12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- """
- @author: luojunhui
- 输入文章连接,输出账号信息,并且把账号存储到数据库中
- """
- import datetime
- from tqdm import tqdm
- from applications import WeixinSpider, longArticlesMySQL
- class weixinAccountCrawler(object):
- """
- weixinAccountCrawler
- """
- def __init__(self, target_url_list):
- self.db_client = longArticlesMySQL()
- self.spider = WeixinSpider()
- self.url_list = target_url_list
- def get_account_detail(self, url):
- """
- 通过文章链接获取账号信息
- :param url:
- :return:
- """
- account_detail = self.spider.get_account_by_url(content_url=url)
- account_obj = account_detail['data']['data']
- account_name = account_obj['account_name']
- gh_id = account_obj['wx_gh']
- self.insert_account_into_database(account_name, gh_id)
- def insert_account_into_database(self, account_name, gh_id, category=None):
- """
- :param category:
- :param account_name:
- :param gh_id:
- :return:
- """
- if not category:
- category = "daily-account-mining"
- insert_sql = f"""
- INSERT INTO long_articles_accounts
- (gh_id, account_source, account_name, account_category, init_date)
- values
- (%s, %s, %s, %s, %s)
- """
- self.db_client.update(
- sql=insert_sql,
- params=(gh_id, "weixin", account_name, category, datetime.date.today().__str__())
- )
- def deal(self):
- """
- entrance of this code
- :return:
- """
- for url in tqdm(self.url_list):
- self.get_account_detail(url)
- if __name__ == '__main__':
- url_list = [
- 'https://mp.weixin.qq.com/s/Q9Je-eNKcHNjh8S-NqQLgg'
- ]
- wac = weixinAccountCrawler(url_list)
- wac.deal()
|