weixinAccountCrawler.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. """
  2. @author: luojunhui
  3. 输入文章连接,输出账号信息,并且把账号存储到数据库中
  4. """
  5. import datetime
  6. from tqdm import tqdm
  7. from applications import WeixinSpider, longArticlesMySQL
  8. class weixinAccountCrawler(object):
  9. """
  10. weixinAccountCrawler
  11. """
  12. def __init__(self, target_url_list):
  13. self.db_client = longArticlesMySQL()
  14. self.spider = WeixinSpider()
  15. self.url_list = target_url_list
  16. def get_account_detail(self, url):
  17. """
  18. 通过文章链接获取账号信息
  19. :param url:
  20. :return:
  21. """
  22. account_detail = self.spider.get_account_by_url(content_url=url)
  23. account_obj = account_detail['data']['data']
  24. account_name = account_obj['account_name']
  25. gh_id = account_obj['wx_gh']
  26. self.insert_account_into_database(account_name, gh_id)
  27. def insert_account_into_database(self, account_name, gh_id, category=None):
  28. """
  29. :param category:
  30. :param account_name:
  31. :param gh_id:
  32. :return:
  33. """
  34. if not category:
  35. category = "daily-account-mining"
  36. insert_sql = f"""
  37. INSERT INTO long_articles_accounts
  38. (gh_id, account_source, account_name, account_category, init_date)
  39. values
  40. (%s, %s, %s, %s, %s)
  41. """
  42. self.db_client.update(
  43. sql=insert_sql,
  44. params=(gh_id, "weixin", account_name, category, datetime.date.today().__str__())
  45. )
  46. def deal(self):
  47. """
  48. entrance of this code
  49. :return:
  50. """
  51. for url in tqdm(self.url_list):
  52. self.get_account_detail(url)
  53. if __name__ == '__main__':
  54. url_list = [
  55. 'https://mp.weixin.qq.com/s/Q9Je-eNKcHNjh8S-NqQLgg'
  56. ]
  57. wac = weixinAccountCrawler(url_list)
  58. wac.deal()