weixinAssociationCrawler.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. """
  2. @author: luojunhui
  3. 微信联想抓取
  4. """
  5. import json
  6. from tqdm import tqdm
  7. from applications import PQMySQL
  8. from applications.spiderTool import SpiderTools
  9. class weixinAssociation(object):
  10. """
  11. 微信联想方法
  12. """
  13. pq_mysql_client = PQMySQL()
  14. spider_tool = SpiderTools()
  15. @classmethod
  16. def getAssociationAccounts(cls):
  17. """
  18. 获取已经联想过的账号
  19. :return:
  20. """
  21. select_sql = f"""
  22. SELECT distinct(gh_id)
  23. FROM long_articles_accounts
  24. where is_using = 1 and account_category = 'association';"""
  25. account_id_tuple = cls.pq_mysql_client.select(select_sql)
  26. account_id_list = [list(i) for i in account_id_tuple]
  27. return account_id_list
  28. @classmethod
  29. def deal(cls):
  30. """
  31. main function
  32. :return:
  33. """
  34. account_info_list = cls.getAssociationAccounts()
  35. for line in tqdm(account_info_list[1:]):
  36. gh_id = line[0]
  37. cls.spider_tool.searchEachAccountArticlesSinglePage(
  38. gh_id=gh_id,
  39. category="association"
  40. )
  41. w = weixinAssociation()
  42. w.deal()