weixinCategoryCrawler.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. """
  2. @author: luojunhui
  3. 抓取全局品类文章
  4. """
  5. import json
  6. import time
  7. from applications import WeixinSpider
  8. class weixinCategory(object):
  9. """
  10. 微信全局品类账号抓取
  11. """
  12. def __init__(self, spider_client):
  13. self.spider_client = spider_client
  14. self.spider = WeixinSpider()
  15. def getAccountList(self):
  16. """
  17. 获取账号
  18. :return:
  19. """
  20. now_time = int(time.time())
  21. twenty_hours_ago = now_time - 3600 * 20
  22. sql = f"""select * from long_article_accounts_outside where latest_article_timestamp < {twenty_hours_ago};"""
  23. account_tuple = self.spider_client.select(sql)
  24. result = [
  25. {
  26. "gh_id": i[0],
  27. "platform": i[1],
  28. "account_name": i[2],
  29. "category": i[3],
  30. "latest_timestamp": i[4]
  31. } for i in account_tuple
  32. ]
  33. return result
  34. def update_data_into_mysql(self, msg_list):
  35. """
  36. 将数据更新到数据库
  37. :return:
  38. """
  39. for obj in msg_list['data']['data']:
  40. print(json.dumps(obj, ensure_ascii=False, indent=4))
  41. def updateEachAccountArticles(self, gh_id, latest_time_stamp):
  42. """
  43. 更新账号文章
  44. :return:
  45. """
  46. index = None
  47. msg_list = self.spider.update_msg_list(ghId=gh_id, index=index)
  48. latest_time_stamp_in_this_msg = msg_list['data']['data'][-1]['BaseInfo']
  49. print(latest_time_stamp_in_this_msg)
  50. if __name__ == '__main__':
  51. wc = weixinCategory(spider_client="123")
  52. wc.updateEachAccountArticles("gh_ddafea4bcc29", latest_time_stamp=1)