12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- """
- @author: luojunhui
- 抓取全局品类文章
- """
- import json
- import time
- from applications import WeixinSpider
- class weixinCategory(object):
- """
- 微信全局品类账号抓取
- """
- def __init__(self, spider_client):
- self.spider_client = spider_client
- self.spider = WeixinSpider()
- def getAccountList(self):
- """
- 获取账号
- :return:
- """
- now_time = int(time.time())
- twenty_hours_ago = now_time - 3600 * 20
- sql = f"""select * from long_article_accounts_outside where latest_article_timestamp < {twenty_hours_ago};"""
- account_tuple = self.spider_client.select(sql)
- result = [
- {
- "gh_id": i[0],
- "platform": i[1],
- "account_name": i[2],
- "category": i[3],
- "latest_timestamp": i[4]
- } for i in account_tuple
- ]
- return result
- def update_data_into_mysql(self, msg_list):
- """
- 将数据更新到数据库
- :return:
- """
- for obj in msg_list['data']['data']:
- print(json.dumps(obj, ensure_ascii=False, indent=4))
- def updateEachAccountArticles(self, gh_id, latest_time_stamp):
- """
- 更新账号文章
- :return:
- """
- index = None
- msg_list = self.spider.update_msg_list(ghId=gh_id, index=index)
- latest_time_stamp_in_this_msg = msg_list['data']['data'][-1]['BaseInfo']
- print(latest_time_stamp_in_this_msg)
- if __name__ == '__main__':
- wc = weixinCategory(spider_client="123")
- wc.updateEachAccountArticles("gh_ddafea4bcc29", latest_time_stamp=1)
|