|
@@ -12,7 +12,8 @@ from tqdm import tqdm
|
|
from datetime import datetime
|
|
from datetime import datetime
|
|
|
|
|
|
from config import accountBaseInfo
|
|
from config import accountBaseInfo
|
|
-from applications import PQMySQL, WeixinSpider, Functions
|
|
|
|
|
|
+from applications import PQMySQL
|
|
|
|
+from tasks.task4 import update_articles
|
|
from applications.decoratorApi import retryOnTimeout
|
|
from applications.decoratorApi import retryOnTimeout
|
|
|
|
|
|
|
|
|
|
@@ -22,8 +23,6 @@ class UpdateMsgDaily(object):
|
|
"""
|
|
"""
|
|
|
|
|
|
db_client = PQMySQL()
|
|
db_client = PQMySQL()
|
|
- spider = WeixinSpider()
|
|
|
|
- functions = Functions()
|
|
|
|
|
|
|
|
@classmethod
|
|
@classmethod
|
|
def getAccountIdDict(cls):
|
|
def getAccountIdDict(cls):
|
|
@@ -72,189 +71,6 @@ class UpdateMsgDaily(object):
|
|
}
|
|
}
|
|
requests.request("POST", url=url, headers=headers, data=json.dumps(payload), timeout=10)
|
|
requests.request("POST", url=url, headers=headers, data=json.dumps(payload), timeout=10)
|
|
|
|
|
|
- @classmethod
|
|
|
|
- def findAccountLatestUpdateTime(cls, gh_id):
|
|
|
|
- """
|
|
|
|
- 获取账号的最近更新id
|
|
|
|
- :param gh_id:
|
|
|
|
- :return:
|
|
|
|
- """
|
|
|
|
- sql = f"""
|
|
|
|
- select accountName, updateTime
|
|
|
|
- from official_articles_v2
|
|
|
|
- where ghId = '{gh_id}'
|
|
|
|
- order by updateTime DESC;
|
|
|
|
- """
|
|
|
|
- result = cls.db_client.select(sql)
|
|
|
|
- if result:
|
|
|
|
- account_name, update_time = result[0]
|
|
|
|
- return {"update_time": update_time, "account_type": "history"}
|
|
|
|
- else:
|
|
|
|
- return {
|
|
|
|
- "update_time": int(time.time()) - 30 * 24 * 60 * 60,
|
|
|
|
- "account_type": "new",
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- @classmethod
|
|
|
|
- def updateMsgList(cls, gh_id, account_name, msg_list):
|
|
|
|
- """
|
|
|
|
- 把消息数据更新到数据库中
|
|
|
|
- :param account_name:
|
|
|
|
- :param gh_id:
|
|
|
|
- :param msg_list:
|
|
|
|
- :return:
|
|
|
|
- """
|
|
|
|
- for info in msg_list:
|
|
|
|
- baseInfo = info.get("BaseInfo", {})
|
|
|
|
- appMsgId = info.get("AppMsg", {}).get("BaseInfo", {}).get("AppMsgId", None)
|
|
|
|
- createTime = (
|
|
|
|
- info.get("AppMsg", {}).get("BaseInfo", {}).get("CreateTime", None)
|
|
|
|
- )
|
|
|
|
- updateTime = (
|
|
|
|
- info.get("AppMsg", {}).get("BaseInfo", {}).get("UpdateTime", None)
|
|
|
|
- )
|
|
|
|
- Type = info.get("AppMsg", {}).get("BaseInfo", {}).get("Type", None)
|
|
|
|
- detail_article_list = info.get("AppMsg", {}).get("DetailInfo", [])
|
|
|
|
- if detail_article_list:
|
|
|
|
- for article in detail_article_list:
|
|
|
|
- title = article.get("Title", None)
|
|
|
|
- Digest = article.get("Digest", None)
|
|
|
|
- ItemIndex = article.get("ItemIndex", None)
|
|
|
|
- ContentUrl = article.get("ContentUrl", None)
|
|
|
|
- SourceUrl = article.get("SourceUrl", None)
|
|
|
|
- CoverImgUrl = article.get("CoverImgUrl", None)
|
|
|
|
- CoverImgUrl_1_1 = article.get("CoverImgUrl_1_1", None)
|
|
|
|
- CoverImgUrl_235_1 = article.get("CoverImgUrl_235_1", None)
|
|
|
|
- ItemShowType = article.get("ItemShowType", None)
|
|
|
|
- IsOriginal = article.get("IsOriginal", None)
|
|
|
|
- ShowDesc = article.get("ShowDesc", None)
|
|
|
|
- show_stat = cls.functions.show_desc_to_sta(ShowDesc)
|
|
|
|
- ori_content = article.get("ori_content", None)
|
|
|
|
- show_view_count = show_stat.get("show_view_count", 0)
|
|
|
|
- show_like_count = show_stat.get("show_like_count", 0)
|
|
|
|
- show_zs_count = show_stat.get("show_zs_count", 0)
|
|
|
|
- show_pay_count = show_stat.get("show_pay_count", 0)
|
|
|
|
- wx_sn = (
|
|
|
|
- ContentUrl.split("&sn=")[1].split("&")[0]
|
|
|
|
- if ContentUrl
|
|
|
|
- else None
|
|
|
|
- )
|
|
|
|
- info_tuple = (
|
|
|
|
- gh_id,
|
|
|
|
- account_name,
|
|
|
|
- appMsgId,
|
|
|
|
- title,
|
|
|
|
- Type,
|
|
|
|
- createTime,
|
|
|
|
- updateTime,
|
|
|
|
- Digest,
|
|
|
|
- ItemIndex,
|
|
|
|
- ContentUrl,
|
|
|
|
- SourceUrl,
|
|
|
|
- CoverImgUrl,
|
|
|
|
- CoverImgUrl_1_1,
|
|
|
|
- CoverImgUrl_235_1,
|
|
|
|
- ItemShowType,
|
|
|
|
- IsOriginal,
|
|
|
|
- ShowDesc,
|
|
|
|
- ori_content,
|
|
|
|
- show_view_count,
|
|
|
|
- show_like_count,
|
|
|
|
- show_zs_count,
|
|
|
|
- show_pay_count,
|
|
|
|
- wx_sn,
|
|
|
|
- json.dumps(baseInfo, ensure_ascii=False),
|
|
|
|
- )
|
|
|
|
- try:
|
|
|
|
- insert_sql = f"""
|
|
|
|
- INSERT INTO official_articles_v2
|
|
|
|
- (ghId, accountName, appMsgId, title, Type, createTime, updateTime, Digest, ItemIndex, ContentUrl, SourceUrl, CoverImgUrl, CoverImgUrl_1_1, CoverImgUrl_255_1, ItemShowType, IsOriginal, ShowDesc, ori_content, show_view_count, show_like_count, show_zs_count, show_pay_count, wx_sn, baseInfo)
|
|
|
|
- values
|
|
|
|
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
|
- """
|
|
|
|
- cls.db_client.update(sql=insert_sql, params=info_tuple)
|
|
|
|
- except Exception as e:
|
|
|
|
- try:
|
|
|
|
- update_sql = f"""
|
|
|
|
- UPDATE official_articles_v2
|
|
|
|
- SET show_view_count = %s, show_like_count=%s
|
|
|
|
- WHERE wx_sn = %s;
|
|
|
|
- """
|
|
|
|
- cls.db_client.update(
|
|
|
|
- sql=update_sql,
|
|
|
|
- params=(show_view_count, show_like_count, wx_sn),
|
|
|
|
- )
|
|
|
|
- except Exception as e:
|
|
|
|
- print("失败-{}".format(e))
|
|
|
|
- continue
|
|
|
|
-
|
|
|
|
- @classmethod
|
|
|
|
- def getAccountArticleList(cls, gh_id, last_update_time, cursor=None):
|
|
|
|
- """
|
|
|
|
- 输入ghid获取账号的文章list
|
|
|
|
- :return:
|
|
|
|
- """
|
|
|
|
- try:
|
|
|
|
- response = cls.spider.update_msg_list(ghId=gh_id, index=cursor)
|
|
|
|
- except Exception as e:
|
|
|
|
- response = {
|
|
|
|
- "error": str(e),
|
|
|
|
- "info": "更新文章接口请求失败",
|
|
|
|
- "gh_id": gh_id,
|
|
|
|
- "time": datetime.now().__str__()
|
|
|
|
- }
|
|
|
|
- # 之后可以考虑抛出阿里云日志
|
|
|
|
- print(response)
|
|
|
|
- return
|
|
|
|
- msg_list = response.get("data", {}).get("data")
|
|
|
|
- if msg_list:
|
|
|
|
- last_article_in_this_msg = msg_list[-1]
|
|
|
|
- last_time_stamp_in_this_msg = last_article_in_this_msg["AppMsg"][
|
|
|
|
- "BaseInfo"
|
|
|
|
- ]["UpdateTime"]
|
|
|
|
- last_url = last_article_in_this_msg["AppMsg"]["DetailInfo"][0]["ContentUrl"]
|
|
|
|
- # 校验是否抓到的是同一个账号
|
|
|
|
- try:
|
|
|
|
- resdata = cls.spider.get_account_by_url(last_url)
|
|
|
|
- except Exception as e:
|
|
|
|
- resdata = {
|
|
|
|
- "error": str(e),
|
|
|
|
- "info": "通过链接获取账号信息失败",
|
|
|
|
- "gh_id": gh_id,
|
|
|
|
- "time": datetime.now().__str__()
|
|
|
|
- }
|
|
|
|
- print(resdata)
|
|
|
|
- return
|
|
|
|
- check_name = resdata["data"].get("data", {}).get("account_name")
|
|
|
|
- check_id = resdata["data"].get("data", {}).get("wx_gh")
|
|
|
|
- if check_id == gh_id:
|
|
|
|
- cls.updateMsgList(gh_id, check_name, msg_list)
|
|
|
|
- if last_time_stamp_in_this_msg > last_update_time:
|
|
|
|
- next_cursor = response["data"]["next_cursor"]
|
|
|
|
- return cls.getAccountArticleList(
|
|
|
|
- gh_id=gh_id,
|
|
|
|
- last_update_time=last_update_time,
|
|
|
|
- cursor=next_cursor,
|
|
|
|
- )
|
|
|
|
- else:
|
|
|
|
- response = {
|
|
|
|
- "code": 1002,
|
|
|
|
- "info": "抓取时候账号校验失败",
|
|
|
|
- "error": None,
|
|
|
|
- "gh_id": gh_id,
|
|
|
|
- "time_stamp": datetime.now().__str__(),
|
|
|
|
- }
|
|
|
|
- print(response)
|
|
|
|
- else:
|
|
|
|
- response = {
|
|
|
|
- "code": 1003,
|
|
|
|
- "info": "账号为抓取到内容",
|
|
|
|
- "error": None,
|
|
|
|
- "gh_id": gh_id,
|
|
|
|
- "time_stamp": datetime.now().__str__(),
|
|
|
|
- }
|
|
|
|
- print(response)
|
|
|
|
-
|
|
|
|
@classmethod
|
|
@classmethod
|
|
def checkEachAccount(cls, gh_id):
|
|
def checkEachAccount(cls, gh_id):
|
|
"""
|
|
"""
|
|
@@ -285,14 +101,11 @@ class UpdateMsgDaily(object):
|
|
更新文章任务
|
|
更新文章任务
|
|
:return:
|
|
:return:
|
|
"""
|
|
"""
|
|
- account_list = cls.getAccountIdDict()
|
|
|
|
|
|
+ account_dict = cls.getAccountIdDict()
|
|
|
|
+ account_list = list(account_dict.keys())
|
|
for account_id in tqdm(account_list):
|
|
for account_id in tqdm(account_list):
|
|
- account_info = cls.findAccountLatestUpdateTime(account_id)
|
|
|
|
- latest_time = account_info["update_time"]
|
|
|
|
try:
|
|
try:
|
|
- cls.getAccountArticleList(
|
|
|
|
- gh_id=account_id, last_update_time=latest_time
|
|
|
|
- )
|
|
|
|
|
|
+ update_articles(gh_id=account_id)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
response = {
|
|
response = {
|
|
"code": 1001,
|
|
"code": 1001,
|
|
@@ -337,8 +150,17 @@ def job_with_thread(job_func):
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
UMD = UpdateMsgDaily()
|
|
UMD = UpdateMsgDaily()
|
|
|
|
|
|
- schedule.every().day.at("21:00").do(job_with_thread, UMD.updateJob)
|
|
|
|
- schedule.every().day.at("21:30").do(job_with_thread, UMD.checkJob)
|
|
|
|
|
|
+ try:
|
|
|
|
+ schedule.every().day.at("21:00").do(job_with_thread, UMD.updateJob)
|
|
|
|
+
|
|
|
|
+ except Exception as error:
|
|
|
|
+ UMD.bot(account_list=["更新文章定时任务异常终止", str(error)])
|
|
|
|
+
|
|
|
|
+ try:
|
|
|
|
+ schedule.every().day.at("21:30").do(job_with_thread, UMD.checkJob)
|
|
|
|
+
|
|
|
|
+ except Exception as error:
|
|
|
|
+ UMD.bot(account_list=['校验账号任务异常终止', str(error)])
|
|
|
|
|
|
while True:
|
|
while True:
|
|
schedule.run_pending()
|
|
schedule.run_pending()
|