|
@@ -8,7 +8,7 @@ from tqdm import tqdm
|
|
|
from datetime import datetime, timedelta
|
|
|
import schedule
|
|
|
|
|
|
-from applications import longArticlesMySQL, PQMySQL, WeixinSpider, Functions
|
|
|
+from applications import longArticlesMySQL, PQMySQL, WeixinSpider, Functions, log, bot
|
|
|
|
|
|
|
|
|
class DailyDataManager(object):
|
|
@@ -40,6 +40,11 @@ class DailyDataManager(object):
|
|
|
);
|
|
|
"""
|
|
|
result_list = cls.pqMysql.select(sql2)
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="getPublishedArticles",
|
|
|
+ message="一共获取 {} 篇文章数据".format(len(result_list))
|
|
|
+ )
|
|
|
return result_list
|
|
|
|
|
|
@classmethod
|
|
@@ -86,9 +91,18 @@ class DailyDataManager(object):
|
|
|
dt_str
|
|
|
)
|
|
|
)
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="updateInfo",
|
|
|
+ message="插入数据成功, video_id 是: {}".format(video_id)
|
|
|
+ )
|
|
|
except Exception as e:
|
|
|
- print(e)
|
|
|
- pass
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="updateInfo",
|
|
|
+ status="fail",
|
|
|
+ message="插入数据失败, 失败原因是".format(e)
|
|
|
+ )
|
|
|
|
|
|
@classmethod
|
|
|
def getRootSourceIds(cls, data_info):
|
|
@@ -97,9 +111,32 @@ class DailyDataManager(object):
|
|
|
:return:
|
|
|
"""
|
|
|
url = data_info[0]
|
|
|
- article_detail = cls.wxSpider.get_article_text(url)
|
|
|
- mini_info = article_detail['data']['data']['mini_program']
|
|
|
- return data_info[1].decode(), mini_info, data_info[2]
|
|
|
+ try:
|
|
|
+ article_detail = cls.wxSpider.get_article_text(url)
|
|
|
+ mini_info = article_detail['data']['data']['mini_program']
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="getRootSourceIds",
|
|
|
+ message="获取文章链接对应的 rootSourceId 成功",
|
|
|
+ data={
|
|
|
+ "ContentUrl": url,
|
|
|
+ "wxSn": data_info[1].decode(),
|
|
|
+ "createTime": data_info[2],
|
|
|
+ "miniInfo": mini_info
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return data_info[1].decode(), mini_info, data_info[2]
|
|
|
+ except Exception as e:
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="getRootSourceIds",
|
|
|
+ status="fail",
|
|
|
+ message="获取文章链接对应的 rootSourceId失败, 报错信息是: {}".format(e),
|
|
|
+ data={
|
|
|
+ "ContentUrl": url
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return
|
|
|
|
|
|
@classmethod
|
|
|
def getMinigramInfo(cls, rootSourceId):
|
|
@@ -171,7 +208,23 @@ class DailyDataManager(object):
|
|
|
L[key] = temp
|
|
|
return L
|
|
|
|
|
|
- return summarize(result_list)
|
|
|
+ try:
|
|
|
+ response = summarize(result_list)
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="getMinigramInfo",
|
|
|
+ message="计算source_id信息成功",
|
|
|
+ data=response
|
|
|
+ )
|
|
|
+ return response
|
|
|
+ except Exception as e:
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="getMinigramInfo",
|
|
|
+ message="获取 source_id信息失败, 报错信息是: {}".format(e),
|
|
|
+ status="fail"
|
|
|
+ )
|
|
|
+ return None
|
|
|
|
|
|
@classmethod
|
|
|
def updateDetail(cls):
|
|
@@ -179,16 +232,20 @@ class DailyDataManager(object):
|
|
|
:return:
|
|
|
"""
|
|
|
today = datetime.today()
|
|
|
- # 获取昨天的日期
|
|
|
+ # 获取三天前的日期
|
|
|
yesterday = today - timedelta(days=3)
|
|
|
yesterday_str = yesterday.__str__().split(" ")[0]
|
|
|
- print(yesterday_str)
|
|
|
sql = f"""
|
|
|
- select distinct root_source_id
|
|
|
- from long_articles_detail_info
|
|
|
- where publish_dt >= '{yesterday_str}';
|
|
|
+ select distinct root_source_id
|
|
|
+ from long_articles_detail_info
|
|
|
+ where publish_dt >= '{yesterday_str}';
|
|
|
"""
|
|
|
source_id_list = cls.pqMysql.select(sql)
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="updateDetail",
|
|
|
+ message="获取前三天的 rootSourceId, 一共有 {} 条记录".format(len(source_id_list))
|
|
|
+ )
|
|
|
for item in tqdm(source_id_list):
|
|
|
s_id = item[0]
|
|
|
try:
|
|
@@ -201,9 +258,9 @@ class DailyDataManager(object):
|
|
|
fission_2 = result[key][3]
|
|
|
# print(s_id, recall_dt, first_level, fission_0, fission_1, fission_2)
|
|
|
update_sql = f"""
|
|
|
- UPDATE long_articles_detail_info
|
|
|
- set first_level = %s, fission_0 = %s, fission_1 = %s, fission_2 = %s
|
|
|
- where root_source_id = %s and recall_dt = %s;
|
|
|
+ UPDATE long_articles_detail_info
|
|
|
+ set first_level = %s, fission_0 = %s, fission_1 = %s, fission_2 = %s
|
|
|
+ where root_source_id = %s and recall_dt = %s;
|
|
|
"""
|
|
|
try:
|
|
|
cls.pqMysql.update(
|
|
@@ -213,9 +270,19 @@ class DailyDataManager(object):
|
|
|
)
|
|
|
)
|
|
|
except Exception as e:
|
|
|
- print("insert error", e)
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="updateDetail",
|
|
|
+ status="fail",
|
|
|
+ message="mysql 更新失败, 报错信息是 {}".format(e)
|
|
|
+ )
|
|
|
except Exception as e:
|
|
|
- print(e)
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="updateDetail",
|
|
|
+ status="fail",
|
|
|
+ message="更新单条数据失败, 报错信息是 {}".format(e)
|
|
|
+ )
|
|
|
|
|
|
|
|
|
def updateArticlesJob():
|
|
@@ -227,7 +294,11 @@ def updateArticlesJob():
|
|
|
article_list = DDM.getPublishedArticles()
|
|
|
for article in tqdm(article_list):
|
|
|
DDM.updateInfo(article)
|
|
|
- print("文章更新完成---{}".format(datetime.today().__str__()))
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="updateArticlesJob",
|
|
|
+ message="文章更新完成---{}".format(datetime.today().__str__())
|
|
|
+ )
|
|
|
|
|
|
|
|
|
def updateMinigramInfoJob():
|
|
@@ -236,14 +307,24 @@ def updateMinigramInfoJob():
|
|
|
:return:
|
|
|
"""
|
|
|
DDM = DailyDataManager()
|
|
|
- DDM.updateDetail()
|
|
|
- print("小程序更新完成---{}".format(datetime.today().__str__()))
|
|
|
+ try:
|
|
|
+ DDM.updateDetail()
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="updateArticlesJob",
|
|
|
+ message="小程序更新完成---{}".format(datetime.today().__str__())
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ log(
|
|
|
+ task="updateMinigramInfoDaily",
|
|
|
+ function="updateArticlesJob",
|
|
|
+ status="fail",
|
|
|
+ message="小程序更新失败---{}, 报错信息是: {}".format(datetime.today().__str__(), e)
|
|
|
+ )
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
- # updateArticlesJob()
|
|
|
- # updateMinigramInfoJob()
|
|
|
schedule.every().day.at("01:00").do(Functions().job_with_thread, updateArticlesJob)
|
|
|
|
|
|
schedule.every().day.at("03:30").do(Functions().job_with_thread, updateMinigramInfoJob)
|