|
@@ -12,6 +12,8 @@ from argparse import ArgumentParser
|
|
|
|
|
|
from applications import longArticlesMySQL, PQMySQL, WeixinSpider, Functions, log, bot
|
|
|
|
|
|
+TASK_NAME = "updateMinigramInfoDaily"
|
|
|
+
|
|
|
def get_yesterday():
|
|
|
yesterday = datetime.today() - timedelta(1)
|
|
|
return yesterday
|
|
@@ -21,13 +23,13 @@ class DailyDataManager(object):
|
|
|
"""
|
|
|
daily 数据每日更新
|
|
|
"""
|
|
|
- laMysql = longArticlesMySQL()
|
|
|
- pqMysql = PQMySQL()
|
|
|
- wxSpider = WeixinSpider()
|
|
|
+ long_articles_db = longArticlesMySQL()
|
|
|
+ pq_db = PQMySQL()
|
|
|
+ wx_spider = WeixinSpider()
|
|
|
functions = Functions()
|
|
|
|
|
|
@classmethod
|
|
|
- def getPublishedArticles(cls, biz_date):
|
|
|
+ def get_published_articles(cls, biz_date):
|
|
|
"""
|
|
|
获取已经发布的文章的信息, createTime 选择为前一天的 0 点并且转化为时间戳
|
|
|
:return:
|
|
@@ -43,22 +45,22 @@ class DailyDataManager(object):
|
|
|
-- select distinct account_name from account_avg_info_v2
|
|
|
-- );
|
|
|
"""
|
|
|
- result_list = cls.pqMysql.select(sql2)
|
|
|
+ result_list = cls.pq_db.select(sql2)
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="getPublishedArticles",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="get_published_articles",
|
|
|
message="一共获取 {} 篇文章数据".format(len(result_list))
|
|
|
)
|
|
|
return result_list
|
|
|
|
|
|
@classmethod
|
|
|
- def updateInfo(cls, line):
|
|
|
+ def update_article_info(cls, line):
|
|
|
"""
|
|
|
update info into mysql
|
|
|
:return:
|
|
|
"""
|
|
|
try:
|
|
|
- wx_sn, mini_info, create_time = cls.getRootSourceIds(line)
|
|
|
+ wx_sn, mini_info, create_time = cls.get_root_source_ids(line)
|
|
|
dt_object = datetime.fromtimestamp(create_time)
|
|
|
publish_dt = dt_object.strftime('%Y-%m-%d')
|
|
|
one_day = timedelta(days=1)
|
|
@@ -81,7 +83,7 @@ class DailyDataManager(object):
|
|
|
values
|
|
|
(%s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
"""
|
|
|
- cls.pqMysql.update(
|
|
|
+ cls.pq_db.update(
|
|
|
sql=insert_sql,
|
|
|
params=(
|
|
|
wx_sn,
|
|
@@ -96,31 +98,31 @@ class DailyDataManager(object):
|
|
|
)
|
|
|
)
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="updateInfo",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="update_article_info",
|
|
|
message="插入数据成功, video_id 是: {}".format(video_id)
|
|
|
)
|
|
|
except Exception as e:
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="updateInfo",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="update_article_info",
|
|
|
status="fail",
|
|
|
message="插入数据失败, 失败原因是".format(e)
|
|
|
)
|
|
|
|
|
|
@classmethod
|
|
|
- def getRootSourceIds(cls, data_info):
|
|
|
+ def get_root_source_ids(cls, data_info):
|
|
|
"""
|
|
|
通过抓取接口获取 data_info
|
|
|
:return:
|
|
|
"""
|
|
|
url = data_info[0]
|
|
|
try:
|
|
|
- article_detail = cls.wxSpider.get_article_text(url)
|
|
|
+ article_detail = cls.wx_spider.get_article_text(url)
|
|
|
mini_info = article_detail['data']['data']['mini_program']
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="getRootSourceIds",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="get_root_source_ids",
|
|
|
message="获取文章链接对应的 rootSourceId 成功",
|
|
|
data={
|
|
|
"ContentUrl": url,
|
|
@@ -132,8 +134,8 @@ class DailyDataManager(object):
|
|
|
return data_info[1].decode(), mini_info, data_info[2]
|
|
|
except Exception as e:
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="getRootSourceIds",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="get_root_source_ids",
|
|
|
status="fail",
|
|
|
message="获取文章链接对应的 rootSourceId失败, 报错信息是: {}".format(e),
|
|
|
data={
|
|
@@ -143,7 +145,7 @@ class DailyDataManager(object):
|
|
|
return
|
|
|
|
|
|
@classmethod
|
|
|
- def getMinigramInfo(cls, rootSourceId):
|
|
|
+ def get_minigram_info(cls, rootSourceId):
|
|
|
"""
|
|
|
|
|
|
:param rootSourceId:
|
|
@@ -154,7 +156,7 @@ class DailyDataManager(object):
|
|
|
from changwen_data_base_v2
|
|
|
where rootsourceid = '{rootSourceId}';
|
|
|
"""
|
|
|
- result_list = cls.laMysql.select(sql)
|
|
|
+ result_list = cls.long_articles_db.select(sql)
|
|
|
|
|
|
def summarize(values):
|
|
|
"""
|
|
@@ -215,23 +217,23 @@ class DailyDataManager(object):
|
|
|
try:
|
|
|
response = summarize(result_list)
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="getMinigramInfo",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="get_minigram_info",
|
|
|
message="计算source_id信息成功",
|
|
|
data=response
|
|
|
)
|
|
|
return response
|
|
|
except Exception as e:
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="getMinigramInfo",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="get_minigram_info",
|
|
|
message="获取 source_id信息失败, 报错信息是: {}".format(e),
|
|
|
status="fail"
|
|
|
)
|
|
|
return None
|
|
|
|
|
|
@classmethod
|
|
|
- def updateDetail(cls, biz_date):
|
|
|
+ def update_minigram_detail(cls, biz_date):
|
|
|
"""
|
|
|
:return:
|
|
|
"""
|
|
@@ -244,16 +246,16 @@ class DailyDataManager(object):
|
|
|
from long_articles_detail_info
|
|
|
where publish_dt >= '{stats_date_str}';
|
|
|
"""
|
|
|
- source_id_list = cls.pqMysql.select(sql)
|
|
|
+ source_id_list = cls.pq_db.select(sql)
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="updateDetail",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="update_minigram_detail",
|
|
|
message="获取前三天的 rootSourceId, 一共有 {} 条记录".format(len(source_id_list))
|
|
|
)
|
|
|
for item in tqdm(source_id_list):
|
|
|
s_id = item[0]
|
|
|
try:
|
|
|
- result = cls.getMinigramInfo(s_id)
|
|
|
+ result = cls.get_minigram_info(s_id)
|
|
|
for key in result:
|
|
|
recall_dt = key
|
|
|
first_level = result[key][0]
|
|
@@ -267,7 +269,7 @@ class DailyDataManager(object):
|
|
|
where root_source_id = %s and recall_dt = %s;
|
|
|
"""
|
|
|
try:
|
|
|
- cls.pqMysql.update(
|
|
|
+ cls.pq_db.update(
|
|
|
sql=update_sql,
|
|
|
params=(
|
|
|
first_level, fission_0, fission_1, fission_2, s_id, recall_dt
|
|
@@ -275,15 +277,15 @@ class DailyDataManager(object):
|
|
|
)
|
|
|
except Exception as e:
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="updateDetail",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="update_minigram_detail",
|
|
|
status="fail",
|
|
|
message="mysql 更新失败, 报错信息是 {}".format(e)
|
|
|
)
|
|
|
except Exception as e:
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="updateDetail",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="update_minigram_detail",
|
|
|
status="fail",
|
|
|
message="更新单条数据失败, 报错信息是 {}".format(e)
|
|
|
)
|
|
@@ -296,12 +298,12 @@ def updateArticlesJob(biz_date=None):
|
|
|
"""
|
|
|
if not biz_date:
|
|
|
biz_date = get_yesterday()
|
|
|
- DDM = DailyDataManager()
|
|
|
- article_list = DDM.getPublishedArticles(biz_date)
|
|
|
+ data_manager = DailyDataManager()
|
|
|
+ article_list = data_manager.get_published_articles(biz_date)
|
|
|
for article in tqdm(article_list):
|
|
|
- DDM.updateInfo(article)
|
|
|
+ data_manager.update_article_info(article)
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
+ task=TASK_NAME,
|
|
|
function="updateArticlesJob",
|
|
|
message="文章更新完成---{}".format(biz_date.__str__())
|
|
|
)
|
|
@@ -314,18 +316,18 @@ def updateMinigramInfoJob(biz_date=None):
|
|
|
"""
|
|
|
if not biz_date:
|
|
|
biz_date = get_yesterday()
|
|
|
- DDM = DailyDataManager()
|
|
|
+ data_manager = DailyDataManager()
|
|
|
try:
|
|
|
- DDM.updateDetail(biz_date)
|
|
|
+ data_manager.update_minigram_detail(biz_date)
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="updateArticlesJob",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="updateMinigramInfoJob",
|
|
|
message="小程序更新完成---{}".format(biz_date.__str__())
|
|
|
)
|
|
|
except Exception as e:
|
|
|
log(
|
|
|
- task="updateMinigramInfoDaily",
|
|
|
- function="updateArticlesJob",
|
|
|
+ task=TASK_NAME,
|
|
|
+ function="updateMinigramInfoJob",
|
|
|
status="fail",
|
|
|
message="小程序更新失败---{}, 报错信息是: {}".format(biz_date.__str__(), e)
|
|
|
)
|
|
@@ -352,7 +354,7 @@ def main():
|
|
|
schedule.run_pending()
|
|
|
time.sleep(1)
|
|
|
# log(
|
|
|
- # task="updateMinigramInfoDaily",
|
|
|
+ # task=TASK_NAME,
|
|
|
# function="main",
|
|
|
# message="更新文章小程序信息任务正常执行"
|
|
|
# )
|