|
@@ -3,13 +3,19 @@
|
|
@description Update Minigram Info Daily
|
|
@description Update Minigram Info Daily
|
|
"""
|
|
"""
|
|
import time
|
|
import time
|
|
|
|
+import sys
|
|
|
|
|
|
from tqdm import tqdm
|
|
from tqdm import tqdm
|
|
from datetime import datetime, timedelta
|
|
from datetime import datetime, timedelta
|
|
import schedule
|
|
import schedule
|
|
|
|
+from argparse import ArgumentParser
|
|
|
|
|
|
from applications import longArticlesMySQL, PQMySQL, WeixinSpider, Functions, log, bot
|
|
from applications import longArticlesMySQL, PQMySQL, WeixinSpider, Functions, log, bot
|
|
|
|
|
|
|
|
+def get_yesterday():
|
|
|
|
+ yesterday = datetime.today() - timedelta(1)
|
|
|
|
+ return yesterday
|
|
|
|
+
|
|
|
|
|
|
class DailyDataManager(object):
|
|
class DailyDataManager(object):
|
|
"""
|
|
"""
|
|
@@ -21,20 +27,18 @@ class DailyDataManager(object):
|
|
functions = Functions()
|
|
functions = Functions()
|
|
|
|
|
|
@classmethod
|
|
@classmethod
|
|
- def getPublishedArticles(cls):
|
|
|
|
|
|
+ def getPublishedArticles(cls, biz_date):
|
|
"""
|
|
"""
|
|
获取已经发布的文章的信息, createTime 选择为前一天的 0 点并且转化为时间戳
|
|
获取已经发布的文章的信息, createTime 选择为前一天的 0 点并且转化为时间戳
|
|
:return:
|
|
:return:
|
|
"""
|
|
"""
|
|
- today = datetime.today()
|
|
|
|
- # 获取昨天的日期
|
|
|
|
- yesterday = today - timedelta(days=1)
|
|
|
|
- yesterday_midnight = datetime(year=yesterday.year, month=yesterday.month, day=yesterday.day)
|
|
|
|
- yesterday_timestamp = yesterday_midnight.timestamp()
|
|
|
|
|
|
+ biz_date_midnight = datetime(year=biz_date.year, month=biz_date.month, day=biz_date.day)
|
|
|
|
+ biz_date_ts = biz_date_midnight.timestamp()
|
|
|
|
+ biz_date_end_ts = biz_date_ts + 24 * 60 * 60 - 1
|
|
sql2 = f"""
|
|
sql2 = f"""
|
|
select ContentUrl, wx_sn, createTime
|
|
select ContentUrl, wx_sn, createTime
|
|
from official_articles_v2
|
|
from official_articles_v2
|
|
- where createTime >= {yesterday_timestamp};
|
|
|
|
|
|
+ where createTime between {biz_date_ts} and {biz_date_end_ts};
|
|
-- and accountName in (
|
|
-- and accountName in (
|
|
-- select distinct account_name from account_avg_info_v2
|
|
-- select distinct account_name from account_avg_info_v2
|
|
-- );
|
|
-- );
|
|
@@ -146,7 +150,7 @@ class DailyDataManager(object):
|
|
:return:
|
|
:return:
|
|
"""
|
|
"""
|
|
sql = f"""
|
|
sql = f"""
|
|
- select type, machinecode, create_time, first_level_dt
|
|
|
|
|
|
+ select type, machinecode, create_time, first_level_dt
|
|
from changwen_data_base_v2
|
|
from changwen_data_base_v2
|
|
where rootsourceid = '{rootSourceId}';
|
|
where rootsourceid = '{rootSourceId}';
|
|
"""
|
|
"""
|
|
@@ -227,18 +231,18 @@ class DailyDataManager(object):
|
|
return None
|
|
return None
|
|
|
|
|
|
@classmethod
|
|
@classmethod
|
|
- def updateDetail(cls):
|
|
|
|
|
|
+ def updateDetail(cls, biz_date):
|
|
"""
|
|
"""
|
|
:return:
|
|
:return:
|
|
"""
|
|
"""
|
|
today = datetime.today()
|
|
today = datetime.today()
|
|
# 获取三天前的日期
|
|
# 获取三天前的日期
|
|
- yesterday = today - timedelta(days=3)
|
|
|
|
- yesterday_str = yesterday.__str__().split(" ")[0]
|
|
|
|
|
|
+ stats_date = biz_date - timedelta(days=3)
|
|
|
|
+ stats_date_str = stats_date.__str__().split(" ")[0]
|
|
sql = f"""
|
|
sql = f"""
|
|
select distinct root_source_id
|
|
select distinct root_source_id
|
|
from long_articles_detail_info
|
|
from long_articles_detail_info
|
|
- where publish_dt >= '{yesterday_str}';
|
|
|
|
|
|
+ where publish_dt >= '{stats_date_str}';
|
|
"""
|
|
"""
|
|
source_id_list = cls.pqMysql.select(sql)
|
|
source_id_list = cls.pqMysql.select(sql)
|
|
log(
|
|
log(
|
|
@@ -285,55 +289,73 @@ class DailyDataManager(object):
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
-def updateArticlesJob():
|
|
|
|
|
|
+def updateArticlesJob(biz_date=None):
|
|
"""
|
|
"""
|
|
更新文章数据
|
|
更新文章数据
|
|
:return:
|
|
:return:
|
|
"""
|
|
"""
|
|
|
|
+ if not biz_date:
|
|
|
|
+ biz_date = get_yesterday()
|
|
DDM = DailyDataManager()
|
|
DDM = DailyDataManager()
|
|
- article_list = DDM.getPublishedArticles()
|
|
|
|
|
|
+ article_list = DDM.getPublishedArticles(biz_date)
|
|
for article in tqdm(article_list):
|
|
for article in tqdm(article_list):
|
|
DDM.updateInfo(article)
|
|
DDM.updateInfo(article)
|
|
log(
|
|
log(
|
|
task="updateMinigramInfoDaily",
|
|
task="updateMinigramInfoDaily",
|
|
function="updateArticlesJob",
|
|
function="updateArticlesJob",
|
|
- message="文章更新完成---{}".format(datetime.today().__str__())
|
|
|
|
|
|
+ message="文章更新完成---{}".format(biz_date.__str__())
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
-def updateMinigramInfoJob():
|
|
|
|
|
|
+def updateMinigramInfoJob(biz_date=None):
|
|
"""
|
|
"""
|
|
更新前三天小程序数据
|
|
更新前三天小程序数据
|
|
:return:
|
|
:return:
|
|
"""
|
|
"""
|
|
|
|
+ if not biz_date:
|
|
|
|
+ biz_date = get_yesterday()
|
|
DDM = DailyDataManager()
|
|
DDM = DailyDataManager()
|
|
try:
|
|
try:
|
|
- DDM.updateDetail()
|
|
|
|
|
|
+ DDM.updateDetail(biz_date)
|
|
log(
|
|
log(
|
|
task="updateMinigramInfoDaily",
|
|
task="updateMinigramInfoDaily",
|
|
function="updateArticlesJob",
|
|
function="updateArticlesJob",
|
|
- message="小程序更新完成---{}".format(datetime.today().__str__())
|
|
|
|
|
|
+ message="小程序更新完成---{}".format(biz_date.__str__())
|
|
)
|
|
)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
log(
|
|
log(
|
|
task="updateMinigramInfoDaily",
|
|
task="updateMinigramInfoDaily",
|
|
function="updateArticlesJob",
|
|
function="updateArticlesJob",
|
|
status="fail",
|
|
status="fail",
|
|
- message="小程序更新失败---{}, 报错信息是: {}".format(datetime.today().__str__(), e)
|
|
|
|
|
|
+ message="小程序更新失败---{}, 报错信息是: {}".format(biz_date.__str__(), e)
|
|
)
|
|
)
|
|
|
|
|
|
|
|
+def main():
|
|
|
|
+ parser = ArgumentParser()
|
|
|
|
+ parser.add_argument("--run-date",
|
|
|
|
+ help="Run only once for date in format of %Y%m%d. \
|
|
|
|
+ If no specified, run as daily jobs.")
|
|
|
|
+ args = parser.parse_args()
|
|
|
|
|
|
-if __name__ == '__main__':
|
|
|
|
- # updateMinigramInfoJob()
|
|
|
|
- schedule.every().day.at("01:30").do(Functions().job_with_thread, updateArticlesJob)
|
|
|
|
|
|
+ if args.run_date:
|
|
|
|
+ biz_date = datetime.strptime(args.run_date, "%Y%m%d")
|
|
|
|
+ print("Run in manual mode. Date: {}".format(args.run_date))
|
|
|
|
+ updateArticlesJob(biz_date)
|
|
|
|
+ updateMinigramInfoJob(biz_date)
|
|
|
|
+ return
|
|
|
|
+ else:
|
|
|
|
+ print("Run in daily mode.")
|
|
|
|
+ schedule.every().day.at("01:30").do(Functions().job_with_thread, updateArticlesJob)
|
|
|
|
+ schedule.every().day.at("03:30").do(Functions().job_with_thread, updateMinigramInfoJob)
|
|
|
|
|
|
- schedule.every().day.at("03:30").do(Functions().job_with_thread, updateMinigramInfoJob)
|
|
|
|
|
|
+ while True:
|
|
|
|
+ schedule.run_pending()
|
|
|
|
+ time.sleep(1)
|
|
|
|
+ # log(
|
|
|
|
+ # task="updateMinigramInfoDaily",
|
|
|
|
+ # function="main",
|
|
|
|
+ # message="更新文章小程序信息任务正常执行"
|
|
|
|
+ # )
|
|
|
|
|
|
- while True:
|
|
|
|
- schedule.run_pending()
|
|
|
|
- time.sleep(1)
|
|
|
|
- # log(
|
|
|
|
- # task="updateMinigramInfoDaily",
|
|
|
|
- # function="main",
|
|
|
|
- # message="更新文章小程序信息任务正常执行"
|
|
|
|
- # )
|
|
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
+ main()
|