|
@@ -1,37 +1,94 @@
|
|
|
-import json
|
|
|
+"""
|
|
|
+@author luojunhui
|
|
|
+@description Update Daily
|
|
|
+"""
|
|
|
+import time
|
|
|
|
|
|
-from concurrent.futures.thread import ThreadPoolExecutor
|
|
|
from tqdm import tqdm
|
|
|
from datetime import datetime, timedelta
|
|
|
+import schedule
|
|
|
|
|
|
-from applications import AdMySQL, PQMySQL, WeixinSpider
|
|
|
+from applications import longArticlesMySQL, PQMySQL, WeixinSpider, Functions
|
|
|
|
|
|
|
|
|
class DailyDataManager(object):
|
|
|
"""
|
|
|
daily 数据每日更新
|
|
|
"""
|
|
|
- ad_mysql = AdMySQL()
|
|
|
- pq_mysql = PQMySQL()
|
|
|
- wx_spider = WeixinSpider()
|
|
|
+ laMysql = longArticlesMySQL()
|
|
|
+ pqMysql = PQMySQL()
|
|
|
+ wxSpider = WeixinSpider()
|
|
|
+ functions = Functions()
|
|
|
|
|
|
@classmethod
|
|
|
def getPublishedArticles(cls):
|
|
|
"""
|
|
|
- 获取已经发布的文章的信息
|
|
|
+ 获取已经发布的文章的信息, createTime 选择为前一天的 0 点并且转化为时间戳
|
|
|
:return:
|
|
|
"""
|
|
|
+ today = datetime.today()
|
|
|
+ # 获取昨天的日期
|
|
|
+ yesterday = today - timedelta(days=1)
|
|
|
+ yesterday_midnight = datetime(year=yesterday.year, month=yesterday.month, day=yesterday.day)
|
|
|
+ yesterday_timestamp = yesterday_midnight.timestamp()
|
|
|
sql2 = f"""
|
|
|
- select ContentUrl, wx_sn, createTime
|
|
|
- from official_articles_v2
|
|
|
- where createTime >= 1724774400
|
|
|
+ select ContentUrl, wx_sn, createTime
|
|
|
+ from official_articles_v2
|
|
|
+ where createTime >= {yesterday_timestamp}
|
|
|
and accountName in (
|
|
|
select distinct account_name from account_avg_info_v2
|
|
|
);
|
|
|
"""
|
|
|
- result_list = cls.pq_mysql.select(sql2)
|
|
|
+ result_list = cls.pqMysql.select(sql2)
|
|
|
return result_list
|
|
|
|
|
|
+ @classmethod
|
|
|
+ def updateInfo(cls, line):
|
|
|
+ """
|
|
|
+ update info into mysql
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ wx_sn, mini_info, create_time = cls.getRootSourceIds(line)
|
|
|
+ dt_object = datetime.fromtimestamp(create_time)
|
|
|
+ publish_dt = dt_object.strftime('%Y-%m-%d')
|
|
|
+ one_day = timedelta(days=1)
|
|
|
+ two_day = timedelta(days=2)
|
|
|
+ next_day = dt_object + one_day
|
|
|
+ next_next_day = dt_object + two_day
|
|
|
+ recall_dt_list = [dt_object, next_day, next_next_day]
|
|
|
+ recall_dt_str_list = [i.strftime('%Y-%m-%d') for i in recall_dt_list]
|
|
|
+ for dt_str in recall_dt_str_list:
|
|
|
+ for index, item in enumerate(mini_info, 1):
|
|
|
+ image_url = item['image_url']
|
|
|
+ nick_name = item['nike_name']
|
|
|
+ root_source_id = item['path'].split("rootSourceId%3D")[-1]
|
|
|
+ video_id = item['path'].split("videos%3Fid%3D")[1].split("%26su%3D")[0]
|
|
|
+ kimi_title = item['title']
|
|
|
+ insert_sql = f"""
|
|
|
+ INSERT INTO long_articles_detail_info
|
|
|
+ (wx_sn, mini_title, mini_name, cover_url, video_index, root_source_id, video_id, publish_dt, recall_dt)
|
|
|
+ values
|
|
|
+ (%s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
+ """
|
|
|
+ cls.pqMysql.update(
|
|
|
+ sql=insert_sql,
|
|
|
+ params=(
|
|
|
+ wx_sn,
|
|
|
+ kimi_title,
|
|
|
+ nick_name,
|
|
|
+ image_url,
|
|
|
+ index,
|
|
|
+ root_source_id,
|
|
|
+ video_id,
|
|
|
+ publish_dt,
|
|
|
+ dt_str
|
|
|
+ )
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ pass
|
|
|
+
|
|
|
@classmethod
|
|
|
def getRootSourceIds(cls, data_info):
|
|
|
"""
|
|
@@ -39,9 +96,7 @@ class DailyDataManager(object):
|
|
|
:return:
|
|
|
"""
|
|
|
url = data_info[0]
|
|
|
- article_detail = cls.wx_spider.get_article_text(url)
|
|
|
- print(url)
|
|
|
- print(article_detail)
|
|
|
+ article_detail = cls.wxSpider.get_article_text(url)
|
|
|
mini_info = article_detail['data']['data']['mini_program']
|
|
|
return data_info[1].decode(), mini_info, data_info[2]
|
|
|
|
|
@@ -49,7 +104,7 @@ class DailyDataManager(object):
|
|
|
def getMinigramInfo(cls, rootSourceId):
|
|
|
"""
|
|
|
|
|
|
- :param rootIdTuple:
|
|
|
+ :param rootSourceId:
|
|
|
:return:
|
|
|
"""
|
|
|
sql = f"""
|
|
@@ -57,7 +112,7 @@ class DailyDataManager(object):
|
|
|
from changwen_data_base_v2
|
|
|
where rootsourceid = '{rootSourceId}';
|
|
|
"""
|
|
|
- result_list = cls.ad_mysql.select(sql)
|
|
|
+ result_list = cls.laMysql.select(sql)
|
|
|
|
|
|
def summarize(values):
|
|
|
"""
|
|
@@ -117,32 +172,21 @@ class DailyDataManager(object):
|
|
|
|
|
|
return summarize(result_list)
|
|
|
|
|
|
- @classmethod
|
|
|
- def getArticleInfo(cls, trace_id):
|
|
|
- """
|
|
|
- 通过 trace_id来获取文章信息
|
|
|
- :param trace_id:
|
|
|
- :return:
|
|
|
- """
|
|
|
- sql = f"""
|
|
|
- SELECT account_name, article_title
|
|
|
- FROM long_articles_video
|
|
|
- WHERE trace_id = '{trace_id}';
|
|
|
- """
|
|
|
- info = cls.pq_mysql.select(sql)
|
|
|
- return info[0]
|
|
|
-
|
|
|
@classmethod
|
|
|
def updateDetail(cls):
|
|
|
"""
|
|
|
-
|
|
|
:return:
|
|
|
"""
|
|
|
+ today = datetime.today()
|
|
|
+ # 获取昨天的日期
|
|
|
+ yesterday = today - timedelta(days=3)
|
|
|
+ yesterday_str = yesterday.__str__().split(" ")[0]
|
|
|
sql = f"""
|
|
|
select distinct root_source_id
|
|
|
- from long_articles_detail_info;
|
|
|
+ from long_articles_detail_info
|
|
|
+ where publish_dt >= '{yesterday_str}';
|
|
|
"""
|
|
|
- source_id_list = cls.pq_mysql.select(sql)
|
|
|
+ source_id_list = cls.pqMysql.select(sql)
|
|
|
for item in tqdm(source_id_list):
|
|
|
s_id = item[0]
|
|
|
try:
|
|
@@ -153,14 +197,13 @@ class DailyDataManager(object):
|
|
|
fission_0 = result[key][1]
|
|
|
fission_1 = result[key][2]
|
|
|
fission_2 = result[key][3]
|
|
|
- print(key, first_level, fission_0, fission_1, fission_2)
|
|
|
update_sql = f"""
|
|
|
UPDATE long_articles_detail_info
|
|
|
set first_level = %s, fission_0 = %s, fission_1 = %s, fission_2 = %s
|
|
|
where root_source_id = %s and recall_dt = %s;
|
|
|
"""
|
|
|
try:
|
|
|
- cls.pq_mysql.update(
|
|
|
+ cls.pqMysql.update(
|
|
|
sql=update_sql,
|
|
|
params=(
|
|
|
first_level, fission_0, fission_1, fission_2, s_id, recall_dt
|
|
@@ -172,8 +215,34 @@ class DailyDataManager(object):
|
|
|
print(e)
|
|
|
|
|
|
|
|
|
+def updateArticlesJob():
|
|
|
+ """
|
|
|
+ 更新文章数据
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ DDM = DailyDataManager()
|
|
|
+ article_list = DDM.getPublishedArticles()
|
|
|
+ for article in tqdm(article_list):
|
|
|
+ DDM.updateInfo(article)
|
|
|
+ print("文章更新完成---{}".format(datetime.today().__str__()))
|
|
|
+
|
|
|
+
|
|
|
+def updateMinigramInfoJob():
|
|
|
+ """
|
|
|
+ 更新前三天小程序数据
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ DDM = DailyDataManager()
|
|
|
+ DDM.updateDetail()
|
|
|
+ print("小程序更新完成---{}".format(datetime.today().__str__()))
|
|
|
+
|
|
|
+
|
|
|
if __name__ == '__main__':
|
|
|
- DM = DailyDataManager()
|
|
|
- # result = DM.getMinigramInfo("longArticles_d409f27d9d64501d6811b47a3779d2d7")
|
|
|
- # print(result)
|
|
|
- # DM.updateDetail()
|
|
|
+
|
|
|
+ schedule.every().day.at("01:00").do(Functions().job_with_thread, updateArticlesJob)
|
|
|
+
|
|
|
+ schedule.every().day.at("04:30").do(Functions().job_with_thread, updateMinigramInfoJob)
|
|
|
+
|
|
|
+ while True:
|
|
|
+ schedule.run_pending()
|
|
|
+ time.sleep(1)
|