|
@@ -7,12 +7,21 @@ import time
|
|
import json
|
|
import json
|
|
import traceback
|
|
import traceback
|
|
|
|
|
|
|
|
+import urllib.parse
|
|
from tqdm import tqdm
|
|
from tqdm import tqdm
|
|
from datetime import datetime
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
+
|
|
from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi
|
|
from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi
|
|
|
|
|
|
ARTICLE_TABLE = "official_articles_v2"
|
|
ARTICLE_TABLE = "official_articles_v2"
|
|
|
|
+ARTICLE_DELETE_CODE = 25005
|
|
|
|
+ARTICLE_SUCCESS_CODE = 0
|
|
|
|
+
|
|
|
|
+DEFAULT_STATUS = 0
|
|
|
|
+REQUEST_FAIL_STATUS = -1
|
|
|
|
+DELETE_STATUS = -2
|
|
|
|
+UNKNOWN_STATUS = -3
|
|
|
|
|
|
|
|
|
|
def get_accounts_v1():
|
|
def get_accounts_v1():
|
|
@@ -446,15 +455,109 @@ def check_job():
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
+def get_articles(db_client):
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ sql = f"""
|
|
|
|
+ SELECT ContentUrl, wx_sn
|
|
|
|
+ FROM official_articles_v2
|
|
|
|
+ WHERE publish_timestamp in {(DEFAULT_STATUS, REQUEST_FAIL_STATUS)};"""
|
|
|
|
+ response = db_client.select(sql)
|
|
|
|
+ return response
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def update_publish_timestamp(db_client, row):
|
|
|
|
+ """
|
|
|
|
+ 更新发布时间戳 && minigram 信息
|
|
|
|
+ :param db_client:
|
|
|
|
+ :param row:
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ url = row[0]
|
|
|
|
+ wx_sn = row[1]
|
|
|
|
+ try:
|
|
|
|
+ response = WeixinSpider().get_article_text(url)
|
|
|
|
+ response_code = response['code']
|
|
|
|
+
|
|
|
|
+ if response_code == ARTICLE_DELETE_CODE:
|
|
|
|
+ publish_timestamp_s = DELETE_STATUS
|
|
|
|
+ root_source_id_list = []
|
|
|
|
+ elif response_code == ARTICLE_SUCCESS_CODE:
|
|
|
|
+ data = response['data']['data']
|
|
|
|
+ publish_timestamp_ms = data['publish_timestamp']
|
|
|
|
+ publish_timestamp_s = int(publish_timestamp_ms / 1000)
|
|
|
|
+ mini_program = data.get('mini_program', [])
|
|
|
|
+ if mini_program:
|
|
|
|
+ root_source_id_list = [
|
|
|
|
+ urllib.parse.parse_qs(
|
|
|
|
+ urllib.parse.unquote(i['path'])
|
|
|
|
+ )['rootSourceId'][0]
|
|
|
|
+ for i in mini_program
|
|
|
|
+ ]
|
|
|
|
+ else:
|
|
|
|
+ root_source_id_list = []
|
|
|
|
+ else:
|
|
|
|
+ publish_timestamp_s = UNKNOWN_STATUS
|
|
|
|
+ root_source_id_list = []
|
|
|
|
+ except Exception as e:
|
|
|
|
+ publish_timestamp_s = REQUEST_FAIL_STATUS
|
|
|
|
+ root_source_id_list = []
|
|
|
|
+ error_msg = traceback.format_exc()
|
|
|
|
+ print(e, error_msg)
|
|
|
|
+
|
|
|
|
+ update_sql = f"""
|
|
|
|
+ UPDATE official_articles_v2
|
|
|
|
+ SET publish_timestamp = %s, root_source_id_list = %s
|
|
|
|
+ WHERE wx_sn = %s;
|
|
|
|
+ """
|
|
|
|
+ db_client.update(
|
|
|
|
+ sql=update_sql,
|
|
|
|
+ params=(
|
|
|
|
+ publish_timestamp_s,
|
|
|
|
+ json.dumps(root_source_id_list, ensure_ascii=False),
|
|
|
|
+ wx_sn
|
|
|
|
+ ))
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def get_article_detail_job():
|
|
|
|
+ """
|
|
|
|
+ 获取发布文章详情
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ db_client = PQMySQL()
|
|
|
|
+ except Exception as e:
|
|
|
|
+ error_msg = traceback.format_exc()
|
|
|
|
+ bot(
|
|
|
|
+ title="获取文章详情任务连接数据库失败",
|
|
|
|
+ detail={
|
|
|
|
+ "job": "get_article_detail_job",
|
|
|
|
+ "error": e,
|
|
|
|
+ "msg": error_msg
|
|
|
|
+ }
|
|
|
|
+ )
|
|
|
|
+ return
|
|
|
|
+ article_tuple = get_articles(db_client)
|
|
|
|
+ for article in tqdm(article_tuple):
|
|
|
|
+ try:
|
|
|
|
+ update_publish_timestamp(db_client=db_client, row=article)
|
|
|
|
+ except Exception as e:
|
|
|
|
+ print(e)
|
|
|
|
+ error_msg = traceback.format_exc()
|
|
|
|
+ print(error_msg)
|
|
|
|
+
|
|
|
|
+
|
|
def main():
|
|
def main():
|
|
"""
|
|
"""
|
|
main
|
|
main
|
|
:return:
|
|
:return:
|
|
"""
|
|
"""
|
|
update_job()
|
|
update_job()
|
|
- time.sleep(60)
|
|
|
|
check_job()
|
|
check_job()
|
|
|
|
+ get_article_detail_job()
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
- main()
|
|
|
|
|
|
+ main()
|