|
@@ -13,27 +13,10 @@ from datetime import datetime
|
|
|
|
|
|
|
|
|
|
from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi
|
|
from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi
|
|
|
|
+from applications.const import updatePublishedMsgTaskConst
|
|
|
|
|
|
-ARTICLE_TABLE = "official_articles_v2"
|
|
|
|
-ARTICLE_DELETE_CODE = 25005
|
|
|
|
-ARTICLE_SUCCESS_CODE = 0
|
|
|
|
-
|
|
|
|
-DEFAULT_STATUS = 0
|
|
|
|
-REQUEST_FAIL_STATUS = -1
|
|
|
|
-DELETE_STATUS = -2
|
|
|
|
-UNKNOWN_STATUS = -3
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-def get_accounts_v1():
|
|
|
|
- """
|
|
|
|
- 获取账号信息
|
|
|
|
- :return: [{}, {},...], [{}, {}, {}...]
|
|
|
|
- """
|
|
|
|
- with open("config/accountInfoV0914.json", encoding="utf-8") as f:
|
|
|
|
- account_list = json.loads(f.read())
|
|
|
|
- subscription_account = [i for i in account_list if i['type'] == '订阅号']
|
|
|
|
- server_account = [i for i in account_list if i['type'] == '服务号']
|
|
|
|
- return subscription_account, server_account
|
|
|
|
|
|
+ARTICLE_TABLE = "official_articles"
|
|
|
|
+const = updatePublishedMsgTaskConst()
|
|
|
|
|
|
|
|
|
|
def get_account_using_status():
|
|
def get_account_using_status():
|
|
@@ -57,7 +40,7 @@ def get_accounts():
|
|
"ghId": line[1],
|
|
"ghId": line[1],
|
|
"follower_count": line[2],
|
|
"follower_count": line[2],
|
|
"account_init_time": int(line[3] / 1000),
|
|
"account_init_time": int(line[3] / 1000),
|
|
- "account_type": line[4],
|
|
|
|
|
|
+ "account_type": line[4], # 订阅号 or 服务号
|
|
"account_auth": line[5]
|
|
"account_auth": line[5]
|
|
"""
|
|
"""
|
|
using_account_set = get_account_using_status()
|
|
using_account_set = get_account_using_status()
|
|
@@ -69,8 +52,8 @@ def get_accounts():
|
|
else:
|
|
else:
|
|
item['using_status'] = 0
|
|
item['using_status'] = 0
|
|
account_list.append(item)
|
|
account_list.append(item)
|
|
- subscription_account = [i for i in account_list if i['account_type'] in {0, 1}]
|
|
|
|
- server_account = [i for i in account_list if i['account_type'] == 2]
|
|
|
|
|
|
+ subscription_account = [i for i in account_list if i['account_type'] in const.SUBSCRIBE_TYPE_SET]
|
|
|
|
+ server_account = [i for i in account_list if i['account_type'] == const.SERVICE_TYPE]
|
|
return subscription_account, server_account
|
|
return subscription_account, server_account
|
|
|
|
|
|
|
|
|
|
@@ -313,7 +296,7 @@ def check_single_account(db_client, account_item):
|
|
try:
|
|
try:
|
|
latest_update_time = db_client.select(sql)[0][0]
|
|
latest_update_time = db_client.select(sql)[0][0]
|
|
# 判断该账号当天发布的文章是否被收集
|
|
# 判断该账号当天发布的文章是否被收集
|
|
- if account_type in {0, 1}:
|
|
|
|
|
|
+ if account_type in const.SUBSCRIBE_TYPE_SET:
|
|
if int(latest_update_time) > int(today_timestamp):
|
|
if int(latest_update_time) > int(today_timestamp):
|
|
return True
|
|
return True
|
|
else:
|
|
else:
|
|
@@ -462,8 +445,8 @@ def get_articles(db_client):
|
|
"""
|
|
"""
|
|
sql = f"""
|
|
sql = f"""
|
|
SELECT ContentUrl, wx_sn
|
|
SELECT ContentUrl, wx_sn
|
|
- FROM official_articles_v2
|
|
|
|
- WHERE publish_timestamp in {(DEFAULT_STATUS, REQUEST_FAIL_STATUS)};"""
|
|
|
|
|
|
+ FROM {ARTICLE_TABLE}
|
|
|
|
+ WHERE publish_timestamp in {(const.DEFAULT_STATUS, const.REQUEST_FAIL_STATUS)};"""
|
|
response = db_client.select(sql)
|
|
response = db_client.select(sql)
|
|
return response
|
|
return response
|
|
|
|
|
|
@@ -481,10 +464,10 @@ def update_publish_timestamp(db_client, row):
|
|
response = WeixinSpider().get_article_text(url)
|
|
response = WeixinSpider().get_article_text(url)
|
|
response_code = response['code']
|
|
response_code = response['code']
|
|
|
|
|
|
- if response_code == ARTICLE_DELETE_CODE:
|
|
|
|
- publish_timestamp_s = DELETE_STATUS
|
|
|
|
|
|
+ if response_code == const.ARTICLE_DELETE_CODE:
|
|
|
|
+ publish_timestamp_s = const.DELETE_STATUS
|
|
root_source_id_list = []
|
|
root_source_id_list = []
|
|
- elif response_code == ARTICLE_SUCCESS_CODE:
|
|
|
|
|
|
+ elif response_code == const.ARTICLE_SUCCESS_CODE:
|
|
data = response['data']['data']
|
|
data = response['data']['data']
|
|
publish_timestamp_ms = data['publish_timestamp']
|
|
publish_timestamp_ms = data['publish_timestamp']
|
|
publish_timestamp_s = int(publish_timestamp_ms / 1000)
|
|
publish_timestamp_s = int(publish_timestamp_ms / 1000)
|
|
@@ -499,16 +482,16 @@ def update_publish_timestamp(db_client, row):
|
|
else:
|
|
else:
|
|
root_source_id_list = []
|
|
root_source_id_list = []
|
|
else:
|
|
else:
|
|
- publish_timestamp_s = UNKNOWN_STATUS
|
|
|
|
|
|
+ publish_timestamp_s = const.UNKNOWN_STATUS
|
|
root_source_id_list = []
|
|
root_source_id_list = []
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- publish_timestamp_s = REQUEST_FAIL_STATUS
|
|
|
|
|
|
+ publish_timestamp_s = const.REQUEST_FAIL_STATUS
|
|
root_source_id_list = []
|
|
root_source_id_list = []
|
|
error_msg = traceback.format_exc()
|
|
error_msg = traceback.format_exc()
|
|
print(e, error_msg)
|
|
print(e, error_msg)
|
|
|
|
|
|
update_sql = f"""
|
|
update_sql = f"""
|
|
- UPDATE official_articles_v2
|
|
|
|
|
|
+ UPDATE {ARTICLE_TABLE}
|
|
SET publish_timestamp = %s, root_source_id_list = %s
|
|
SET publish_timestamp = %s, root_source_id_list = %s
|
|
WHERE wx_sn = %s;
|
|
WHERE wx_sn = %s;
|
|
"""
|
|
"""
|
|
@@ -519,7 +502,7 @@ def update_publish_timestamp(db_client, row):
|
|
json.dumps(root_source_id_list, ensure_ascii=False),
|
|
json.dumps(root_source_id_list, ensure_ascii=False),
|
|
wx_sn
|
|
wx_sn
|
|
))
|
|
))
|
|
- if publish_timestamp_s == REQUEST_FAIL_STATUS:
|
|
|
|
|
|
+ if publish_timestamp_s == const.REQUEST_FAIL_STATUS:
|
|
return row
|
|
return row
|
|
else:
|
|
else:
|
|
return None
|
|
return None
|
|
@@ -566,10 +549,10 @@ def get_article_detail_job():
|
|
|
|
|
|
# 通过msgId 来修改publish_timestamp
|
|
# 通过msgId 来修改publish_timestamp
|
|
update_sql = f"""
|
|
update_sql = f"""
|
|
- UPDATE official_articles_v2 oav
|
|
|
|
|
|
+ UPDATE {ARTICLE_TABLE} oav
|
|
JOIN (
|
|
JOIN (
|
|
SELECT appMsgId, MAX(publish_timestamp) AS publish_timestamp
|
|
SELECT appMsgId, MAX(publish_timestamp) AS publish_timestamp
|
|
- FROM official_articles_v2
|
|
|
|
|
|
+ FROM {ARTICLE_TABLE}
|
|
WHERE publish_timestamp > %s
|
|
WHERE publish_timestamp > %s
|
|
GROUP BY appMsgId
|
|
GROUP BY appMsgId
|
|
) vv
|
|
) vv
|
|
@@ -584,8 +567,8 @@ def get_article_detail_job():
|
|
|
|
|
|
# 若还是无 publish_timestamp,用update_time当作 publish_timestamp
|
|
# 若还是无 publish_timestamp,用update_time当作 publish_timestamp
|
|
update_sql_2 = f"""
|
|
update_sql_2 = f"""
|
|
- UPDATE official_articles_v2
|
|
|
|
- SET publish_timestamp = update_time
|
|
|
|
|
|
+ UPDATE {ARTICLE_TABLE}
|
|
|
|
+ SET publish_timestamp = updateTime
|
|
WHERE publish_timestamp < %s;
|
|
WHERE publish_timestamp < %s;
|
|
"""
|
|
"""
|
|
db_client.update(
|
|
db_client.update(
|