|
@@ -11,10 +11,12 @@ import schedule
|
|
|
from tqdm import tqdm
|
|
|
from datetime import datetime
|
|
|
|
|
|
-from applications import PQMySQL, WeixinSpider, Functions, log, bot
|
|
|
+from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi
|
|
|
|
|
|
+ARTICLE_TABLE = "official_articles_v2"
|
|
|
|
|
|
-def getAccounts():
|
|
|
+
|
|
|
+def get_accounts_v1():
|
|
|
"""
|
|
|
获取账号信息
|
|
|
:return: [{}, {},...], [{}, {}, {}...]
|
|
@@ -26,15 +28,34 @@ def getAccounts():
|
|
|
return subscription_account, server_account
|
|
|
|
|
|
|
|
|
-def insertEachMsg(db_client, gh_id, account_name, msg_list):
|
|
|
+def get_accounts():
|
|
|
+ """
|
|
|
+ 从 aigc 数据库中获取目前处于发布状态的账号
|
|
|
+ :return:
|
|
|
+ "name": line[0],
|
|
|
+ "ghId": line[1],
|
|
|
+ "follower_count": line[2],
|
|
|
+ "account_init_time": int(line[3] / 1000),
|
|
|
+ "account_type": line[4],
|
|
|
+ "account_auth": line[5]
|
|
|
+ """
|
|
|
+ account_list = aiditApi.get_publish_account_from_aigc()
|
|
|
+ subscription_account = [i for i in account_list if i['account_type'] in {0, 1}]
|
|
|
+ server_account = [i for i in account_list if i['account_type'] == 2]
|
|
|
+ return subscription_account, server_account
|
|
|
+
|
|
|
+
|
|
|
+def insert_each_msg(db_client, item, account_name, msg_list):
|
|
|
"""
|
|
|
把消息数据更新到数据库中
|
|
|
+ :param item:
|
|
|
:param db_client:
|
|
|
:param account_name:
|
|
|
- :param gh_id:
|
|
|
:param msg_list:
|
|
|
:return:
|
|
|
"""
|
|
|
+ gh_id = item['ghId']
|
|
|
+ account_init_timestamp = item['account_init_timestamp']
|
|
|
for info in msg_list:
|
|
|
baseInfo = info.get("BaseInfo", {})
|
|
|
appMsgId = info.get("AppMsg", {}).get("BaseInfo", {}).get("AppMsgId", None)
|
|
@@ -62,6 +83,7 @@ def insertEachMsg(db_client, gh_id, account_name, msg_list):
|
|
|
show_zs_count = show_stat.get("show_zs_count", 0)
|
|
|
show_pay_count = show_stat.get("show_pay_count", 0)
|
|
|
wx_sn = ContentUrl.split("&sn=")[1].split("&")[0] if ContentUrl else None
|
|
|
+ status = 1 if int(time.time()) - account_init_timestamp > 14 * 24 * 3600 else 0
|
|
|
info_tuple = (
|
|
|
gh_id,
|
|
|
account_name,
|
|
@@ -87,19 +109,20 @@ def insertEachMsg(db_client, gh_id, account_name, msg_list):
|
|
|
show_pay_count,
|
|
|
wx_sn,
|
|
|
json.dumps(baseInfo, ensure_ascii=False),
|
|
|
- Functions().str_to_md5(title)
|
|
|
+ Functions().str_to_md5(title),
|
|
|
+ status
|
|
|
)
|
|
|
try:
|
|
|
insert_sql = f"""
|
|
|
- INSERT INTO official_articles_v2
|
|
|
- (ghId, accountName, appMsgId, title, Type, createTime, updateTime, Digest, ItemIndex, ContentUrl, SourceUrl, CoverImgUrl, CoverImgUrl_1_1, CoverImgUrl_255_1, ItemShowType, IsOriginal, ShowDesc, ori_content, show_view_count, show_like_count, show_zs_count, show_pay_count, wx_sn, baseInfo, title_md5)
|
|
|
+ INSERT INTO {ARTICLE_TABLE}
|
|
|
+ (ghId, accountName, appMsgId, title, Type, createTime, updateTime, Digest, ItemIndex, ContentUrl, SourceUrl, CoverImgUrl, CoverImgUrl_1_1, CoverImgUrl_255_1, ItemShowType, IsOriginal, ShowDesc, ori_content, show_view_count, show_like_count, show_zs_count, show_pay_count, wx_sn, baseInfo, title_md5, status)
|
|
|
values
|
|
|
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
+ (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
"""
|
|
|
db_client.update(sql=insert_sql, params=info_tuple)
|
|
|
log(
|
|
|
task="updatePublishedMsgDaily",
|
|
|
- function="insertEachMsg",
|
|
|
+ function="insert_each_msg",
|
|
|
message="插入文章数据成功",
|
|
|
data={
|
|
|
"info": info_tuple
|
|
@@ -109,7 +132,7 @@ def insertEachMsg(db_client, gh_id, account_name, msg_list):
|
|
|
except Exception as e:
|
|
|
try:
|
|
|
update_sql = f"""
|
|
|
- UPDATE official_articles_v2
|
|
|
+ UPDATE {ARTICLE_TABLE}
|
|
|
SET show_view_count = %s, show_like_count=%s
|
|
|
WHERE wx_sn = %s;
|
|
|
"""
|
|
@@ -117,7 +140,7 @@ def insertEachMsg(db_client, gh_id, account_name, msg_list):
|
|
|
params=(show_view_count, show_like_count, wx_sn))
|
|
|
log(
|
|
|
task="updatePublishedMsgDaily",
|
|
|
- function="insertEachMsg",
|
|
|
+ function="insert_each_msg",
|
|
|
message="更新文章数据成功",
|
|
|
data={
|
|
|
"wxSn": wx_sn,
|
|
@@ -129,23 +152,24 @@ def insertEachMsg(db_client, gh_id, account_name, msg_list):
|
|
|
except Exception as e:
|
|
|
log(
|
|
|
task="updatePublishedMsgDaily",
|
|
|
- function="insertEachMsg",
|
|
|
+ function="insert_each_msg",
|
|
|
message="更新文章失败, 报错原因是: {}".format(e),
|
|
|
status="fail"
|
|
|
)
|
|
|
continue
|
|
|
|
|
|
|
|
|
-def updateEachAccount(db_client, gh_id, account_name, latest_update_time, cursor=None):
|
|
|
+def update_each_account(db_client, item, account_name, latest_update_time, cursor=None):
|
|
|
"""
|
|
|
更新每一个账号信息
|
|
|
+ :param item:
|
|
|
:param account_name:
|
|
|
:param cursor:
|
|
|
:param latest_update_time: 最新更新时间
|
|
|
:param db_client: 数据库连接信息
|
|
|
- :param gh_id: 公众号 gh_id
|
|
|
:return: None
|
|
|
"""
|
|
|
+ gh_id = item['ghId']
|
|
|
response = WeixinSpider().update_msg_list(ghId=gh_id, index=cursor)
|
|
|
msg_list = response.get("data", {}).get("data", {})
|
|
|
if msg_list:
|
|
@@ -156,31 +180,31 @@ def updateEachAccount(db_client, gh_id, account_name, latest_update_time, cursor
|
|
|
resdata = WeixinSpider().get_account_by_url(last_url)
|
|
|
check_id = resdata['data'].get('data', {}).get('wx_gh')
|
|
|
if check_id == gh_id:
|
|
|
- insertEachMsg(
|
|
|
+ insert_each_msg(
|
|
|
db_client=db_client,
|
|
|
- gh_id=gh_id,
|
|
|
+ item=item,
|
|
|
account_name=account_name,
|
|
|
msg_list=msg_list
|
|
|
)
|
|
|
if last_time_stamp_in_this_msg > latest_update_time:
|
|
|
next_cursor = response['data']['next_cursor']
|
|
|
- return updateEachAccount(
|
|
|
+ return update_each_account(
|
|
|
db_client=db_client,
|
|
|
- gh_id=gh_id,
|
|
|
+ item=item,
|
|
|
account_name=account_name,
|
|
|
latest_update_time=latest_update_time,
|
|
|
cursor=next_cursor
|
|
|
)
|
|
|
log(
|
|
|
task="updatePublishedMsgDaily",
|
|
|
- function="updateEachAccount",
|
|
|
+ function="update_each_account",
|
|
|
message="账号文章更新成功",
|
|
|
data=response
|
|
|
)
|
|
|
else:
|
|
|
log(
|
|
|
task="updatePublishedMsgDaily",
|
|
|
- function="updateEachAccount",
|
|
|
+ function="update_each_account",
|
|
|
message="账号文章更新失败",
|
|
|
status="fail",
|
|
|
data=response
|
|
@@ -188,7 +212,7 @@ def updateEachAccount(db_client, gh_id, account_name, latest_update_time, cursor
|
|
|
return
|
|
|
|
|
|
|
|
|
-def checkAccountInfo(db_client, gh_id):
|
|
|
+def check_account_info(db_client, gh_id):
|
|
|
"""
|
|
|
通过 gh_id查询视频信息
|
|
|
:param db_client:
|
|
@@ -196,10 +220,10 @@ def checkAccountInfo(db_client, gh_id):
|
|
|
:return:
|
|
|
"""
|
|
|
sql = f"""
|
|
|
- select accountName, updateTime
|
|
|
- from official_articles_v2
|
|
|
- where ghId = '{gh_id}'
|
|
|
- order by updateTime DESC;
|
|
|
+ SELECT accountName, updateTime
|
|
|
+ FROM {ARTICLE_TABLE}
|
|
|
+ WHERE ghId = '{gh_id}'
|
|
|
+ ORDER BY updateTime DESC;
|
|
|
"""
|
|
|
result = db_client.select(sql)
|
|
|
if result:
|
|
@@ -217,25 +241,26 @@ def checkAccountInfo(db_client, gh_id):
|
|
|
}
|
|
|
|
|
|
|
|
|
-def updateSingleJob(db_client, gh_id):
|
|
|
+def update_single_account(db_client, sub_item):
|
|
|
"""
|
|
|
|
|
|
+ :param sub_item:
|
|
|
:param db_client:
|
|
|
- :param gh_id:
|
|
|
:return:
|
|
|
"""
|
|
|
- account_info = checkAccountInfo(db_client, gh_id)
|
|
|
+ gh_id = sub_item['ghId']
|
|
|
+ account_info = check_account_info(db_client, gh_id)
|
|
|
account_name = account_info['account_name']
|
|
|
update_time = account_info['update_time']
|
|
|
- updateEachAccount(
|
|
|
+ update_each_account(
|
|
|
db_client=db_client,
|
|
|
- gh_id=gh_id,
|
|
|
+ item=sub_item,
|
|
|
account_name=account_name,
|
|
|
latest_update_time=update_time
|
|
|
)
|
|
|
|
|
|
|
|
|
-def checkSingleAccount(db_client, account_item):
|
|
|
+def check_single_account(db_client, account_item):
|
|
|
"""
|
|
|
校验每个账号是否更新
|
|
|
:param db_client:
|
|
@@ -243,21 +268,21 @@ def checkSingleAccount(db_client, account_item):
|
|
|
:return: True / False
|
|
|
"""
|
|
|
gh_id = account_item['ghId']
|
|
|
- account_type = account_item['type']
|
|
|
+ account_type = account_item['account_type']
|
|
|
today_str = datetime.today().strftime("%Y-%m-%d")
|
|
|
today_date_time = datetime.strptime(today_str, "%Y-%m-%d")
|
|
|
today_timestamp = today_date_time.timestamp()
|
|
|
sql = f"""
|
|
|
- select updateTime
|
|
|
- from official_articles_v2
|
|
|
- where ghId = '{gh_id}'
|
|
|
- order by updateTime
|
|
|
- desc;
|
|
|
+ SELECT updateTime
|
|
|
+ FROM {ARTICLE_TABLE}
|
|
|
+ WHERE ghId = '{gh_id}'
|
|
|
+ ORDER BY updateTime
|
|
|
+ DESC;
|
|
|
"""
|
|
|
try:
|
|
|
latest_update_time = db_client.select(sql)[0][0]
|
|
|
# 判断该账号当天发布的文章是否被收集
|
|
|
- if account_type == "订阅号":
|
|
|
+ if account_type in {0, 1}:
|
|
|
if int(latest_update_time) > int(today_timestamp):
|
|
|
return True
|
|
|
else:
|
|
@@ -272,31 +297,31 @@ def checkSingleAccount(db_client, account_item):
|
|
|
return False
|
|
|
|
|
|
|
|
|
-def updateJob():
|
|
|
+def update_job():
|
|
|
"""
|
|
|
更新任务
|
|
|
:return:
|
|
|
"""
|
|
|
db_client = PQMySQL()
|
|
|
- sub_accounts, server_accounts = getAccounts()
|
|
|
+ sub_accounts, server_accounts = get_accounts()
|
|
|
s_count = 0
|
|
|
f_count = 0
|
|
|
for sub_item in tqdm(sub_accounts):
|
|
|
try:
|
|
|
- updateSingleJob(db_client, sub_item['ghId'])
|
|
|
+ update_single_account(db_client, sub_item)
|
|
|
s_count += 1
|
|
|
time.sleep(5)
|
|
|
except Exception as e:
|
|
|
f_count += 1
|
|
|
log(
|
|
|
task="updatePublishedMsgDaily",
|
|
|
- function="updateJob",
|
|
|
+ function="update_job",
|
|
|
message="单个账号文章更新失败, 报错信息是: {}".format(e),
|
|
|
status="fail",
|
|
|
)
|
|
|
log(
|
|
|
task="updatePublishedMsgDaily",
|
|
|
- function="updateJob",
|
|
|
+ function="update_job",
|
|
|
message="订阅号更新完成",
|
|
|
data={
|
|
|
"success": s_count,
|
|
@@ -316,30 +341,30 @@ def updateJob():
|
|
|
|
|
|
for sub_item in tqdm(server_accounts):
|
|
|
try:
|
|
|
- updateSingleJob(db_client, sub_item['ghId'])
|
|
|
+ update_single_account(db_client, sub_item)
|
|
|
time.sleep(5)
|
|
|
except Exception as e:
|
|
|
print(e)
|
|
|
|
|
|
|
|
|
-def checkJob():
|
|
|
+def check_job():
|
|
|
"""
|
|
|
校验任务
|
|
|
:return:
|
|
|
"""
|
|
|
db_client = PQMySQL()
|
|
|
- sub_accounts, server_accounts = getAccounts()
|
|
|
+ sub_accounts, server_accounts = get_accounts()
|
|
|
fail_list = []
|
|
|
# account_list = sub_accounts + server_accounts
|
|
|
account_list = sub_accounts
|
|
|
# check and rework if fail
|
|
|
for sub_item in tqdm(account_list):
|
|
|
- res = checkSingleAccount(db_client, sub_item)
|
|
|
+ res = check_single_account(db_client, sub_item)
|
|
|
if not res:
|
|
|
- updateSingleJob(db_client, sub_item['ghId'])
|
|
|
+ update_single_account(db_client, sub_item)
|
|
|
# check whether success and bot if fails
|
|
|
for sub_item in tqdm(account_list):
|
|
|
- res = checkSingleAccount(db_client, sub_item)
|
|
|
+ res = check_single_account(db_client, sub_item)
|
|
|
if not res:
|
|
|
fail_list.append(sub_item)
|
|
|
if fail_list:
|
|
@@ -363,16 +388,10 @@ def job_with_thread(job_func):
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
- schedule.every().day.at("20:50").do(job_with_thread, updateJob)
|
|
|
+ schedule.every().day.at("20:50").do(job_with_thread, update_job)
|
|
|
|
|
|
- schedule.every().day.at("21:45").do(job_with_thread, checkJob)
|
|
|
+ schedule.every().day.at("21:45").do(job_with_thread, check_job)
|
|
|
|
|
|
while True:
|
|
|
schedule.run_pending()
|
|
|
time.sleep(1)
|
|
|
- # log(
|
|
|
- # task="updatePublishedMsgDaily",
|
|
|
- # function="main",
|
|
|
- # message="更新公众号文章信息任务正常执行"
|
|
|
- # )
|
|
|
-
|