|
@@ -22,7 +22,7 @@ def monitor_article(article):
|
|
|
"""
|
|
|
校验单篇文章是否
|
|
|
"""
|
|
|
- gh_id, account_name, title, url, wx_sn, publish_date = article
|
|
|
+ gh_id, account_name, title, url, wx_sn, publish_date, root_source_id_list = article
|
|
|
try:
|
|
|
response = spider.get_article_text(url, is_cache=False)
|
|
|
response_code = response["code"]
|
|
@@ -30,13 +30,13 @@ def monitor_article(article):
|
|
|
error_detail = response.get("msg")
|
|
|
insert_sql = f"""
|
|
|
INSERT IGNORE INTO illegal_articles
|
|
|
- (gh_id, account_name, title, wx_sn, publish_date, illegal_reason)
|
|
|
+ (gh_id, account_name, title, wx_sn, publish_date, illegal_reason, root_source_id_list)
|
|
|
VALUES
|
|
|
- (%s, %s, %s, %s, %s, %s);
|
|
|
+ (%s, %s, %s, %s, %s, %s, %s);
|
|
|
"""
|
|
|
affected_rows = long_articles_db_client.save(
|
|
|
query=insert_sql,
|
|
|
- params=(gh_id, account_name, title, wx_sn, publish_date, error_detail),
|
|
|
+ params=(gh_id, account_name, title, wx_sn, publish_date, error_detail, root_source_id_list),
|
|
|
)
|
|
|
if affected_rows:
|
|
|
bot(
|
|
@@ -71,7 +71,7 @@ def get_article_list(run_date):
|
|
|
int(datetime.strptime(run_date, "%Y-%m-%d").timestamp()) - const.MONITOR_PERIOD
|
|
|
)
|
|
|
select_sql = f"""
|
|
|
- SELECT ghId, accountName, title, ContentUrl, wx_sn, from_unixtime(publish_timestamp) AS publish_timestamp
|
|
|
+ SELECT ghId, accountName, title, ContentUrl, wx_sn, from_unixtime(publish_timestamp) AS publish_timestamp, root_source_id_list
|
|
|
FROM official_articles_v2
|
|
|
WHERE publish_timestamp >= {monitor_start_timestamp}
|
|
|
ORDER BY publish_timestamp DESC;
|