|
@@ -20,21 +20,16 @@ class OutsideGzhArticlesManager:
|
|
self.denet_client.connect()
|
|
self.denet_client.connect()
|
|
self.feishu_bot_api = FeishuBotApi()
|
|
self.feishu_bot_api = FeishuBotApi()
|
|
|
|
|
|
- def process_illegal_article(
|
|
|
|
- self, account_name, title, reason, publish_timestamp, account_source
|
|
|
|
- ):
|
|
|
|
- self.feishu_bot_api.bot(
|
|
|
|
- title="文章违规告警",
|
|
|
|
- detail={
|
|
|
|
- "account_name": account_name,
|
|
|
|
- "title": title,
|
|
|
|
- "reason": reason,
|
|
|
|
- "publish_timestamp": publish_timestamp,
|
|
|
|
- "account_source": account_source,
|
|
|
|
- },
|
|
|
|
- env="dev"
|
|
|
|
|
|
+ def update_article_illegal_status(self, article_id, illegal_reason):
|
|
|
|
+ update_query = f"""
|
|
|
|
+ update outside_gzh_account_monitor
|
|
|
|
+ set illegal_status = %s, illegal_reason = %s
|
|
|
|
+ where id = %s and illegal_reason = %s
|
|
|
|
+ """
|
|
|
|
+ self.long_articles_client.save(
|
|
|
|
+ query=update_query,
|
|
|
|
+ params=(1, illegal_reason, article_id, 0)
|
|
)
|
|
)
|
|
- return
|
|
|
|
|
|
|
|
|
|
|
|
class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
|
|
class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
|
|
@@ -59,7 +54,7 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
|
|
fetch_response = get_article_list_from_account(gh_id)
|
|
fetch_response = get_article_list_from_account(gh_id)
|
|
msg_list = fetch_response.get("data", {}).get("data", [])
|
|
msg_list = fetch_response.get("data", {}).get("data", [])
|
|
if msg_list:
|
|
if msg_list:
|
|
- for msg in msg_list[:1]:
|
|
|
|
|
|
+ for msg in tqdm(msg_list, desc=f"insert account {account['account_name']}"):
|
|
self.save_each_msg_to_db(msg, account)
|
|
self.save_each_msg_to_db(msg, account)
|
|
|
|
|
|
else:
|
|
else:
|
|
@@ -70,20 +65,31 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
|
|
detail_info = msg["AppMsg"]["DetailInfo"]
|
|
detail_info = msg["AppMsg"]["DetailInfo"]
|
|
app_msg_id = base_info["AppMsgId"]
|
|
app_msg_id = base_info["AppMsgId"]
|
|
create_timestamp = base_info["CreateTime"]
|
|
create_timestamp = base_info["CreateTime"]
|
|
- update_timestamp = base_info["UpdateTime"]
|
|
|
|
publish_type = base_info["Type"]
|
|
publish_type = base_info["Type"]
|
|
|
|
|
|
# insert each article
|
|
# insert each article
|
|
- for article in detail_info[:1]:
|
|
|
|
|
|
+ for article in detail_info:
|
|
link = article["ContentUrl"]
|
|
link = article["ContentUrl"]
|
|
article_detail = get_article_detail(link)
|
|
article_detail = get_article_detail(link)
|
|
response_code = article_detail["code"]
|
|
response_code = article_detail["code"]
|
|
if response_code == 25012:
|
|
if response_code == 25012:
|
|
illegal_reason = article_detail.get("msg")
|
|
illegal_reason = article_detail.get("msg")
|
|
# bot and return
|
|
# bot and return
|
|
|
|
+ self.feishu_bot_api.bot(
|
|
|
|
+ title="文章违规告警",
|
|
|
|
+ detail={
|
|
|
|
+ "account_name": article["account_name"],
|
|
|
|
+ "title": article['title'],
|
|
|
|
+ "reason": illegal_reason,
|
|
|
|
+ "publish_timestamp": create_timestamp,
|
|
|
|
+ "account_source": article["account_source"]
|
|
|
|
+ },
|
|
|
|
+ env="dev"
|
|
|
|
+ )
|
|
|
|
+
|
|
elif response_code == 0:
|
|
elif response_code == 0:
|
|
insert_query = f"""
|
|
insert_query = f"""
|
|
- insert into outside_gzh_account_monitor
|
|
|
|
|
|
+ insert ignore into outside_gzh_account_monitor
|
|
(account_name, gh_id, account_source, account_type, app_msg_id, publish_type, position, title, link,
|
|
(account_name, gh_id, account_source, account_type, app_msg_id, publish_type, position, title, link,
|
|
channel_content_id, crawler_timestamp, publish_timestamp)
|
|
channel_content_id, crawler_timestamp, publish_timestamp)
|
|
values
|
|
values
|
|
@@ -102,8 +108,6 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
|
|
article["Title"],
|
|
article["Title"],
|
|
link,
|
|
link,
|
|
article_detail["data"]["data"]["channel_content_id"],
|
|
article_detail["data"]["data"]["channel_content_id"],
|
|
- create_timestamp,
|
|
|
|
- update_timestamp,
|
|
|
|
int(time.time()),
|
|
int(time.time()),
|
|
int(article_detail["data"]["data"]["publish_timestamp"] / 1000),
|
|
int(article_detail["data"]["data"]["publish_timestamp"] / 1000),
|
|
),
|
|
),
|
|
@@ -112,12 +116,13 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
|
|
-class OutsideGzhArticlesMonitor(OutsideGzhArticlesCollector):
|
|
|
|
|
|
+class OutsideGzhArticlesMonitor(OutsideGzhArticlesManager):
|
|
|
|
|
|
def fetch_article_list_to_check(self):
|
|
def fetch_article_list_to_check(self):
|
|
publish_timestamp_threshold = int(time.time()) - 7 * 24 * 3600
|
|
publish_timestamp_threshold = int(time.time()) - 7 * 24 * 3600
|
|
fetch_query = f"""
|
|
fetch_query = f"""
|
|
- select account_name, gh_id, account_source, account_type, title, link
|
|
|
|
|
|
+ select id, account_name, gh_id, account_source, account_type,
|
|
|
|
+ title, link, from_unixtime(publish_timestamp) as publish_date
|
|
from outside_gzh_account_monitor
|
|
from outside_gzh_account_monitor
|
|
where illegal_status = 0 and publish_timestamp > {publish_timestamp_threshold};
|
|
where illegal_status = 0 and publish_timestamp > {publish_timestamp_threshold};
|
|
"""
|
|
"""
|
|
@@ -134,13 +139,19 @@ class OutsideGzhArticlesMonitor(OutsideGzhArticlesCollector):
|
|
response_code = article_detail["code"]
|
|
response_code = article_detail["code"]
|
|
if response_code == 25012:
|
|
if response_code == 25012:
|
|
illegal_reason = article_detail.get("msg")
|
|
illegal_reason = article_detail.get("msg")
|
|
- self.process_illegal_article(
|
|
|
|
- account_name=article["account_name"],
|
|
|
|
- title=article["title"],
|
|
|
|
- reason=illegal_reason,
|
|
|
|
- publish_timestamp=article["publish_timestamp"],
|
|
|
|
- account_source=article["account_source"],
|
|
|
|
|
|
+ self.feishu_bot_api.bot(
|
|
|
|
+ title="文章违规告警",
|
|
|
|
+ detail={
|
|
|
|
+ "account_name": article["account_name"],
|
|
|
|
+ "title": article['title'],
|
|
|
|
+ "reason": illegal_reason,
|
|
|
|
+ "publish_date": article["publish_date"],
|
|
|
|
+ "account_source": article["account_source"]
|
|
|
|
+ },
|
|
|
|
+ env="dev"
|
|
)
|
|
)
|
|
|
|
+ article_id = article["id"]
|
|
|
|
+ self.update_article_illegal_status(article_id, illegal_reason)
|
|
else:
|
|
else:
|
|
return
|
|
return
|
|
|
|
|
|
@@ -151,7 +162,5 @@ class OutsideGzhArticlesMonitor(OutsideGzhArticlesCollector):
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
- collector = OutsideGzhArticlesCollector()
|
|
|
|
- accounts = collector.fetch_outside_account_list()
|
|
|
|
- for account_ in tqdm(accounts[1:2]):
|
|
|
|
- collector.fetch_each_account(account_)
|
|
|
|
|
|
+ monitor = OutsideGzhArticlesMonitor()
|
|
|
|
+ monitor.deal()
|