|
@@ -6,8 +6,7 @@
|
|
|
import time
|
|
import time
|
|
|
|
|
|
|
|
from tqdm import tqdm
|
|
from tqdm import tqdm
|
|
|
-
|
|
|
|
|
-from applications import WeixinSpider, Functions
|
|
|
|
|
|
|
+from applications import WeixinSpider, Functions, llm_sensitivity
|
|
|
|
|
|
|
|
# 常量
|
|
# 常量
|
|
|
ACCOUNT_GOOD_STATUS = 1
|
|
ACCOUNT_GOOD_STATUS = 1
|
|
@@ -65,9 +64,12 @@ class weixinCategory(object):
|
|
|
show_like_count = show_stat.get("show_like_count", DEFAULT_LIKE_COUNT)
|
|
show_like_count = show_stat.get("show_like_count", DEFAULT_LIKE_COUNT)
|
|
|
insert_sql = f"""
|
|
insert_sql = f"""
|
|
|
insert into crawler_meta_article
|
|
insert into crawler_meta_article
|
|
|
- (platform, mode, category, out_account_id, article_index, title, link, read_cnt, like_cnt, description, publish_time, crawler_time, status, unique_index)
|
|
|
|
|
|
|
+ (
|
|
|
|
|
+ platform, mode, category, out_account_id, article_index, title, link, read_cnt, like_cnt,
|
|
|
|
|
+ description, publish_time, crawler_time, status, unique_index, llm_sensitivity
|
|
|
|
|
+ )
|
|
|
VALUES
|
|
VALUES
|
|
|
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
|
|
|
|
+ (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
"""
|
|
"""
|
|
|
self.db_client_lam.update(
|
|
self.db_client_lam.update(
|
|
|
sql=insert_sql,
|
|
sql=insert_sql,
|
|
@@ -86,6 +88,7 @@ class weixinCategory(object):
|
|
|
int(time.time()),
|
|
int(time.time()),
|
|
|
DEFAULT_ARTICLE_STATUS,
|
|
DEFAULT_ARTICLE_STATUS,
|
|
|
self.function.generateGzhId(obj["ContentUrl"]),
|
|
self.function.generateGzhId(obj["ContentUrl"]),
|
|
|
|
|
+ obj.get("llm_sensitivity", -1)
|
|
|
),
|
|
),
|
|
|
)
|
|
)
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
@@ -121,6 +124,18 @@ class weixinCategory(object):
|
|
|
msg_list = response.get("data", {}).get("data")
|
|
msg_list = response.get("data", {}).get("data")
|
|
|
if msg_list:
|
|
if msg_list:
|
|
|
last_article_in_this_msg = msg_list[-1]
|
|
last_article_in_this_msg = msg_list[-1]
|
|
|
|
|
+
|
|
|
|
|
+ article_titles = []
|
|
|
|
|
+ for msg in msg_list:
|
|
|
|
|
+ for article in msg['AppMsg']['DetailInfo']:
|
|
|
|
|
+ article_titles.append(article['Title'])
|
|
|
|
|
+ sensitive_results = llm_sensitivity.check_titles(article_titles, True)
|
|
|
|
|
+ for msg in msg_list:
|
|
|
|
|
+ for article in msg['AppMsg']['DetailInfo']:
|
|
|
|
|
+ sensitive_hit = sensitive_results.get(article['Title'], None)
|
|
|
|
|
+ if sensitive_hit:
|
|
|
|
|
+ article['llm_sensitivity'] = sensitive_hit['hit_rule']
|
|
|
|
|
+
|
|
|
self.insert_data_into_db(
|
|
self.insert_data_into_db(
|
|
|
gh_id=gh_id, category=category, article_list=msg_list
|
|
gh_id=gh_id, category=category, article_list=msg_list
|
|
|
)
|
|
)
|