|
@@ -8,7 +8,7 @@ import time
|
|
|
from tqdm import tqdm
|
|
|
from pymysql.cursors import DictCursor
|
|
|
|
|
|
-from applications import WeixinSpider, Functions, llm_sensitivity, log
|
|
|
+from applications import WeixinSpider, Functions, log
|
|
|
from coldStartTasks.filter import article_crawler_duplicate_filter
|
|
|
from config import apolloConfig
|
|
|
|
|
@@ -158,18 +158,6 @@ class weixinCategory(object):
|
|
|
print(e)
|
|
|
return success_records
|
|
|
|
|
|
- def update_article_sensitive_status(self, category, unique_index, status):
|
|
|
- """
|
|
|
- 更新文章敏感状态
|
|
|
- :return:
|
|
|
- """
|
|
|
- update_sql = f"""
|
|
|
- update crawler_meta_article
|
|
|
- set llm_sensitivity = %s
|
|
|
- where category = %s and unique_index = %s;
|
|
|
- """
|
|
|
- self.db_client_lam.update(sql=update_sql, params=(status, category, unique_index))
|
|
|
-
|
|
|
def update_latest_account_timestamp(self, gh_id):
|
|
|
"""
|
|
|
更新账号的最新时间戳
|
|
@@ -242,18 +230,6 @@ class weixinCategory(object):
|
|
|
print("success")
|
|
|
except Exception as e:
|
|
|
print("fail because of {}".format(e))
|
|
|
- success_titles = [x['title'] for x in success_records]
|
|
|
- if success_titles:
|
|
|
- try:
|
|
|
- sensitive_results = llm_sensitivity.check_titles(success_titles)
|
|
|
- for record, sensitive_result in zip(success_records, sensitive_results):
|
|
|
- self.update_article_sensitive_status(
|
|
|
- category=category,
|
|
|
- unique_index=record['unique_index'],
|
|
|
- status=sensitive_result['hit_rule']
|
|
|
- )
|
|
|
- except Exception as e:
|
|
|
- print("failed to update sensitive status: {}".format(e))
|
|
|
|
|
|
def deal(self, category_list, date_str):
|
|
|
"""
|