Kaynağa Gözat

Update weixinCategoryCrawler: fix sensitivity check

StrayWarrior 5 ay önce
ebeveyn
işleme
b9114b2266

+ 2 - 0
applications/llm_sensitivity.py

@@ -95,6 +95,8 @@ def check_titles(titles, retun_map=False):
             json_data = json.loads(res)
         except Exception as e:
             print(e)
+        if isinstance(json_data, dict):
+            json_data = [json_data]
         if not json_data:
             for title in current_batch:
                 try:

+ 14 - 10
coldStartTasks/crawler/weixinCategoryCrawler.py

@@ -170,6 +170,7 @@ class weixinCategory(object):
         :return:
         """
         for category in category_list:
+            success_records = []
             account_list = self.get_account_list(category)
             for account in tqdm(account_list):
                 try:
@@ -179,23 +180,26 @@ class weixinCategory(object):
                         timestamp = int(account['latest_timestamp'].timestamp())
                     except Exception as e:
                         timestamp = DEFAULT_TIMESTAMP
-                    success_records = self.update_each_account(
+                    success_records += self.update_each_account(
                         gh_id=gh_id,
                         category=category,
                         latest_time_stamp=timestamp
                     )
-                    success_titles = [x['title'] for x in success_records]
-                    if success_titles:
-                        sensitive_results = llm_sensitivity.check_titles(success_titles)
-                        for record, sensitive_result in zip(success_records, sensitive_results):
-                            self.update_article_sensitive_status(
-                                category=category,
-                                unique_index=record['unique_index'],
-                                status=sensitive_result['hit_rule']
-                            )
                     print("success")
                 except Exception as e:
                     print("fail because of {}".format(e))
+            success_titles = [x['title'] for x in success_records]
+            if success_titles:
+                try:
+                    sensitive_results = llm_sensitivity.check_titles(success_titles)
+                    for record, sensitive_result in zip(success_records, sensitive_results):
+                        self.update_article_sensitive_status(
+                            category=category,
+                            unique_index=record['unique_index'],
+                            status=sensitive_result['hit_rule']
+                        )
+                except Exception as e:
+                    print("failed to update sensitive status: {}".format(e))
 
     def deal_accounts(self, account_list):
         """