Browse Source

处理爬虫模型

罗俊辉 1 year ago
parent
commit
547b3bd13f
1 changed files with 5 additions and 2 deletions
  1. 5 2
      process_data.py

+ 5 - 2
process_data.py

@@ -78,6 +78,8 @@ class DataProcessor(object):
             case "spider":
                 if item['type'] == "spider":
                     item_features = [item[i] for i in spider_features]
+                else:
+                    return None, None
         keywords_textrank = self.title_processor(video_id)
         if keywords_textrank:
             for i in range(3):
@@ -135,8 +137,9 @@ class DataProcessor(object):
             y_list = []
             for video_obj in tqdm(x_data):
                 our_label, features = self.generate_train_label(video_obj, y_data, c)
-                x_list.append(features)
-                y_list.append(our_label)
+                if features:
+                    x_list.append(features)
+                    y_list.append(our_label)
             with open("data/produce_data/x_data_{}_{}_{}_{}.json".format(c, self.flag, dt, self.c), "w") as f1:
                 f1.write(json.dumps(x_list, ensure_ascii=False))