Browse Source

仅通过标题tag 分析全部数据

罗俊辉 1 year ago
parent
commit
4d59f5d750
1 changed files with 14 additions and 11 deletions
  1. 14 11
      data_process/process_data_for_lightgbm.py

+ 14 - 11
data_process/process_data_for_lightgbm.py

@@ -197,17 +197,20 @@ class AllProcess(object):
         df = []
         for line in tqdm(dt_list):
             title = line[0]
-            title_tags = list(jieba.analyse.textrank(title, topK=4))
-            temp = list(line)
-            if title_tags:
-                for i in range(4):
-                    try:
-                        temp.append(title_tags[i])
-                    except:
-                        temp.append(None)
-                df.append(temp[1:])
-            else:
-                continue
+            try:
+                title_tags = list(jieba.analyse.textrank(title, topK=4))
+                temp = list(line)
+                if title_tags:
+                    for i in range(4):
+                        try:
+                            temp.append(title_tags[i])
+                        except:
+                            temp.append(None)
+                    df.append(temp[1:])
+                else:
+                    continue
+            except Exception as e:
+                print("title is empty\t", e)
         df = pd.DataFrame(df, columns=self.all_features)
         df.to_json(des_path, orient='records')