|
@@ -197,17 +197,20 @@ class AllProcess(object):
|
|
|
df = []
|
|
|
for line in tqdm(dt_list):
|
|
|
title = line[0]
|
|
|
- title_tags = list(jieba.analyse.textrank(title, topK=4))
|
|
|
- temp = list(line)
|
|
|
- if title_tags:
|
|
|
- for i in range(4):
|
|
|
- try:
|
|
|
- temp.append(title_tags[i])
|
|
|
- except:
|
|
|
- temp.append(None)
|
|
|
- df.append(temp[1:])
|
|
|
- else:
|
|
|
- continue
|
|
|
+ try:
|
|
|
+ title_tags = list(jieba.analyse.textrank(title, topK=4))
|
|
|
+ temp = list(line)
|
|
|
+ if title_tags:
|
|
|
+ for i in range(4):
|
|
|
+ try:
|
|
|
+ temp.append(title_tags[i])
|
|
|
+ except:
|
|
|
+ temp.append(None)
|
|
|
+ df.append(temp[1:])
|
|
|
+ else:
|
|
|
+ continue
|
|
|
+ except Exception as e:
|
|
|
+ print("title is empty\t", e)
|
|
|
df = pd.DataFrame(df, columns=self.all_features)
|
|
|
df.to_json(des_path, orient='records')
|
|
|
|