@@ -221,8 +221,17 @@ class SpiderProcess(object):
title_tags = list(jieba.analyse.textrank(title, topK=3))
temp.append(lop)
temp.append(duration)
- for i in range(3):
- temp.append(title_tags[i] if title_tags[i] else None)
+ if title_tags:
+ for i in range(3):
+ try:
+ temp.append(title_tags[i])
+ except:
+ temp.append(None)
+ else:
+
df.append(temp[1:])
df = pd.DataFrame(df, columns=['title', 'channel', 'out_user_id', 'mode', 'out_play_cnt', 'out_like_cnt', 'out_share_cnt', 'lop', 'duration', 'tag1', 'tag2', 'tag3'])
df.to_excel("data/train_data/spider_data_{}.xlsx".format(datetime.datetime.today().strftime("y%m%d")))