Browse Source

generate label for mysql

罗俊辉 1 year ago
parent
commit
db42c79fe7
1 changed files with 11 additions and 2 deletions
  1. 11 2
      process_data.py

+ 11 - 2
process_data.py

@@ -221,8 +221,17 @@ class SpiderProcess(object):
             title_tags = list(jieba.analyse.textrank(title, topK=3))
             temp.append(lop)
             temp.append(duration)
-            for i in range(3):
-                temp.append(title_tags[i] if title_tags[i] else None)
+            if title_tags:
+                for i in range(3):
+                    try:
+                        temp.append(title_tags[i])
+                    except:
+                        temp.append(None)
+            else:
+                temp.append(None)
+                temp.append(None)
+                temp.append(None)
+
             df.append(temp[1:])
         df = pd.DataFrame(df, columns=['title', 'channel', 'out_user_id', 'mode', 'out_play_cnt', 'out_like_cnt', 'out_share_cnt', 'lop', 'duration', 'tag1', 'tag2', 'tag3'])
         df.to_excel("data/train_data/spider_data_{}.xlsx".format(datetime.datetime.today().strftime("y%m%d")))