|
@@ -47,13 +47,17 @@ class DataProcessor(object):
|
|
|
"out_collection_cnt",
|
|
|
]
|
|
|
item_features = [item[i] for i in userful_features]
|
|
|
- keywords_textrank, keywords_tf = self.title_processor(video_id)
|
|
|
- if keywords_tf and keywords_textrank:
|
|
|
- item_features.append(",".join(keywords_textrank))
|
|
|
- item_features.append(",".join(keywords_tf))
|
|
|
+ keywords_textrank = self.title_processor(video_id)
|
|
|
+ if keywords_textrank:
|
|
|
+ for i in range(3):
|
|
|
+ try:
|
|
|
+ item_features.append(keywords_textrank[i])
|
|
|
+ except:
|
|
|
+ item_features.append(None)
|
|
|
else:
|
|
|
item_features.append(None)
|
|
|
item_features.append(None)
|
|
|
+ item_features.append(None)
|
|
|
label_dt = generate_label_date(dt)
|
|
|
label_obj = y_ori_data.get(label_dt, {}).get(video_id)
|
|
|
if label_obj:
|
|
@@ -72,11 +76,10 @@ class DataProcessor(object):
|
|
|
try:
|
|
|
title = self.client.select(sql)
|
|
|
keywords_textrank = jieba.analyse.textrank(title, topK=3)
|
|
|
- keywords_tfidf = jieba.analyse.extract_tags(title, topK=3)
|
|
|
- return list(keywords_textrank), list(keywords_tfidf)
|
|
|
+ return list(keywords_textrank)
|
|
|
except Exception as e:
|
|
|
print(video_id, "\t", e)
|
|
|
- return [], []
|
|
|
+ return []
|
|
|
|
|
|
def producer(self):
|
|
|
"""
|