|
@@ -1,7 +1,8 @@
|
|
|
"""
|
|
|
@author: luojunhui
|
|
|
"""
|
|
|
-import asyncio
|
|
|
+import jieba.analyse
|
|
|
+from pandas as pd
|
|
|
|
|
|
from .model_init import models
|
|
|
|
|
@@ -10,10 +11,35 @@ class ParamProcess(object):
|
|
|
"""
|
|
|
处理 params, 继承 models
|
|
|
"""
|
|
|
+
|
|
|
def __init__(self):
|
|
|
self.model_v1 = models.model_v1
|
|
|
self.model_v2 = models.model_v2
|
|
|
- self.layer_encoder = models.label_encoder
|
|
|
+ self.label_encoder = models.label_encoder
|
|
|
+
|
|
|
+ async def title_to_tags(self, features):
|
|
|
+ """
|
|
|
+ process video title to tags and transform features_json_to_dataFrame
|
|
|
+ :param features:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ title = features['title']
|
|
|
+ if title:
|
|
|
+ title = title.strip()
|
|
|
+ title_tags = list(jieba.analyse.textrank(title, topK=3))
|
|
|
+ if title_tags:
|
|
|
+ for i in range(3):
|
|
|
+ try:
|
|
|
+ features['tag_{}'.format(i + 1)] = title_tags[i]
|
|
|
+ except:
|
|
|
+ features['tag_{}'.format(i + 1)] = None
|
|
|
+ else:
|
|
|
+ features['tag_1'] = None
|
|
|
+ features['tag_2'] = None
|
|
|
+ features['tag_3'] = None
|
|
|
+ df = pd.DataFrame(features)
|
|
|
+ df.drop('title', axis=1)
|
|
|
+ return df
|
|
|
|
|
|
async def predict_score(self, version, features):
|
|
|
"""
|
|
@@ -24,9 +50,12 @@ class ParamProcess(object):
|
|
|
"""
|
|
|
match version:
|
|
|
case "v1":
|
|
|
- return await self.model_v1.predict(features)
|
|
|
+ result = await self.model_v1(features)
|
|
|
+ print(result)
|
|
|
+ return result
|
|
|
case "v2":
|
|
|
- return await self.model_v2.predict(features)
|
|
|
+ result = await self.model_v2.predict(features)
|
|
|
+ return result
|
|
|
|
|
|
async def process_label(self, params):
|
|
|
"""
|
|
@@ -36,15 +65,27 @@ class ParamProcess(object):
|
|
|
"""
|
|
|
version = params['version']
|
|
|
features = params['features']
|
|
|
+ features = await self.title_to_tags(features)
|
|
|
match version:
|
|
|
case "v1":
|
|
|
# 全部转化为类别
|
|
|
- print("all to string cate")
|
|
|
- # features = []
|
|
|
+ str_column = [
|
|
|
+ "channel",
|
|
|
+ "type",
|
|
|
+ "tag1",
|
|
|
+ "tag2",
|
|
|
+ "tag3"
|
|
|
+ ]
|
|
|
+ for key in str_column:
|
|
|
+ features[key] = self.label_encoder.fit_transform(features[key])
|
|
|
return version, features
|
|
|
case "v2":
|
|
|
- print("all to float cate")
|
|
|
- # features = []
|
|
|
+ float_column = ["out_play_cnt", "out_like_cnt", "out_share_cnt", "lop", "duration"]
|
|
|
+ str_column = ["channel", "mode", "out_user_id", "tag1", "tag2", "tag3"]
|
|
|
+ for key in float_column:
|
|
|
+ features[key] = pd.to_numeric(features[key], errors="coerce")
|
|
|
+ for key in str_column:
|
|
|
+ features[key] = self.label_encoder.fit_transform(features[key])
|
|
|
return version, features
|
|
|
|
|
|
async def process(self, params):
|
|
@@ -55,6 +96,4 @@ class ParamProcess(object):
|
|
|
"""
|
|
|
version, features = await self.process_label(params)
|
|
|
print(version, features)
|
|
|
- # return await self.predict_score(version, features)
|
|
|
-
|
|
|
-
|
|
|
+ return await self.predict_score(version, features)
|