123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196 |
- """
- @author: luojunhui
- """
- import jieba.analyse
- import pandas as pd
- from .model_init import models
- class ParamProcess(object):
- """
- 处理 params, 继承 models
- """
- def __init__(self):
- self.model_v1 = models.model_v1
- self.model_v2 = models.model_v2
- self.label_encoder = models.label_encoder
- self.features_v1 = ["channel", "type", "title"]
- self.features_v2 = ["channel", "out_user_id", "mode", "out_play_cnt", "out_like_cnt", "out_share_cnt", "title",
- "lop", "duration"]
- async def title_to_tags(self, features):
- """
- process video title to tags and transform features_json_to_dataFrame
- :param features:
- :return:
- """
- title = features['title']
- if title:
- title = title.strip()
- title_tags = list(jieba.analyse.textrank(title, topK=3))
- if title_tags:
- for i in range(3):
- try:
- features['tag{}'.format(i + 1)] = title_tags[i]
- except:
- features['tag_{}'.format(i + 1)] = None
- else:
- features['tag1'] = None
- features['tag2'] = None
- features['tag3'] = None
- df = pd.DataFrame([features])
- df = df.drop('title', axis=1)
- return df
- async def predict_score(self, version, features):
- """
- 预测
- :param version: 模型版本
- :param features: 视频被 label_encoder 之后的features
- :return: data
- """
- match version:
- case "v1":
- result = self.model_v1(features)
- result = list(result)
- if result:
- obj = {
- "vision": "v1",
- "score": result[0],
- "benchmark": 0.06,
- "is_good_video": 1 if result[0] > 0.06 else 0
- }
- else:
- obj = {
- "vision": "v1",
- "score": None,
- "benchmark": 0.06,
- "is_good_video": 0
- }
- return {
- "code": 0,
- "message": "success",
- "data": obj
- }
- case "v2":
- result = self.model_v2.predict(features)
- result = list(result)
- if result:
- obj = {
- "vision": "v2",
- "score": result[0],
- "benchmark": 0.3,
- "is_good_video": 1 if result[0] > 0.3 else 0
- }
- else:
- obj = {
- "vision": "v2",
- "score": None,
- "benchmark": 0.3,
- "is_good_video": 0
- }
- return {
- "code": 0,
- "message": "success",
- "data": obj
- }
- async def process_label(self, params):
- """
- 处理类别 features 和 float features
- :param params: 接收到的参数
- :return: 转化好的类别特征的 dataframe
- """
- version = params['version']
- features = params['features']
- features = await self.title_to_tags(features)
- match version:
- case "v1":
- # 全部转化为类别
- str_column = [
- "channel",
- "type",
- "tag1",
- "tag2",
- "tag3"
- ]
- for key in str_column:
- features[key] = self.label_encoder.fit_transform(features[key])
- return version, features
- case "v2":
- float_column = ["out_play_cnt", "out_like_cnt", "out_share_cnt", "lop", "duration"]
- str_column = ["channel", "mode", "out_user_id", "tag1", "tag2", "tag3"]
- for key in float_column:
- features[key] = pd.to_numeric(features[key], errors="coerce")
- for key in str_column:
- features[key] = self.label_encoder.fit_transform(features[key])
- return version, features
- async def process(self, params):
- """
- 处理
- :param params:
- :return:
- """
- # check params
- v = params.get("version")
- if v == "v1":
- features = params.get("features")
- if len(features) != 3:
- return {
- "code": 1,
- "message": "参数错误,v1,features长度应该是 3,传参长度是{}".format(len(features)),
- "data": None
- }
- for feature in self.features_v1:
- if feature in features:
- continue
- else:
- return {
- "code": 1,
- "message": "参数错误, 缺少参数{}".format(feature),
- "data": None
- }
- if v == "v2":
- features = params.get("features")
- if len(features) != 9:
- return {
- "code": 1,
- "message": "参数错误,v2,features长度应该是 9,传参长度是{}".format(len(features)),
- "data": None
- }
- for feature in self.features_v2:
- if feature in features:
- continue
- else:
- return {
- "code": 1,
- "message": "参数错误, 缺少参数{}".format(feature),
- "data": None
- }
- else:
- return {
- "code": 1,
- "message": "参数错误,version 应该是 v1 or v2, 传参是{}".format(v),
- "data": None
- }
- try:
- version, features = await self.process_label(params)
- except Exception as e:
- return {
- "code": 2,
- "message": "系统错误,定位在 process_label, 报错内容是{}:".format(e),
- "data": None
- }
- try:
- res = await self.predict_score(version, features)
- return res
- except Exception as e:
- return {
- "code": 2,
- "message": "系统异常, 定位在 predict_score, 报错是{}:".format(e),
- "data": None
- }
|