""" @author: luojunhui """ import jieba.analyse import pandas as pd from .model_init import models class ParamProcess(object): """ 处理 params, 继承 models """ def __init__(self): self.model_v1 = models.model_v1 self.model_v2 = models.model_v2 self.label_encoder = models.label_encoder self.features_v1 = ["channel", "type", "title"] self.features_v2 = ["channel", "out_user_id", "mode", "out_play_cnt", "out_like_cnt", "out_share_cnt", "title", "lop", "duration"] async def title_to_tags(self, features): """ process video title to tags and transform features_json_to_dataFrame :param features: :return: """ title = features['title'] if title: title = title.strip() title_tags = list(jieba.analyse.textrank(title, topK=3)) if title_tags: for i in range(3): try: features['tag{}'.format(i + 1)] = title_tags[i] except: features['tag_{}'.format(i + 1)] = None else: features['tag1'] = None features['tag2'] = None features['tag3'] = None df = pd.DataFrame([features]) df = df.drop('title', axis=1) return df async def predict_score(self, version, features): """ 预测 :param version: 模型版本 :param features: 视频被 label_encoder 之后的features :return: data """ match version: case "v1": result = self.model_v1(features) result = list(result) if result: obj = { "vision": "v1", "score": result[0], "benchmark": 0.06, "is_good_video": 1 if result[0] > 0.06 else 0 } else: obj = { "vision": "v1", "score": None, "benchmark": 0.06, "is_good_video": 0 } return { "code": 0, "message": "success", "data": obj } case "v2": result = self.model_v2.predict(features) result = list(result) if result: obj = { "vision": "v2", "score": result[0], "benchmark": 0.3, "is_good_video": 1 if result[0] > 0.3 else 0 } else: obj = { "vision": "v2", "score": None, "benchmark": 0.3, "is_good_video": 0 } return { "code": 0, "message": "success", "data": obj } async def process_label(self, params): """ 处理类别 features 和 float features :param params: 接收到的参数 :return: 转化好的类别特征的 dataframe """ version = params['version'] features = params['features'] features = await self.title_to_tags(features) match version: case "v1": # 全部转化为类别 str_column = [ "channel", "type", "tag1", "tag2", "tag3" ] for key in str_column: features[key] = self.label_encoder.fit_transform(features[key]) return version, features case "v2": float_column = ["out_play_cnt", "out_like_cnt", "out_share_cnt", "lop", "duration"] str_column = ["channel", "mode", "out_user_id", "tag1", "tag2", "tag3"] for key in float_column: features[key] = pd.to_numeric(features[key], errors="coerce") for key in str_column: features[key] = self.label_encoder.fit_transform(features[key]) return version, features async def process(self, params): """ 处理 :param params: :return: """ # check params v = params.get("version") if v == "v1": features = params.get("features") if len(features) != 3: return { "code": 1, "message": "参数错误,v1,features长度应该是 3,传参长度是{}".format(len(features)), "data": None } for feature in self.features_v1: if feature in features: continue else: return { "code": 1, "message": "参数错误, 缺少参数{}".format(feature), "data": None } if v == "v2": features = params.get("features") if len(features) != 9: return { "code": 1, "message": "参数错误,v2,features长度应该是 9,传参长度是{}".format(len(features)), "data": None } for feature in self.features_v2: if feature in features: continue else: return { "code": 1, "message": "参数错误, 缺少参数{}".format(feature), "data": None } else: return { "code": 1, "message": "参数错误,version 应该是 v1 or v2, 传参是{}".format(v), "data": None } try: version, features = await self.process_label(params) except Exception as e: return { "code": 2, "message": "系统错误,定位在 process_label, 报错内容是{}:".format(e), "data": None } try: res = await self.predict_score(version, features) return res except Exception as e: return { "code": 2, "message": "系统异常, 定位在 predict_score, 报错是{}:".format(e), "data": None }