functions.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. """
  2. @author: luojunhui
  3. """
  4. import jieba.analyse
  5. import pandas as pd
  6. from .model_init import models
  7. class ParamProcess(object):
  8. """
  9. 处理 params, 继承 models
  10. """
  11. def __init__(self):
  12. self.model_v1 = models.model_v1
  13. self.model_v2 = models.model_v2
  14. self.label_encoder = models.label_encoder
  15. async def title_to_tags(self, features):
  16. """
  17. process video title to tags and transform features_json_to_dataFrame
  18. :param features:
  19. :return:
  20. """
  21. title = features['title']
  22. if title:
  23. title = title.strip()
  24. title_tags = list(jieba.analyse.textrank(title, topK=3))
  25. if title_tags:
  26. for i in range(3):
  27. try:
  28. features['tag{}'.format(i + 1)] = title_tags[i]
  29. except:
  30. features['tag_{}'.format(i + 1)] = None
  31. else:
  32. features['tag1'] = None
  33. features['tag2'] = None
  34. features['tag3'] = None
  35. df = pd.DataFrame([features])
  36. # print("data_frame", df.columns)
  37. df = df.drop('title', axis=1)
  38. return df
  39. async def predict_score(self, version, features):
  40. """
  41. 预测
  42. :param version: 模型版本
  43. :param features: 视频被 label_encoder 之后的features
  44. :return: score: 返回的分数
  45. """
  46. match version:
  47. case "v1":
  48. result = self.model_v1(features)
  49. result = list(result)
  50. if result:
  51. obj = {
  52. "vision": "v1",
  53. "score": result[0],
  54. "benchmark": 0.06,
  55. "is_good_video": 1 if result[0] > 0.06 else 0
  56. }
  57. else:
  58. obj = {
  59. "vision": "v1",
  60. "score": None,
  61. "benchmark": 0.06,
  62. "is_good_video": 0
  63. }
  64. return obj
  65. case "v2":
  66. result = self.model_v2.predict(features)
  67. result = list(result)
  68. if result:
  69. obj = {
  70. "vision": "v2",
  71. "score": result[0],
  72. "benchmark": 0.3,
  73. "is_good_video": 1 if result[0] > 0.3 else 0
  74. }
  75. else:
  76. obj = {
  77. "vision": "v2",
  78. "score": None,
  79. "benchmark": 0.3,
  80. "is_good_video": 0
  81. }
  82. return obj
  83. async def process_label(self, params):
  84. """
  85. 处理类别 features 和 float features
  86. :param params: 接收到的参数
  87. :return:
  88. """
  89. version = params['version']
  90. features = params['features']
  91. features = await self.title_to_tags(features)
  92. match version:
  93. case "v1":
  94. # 全部转化为类别
  95. str_column = [
  96. "channel",
  97. "type",
  98. "tag1",
  99. "tag2",
  100. "tag3"
  101. ]
  102. for key in str_column:
  103. features[key] = self.label_encoder.fit_transform(features[key])
  104. return version, features
  105. case "v2":
  106. float_column = ["out_play_cnt", "out_like_cnt", "out_share_cnt", "lop", "duration"]
  107. str_column = ["channel", "mode", "out_user_id", "tag1", "tag2", "tag3"]
  108. for key in float_column:
  109. features[key] = pd.to_numeric(features[key], errors="coerce")
  110. for key in str_column:
  111. features[key] = self.label_encoder.fit_transform(features[key])
  112. return version, features
  113. async def process(self, params):
  114. """
  115. 处理
  116. :param params:
  117. :return:
  118. """
  119. version, features = await self.process_label(params)
  120. # print(version, features)
  121. return await self.predict_score(version, features)