automation_provide_util.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. import logging
  2. from typing import List, Dict, Any
  3. from model.automation_provide_job import SaveFilterConditionParam, SearchFilterConfigItem
  4. # 配置日志
  5. logging.basicConfig(level=logging.ERROR)
  6. logger = logging.getLogger(__name__)
  7. # ==================== 核心工具类 ====================
  8. class AutoProvideUtil:
  9. # 配置键与类型的映射(对应Java的configKeyAndTypeMap)
  10. CONFIG_KEY_AND_TYPE_MAP = {
  11. "点赞量": 3,
  12. "发布量": 4,
  13. "收藏量": 7,
  14. "评论量": 9,
  15. "转发量": 27,
  16. "内容发布时间": 12,
  17. "发布时间": 25,
  18. "视频时长": 26,
  19. "观众年龄50+占比": 128,
  20. "观众年龄50+TGI": 129,
  21. "观众性别男性占比": 130,
  22. "观众性别男性TGI": 131,
  23. "观众性别女性占比": 132,
  24. "观众性别女性TGI": 133,
  25. "视频时长(秒)": 134,
  26. "分享量/点赞量": 138
  27. }
  28. # 配置键与规则键的映射(对应Java的configKeyAndRuleKeyMap)
  29. CONFIG_KEY_AND_RULE_KEY_MAP = {
  30. "点赞量": "likeCnt",
  31. "视频时长(秒)": "videoDuration_s",
  32. "观众年龄50+占比": "audienceAge50Rate",
  33. "观众年龄50+TGI": "audienceAge50TGI",
  34. "分享量/点赞量": "shareDivLink"
  35. }
  36. @classmethod
  37. def parse_apollo_config(cls, config_jsons: List[Dict[str, str]]) -> List[SaveFilterConditionParam]:
  38. """解析Apollo配置,转换为SaveFilterConditionParam列表"""
  39. try:
  40. # 将字典列表转换为SearchFilterConfigItem列表
  41. config_items = [
  42. SearchFilterConfigItem(
  43. key=item.get("key", ""),
  44. operator=item.get("operator", ""),
  45. value=item.get("value", "")
  46. )
  47. for item in config_jsons
  48. ]
  49. return cls.parse_apollo_config_items(config_items)
  50. except Exception as e:
  51. logger.error(f"parse apollo config json error: {config_jsons}", exc_info=e)
  52. return []
  53. @classmethod
  54. def parse_apollo_config_items(cls, config_items: List[SearchFilterConfigItem]) -> List[SaveFilterConditionParam]:
  55. """解析SearchFilterConfigItem列表,生成过滤参数"""
  56. save_filter_conditions = []
  57. if not config_items:
  58. return save_filter_conditions
  59. for config in config_items:
  60. key = config.key.strip()
  61. operator_str = config.operator.strip()
  62. value = config.value.strip()
  63. # 空值检查
  64. if not all([key, operator_str, value]):
  65. continue
  66. # 检查key是否在映射中
  67. if key not in cls.CONFIG_KEY_AND_TYPE_MAP:
  68. continue
  69. # 构建过滤参数对象
  70. save_filter_condition = SaveFilterConditionParam(
  71. condition_type=cls.CONFIG_KEY_AND_TYPE_MAP[key],
  72. operator=cls.operator_convert(operator_str),
  73. data=[value]
  74. )
  75. save_filter_conditions.append(save_filter_condition)
  76. return save_filter_conditions
  77. @staticmethod
  78. def operator_convert(operator: str) -> int:
  79. """将中文操作符转换为对应的数字编码"""
  80. operator_map = {
  81. "大于": 1,
  82. "等于": 2,
  83. "小于": 3,
  84. "包含": 4,
  85. "不包含": 5,
  86. "介于": 6
  87. }
  88. return operator_map.get(operator, -1)
  89. @classmethod
  90. def parse_filter_map_to_rule_str(cls, config_jsons: List[Dict[str, str]]) -> str:
  91. """将过滤配置映射转换为规则字符串"""
  92. if not config_jsons:
  93. return ""
  94. try:
  95. # 转换为SearchFilterConfigItem列表
  96. config_items = [
  97. SearchFilterConfigItem(
  98. key=item.get("key", ""),
  99. operator=item.get("operator", ""),
  100. value=item.get("value", "")
  101. )
  102. for item in config_jsons
  103. ]
  104. return cls.parse_filter_config_to_rule_str(config_items)
  105. except Exception as e:
  106. logger.error(f"parse filter config json error: {config_jsons}", exc_info=e)
  107. return ""
  108. @classmethod
  109. def parse_filter_config_to_rule_str(cls, config_items: List[SearchFilterConfigItem]) -> str:
  110. """将SearchFilterConfigItem列表转换为规则字符串"""
  111. if not config_items:
  112. return ""
  113. condition_str_list = []
  114. for config in config_items:
  115. key = config.key.strip()
  116. operator_str = config.operator.strip()
  117. value = config.value.strip()
  118. # 空值检查
  119. if not all([key, operator_str, value]):
  120. continue
  121. # 检查key是否在规则映射中
  122. if key not in cls.CONFIG_KEY_AND_RULE_KEY_MAP:
  123. continue
  124. # 拼接条件字符串
  125. rule_key = cls.CONFIG_KEY_AND_RULE_KEY_MAP[key]
  126. operator = cls.operator_convert_str(operator_str)
  127. condition_str_list.append(f"{rule_key}{operator}{value}")
  128. return "&&".join(condition_str_list)
  129. @staticmethod
  130. def operator_convert_str(operator: str) -> str:
  131. """将中文操作符转换为符号操作符"""
  132. operator_map = {
  133. "大于": ">",
  134. "等于": "==",
  135. "小于": "<",
  136. "大于等于": ">=",
  137. "小于等于": "<=",
  138. "不等于": "!="
  139. }
  140. if operator not in operator_map:
  141. raise RuntimeError(f"不支持的操作符: {operator}")
  142. return operator_map[operator]
  143. @classmethod
  144. def extract_content_rule_feature(cls, content_detail: Dict[str, Any], fans_portrait: Dict[str, Any]) -> Dict[str, float]:
  145. """提取内容规则特征"""
  146. context = {}
  147. cls.extract_content_detail_feature(context, content_detail)
  148. cls.extract_fans_portrait_feature(context, fans_portrait)
  149. return context
  150. @classmethod
  151. def extract_content_detail_feature(cls, context: Dict[str, float], content_detail: Dict[str, Any]) -> None:
  152. """提取内容详情特征"""
  153. if not content_detail:
  154. return
  155. content_detail_data = content_detail.get("data", {})
  156. if not content_detail_data:
  157. return
  158. # 提取点赞量
  159. if "like_count" in content_detail_data:
  160. like_cnt = content_detail_data.get("like_count", 0.0)
  161. context["likeCnt"] = float(like_cnt)
  162. # 提取视频时长
  163. video_url_list = content_detail_data.get("video_url_list", [])
  164. if video_url_list and isinstance(video_url_list, list) and len(video_url_list) > 0:
  165. first_video = video_url_list[0]
  166. if isinstance(first_video, dict) and "video_duration" in first_video:
  167. video_duration = first_video.get("video_duration", 0.0)
  168. context["videoDuration_s"] = float(video_duration)
  169. # 提取分享量/点赞量比值
  170. like_cnt = content_detail_data.get("like_count", 0.0)
  171. share_cnt = content_detail_data.get("share_count", 0.0)
  172. if like_cnt and share_cnt: # 避免除以0
  173. context["shareCnt"] = float(share_cnt)
  174. context["shareDivLink"] = cls.safe_div(share_cnt, like_cnt)
  175. @classmethod
  176. def extract_fans_portrait_feature(cls, context: Dict[str, float], fans_portrait: Dict[str, Any]) -> None:
  177. """提取粉丝画像特征"""
  178. if not fans_portrait:
  179. return
  180. fans_portrait_data = fans_portrait.get("data", {})
  181. if not fans_portrait_data:
  182. return
  183. # 提取50+年龄画像
  184. age_info = fans_portrait_data.get("年龄", {})
  185. age_50_plus = age_info.get("50-", {})
  186. if age_50_plus:
  187. # 处理百分比字符串
  188. percentage_str = age_50_plus.get("percentage", "0%").replace("%", "")
  189. preference_str = age_50_plus.get("preference", "0%").replace("%", "")
  190. audience_age50_rate = float(percentage_str) / 100
  191. audience_age50_tgi = float(preference_str)
  192. context["audienceAge50Rate"] = audience_age50_rate
  193. context["audienceAge50TGI"] = audience_age50_tgi
  194. @staticmethod
  195. def safe_div(numerator: float, denominator: float) -> float:
  196. """安全除法,避免除以0"""
  197. try:
  198. return float(numerator) / float(denominator)
  199. except ZeroDivisionError:
  200. return 0.0
  201. # ==================== 使用示例 ====================
  202. if __name__ == "__main__":
  203. # 1. 测试解析Apollo配置
  204. test_config = [
  205. {"key": "点赞量", "operator": "大于", "value": "1000"},
  206. {"key": "视频时长(秒)", "operator": "小于", "value": "60"}
  207. ]
  208. params = AutoProvideUtil.parse_apollo_config(test_config)
  209. print("解析后的过滤参数:")
  210. for param in params:
  211. print(f"类型: {param.condition_type}, 操作符: {param.operator}, 值: {param.data}")
  212. # 2. 测试转换为规则字符串
  213. rule_str = AutoProvideUtil.parse_filter_map_to_rule_str(test_config)
  214. print(f"\n规则字符串: {rule_str}")
  215. # 3. 测试提取特征
  216. test_content_detail = {
  217. "data": {
  218. "like_count": 2000.0,
  219. "share_count": 500.0,
  220. "video_url_list": [{"video_duration": 45.0}]
  221. }
  222. }
  223. test_fans_portrait = {
  224. "data": {
  225. "年龄": {"50-": {"percentage": "25%", "preference": "120%"}}
  226. }
  227. }
  228. features = AutoProvideUtil.extract_content_rule_feature(test_content_detail, test_fans_portrait)
  229. print("\n提取的特征:")
  230. for k, v in features.items():
  231. print(f"{k}: {v}")