import logging from typing import List, Dict, Any from model.automation_provide_job import SaveFilterConditionParam, SearchFilterConfigItem # 配置日志 logging.basicConfig(level=logging.ERROR) logger = logging.getLogger(__name__) # ==================== 核心工具类 ==================== class AutoProvideUtil: # 配置键与类型的映射(对应Java的configKeyAndTypeMap) CONFIG_KEY_AND_TYPE_MAP = { "点赞量": 3, "发布量": 4, "收藏量": 7, "评论量": 9, "转发量": 27, "内容发布时间": 12, "发布时间": 25, "视频时长": 26, "观众年龄50+占比": 128, "观众年龄50+TGI": 129, "观众性别男性占比": 130, "观众性别男性TGI": 131, "观众性别女性占比": 132, "观众性别女性TGI": 133, "视频时长(秒)": 134, "分享量/点赞量": 138 } # 配置键与规则键的映射(对应Java的configKeyAndRuleKeyMap) CONFIG_KEY_AND_RULE_KEY_MAP = { "点赞量": "likeCnt", "视频时长(秒)": "videoDuration_s", "观众年龄50+占比": "audienceAge50Rate", "观众年龄50+TGI": "audienceAge50TGI", "分享量/点赞量": "shareDivLink" } @classmethod def parse_apollo_config(cls, config_jsons: List[Dict[str, str]]) -> List[SaveFilterConditionParam]: """解析Apollo配置,转换为SaveFilterConditionParam列表""" try: # 将字典列表转换为SearchFilterConfigItem列表 config_items = [ SearchFilterConfigItem( key=item.get("key", ""), operator=item.get("operator", ""), value=item.get("value", "") ) for item in config_jsons ] return cls.parse_apollo_config_items(config_items) except Exception as e: logger.error(f"parse apollo config json error: {config_jsons}", exc_info=e) return [] @classmethod def parse_apollo_config_items(cls, config_items: List[SearchFilterConfigItem]) -> List[SaveFilterConditionParam]: """解析SearchFilterConfigItem列表,生成过滤参数""" save_filter_conditions = [] if not config_items: return save_filter_conditions for config in config_items: key = config.key.strip() operator_str = config.operator.strip() value = config.value.strip() # 空值检查 if not all([key, operator_str, value]): continue # 检查key是否在映射中 if key not in cls.CONFIG_KEY_AND_TYPE_MAP: continue # 构建过滤参数对象 save_filter_condition = SaveFilterConditionParam( condition_type=cls.CONFIG_KEY_AND_TYPE_MAP[key], operator=cls.operator_convert(operator_str), data=[value] ) save_filter_conditions.append(save_filter_condition) return save_filter_conditions @staticmethod def operator_convert(operator: str) -> int: """将中文操作符转换为对应的数字编码""" operator_map = { "大于": 1, "等于": 2, "小于": 3, "包含": 4, "不包含": 5, "介于": 6 } return operator_map.get(operator, -1) @classmethod def parse_filter_map_to_rule_str(cls, config_jsons: List[Dict[str, str]]) -> str: """将过滤配置映射转换为规则字符串""" if not config_jsons: return "" try: # 转换为SearchFilterConfigItem列表 config_items = [ SearchFilterConfigItem( key=item.get("key", ""), operator=item.get("operator", ""), value=item.get("value", "") ) for item in config_jsons ] return cls.parse_filter_config_to_rule_str(config_items) except Exception as e: logger.error(f"parse filter config json error: {config_jsons}", exc_info=e) return "" @classmethod def parse_filter_config_to_rule_str(cls, config_items: List[SearchFilterConfigItem]) -> str: """将SearchFilterConfigItem列表转换为规则字符串""" if not config_items: return "" condition_str_list = [] for config in config_items: key = config.key.strip() operator_str = config.operator.strip() value = config.value.strip() # 空值检查 if not all([key, operator_str, value]): continue # 检查key是否在规则映射中 if key not in cls.CONFIG_KEY_AND_RULE_KEY_MAP: continue # 拼接条件字符串 rule_key = cls.CONFIG_KEY_AND_RULE_KEY_MAP[key] operator = cls.operator_convert_str(operator_str) condition_str_list.append(f"{rule_key}{operator}{value}") return "&&".join(condition_str_list) @staticmethod def operator_convert_str(operator: str) -> str: """将中文操作符转换为符号操作符""" operator_map = { "大于": ">", "等于": "==", "小于": "<", "大于等于": ">=", "小于等于": "<=", "不等于": "!=" } if operator not in operator_map: raise RuntimeError(f"不支持的操作符: {operator}") return operator_map[operator] @classmethod def extract_content_rule_feature(cls, content_detail: Dict[str, Any], fans_portrait: Dict[str, Any]) -> Dict[str, float]: """提取内容规则特征""" context = {} cls.extract_content_detail_feature(context, content_detail) cls.extract_fans_portrait_feature(context, fans_portrait) return context @classmethod def extract_content_detail_feature(cls, context: Dict[str, float], content_detail: Dict[str, Any]) -> None: """提取内容详情特征""" if not content_detail: return content_detail_data = content_detail.get("data", {}) if not content_detail_data: return # 提取点赞量 if "like_count" in content_detail_data: like_cnt = content_detail_data.get("like_count", 0.0) context["likeCnt"] = float(like_cnt) # 提取视频时长 video_url_list = content_detail_data.get("video_url_list", []) if video_url_list and isinstance(video_url_list, list) and len(video_url_list) > 0: first_video = video_url_list[0] if isinstance(first_video, dict) and "video_duration" in first_video: video_duration = first_video.get("video_duration", 0.0) context["videoDuration_s"] = float(video_duration) # 提取分享量/点赞量比值 like_cnt = content_detail_data.get("like_count", 0.0) share_cnt = content_detail_data.get("share_count", 0.0) if like_cnt and share_cnt: # 避免除以0 context["shareCnt"] = float(share_cnt) context["shareDivLink"] = cls.safe_div(share_cnt, like_cnt) @classmethod def extract_fans_portrait_feature(cls, context: Dict[str, float], fans_portrait: Dict[str, Any]) -> None: """提取粉丝画像特征""" if not fans_portrait: return fans_portrait_data = fans_portrait.get("data", {}) if not fans_portrait_data: return # 提取50+年龄画像 age_info = fans_portrait_data.get("年龄", {}) age_50_plus = age_info.get("50-", {}) if age_50_plus: # 处理百分比字符串 percentage_str = age_50_plus.get("percentage", "0%").replace("%", "") preference_str = age_50_plus.get("preference", "0%").replace("%", "") audience_age50_rate = float(percentage_str) / 100 audience_age50_tgi = float(preference_str) context["audienceAge50Rate"] = audience_age50_rate context["audienceAge50TGI"] = audience_age50_tgi @staticmethod def safe_div(numerator: float, denominator: float) -> float: """安全除法,避免除以0""" try: return float(numerator) / float(denominator) except ZeroDivisionError: return 0.0 # ==================== 使用示例 ==================== if __name__ == "__main__": # 1. 测试解析Apollo配置 test_config = [ {"key": "点赞量", "operator": "大于", "value": "1000"}, {"key": "视频时长(秒)", "operator": "小于", "value": "60"} ] params = AutoProvideUtil.parse_apollo_config(test_config) print("解析后的过滤参数:") for param in params: print(f"类型: {param.condition_type}, 操作符: {param.operator}, 值: {param.data}") # 2. 测试转换为规则字符串 rule_str = AutoProvideUtil.parse_filter_map_to_rule_str(test_config) print(f"\n规则字符串: {rule_str}") # 3. 测试提取特征 test_content_detail = { "data": { "like_count": 2000.0, "share_count": 500.0, "video_url_list": [{"video_duration": 45.0}] } } test_fans_portrait = { "data": { "年龄": {"50-": {"percentage": "25%", "preference": "120%"}} } } features = AutoProvideUtil.extract_content_rule_feature(test_content_detail, test_fans_portrait) print("\n提取的特征:") for k, v in features.items(): print(f"{k}: {v}")