| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270 |
- import logging
- from typing import List, Dict, Any
- from model.automation_provide_job import SaveFilterConditionParam, SearchFilterConfigItem
- # 配置日志
- logging.basicConfig(level=logging.ERROR)
- logger = logging.getLogger(__name__)
- # ==================== 核心工具类 ====================
- class AutoProvideUtil:
- # 配置键与类型的映射(对应Java的configKeyAndTypeMap)
- CONFIG_KEY_AND_TYPE_MAP = {
- "点赞量": 3,
- "发布量": 4,
- "收藏量": 7,
- "评论量": 9,
- "转发量": 27,
- "内容发布时间": 12,
- "发布时间": 25,
- "视频时长": 26,
- "观众年龄50+占比": 128,
- "观众年龄50+TGI": 129,
- "观众性别男性占比": 130,
- "观众性别男性TGI": 131,
- "观众性别女性占比": 132,
- "观众性别女性TGI": 133,
- "视频时长(秒)": 134,
- "分享量/点赞量": 138
- }
- # 配置键与规则键的映射(对应Java的configKeyAndRuleKeyMap)
- CONFIG_KEY_AND_RULE_KEY_MAP = {
- "点赞量": "likeCnt",
- "视频时长(秒)": "videoDuration_s",
- "观众年龄50+占比": "audienceAge50Rate",
- "观众年龄50+TGI": "audienceAge50TGI",
- "分享量/点赞量": "shareDivLink"
- }
- @classmethod
- def parse_apollo_config(cls, config_jsons: List[Dict[str, str]]) -> List[SaveFilterConditionParam]:
- """解析Apollo配置,转换为SaveFilterConditionParam列表"""
- try:
- # 将字典列表转换为SearchFilterConfigItem列表
- config_items = [
- SearchFilterConfigItem(
- key=item.get("key", ""),
- operator=item.get("operator", ""),
- value=item.get("value", "")
- )
- for item in config_jsons
- ]
- return cls.parse_apollo_config_items(config_items)
- except Exception as e:
- logger.error(f"parse apollo config json error: {config_jsons}", exc_info=e)
- return []
- @classmethod
- def parse_apollo_config_items(cls, config_items: List[SearchFilterConfigItem]) -> List[SaveFilterConditionParam]:
- """解析SearchFilterConfigItem列表,生成过滤参数"""
- save_filter_conditions = []
- if not config_items:
- return save_filter_conditions
- for config in config_items:
- key = config.key.strip()
- operator_str = config.operator.strip()
- value = config.value.strip()
- # 空值检查
- if not all([key, operator_str, value]):
- continue
- # 检查key是否在映射中
- if key not in cls.CONFIG_KEY_AND_TYPE_MAP:
- continue
- # 构建过滤参数对象
- save_filter_condition = SaveFilterConditionParam(
- condition_type=cls.CONFIG_KEY_AND_TYPE_MAP[key],
- operator=cls.operator_convert(operator_str),
- data=[value]
- )
- save_filter_conditions.append(save_filter_condition)
- return save_filter_conditions
- @staticmethod
- def operator_convert(operator: str) -> int:
- """将中文操作符转换为对应的数字编码"""
- operator_map = {
- "大于": 1,
- "等于": 2,
- "小于": 3,
- "包含": 4,
- "不包含": 5,
- "介于": 6
- }
- return operator_map.get(operator, -1)
- @classmethod
- def parse_filter_map_to_rule_str(cls, config_jsons: List[Dict[str, str]]) -> str:
- """将过滤配置映射转换为规则字符串"""
- if not config_jsons:
- return ""
- try:
- # 转换为SearchFilterConfigItem列表
- config_items = [
- SearchFilterConfigItem(
- key=item.get("key", ""),
- operator=item.get("operator", ""),
- value=item.get("value", "")
- )
- for item in config_jsons
- ]
- return cls.parse_filter_config_to_rule_str(config_items)
- except Exception as e:
- logger.error(f"parse filter config json error: {config_jsons}", exc_info=e)
- return ""
- @classmethod
- def parse_filter_config_to_rule_str(cls, config_items: List[SearchFilterConfigItem]) -> str:
- """将SearchFilterConfigItem列表转换为规则字符串"""
- if not config_items:
- return ""
- condition_str_list = []
- for config in config_items:
- key = config.key.strip()
- operator_str = config.operator.strip()
- value = config.value.strip()
- # 空值检查
- if not all([key, operator_str, value]):
- continue
- # 检查key是否在规则映射中
- if key not in cls.CONFIG_KEY_AND_RULE_KEY_MAP:
- continue
- # 拼接条件字符串
- rule_key = cls.CONFIG_KEY_AND_RULE_KEY_MAP[key]
- operator = cls.operator_convert_str(operator_str)
- condition_str_list.append(f"{rule_key}{operator}{value}")
- return "&&".join(condition_str_list)
- @staticmethod
- def operator_convert_str(operator: str) -> str:
- """将中文操作符转换为符号操作符"""
- operator_map = {
- "大于": ">",
- "等于": "==",
- "小于": "<",
- "大于等于": ">=",
- "小于等于": "<=",
- "不等于": "!="
- }
- if operator not in operator_map:
- raise RuntimeError(f"不支持的操作符: {operator}")
- return operator_map[operator]
- @classmethod
- def extract_content_rule_feature(cls, content_detail: Dict[str, Any], fans_portrait: Dict[str, Any]) -> Dict[str, float]:
- """提取内容规则特征"""
- context = {}
- cls.extract_content_detail_feature(context, content_detail)
- cls.extract_fans_portrait_feature(context, fans_portrait)
- return context
- @classmethod
- def extract_content_detail_feature(cls, context: Dict[str, float], content_detail: Dict[str, Any]) -> None:
- """提取内容详情特征"""
- if not content_detail:
- return
- content_detail_data = content_detail.get("data", {})
- if not content_detail_data:
- return
- # 提取点赞量
- if "like_count" in content_detail_data:
- like_cnt = content_detail_data.get("like_count", 0.0)
- context["likeCnt"] = float(like_cnt)
- # 提取视频时长
- video_url_list = content_detail_data.get("video_url_list", [])
- if video_url_list and isinstance(video_url_list, list) and len(video_url_list) > 0:
- first_video = video_url_list[0]
- if isinstance(first_video, dict) and "video_duration" in first_video:
- video_duration = first_video.get("video_duration", 0.0)
- context["videoDuration_s"] = float(video_duration)
- # 提取分享量/点赞量比值
- like_cnt = content_detail_data.get("like_count", 0.0)
- share_cnt = content_detail_data.get("share_count", 0.0)
- if like_cnt and share_cnt: # 避免除以0
- context["shareCnt"] = float(share_cnt)
- context["shareDivLink"] = cls.safe_div(share_cnt, like_cnt)
- @classmethod
- def extract_fans_portrait_feature(cls, context: Dict[str, float], fans_portrait: Dict[str, Any]) -> None:
- """提取粉丝画像特征"""
- if not fans_portrait:
- return
- fans_portrait_data = fans_portrait.get("data", {})
- if not fans_portrait_data:
- return
- # 提取50+年龄画像
- age_info = fans_portrait_data.get("年龄", {})
- age_50_plus = age_info.get("50-", {})
- if age_50_plus:
- # 处理百分比字符串
- percentage_str = age_50_plus.get("percentage", "0%").replace("%", "")
- preference_str = age_50_plus.get("preference", "0%").replace("%", "")
- audience_age50_rate = float(percentage_str) / 100
- audience_age50_tgi = float(preference_str)
- context["audienceAge50Rate"] = audience_age50_rate
- context["audienceAge50TGI"] = audience_age50_tgi
- @staticmethod
- def safe_div(numerator: float, denominator: float) -> float:
- """安全除法,避免除以0"""
- try:
- return float(numerator) / float(denominator)
- except ZeroDivisionError:
- return 0.0
- # ==================== 使用示例 ====================
- if __name__ == "__main__":
- # 1. 测试解析Apollo配置
- test_config = [
- {"key": "点赞量", "operator": "大于", "value": "1000"},
- {"key": "视频时长(秒)", "operator": "小于", "value": "60"}
- ]
- params = AutoProvideUtil.parse_apollo_config(test_config)
- print("解析后的过滤参数:")
- for param in params:
- print(f"类型: {param.condition_type}, 操作符: {param.operator}, 值: {param.data}")
- # 2. 测试转换为规则字符串
- rule_str = AutoProvideUtil.parse_filter_map_to_rule_str(test_config)
- print(f"\n规则字符串: {rule_str}")
- # 3. 测试提取特征
- test_content_detail = {
- "data": {
- "like_count": 2000.0,
- "share_count": 500.0,
- "video_url_list": [{"video_duration": 45.0}]
- }
- }
- test_fans_portrait = {
- "data": {
- "年龄": {"50-": {"percentage": "25%", "preference": "120%"}}
- }
- }
- features = AutoProvideUtil.extract_content_rule_feature(test_content_detail, test_fans_portrait)
- print("\n提取的特征:")
- for k, v in features.items():
- print(f"{k}: {v}")
|