Kaynağa Gözat

feat:添加脚本

zhaohaipeng 2 hafta önce
ebeveyn
işleme
4ffc0aaa1a

+ 193 - 0
client/CrawlerClient.py

@@ -0,0 +1,193 @@
+import json
+import logging
+from typing import List, Dict, Optional, Any
+
+import requests
+
+from model.automation_provide_job import DouYinSearchConfig, ChannelSearchAndDetailDTO
+
+# ==================== 配置与枚举定义 ====================
+# 日志配置
+# logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+# logger = logging.getLogger(__name__)
+
+
+class CrawlerClient:
+    def __init__(self):
+        self.base_url = "http://crawapi.piaoquantv.com"
+        self.base_ip = "http://8.217.190.241:8888"
+
+    def keyword_search(self, search_config: DouYinSearchConfig) -> Dict[str, Any]:
+        """
+        重载方法:通过DouYinSearchConfig对象进行关键词搜索
+        """
+        return self.keyword_search_base(
+            keywords=search_config.search_content,
+            contentType=search_config.content_type,
+            sortType=search_config.sort_type,
+            publishTime=search_config.publish_time,
+            duration=search_config.duration,
+            cursor=search_config.cursor,
+            account_id=search_config.account_id
+        )
+
+    def keyword_search_base(
+            self,
+            keywords: str,
+            contentType: Optional[str] = None,
+            sortType: Optional[str] = None,
+            publishTime: Optional[str] = None,
+            duration: Optional[str] = None,
+            cursor: Optional[str] = None,
+            account_id: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        """
+        基础关键词搜索方法(对应Java的keywordSearch重载方法)
+        """
+        if not keywords:
+            raise RuntimeError("keywords is not empty")
+
+        # 拼接API URL
+        url = f"{self.base_url}/crawler/dou_yin/keyword"
+
+        # 设置默认值
+        content_type = contentType if contentType else "视频"
+        sort_type = sortType if sortType else "综合排序"
+        publish_time = publishTime if publishTime else "不限"
+        duration_val = duration if duration else "不限"
+        account_id = account_id if account_id else 98
+
+        # 构建请求参数
+        param_json = {
+            "keyword": keywords,
+            "content_type": content_type,
+            "sort_type": sort_type,
+            "publish_time": publish_time,
+            "duration": duration_val,
+            "cursor": cursor if cursor else "",
+            "accountId": account_id
+        }
+
+        # 发送POST请求并处理响应
+        return self._post(url, param_json)
+
+    def dou_yin_keywords_search(
+            self,
+            search_config: DouYinSearchConfig,
+            is_need_content_detail: bool = False,
+            is_need_fans_portrait: bool = False
+    ) -> List[ChannelSearchAndDetailDTO]:
+        """
+        抖音关键词搜索,返回包含详情/粉丝画像的完整结果列表
+        """
+        search_result_json = {}
+        try:
+            # 执行关键词搜索
+            search_result_json = self.keyword_search(search_config)
+        except Exception as e:
+            pass
+            # logger.error(f"关键词 {search_config.search_content} 搜索异常", exc_info=e)
+
+        # 解析搜索结果列表
+        search_result = search_result_json.get("data", [])
+        if not isinstance(search_result, list) or len(search_result) == 0:
+            return []
+
+        # logger.info(f"关键词 {search_config.search_content} 搜索视频数: {len(search_result)}")
+
+        # 构建返回结果
+        result = []
+        for search_json in search_result:
+            dto = ChannelSearchAndDetailDTO()
+
+            # 提取基础信息
+            channel_content_id = search_json.get("aweme_id", "")
+            author_info = search_json.get("author", {})
+            channel_account_id = author_info.get("sec_uid", "")
+
+            # 初始化详情和画像
+            content_detail = {}
+            fans_portrait = {}
+
+            try:
+                if is_need_content_detail and channel_content_id:
+                    content_detail = self.get_content_detail_by_id(channel_content_id)
+            except Exception as e:
+                # logger.error(f"获取站外视频 {channel_content_id} 的内容详情异常", exc_info=e)
+                pass
+            try:
+                if is_need_fans_portrait and channel_account_id:
+                    fans_portrait = self.get_fans_portrait_by_id(channel_account_id)
+            except Exception as e:
+                # logger.error(f"获取站外视频对应账号 {channel_account_id} 的粉丝画像异常", exc_info=e)
+                pass
+            # 填充DTO字段
+            dto.search_content = search_config.search_content
+            dto.search_result = search_json
+            dto.channel_content_id = channel_content_id
+            dto.channel_account_id = channel_account_id
+            dto.content_detail = content_detail
+            dto.fans_portrait = fans_portrait
+
+            result.append(dto)
+
+        return result
+
+    def get_content_detail_by_id(self, content_id: str) -> Dict[str, Any]:
+        """根据内容ID获取详情(无缓存)"""
+        if not content_id:
+            return {}
+
+        url = f"{self.base_ip}/crawler/dou_yin/detail"
+        param_json = {"content_id": content_id}
+        return self._post(url, param_json)
+
+    def get_fans_portrait_by_id(self, account_id: str) -> Dict[str, Any]:
+        """根据账号ID获取粉丝画像(无缓存)"""
+        if not account_id:
+            return {}
+
+        url = f"{self.base_url}/crawler/dou_yin/re_dian_bao/account_fans_portrait"
+        param_json = {
+            "account_id": account_id,
+            "need_province": False,
+            "need_city": False,
+            "need_city_level": False,
+            "need_gender": False,
+            "need_age": True,
+            "need_phone_brand": False,
+            "need_phone_price": False
+        }
+        return self._post(url, param_json)
+
+    @classmethod
+    def _post(cls, url: str, params: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        通用POST请求方法(对应Java的post私有方法)
+        """
+        # logger.info(f"invoke crawler api request. url:{url}, request:{params}")
+
+        # 发送POST请求
+        response_str = requests.post(url, json.dumps(params)).text
+        response_str = response_str if response_str else "{}"
+
+        # 解析响应
+        try:
+            resp_json = json.loads(response_str)
+        except json.JSONDecodeError:
+            # logger.error(f"响应JSON解析失败: {response_str}")
+            resp_json = {}
+
+        # logger.info(f"invoke crawler api result. respJson: {resp_json}")
+
+        # 检查响应码
+        if resp_json.get("code") != "0":
+            raise RuntimeError(resp_json.get("msg", "API调用失败"))
+
+        # 返回data字段
+        return resp_json.get("data", {})
+
+
+# ==================== 使用示例 ====================
+if __name__ == "__main__":
+    pass

+ 0 - 0
model/__init__.py


+ 39 - 0
model/automation_provide_job.py

@@ -0,0 +1,39 @@
+from dataclasses import dataclass
+from typing import Dict, Optional, Any, List
+
+
+@dataclass
+class DouYinSearchConfig:
+    """抖音搜索配置类"""
+    search_content: str  # 搜索内容(关键词/图片URL)
+    content_type: Optional[str] = None  # 内容类型(视频/图文等)
+    sort_type: Optional[str] = None  # 排序类型(综合排序/最新等)
+    publish_time: Optional[str] = None  # 发布时间(不限/近7天等)
+    duration: Optional[str] = None  # 时长(不限/1分钟内等)
+    cursor: Optional[str] = None  # 分页游标
+    account_id: Optional[int] = None  # 使用的账号
+
+
+@dataclass
+class ChannelSearchAndDetailDTO:
+    """渠道搜索结果详情DTO"""
+    search_content: Optional[str] = None  # 搜索内容
+    search_result: Optional[Dict[str, Any]] = None  # 原始搜索结果
+    channel_content_id: Optional[str] = None  # 内容ID
+    channel_account_id: Optional[str] = None  # 账号ID
+    content_detail: Optional[Dict[str, Any]] = None  # 内容详情
+    fans_portrait: Optional[Dict[str, Any]] = None  # 粉丝画像
+
+
+@dataclass
+class SearchFilterConfigItem:
+    key: str
+    operator: str
+    value: str
+
+
+@dataclass
+class SaveFilterConditionParam:
+    condition_type: int
+    operator: int
+    data: List[str]

+ 203 - 0
script/dou_yin_keywords_search.py

@@ -0,0 +1,203 @@
+import json
+from typing import List, Any, Optional
+
+from simpleeval import simple_eval
+
+from client.CrawlerClient import CrawlerClient
+from model.automation_provide_job import DouYinSearchConfig, ChannelSearchAndDetailDTO, SearchFilterConfigItem
+from util.automation_provide_util import AutoProvideUtil
+
+crawler_client = CrawlerClient()
+
+preFilterThreshold = 3
+
+result_txt_file = '/Users/zhao/Desktop/tzld/文档/分析文档/关键词分析.txt'
+
+
+def write_result_file(content, mode='a+'):
+    with open(result_txt_file, mode) as f:
+        f.write(content)
+        f.write("\n")
+
+
+def log_info_print_title():
+    write_result_file(
+        "视频ID,品类,关键词,爬取计划,结果,原因,搜索使用的账号ID,排序方式,站外视频ID,站外账号ID,过滤结果,分享量,点赞量,分享量/点赞量,视频时长(秒),观众年龄50+占比,观众年龄50+TGI,过滤规则表达式", 'w')
+
+
+def log_info_print(log_json: dict[str, Any], account_id: Optional[int] = None):
+    video_id = log_json["videoId"]
+    keywords = log_json['keywords']
+    crawler_plan_id = log_json.get("crawlerPlanId", "")
+    result = log_json.get("result", False)
+    reason = log_json.get("reason", "")
+    merge_cate2 = log_json['mergeSecondLevelCate']
+    sort_type = json.loads(log_json.get("modelValueConfig", "{}")).get("sortType")
+    ext_json = json.loads(log_json.get("ext", "{}"))
+    account_id = account_id if account_id else 0
+    if not ext_json:
+        write_result_file(f"{video_id},{merge_cate2},{keywords},'{crawler_plan_id},'{result},{reason},{account_id},{sort_type}")
+        return
+    for channel_content_id in ext_json:
+        channel_ext_info = ext_json[channel_content_id]
+        filter_result = channel_ext_info.get("result", False)
+        rule_str = channel_ext_info.get("rule", "")
+        rule_context = channel_ext_info.get('ruleContext', {})
+        share_cnt = rule_context.get('shareCnt', 0)
+        video_duration_s = rule_context.get('videoDuration_s', 0)
+        like_cnt = rule_context.get('likeCnt', 0)
+        audience_age_50_rate = rule_context.get('audienceAge50Rate', 0)
+        audience_age_50_tgi = rule_context.get('audienceAge50TGI', 0)
+        share_div_link = rule_context.get('shareDivLink', 0)
+
+        channel_account_id = ""
+        if "contentDetail" in channel_ext_info:
+            channel_account_id = channel_ext_info["contentDetail"].get("channelAccountId")
+        elif "fanPortrait" in channel_ext_info:
+            channel_account_id = channel_ext_info["fanPortrait"].get("channelAccountId")
+
+        write_result_file(f"{video_id},{merge_cate2},{keywords},'{crawler_plan_id},'{result},{reason},{account_id},{sort_type},'{channel_content_id},{channel_account_id},{filter_result},"
+                          f"{share_cnt},{like_cnt},{share_div_link},{video_duration_s},{audience_age_50_rate},{audience_age_50_tgi},{rule_str}")
+
+
+def keywords_search(keywords: str, sort_type: str, account_id=None) -> List[ChannelSearchAndDetailDTO]:
+    search_config = DouYinSearchConfig(
+        search_content=keywords,
+        sort_type=sort_type,
+        account_id=account_id
+    )
+    return crawler_client.dou_yin_keywords_search(search_config, True, True)
+
+
+def eval_expr(expr: str, context: dict) -> bool:
+    expr = expr.replace("&&", " and ").replace("||", " or ")
+    return bool(simple_eval(expr, names=context))
+
+
+def keywords_search_and_filter(keywords: str, sort_type: str, account_id: int, log_json: dict[str, Any], filters: List[SearchFilterConfigItem]) -> dict[str, Any]:
+    need_copy_keys = ["videoId", "accountFilters", "contentFilters", "mergeSecondLevelCate", "keywords"]
+    result_json = {}
+    for key in need_copy_keys:
+        result_json[key] = log_json.get(key)
+
+    log_ext_info = {}
+    result_json['ext'] = log_ext_info
+    result_json['result'] = True
+    result_json['modelValueConfig'] = {"sortType": sort_type}
+
+    rule_str = AutoProvideUtil.parse_filter_config_to_rule_str(filters)
+
+    channel_search_and_detail_dtos = keywords_search(keywords, sort_type, account_id)
+    if not channel_search_and_detail_dtos:
+        result_json["result"] = False
+        result_json['reason'] = '关键词搜索结果为空'
+        return result_json
+
+    cnt = 0
+    for channel_search_and_detail_dto in channel_search_and_detail_dtos:
+        channel_content_id = channel_search_and_detail_dto.channel_content_id
+        channel_account_id = channel_search_and_detail_dto.channel_account_id
+
+        content_detail = channel_search_and_detail_dto.content_detail
+        fans_portrait = channel_search_and_detail_dto.fans_portrait
+
+        ext_json = {}
+        log_ext_info[channel_content_id] = ext_json
+
+        if content_detail:
+            content_detail['channelAccountId'] = channel_account_id
+            content_detail['channelContentId'] = channel_content_id
+            ext_json['contentDetail'] = content_detail
+
+        if fans_portrait:
+            fans_portrait['channelAccountId'] = channel_account_id
+            fans_portrait['channelContentId'] = channel_content_id
+            ext_json['fanPortrait'] = fans_portrait
+
+        if (not content_detail) and (not fans_portrait):
+            ext_json["result"] = False
+            continue
+        rule_context = AutoProvideUtil.extract_content_rule_feature(content_detail=content_detail, fans_portrait=fans_portrait)
+        ext_json['ruleContext'] = rule_context
+        ext_json['rule'] = rule_str
+
+        if not rule_context:
+            cnt += 1
+            continue
+
+        result = eval_expr(expr=rule_str, context=rule_context)
+        ext_json['result'] = result
+        if result:
+            cnt += 1
+    if cnt <= preFilterThreshold:
+        log_json["result"] = False
+        log_json['reason'] = '该关键词首页满足条件的视频数不足'
+
+    return {}
+
+
+def keywords_not_login_comprehensive_sort(keywords: str, log_json: dict[str, Any], filters: List[SearchFilterConfigItem]):
+    """
+    未登录,综合排序
+    """
+    account_id = 0
+    log_json = keywords_search_and_filter(keywords=keywords, sort_type="综合排序", account_id=account_id, log_json=log_json, filters=filters)
+    log_json['ext'] = json.dumps(log_json['ext'], ensure_ascii=False)
+    log_json['modelValueConfig'] = json.dumps(log_json['modelValueConfig'], ensure_ascii=False)
+    log_info_print(log_json, account_id=account_id)
+
+
+def keywords_login_comprehensive_sort(keywords: str, log_json: dict[str, Any], filters: List[SearchFilterConfigItem]):
+    """
+    登录,综合排序
+    """
+    account_id = 771431186
+    log_json = keywords_search_and_filter(keywords=keywords, sort_type="综合排序", account_id=account_id, log_json=log_json, filters=filters)
+    log_json['ext'] = json.dumps(log_json['ext'], ensure_ascii=False)
+    log_json['modelValueConfig'] = json.dumps(log_json['modelValueConfig'], ensure_ascii=False)
+    log_info_print(log_json, account_id=account_id)
+
+
+def keywords_login_like_sort(keywords: str, log_json: dict[str, Any], filters: List[SearchFilterConfigItem]):
+    """
+    登录状态,最多点赞
+    """
+    account_id = 771431186
+    log_json = keywords_search_and_filter(keywords=keywords, sort_type="最多点赞", account_id=account_id, log_json=log_json, filters=filters)
+    log_json['ext'] = json.dumps(log_json['ext'], ensure_ascii=False)
+    log_json['modelValueConfig'] = json.dumps(log_json['modelValueConfig'], ensure_ascii=False)
+    log_info_print(log_json, account_id=account_id)
+
+
+def handle_log_json(log_json: dict[str, Any]):
+    log_info_print(log_json)
+
+    # 未登录,最多点赞
+    keywords = log_json['keywords']
+    account_filters = json.loads(log_json.get("accountFilters", "[]"))
+    content_filters = json.loads(log_json.get("contentFilters", '[]'))
+    search_filter_config_tems = []
+    for filter_item in account_filters + content_filters:
+        search_filter_config_tems.append(SearchFilterConfigItem(**filter_item))
+
+    keywords_not_login_comprehensive_sort(keywords, log_json, search_filter_config_tems)
+    keywords_login_comprehensive_sort(keywords, log_json, search_filter_config_tems)
+    keywords_login_like_sort(keywords, log_json, search_filter_config_tems)
+
+
+def main():
+    file_path = "/Users/zhao/Downloads/keywords_filter_test_sample.json"
+    log_list = []
+    with open(file_path, "r", encoding="utf-8") as f:
+        line = f.readline()
+        while line:
+            log_list.append(json.loads(line))
+            line = f.readline()
+
+    log_info_print_title()
+    for log in log_list:
+        handle_log_json(log)
+
+
+if __name__ == '__main__':
+    main()

+ 270 - 0
util/automation_provide_util.py

@@ -0,0 +1,270 @@
+import logging
+from typing import List, Dict, Any
+
+from model.automation_provide_job import SaveFilterConditionParam, SearchFilterConfigItem
+
+# 配置日志
+logging.basicConfig(level=logging.ERROR)
+logger = logging.getLogger(__name__)
+
+
+# ==================== 核心工具类 ====================
+class AutoProvideUtil:
+    # 配置键与类型的映射(对应Java的configKeyAndTypeMap)
+    CONFIG_KEY_AND_TYPE_MAP = {
+        "点赞量": 3,
+        "发布量": 4,
+        "收藏量": 7,
+        "评论量": 9,
+        "转发量": 27,
+        "内容发布时间": 12,
+        "发布时间": 25,
+        "视频时长": 26,
+        "观众年龄50+占比": 128,
+        "观众年龄50+TGI": 129,
+        "观众性别男性占比": 130,
+        "观众性别男性TGI": 131,
+        "观众性别女性占比": 132,
+        "观众性别女性TGI": 133,
+        "视频时长(秒)": 134,
+        "分享量/点赞量": 138
+    }
+
+    # 配置键与规则键的映射(对应Java的configKeyAndRuleKeyMap)
+    CONFIG_KEY_AND_RULE_KEY_MAP = {
+        "点赞量": "likeCnt",
+        "视频时长(秒)": "videoDuration_s",
+        "观众年龄50+占比": "audienceAge50Rate",
+        "观众年龄50+TGI": "audienceAge50TGI",
+        "分享量/点赞量": "shareDivLink"
+    }
+
+    @classmethod
+    def parse_apollo_config(cls, config_jsons: List[Dict[str, str]]) -> List[SaveFilterConditionParam]:
+        """解析Apollo配置,转换为SaveFilterConditionParam列表"""
+        try:
+            # 将字典列表转换为SearchFilterConfigItem列表
+            config_items = [
+                SearchFilterConfigItem(
+                    key=item.get("key", ""),
+                    operator=item.get("operator", ""),
+                    value=item.get("value", "")
+                )
+                for item in config_jsons
+            ]
+            return cls.parse_apollo_config_items(config_items)
+        except Exception as e:
+            logger.error(f"parse apollo config json error: {config_jsons}", exc_info=e)
+            return []
+
+    @classmethod
+    def parse_apollo_config_items(cls, config_items: List[SearchFilterConfigItem]) -> List[SaveFilterConditionParam]:
+        """解析SearchFilterConfigItem列表,生成过滤参数"""
+        save_filter_conditions = []
+
+        if not config_items:
+            return save_filter_conditions
+
+        for config in config_items:
+            key = config.key.strip()
+            operator_str = config.operator.strip()
+            value = config.value.strip()
+
+            # 空值检查
+            if not all([key, operator_str, value]):
+                continue
+
+            # 检查key是否在映射中
+            if key not in cls.CONFIG_KEY_AND_TYPE_MAP:
+                continue
+
+            # 构建过滤参数对象
+            save_filter_condition = SaveFilterConditionParam(
+                condition_type=cls.CONFIG_KEY_AND_TYPE_MAP[key],
+                operator=cls.operator_convert(operator_str),
+                data=[value]
+            )
+            save_filter_conditions.append(save_filter_condition)
+
+        return save_filter_conditions
+
+    @staticmethod
+    def operator_convert(operator: str) -> int:
+        """将中文操作符转换为对应的数字编码"""
+        operator_map = {
+            "大于": 1,
+            "等于": 2,
+            "小于": 3,
+            "包含": 4,
+            "不包含": 5,
+            "介于": 6
+        }
+        return operator_map.get(operator, -1)
+
+    @classmethod
+    def parse_filter_map_to_rule_str(cls, config_jsons: List[Dict[str, str]]) -> str:
+        """将过滤配置映射转换为规则字符串"""
+        if not config_jsons:
+            return ""
+
+        try:
+            # 转换为SearchFilterConfigItem列表
+            config_items = [
+                SearchFilterConfigItem(
+                    key=item.get("key", ""),
+                    operator=item.get("operator", ""),
+                    value=item.get("value", "")
+                )
+                for item in config_jsons
+            ]
+            return cls.parse_filter_config_to_rule_str(config_items)
+        except Exception as e:
+            logger.error(f"parse filter config json error: {config_jsons}", exc_info=e)
+            return ""
+
+    @classmethod
+    def parse_filter_config_to_rule_str(cls, config_items: List[SearchFilterConfigItem]) -> str:
+        """将SearchFilterConfigItem列表转换为规则字符串"""
+        if not config_items:
+            return ""
+
+        condition_str_list = []
+        for config in config_items:
+            key = config.key.strip()
+            operator_str = config.operator.strip()
+            value = config.value.strip()
+
+            # 空值检查
+            if not all([key, operator_str, value]):
+                continue
+
+            # 检查key是否在规则映射中
+            if key not in cls.CONFIG_KEY_AND_RULE_KEY_MAP:
+                continue
+
+            # 拼接条件字符串
+            rule_key = cls.CONFIG_KEY_AND_RULE_KEY_MAP[key]
+            operator = cls.operator_convert_str(operator_str)
+            condition_str_list.append(f"{rule_key}{operator}{value}")
+
+        return "&&".join(condition_str_list)
+
+    @staticmethod
+    def operator_convert_str(operator: str) -> str:
+        """将中文操作符转换为符号操作符"""
+        operator_map = {
+            "大于": ">",
+            "等于": "==",
+            "小于": "<",
+            "大于等于": ">=",
+            "小于等于": "<=",
+            "不等于": "!="
+        }
+        if operator not in operator_map:
+            raise RuntimeError(f"不支持的操作符: {operator}")
+        return operator_map[operator]
+
+    @classmethod
+    def extract_content_rule_feature(cls, content_detail: Dict[str, Any], fans_portrait: Dict[str, Any]) -> Dict[str, float]:
+        """提取内容规则特征"""
+        context = {}
+        cls.extract_content_detail_feature(context, content_detail)
+        cls.extract_fans_portrait_feature(context, fans_portrait)
+        return context
+
+    @classmethod
+    def extract_content_detail_feature(cls, context: Dict[str, float], content_detail: Dict[str, Any]) -> None:
+        """提取内容详情特征"""
+        if not content_detail:
+            return
+
+        content_detail_data = content_detail.get("data", {})
+        if not content_detail_data:
+            return
+
+        # 提取点赞量
+        if "like_count" in content_detail_data:
+            like_cnt = content_detail_data.get("like_count", 0.0)
+            context["likeCnt"] = float(like_cnt)
+
+        # 提取视频时长
+        video_url_list = content_detail_data.get("video_url_list", [])
+        if video_url_list and isinstance(video_url_list, list) and len(video_url_list) > 0:
+            first_video = video_url_list[0]
+            if isinstance(first_video, dict) and "video_duration" in first_video:
+                video_duration = first_video.get("video_duration", 0.0)
+                context["videoDuration_s"] = float(video_duration)
+
+        # 提取分享量/点赞量比值
+        like_cnt = content_detail_data.get("like_count", 0.0)
+        share_cnt = content_detail_data.get("share_count", 0.0)
+        if like_cnt and share_cnt:  # 避免除以0
+            context["shareCnt"] = float(share_cnt)
+            context["shareDivLink"] = cls.safe_div(share_cnt, like_cnt)
+
+    @classmethod
+    def extract_fans_portrait_feature(cls, context: Dict[str, float], fans_portrait: Dict[str, Any]) -> None:
+        """提取粉丝画像特征"""
+        if not fans_portrait:
+            return
+
+        fans_portrait_data = fans_portrait.get("data", {})
+        if not fans_portrait_data:
+            return
+
+        # 提取50+年龄画像
+        age_info = fans_portrait_data.get("年龄", {})
+        age_50_plus = age_info.get("50-", {})
+        if age_50_plus:
+            # 处理百分比字符串
+            percentage_str = age_50_plus.get("percentage", "0%").replace("%", "")
+            preference_str = age_50_plus.get("preference", "0%").replace("%", "")
+
+            audience_age50_rate = float(percentage_str) / 100
+            audience_age50_tgi = float(preference_str)
+
+            context["audienceAge50Rate"] = audience_age50_rate
+            context["audienceAge50TGI"] = audience_age50_tgi
+
+    @staticmethod
+    def safe_div(numerator: float, denominator: float) -> float:
+        """安全除法,避免除以0"""
+        try:
+            return float(numerator) / float(denominator)
+        except ZeroDivisionError:
+            return 0.0
+
+
+# ==================== 使用示例 ====================
+if __name__ == "__main__":
+    # 1. 测试解析Apollo配置
+    test_config = [
+        {"key": "点赞量", "operator": "大于", "value": "1000"},
+        {"key": "视频时长(秒)", "operator": "小于", "value": "60"}
+    ]
+    params = AutoProvideUtil.parse_apollo_config(test_config)
+    print("解析后的过滤参数:")
+    for param in params:
+        print(f"类型: {param.condition_type}, 操作符: {param.operator}, 值: {param.data}")
+
+    # 2. 测试转换为规则字符串
+    rule_str = AutoProvideUtil.parse_filter_map_to_rule_str(test_config)
+    print(f"\n规则字符串: {rule_str}")
+
+    # 3. 测试提取特征
+    test_content_detail = {
+        "data": {
+            "like_count": 2000.0,
+            "share_count": 500.0,
+            "video_url_list": [{"video_duration": 45.0}]
+        }
+    }
+    test_fans_portrait = {
+        "data": {
+            "年龄": {"50-": {"percentage": "25%", "preference": "120%"}}
+        }
+    }
+    features = AutoProvideUtil.extract_content_rule_feature(test_content_detail, test_fans_portrait)
+    print("\n提取的特征:")
+    for k, v in features.items():
+        print(f"{k}: {v}")