2 months ago · 4ffc0aaa1a
--- a/client/CrawlerClient.py
+++ b/client/CrawlerClient.py
@@ -0,0 +1,193 @@
 
				+import json
			
 
				+import logging
			
 
				+from typing import List, Dict, Optional, Any
			
 
				+
			
 
				+import requests
			
 
				+
			
 
				+from model.automation_provide_job import DouYinSearchConfig, ChannelSearchAndDetailDTO
			
 
				+
			
 
				+# ==================== 配置与枚举定义 ====================
			
 
				+# 日志配置
			
 
				+# logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+# logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class CrawlerClient:
			
 
				+    def __init__(self):
			
 
				+        self.base_url = "http://crawapi.piaoquantv.com"
			
 
				+        self.base_ip = "http://8.217.190.241:8888"
			
 
				+
			
 
				+    def keyword_search(self, search_config: DouYinSearchConfig) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        重载方法：通过DouYinSearchConfig对象进行关键词搜索
			
 
				+        """
			
 
				+        return self.keyword_search_base(
			
 
				+            keywords=search_config.search_content,
			
 
				+            contentType=search_config.content_type,
			
 
				+            sortType=search_config.sort_type,
			
 
				+            publishTime=search_config.publish_time,
			
 
				+            duration=search_config.duration,
			
 
				+            cursor=search_config.cursor,
			
 
				+            account_id=search_config.account_id
			
 
				+        )
			
 
				+
			
 
				+    def keyword_search_base(
			
 
				+            self,
			
 
				+            keywords: str,
			
 
				+            contentType: Optional[str] = None,
			
 
				+            sortType: Optional[str] = None,
			
 
				+            publishTime: Optional[str] = None,
			
 
				+            duration: Optional[str] = None,
			
 
				+            cursor: Optional[str] = None,
			
 
				+            account_id: Optional[int] = None,
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        基础关键词搜索方法（对应Java的keywordSearch重载方法）
			
 
				+        """
			
 
				+        if not keywords:
			
 
				+            raise RuntimeError("keywords is not empty")
			
 
				+
			
 
				+        # 拼接API URL
			
 
				+        url = f"{self.base_url}/crawler/dou_yin/keyword"
			
 
				+
			
 
				+        # 设置默认值
			
 
				+        content_type = contentType if contentType else "视频"
			
 
				+        sort_type = sortType if sortType else "综合排序"
			
 
				+        publish_time = publishTime if publishTime else "不限"
			
 
				+        duration_val = duration if duration else "不限"
			
 
				+        account_id = account_id if account_id else 98
			
 
				+
			
 
				+        # 构建请求参数
			
 
				+        param_json = {
			
 
				+            "keyword": keywords,
			
 
				+            "content_type": content_type,
			
 
				+            "sort_type": sort_type,
			
 
				+            "publish_time": publish_time,
			
 
				+            "duration": duration_val,
			
 
				+            "cursor": cursor if cursor else "",
			
 
				+            "accountId": account_id
			
 
				+        }
			
 
				+
			
 
				+        # 发送POST请求并处理响应
			
 
				+        return self._post(url, param_json)
			
 
				+
			
 
				+    def dou_yin_keywords_search(
			
 
				+            self,
			
 
				+            search_config: DouYinSearchConfig,
			
 
				+            is_need_content_detail: bool = False,
			
 
				+            is_need_fans_portrait: bool = False
			
 
				+    ) -> List[ChannelSearchAndDetailDTO]:
			
 
				+        """
			
 
				+        抖音关键词搜索，返回包含详情/粉丝画像的完整结果列表
			
 
				+        """
			
 
				+        search_result_json = {}
			
 
				+        try:
			
 
				+            # 执行关键词搜索
			
 
				+            search_result_json = self.keyword_search(search_config)
			
 
				+        except Exception as e:
			
 
				+            pass
			
 
				+            # logger.error(f"关键词 {search_config.search_content} 搜索异常", exc_info=e)
			
 
				+
			
 
				+        # 解析搜索结果列表
			
 
				+        search_result = search_result_json.get("data", [])
			
 
				+        if not isinstance(search_result, list) or len(search_result) == 0:
			
 
				+            return []
			
 
				+
			
 
				+        # logger.info(f"关键词 {search_config.search_content} 搜索视频数: {len(search_result)}")
			
 
				+
			
 
				+        # 构建返回结果
			
 
				+        result = []
			
 
				+        for search_json in search_result:
			
 
				+            dto = ChannelSearchAndDetailDTO()
			
 
				+
			
 
				+            # 提取基础信息
			
 
				+            channel_content_id = search_json.get("aweme_id", "")
			
 
				+            author_info = search_json.get("author", {})
			
 
				+            channel_account_id = author_info.get("sec_uid", "")
			
 
				+
			
 
				+            # 初始化详情和画像
			
 
				+            content_detail = {}
			
 
				+            fans_portrait = {}
			
 
				+
			
 
				+            try:
			
 
				+                if is_need_content_detail and channel_content_id:
			
 
				+                    content_detail = self.get_content_detail_by_id(channel_content_id)
			
 
				+            except Exception as e:
			
 
				+                # logger.error(f"获取站外视频 {channel_content_id} 的内容详情异常", exc_info=e)
			
 
				+                pass
			
 
				+            try:
			
 
				+                if is_need_fans_portrait and channel_account_id:
			
 
				+                    fans_portrait = self.get_fans_portrait_by_id(channel_account_id)
			
 
				+            except Exception as e:
			
 
				+                # logger.error(f"获取站外视频对应账号 {channel_account_id} 的粉丝画像异常", exc_info=e)
			
 
				+                pass
			
 
				+            # 填充DTO字段
			
 
				+            dto.search_content = search_config.search_content
			
 
				+            dto.search_result = search_json
			
 
				+            dto.channel_content_id = channel_content_id
			
 
				+            dto.channel_account_id = channel_account_id
			
 
				+            dto.content_detail = content_detail
			
 
				+            dto.fans_portrait = fans_portrait
			
 
				+
			
 
				+            result.append(dto)
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    def get_content_detail_by_id(self, content_id: str) -> Dict[str, Any]:
			
 
				+        """根据内容ID获取详情（无缓存）"""
			
 
				+        if not content_id:
			
 
				+            return {}
			
 
				+
			
 
				+        url = f"{self.base_ip}/crawler/dou_yin/detail"
			
 
				+        param_json = {"content_id": content_id}
			
 
				+        return self._post(url, param_json)
			
 
				+
			
 
				+    def get_fans_portrait_by_id(self, account_id: str) -> Dict[str, Any]:
			
 
				+        """根据账号ID获取粉丝画像（无缓存）"""
			
 
				+        if not account_id:
			
 
				+            return {}
			
 
				+
			
 
				+        url = f"{self.base_url}/crawler/dou_yin/re_dian_bao/account_fans_portrait"
			
 
				+        param_json = {
			
 
				+            "account_id": account_id,
			
 
				+            "need_province": False,
			
 
				+            "need_city": False,
			
 
				+            "need_city_level": False,
			
 
				+            "need_gender": False,
			
 
				+            "need_age": True,
			
 
				+            "need_phone_brand": False,
			
 
				+            "need_phone_price": False
			
 
				+        }
			
 
				+        return self._post(url, param_json)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def _post(cls, url: str, params: Dict[str, Any]) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        通用POST请求方法（对应Java的post私有方法）
			
 
				+        """
			
 
				+        # logger.info(f"invoke crawler api request. url:{url}, request:{params}")
			
 
				+
			
 
				+        # 发送POST请求
			
 
				+        response_str = requests.post(url, json.dumps(params)).text
			
 
				+        response_str = response_str if response_str else "{}"
			
 
				+
			
 
				+        # 解析响应
			
 
				+        try:
			
 
				+            resp_json = json.loads(response_str)
			
 
				+        except json.JSONDecodeError:
			
 
				+            # logger.error(f"响应JSON解析失败: {response_str}")
			
 
				+            resp_json = {}
			
 
				+
			
 
				+        # logger.info(f"invoke crawler api result. respJson: {resp_json}")
			
 
				+
			
 
				+        # 检查响应码
			
 
				+        if resp_json.get("code") != "0":
			
 
				+            raise RuntimeError(resp_json.get("msg", "API调用失败"))
			
 
				+
			
 
				+        # 返回data字段
			
 
				+        return resp_json.get("data", {})
			
 
				+
			
 
				+
			
 
				+# ==================== 使用示例 ====================
			
 
				+if __name__ == "__main__":
			
 
				+    pass
			
--- a/model/__init__.py
+++ b/model/__init__.py
--- a/model/automation_provide_job.py
+++ b/model/automation_provide_job.py
@@ -0,0 +1,39 @@
 
				+from dataclasses import dataclass
			
 
				+from typing import Dict, Optional, Any, List
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class DouYinSearchConfig:
			
 
				+    """抖音搜索配置类"""
			
 
				+    search_content: str  # 搜索内容（关键词/图片URL）
			
 
				+    content_type: Optional[str] = None  # 内容类型（视频/图文等）
			
 
				+    sort_type: Optional[str] = None  # 排序类型（综合排序/最新等）
			
 
				+    publish_time: Optional[str] = None  # 发布时间（不限/近7天等）
			
 
				+    duration: Optional[str] = None  # 时长（不限/1分钟内等）
			
 
				+    cursor: Optional[str] = None  # 分页游标
			
 
				+    account_id: Optional[int] = None  # 使用的账号
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class ChannelSearchAndDetailDTO:
			
 
				+    """渠道搜索结果详情DTO"""
			
 
				+    search_content: Optional[str] = None  # 搜索内容
			
 
				+    search_result: Optional[Dict[str, Any]] = None  # 原始搜索结果
			
 
				+    channel_content_id: Optional[str] = None  # 内容ID
			
 
				+    channel_account_id: Optional[str] = None  # 账号ID
			
 
				+    content_detail: Optional[Dict[str, Any]] = None  # 内容详情
			
 
				+    fans_portrait: Optional[Dict[str, Any]] = None  # 粉丝画像
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class SearchFilterConfigItem:
			
 
				+    key: str
			
 
				+    operator: str
			
 
				+    value: str
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class SaveFilterConditionParam:
			
 
				+    condition_type: int
			
 
				+    operator: int
			
 
				+    data: List[str]
			
--- a/script/dou_yin_keywords_search.py
+++ b/script/dou_yin_keywords_search.py
@@ -0,0 +1,203 @@
 
				+import json
			
 
				+from typing import List, Any, Optional
			
 
				+
			
 
				+from simpleeval import simple_eval
			
 
				+
			
 
				+from client.CrawlerClient import CrawlerClient
			
 
				+from model.automation_provide_job import DouYinSearchConfig, ChannelSearchAndDetailDTO, SearchFilterConfigItem
			
 
				+from util.automation_provide_util import AutoProvideUtil
			
 
				+
			
 
				+crawler_client = CrawlerClient()
			
 
				+
			
 
				+preFilterThreshold = 3
			
 
				+
			
 
				+result_txt_file = '/Users/zhao/Desktop/tzld/文档/分析文档/关键词分析.txt'
			
 
				+
			
 
				+
			
 
				+def write_result_file(content, mode='a+'):
			
 
				+    with open(result_txt_file, mode) as f:
			
 
				+        f.write(content)
			
 
				+        f.write("\n")
			
 
				+
			
 
				+
			
 
				+def log_info_print_title():
			
 
				+    write_result_file(
			
 
				+        "视频ID,品类,关键词,爬取计划,结果,原因,搜索使用的账号ID,排序方式,站外视频ID,站外账号ID,过滤结果,分享量,点赞量,分享量/点赞量,视频时长（秒）,观众年龄50+占比,观众年龄50+TGI,过滤规则表达式", 'w')
			
 
				+
			
 
				+
			
 
				+def log_info_print(log_json: dict[str, Any], account_id: Optional[int] = None):
			
 
				+    video_id = log_json["videoId"]
			
 
				+    keywords = log_json['keywords']
			
 
				+    crawler_plan_id = log_json.get("crawlerPlanId", "")
			
 
				+    result = log_json.get("result", False)
			
 
				+    reason = log_json.get("reason", "")
			
 
				+    merge_cate2 = log_json['mergeSecondLevelCate']
			
 
				+    sort_type = json.loads(log_json.get("modelValueConfig", "{}")).get("sortType")
			
 
				+    ext_json = json.loads(log_json.get("ext", "{}"))
			
 
				+    account_id = account_id if account_id else 0
			
 
				+    if not ext_json:
			
 
				+        write_result_file(f"{video_id},{merge_cate2},{keywords},'{crawler_plan_id},'{result},{reason},{account_id},{sort_type}")
			
 
				+        return
			
 
				+    for channel_content_id in ext_json:
			
 
				+        channel_ext_info = ext_json[channel_content_id]
			
 
				+        filter_result = channel_ext_info.get("result", False)
			
 
				+        rule_str = channel_ext_info.get("rule", "")
			
 
				+        rule_context = channel_ext_info.get('ruleContext', {})
			
 
				+        share_cnt = rule_context.get('shareCnt', 0)
			
 
				+        video_duration_s = rule_context.get('videoDuration_s', 0)
			
 
				+        like_cnt = rule_context.get('likeCnt', 0)
			
 
				+        audience_age_50_rate = rule_context.get('audienceAge50Rate', 0)
			
 
				+        audience_age_50_tgi = rule_context.get('audienceAge50TGI', 0)
			
 
				+        share_div_link = rule_context.get('shareDivLink', 0)
			
 
				+
			
 
				+        channel_account_id = ""
			
 
				+        if "contentDetail" in channel_ext_info:
			
 
				+            channel_account_id = channel_ext_info["contentDetail"].get("channelAccountId")
			
 
				+        elif "fanPortrait" in channel_ext_info:
			
 
				+            channel_account_id = channel_ext_info["fanPortrait"].get("channelAccountId")
			
 
				+
			
 
				+        write_result_file(f"{video_id},{merge_cate2},{keywords},'{crawler_plan_id},'{result},{reason},{account_id},{sort_type},'{channel_content_id},{channel_account_id},{filter_result},"
			
 
				+                          f"{share_cnt},{like_cnt},{share_div_link},{video_duration_s},{audience_age_50_rate},{audience_age_50_tgi},{rule_str}")
			
 
				+
			
 
				+
			
 
				+def keywords_search(keywords: str, sort_type: str, account_id=None) -> List[ChannelSearchAndDetailDTO]:
			
 
				+    search_config = DouYinSearchConfig(
			
 
				+        search_content=keywords,
			
 
				+        sort_type=sort_type,
			
 
				+        account_id=account_id
			
 
				+    )
			
 
				+    return crawler_client.dou_yin_keywords_search(search_config, True, True)
			
 
				+
			
 
				+
			
 
				+def eval_expr(expr: str, context: dict) -> bool:
			
 
				+    expr = expr.replace("&&", " and ").replace("||", " or ")
			
 
				+    return bool(simple_eval(expr, names=context))
			
 
				+
			
 
				+
			
 
				+def keywords_search_and_filter(keywords: str, sort_type: str, account_id: int, log_json: dict[str, Any], filters: List[SearchFilterConfigItem]) -> dict[str, Any]:
			
 
				+    need_copy_keys = ["videoId", "accountFilters", "contentFilters", "mergeSecondLevelCate", "keywords"]
			
 
				+    result_json = {}
			
 
				+    for key in need_copy_keys:
			
 
				+        result_json[key] = log_json.get(key)
			
 
				+
			
 
				+    log_ext_info = {}
			
 
				+    result_json['ext'] = log_ext_info
			
 
				+    result_json['result'] = True
			
 
				+    result_json['modelValueConfig'] = {"sortType": sort_type}
			
 
				+
			
 
				+    rule_str = AutoProvideUtil.parse_filter_config_to_rule_str(filters)
			
 
				+
			
 
				+    channel_search_and_detail_dtos = keywords_search(keywords, sort_type, account_id)
			
 
				+    if not channel_search_and_detail_dtos:
			
 
				+        result_json["result"] = False
			
 
				+        result_json['reason'] = '关键词搜索结果为空'
			
 
				+        return result_json
			
 
				+
			
 
				+    cnt = 0
			
 
				+    for channel_search_and_detail_dto in channel_search_and_detail_dtos:
			
 
				+        channel_content_id = channel_search_and_detail_dto.channel_content_id
			
 
				+        channel_account_id = channel_search_and_detail_dto.channel_account_id
			
 
				+
			
 
				+        content_detail = channel_search_and_detail_dto.content_detail
			
 
				+        fans_portrait = channel_search_and_detail_dto.fans_portrait
			
 
				+
			
 
				+        ext_json = {}
			
 
				+        log_ext_info[channel_content_id] = ext_json
			
 
				+
			
 
				+        if content_detail:
			
 
				+            content_detail['channelAccountId'] = channel_account_id
			
 
				+            content_detail['channelContentId'] = channel_content_id
			
 
				+            ext_json['contentDetail'] = content_detail
			
 
				+
			
 
				+        if fans_portrait:
			
 
				+            fans_portrait['channelAccountId'] = channel_account_id
			
 
				+            fans_portrait['channelContentId'] = channel_content_id
			
 
				+            ext_json['fanPortrait'] = fans_portrait
			
 
				+
			
 
				+        if (not content_detail) and (not fans_portrait):
			
 
				+            ext_json["result"] = False
			
 
				+            continue
			
 
				+        rule_context = AutoProvideUtil.extract_content_rule_feature(content_detail=content_detail, fans_portrait=fans_portrait)
			
 
				+        ext_json['ruleContext'] = rule_context
			
 
				+        ext_json['rule'] = rule_str
			
 
				+
			
 
				+        if not rule_context:
			
 
				+            cnt += 1
			
 
				+            continue
			
 
				+
			
 
				+        result = eval_expr(expr=rule_str, context=rule_context)
			
 
				+        ext_json['result'] = result
			
 
				+        if result:
			
 
				+            cnt += 1
			
 
				+    if cnt <= preFilterThreshold:
			
 
				+        log_json["result"] = False
			
 
				+        log_json['reason'] = '该关键词首页满足条件的视频数不足'
			
 
				+
			
 
				+    return {}
			
 
				+
			
 
				+
			
 
				+def keywords_not_login_comprehensive_sort(keywords: str, log_json: dict[str, Any], filters: List[SearchFilterConfigItem]):
			
 
				+    """
			
 
				+    未登录，综合排序
			
 
				+    """
			
 
				+    account_id = 0
			
 
				+    log_json = keywords_search_and_filter(keywords=keywords, sort_type="综合排序", account_id=account_id, log_json=log_json, filters=filters)
			
 
				+    log_json['ext'] = json.dumps(log_json['ext'], ensure_ascii=False)
			
 
				+    log_json['modelValueConfig'] = json.dumps(log_json['modelValueConfig'], ensure_ascii=False)
			
 
				+    log_info_print(log_json, account_id=account_id)
			
 
				+
			
 
				+
			
 
				+def keywords_login_comprehensive_sort(keywords: str, log_json: dict[str, Any], filters: List[SearchFilterConfigItem]):
			
 
				+    """
			
 
				+    登录，综合排序
			
 
				+    """
			
 
				+    account_id = 771431186
			
 
				+    log_json = keywords_search_and_filter(keywords=keywords, sort_type="综合排序", account_id=account_id, log_json=log_json, filters=filters)
			
 
				+    log_json['ext'] = json.dumps(log_json['ext'], ensure_ascii=False)
			
 
				+    log_json['modelValueConfig'] = json.dumps(log_json['modelValueConfig'], ensure_ascii=False)
			
 
				+    log_info_print(log_json, account_id=account_id)
			
 
				+
			
 
				+
			
 
				+def keywords_login_like_sort(keywords: str, log_json: dict[str, Any], filters: List[SearchFilterConfigItem]):
			
 
				+    """
			
 
				+    登录状态，最多点赞
			
 
				+    """
			
 
				+    account_id = 771431186
			
 
				+    log_json = keywords_search_and_filter(keywords=keywords, sort_type="最多点赞", account_id=account_id, log_json=log_json, filters=filters)
			
 
				+    log_json['ext'] = json.dumps(log_json['ext'], ensure_ascii=False)
			
 
				+    log_json['modelValueConfig'] = json.dumps(log_json['modelValueConfig'], ensure_ascii=False)
			
 
				+    log_info_print(log_json, account_id=account_id)
			
 
				+
			
 
				+
			
 
				+def handle_log_json(log_json: dict[str, Any]):
			
 
				+    log_info_print(log_json)
			
 
				+
			
 
				+    # 未登录，最多点赞
			
 
				+    keywords = log_json['keywords']
			
 
				+    account_filters = json.loads(log_json.get("accountFilters", "[]"))
			
 
				+    content_filters = json.loads(log_json.get("contentFilters", '[]'))
			
 
				+    search_filter_config_tems = []
			
 
				+    for filter_item in account_filters + content_filters:
			
 
				+        search_filter_config_tems.append(SearchFilterConfigItem(**filter_item))
			
 
				+
			
 
				+    keywords_not_login_comprehensive_sort(keywords, log_json, search_filter_config_tems)
			
 
				+    keywords_login_comprehensive_sort(keywords, log_json, search_filter_config_tems)
			
 
				+    keywords_login_like_sort(keywords, log_json, search_filter_config_tems)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    file_path = "/Users/zhao/Downloads/keywords_filter_test_sample.json"
			
 
				+    log_list = []
			
 
				+    with open(file_path, "r", encoding="utf-8") as f:
			
 
				+        line = f.readline()
			
 
				+        while line:
			
 
				+            log_list.append(json.loads(line))
			
 
				+            line = f.readline()
			
 
				+
			
 
				+    log_info_print_title()
			
 
				+    for log in log_list:
			
 
				+        handle_log_json(log)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/util/automation_provide_util.py
+++ b/util/automation_provide_util.py
@@ -0,0 +1,270 @@
 
				+import logging
			
 
				+from typing import List, Dict, Any
			
 
				+
			
 
				+from model.automation_provide_job import SaveFilterConditionParam, SearchFilterConfigItem
			
 
				+
			
 
				+# 配置日志
			
 
				+logging.basicConfig(level=logging.ERROR)
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+# ==================== 核心工具类 ====================
			
 
				+class AutoProvideUtil:
			
 
				+    # 配置键与类型的映射（对应Java的configKeyAndTypeMap）
			
 
				+    CONFIG_KEY_AND_TYPE_MAP = {
			
 
				+        "点赞量": 3,
			
 
				+        "发布量": 4,
			
 
				+        "收藏量": 7,
			
 
				+        "评论量": 9,
			
 
				+        "转发量": 27,
			
 
				+        "内容发布时间": 12,
			
 
				+        "发布时间": 25,
			
 
				+        "视频时长": 26,
			
 
				+        "观众年龄50+占比": 128,
			
 
				+        "观众年龄50+TGI": 129,
			
 
				+        "观众性别男性占比": 130,
			
 
				+        "观众性别男性TGI": 131,
			
 
				+        "观众性别女性占比": 132,
			
 
				+        "观众性别女性TGI": 133,
			
 
				+        "视频时长（秒）": 134,
			
 
				+        "分享量/点赞量": 138
			
 
				+    }
			
 
				+
			
 
				+    # 配置键与规则键的映射（对应Java的configKeyAndRuleKeyMap）
			
 
				+    CONFIG_KEY_AND_RULE_KEY_MAP = {
			
 
				+        "点赞量": "likeCnt",
			
 
				+        "视频时长（秒）": "videoDuration_s",
			
 
				+        "观众年龄50+占比": "audienceAge50Rate",
			
 
				+        "观众年龄50+TGI": "audienceAge50TGI",
			
 
				+        "分享量/点赞量": "shareDivLink"
			
 
				+    }
			
 
				+
			
 
				+    @classmethod
			
 
				+    def parse_apollo_config(cls, config_jsons: List[Dict[str, str]]) -> List[SaveFilterConditionParam]:
			
 
				+        """解析Apollo配置，转换为SaveFilterConditionParam列表"""
			
 
				+        try:
			
 
				+            # 将字典列表转换为SearchFilterConfigItem列表
			
 
				+            config_items = [
			
 
				+                SearchFilterConfigItem(
			
 
				+                    key=item.get("key", ""),
			
 
				+                    operator=item.get("operator", ""),
			
 
				+                    value=item.get("value", "")
			
 
				+                )
			
 
				+                for item in config_jsons
			
 
				+            ]
			
 
				+            return cls.parse_apollo_config_items(config_items)
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"parse apollo config json error: {config_jsons}", exc_info=e)
			
 
				+            return []
			
 
				+
			
 
				+    @classmethod
			
 
				+    def parse_apollo_config_items(cls, config_items: List[SearchFilterConfigItem]) -> List[SaveFilterConditionParam]:
			
 
				+        """解析SearchFilterConfigItem列表，生成过滤参数"""
			
 
				+        save_filter_conditions = []
			
 
				+
			
 
				+        if not config_items:
			
 
				+            return save_filter_conditions
			
 
				+
			
 
				+        for config in config_items:
			
 
				+            key = config.key.strip()
			
 
				+            operator_str = config.operator.strip()
			
 
				+            value = config.value.strip()
			
 
				+
			
 
				+            # 空值检查
			
 
				+            if not all([key, operator_str, value]):
			
 
				+                continue
			
 
				+
			
 
				+            # 检查key是否在映射中
			
 
				+            if key not in cls.CONFIG_KEY_AND_TYPE_MAP:
			
 
				+                continue
			
 
				+
			
 
				+            # 构建过滤参数对象
			
 
				+            save_filter_condition = SaveFilterConditionParam(
			
 
				+                condition_type=cls.CONFIG_KEY_AND_TYPE_MAP[key],
			
 
				+                operator=cls.operator_convert(operator_str),
			
 
				+                data=[value]
			
 
				+            )
			
 
				+            save_filter_conditions.append(save_filter_condition)
			
 
				+
			
 
				+        return save_filter_conditions
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def operator_convert(operator: str) -> int:
			
 
				+        """将中文操作符转换为对应的数字编码"""
			
 
				+        operator_map = {
			
 
				+            "大于": 1,
			
 
				+            "等于": 2,
			
 
				+            "小于": 3,
			
 
				+            "包含": 4,
			
 
				+            "不包含": 5,
			
 
				+            "介于": 6
			
 
				+        }
			
 
				+        return operator_map.get(operator, -1)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def parse_filter_map_to_rule_str(cls, config_jsons: List[Dict[str, str]]) -> str:
			
 
				+        """将过滤配置映射转换为规则字符串"""
			
 
				+        if not config_jsons:
			
 
				+            return ""
			
 
				+
			
 
				+        try:
			
 
				+            # 转换为SearchFilterConfigItem列表
			
 
				+            config_items = [
			
 
				+                SearchFilterConfigItem(
			
 
				+                    key=item.get("key", ""),
			
 
				+                    operator=item.get("operator", ""),
			
 
				+                    value=item.get("value", "")
			
 
				+                )
			
 
				+                for item in config_jsons
			
 
				+            ]
			
 
				+            return cls.parse_filter_config_to_rule_str(config_items)
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"parse filter config json error: {config_jsons}", exc_info=e)
			
 
				+            return ""
			
 
				+
			
 
				+    @classmethod
			
 
				+    def parse_filter_config_to_rule_str(cls, config_items: List[SearchFilterConfigItem]) -> str:
			
 
				+        """将SearchFilterConfigItem列表转换为规则字符串"""
			
 
				+        if not config_items:
			
 
				+            return ""
			
 
				+
			
 
				+        condition_str_list = []
			
 
				+        for config in config_items:
			
 
				+            key = config.key.strip()
			
 
				+            operator_str = config.operator.strip()
			
 
				+            value = config.value.strip()
			
 
				+
			
 
				+            # 空值检查
			
 
				+            if not all([key, operator_str, value]):
			
 
				+                continue
			
 
				+
			
 
				+            # 检查key是否在规则映射中
			
 
				+            if key not in cls.CONFIG_KEY_AND_RULE_KEY_MAP:
			
 
				+                continue
			
 
				+
			
 
				+            # 拼接条件字符串
			
 
				+            rule_key = cls.CONFIG_KEY_AND_RULE_KEY_MAP[key]
			
 
				+            operator = cls.operator_convert_str(operator_str)
			
 
				+            condition_str_list.append(f"{rule_key}{operator}{value}")
			
 
				+
			
 
				+        return "&&".join(condition_str_list)
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def operator_convert_str(operator: str) -> str:
			
 
				+        """将中文操作符转换为符号操作符"""
			
 
				+        operator_map = {
			
 
				+            "大于": ">",
			
 
				+            "等于": "==",
			
 
				+            "小于": "<",
			
 
				+            "大于等于": ">=",
			
 
				+            "小于等于": "<=",
			
 
				+            "不等于": "!="
			
 
				+        }
			
 
				+        if operator not in operator_map:
			
 
				+            raise RuntimeError(f"不支持的操作符: {operator}")
			
 
				+        return operator_map[operator]
			
 
				+
			
 
				+    @classmethod
			
 
				+    def extract_content_rule_feature(cls, content_detail: Dict[str, Any], fans_portrait: Dict[str, Any]) -> Dict[str, float]:
			
 
				+        """提取内容规则特征"""
			
 
				+        context = {}
			
 
				+        cls.extract_content_detail_feature(context, content_detail)
			
 
				+        cls.extract_fans_portrait_feature(context, fans_portrait)
			
 
				+        return context
			
 
				+
			
 
				+    @classmethod
			
 
				+    def extract_content_detail_feature(cls, context: Dict[str, float], content_detail: Dict[str, Any]) -> None:
			
 
				+        """提取内容详情特征"""
			
 
				+        if not content_detail:
			
 
				+            return
			
 
				+
			
 
				+        content_detail_data = content_detail.get("data", {})
			
 
				+        if not content_detail_data:
			
 
				+            return
			
 
				+
			
 
				+        # 提取点赞量
			
 
				+        if "like_count" in content_detail_data:
			
 
				+            like_cnt = content_detail_data.get("like_count", 0.0)
			
 
				+            context["likeCnt"] = float(like_cnt)
			
 
				+
			
 
				+        # 提取视频时长
			
 
				+        video_url_list = content_detail_data.get("video_url_list", [])
			
 
				+        if video_url_list and isinstance(video_url_list, list) and len(video_url_list) > 0:
			
 
				+            first_video = video_url_list[0]
			
 
				+            if isinstance(first_video, dict) and "video_duration" in first_video:
			
 
				+                video_duration = first_video.get("video_duration", 0.0)
			
 
				+                context["videoDuration_s"] = float(video_duration)
			
 
				+
			
 
				+        # 提取分享量/点赞量比值
			
 
				+        like_cnt = content_detail_data.get("like_count", 0.0)
			
 
				+        share_cnt = content_detail_data.get("share_count", 0.0)
			
 
				+        if like_cnt and share_cnt:  # 避免除以0
			
 
				+            context["shareCnt"] = float(share_cnt)
			
 
				+            context["shareDivLink"] = cls.safe_div(share_cnt, like_cnt)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def extract_fans_portrait_feature(cls, context: Dict[str, float], fans_portrait: Dict[str, Any]) -> None:
			
 
				+        """提取粉丝画像特征"""
			
 
				+        if not fans_portrait:
			
 
				+            return
			
 
				+
			
 
				+        fans_portrait_data = fans_portrait.get("data", {})
			
 
				+        if not fans_portrait_data:
			
 
				+            return
			
 
				+
			
 
				+        # 提取50+年龄画像
			
 
				+        age_info = fans_portrait_data.get("年龄", {})
			
 
				+        age_50_plus = age_info.get("50-", {})
			
 
				+        if age_50_plus:
			
 
				+            # 处理百分比字符串
			
 
				+            percentage_str = age_50_plus.get("percentage", "0%").replace("%", "")
			
 
				+            preference_str = age_50_plus.get("preference", "0%").replace("%", "")
			
 
				+
			
 
				+            audience_age50_rate = float(percentage_str) / 100
			
 
				+            audience_age50_tgi = float(preference_str)
			
 
				+
			
 
				+            context["audienceAge50Rate"] = audience_age50_rate
			
 
				+            context["audienceAge50TGI"] = audience_age50_tgi
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def safe_div(numerator: float, denominator: float) -> float:
			
 
				+        """安全除法，避免除以0"""
			
 
				+        try:
			
 
				+            return float(numerator) / float(denominator)
			
 
				+        except ZeroDivisionError:
			
 
				+            return 0.0
			
 
				+
			
 
				+
			
 
				+# ==================== 使用示例 ====================
			
 
				+if __name__ == "__main__":
			
 
				+    # 1. 测试解析Apollo配置
			
 
				+    test_config = [
			
 
				+        {"key": "点赞量", "operator": "大于", "value": "1000"},
			
 
				+        {"key": "视频时长（秒）", "operator": "小于", "value": "60"}
			
 
				+    ]
			
 
				+    params = AutoProvideUtil.parse_apollo_config(test_config)
			
 
				+    print("解析后的过滤参数：")
			
 
				+    for param in params:
			
 
				+        print(f"类型: {param.condition_type}, 操作符: {param.operator}, 值: {param.data}")
			
 
				+
			
 
				+    # 2. 测试转换为规则字符串
			
 
				+    rule_str = AutoProvideUtil.parse_filter_map_to_rule_str(test_config)
			
 
				+    print(f"\n规则字符串: {rule_str}")
			
 
				+
			
 
				+    # 3. 测试提取特征
			
 
				+    test_content_detail = {
			
 
				+        "data": {
			
 
				+            "like_count": 2000.0,
			
 
				+            "share_count": 500.0,
			
 
				+            "video_url_list": [{"video_duration": 45.0}]
			
 
				+        }
			
 
				+    }
			
 
				+    test_fans_portrait = {
			
 
				+        "data": {
			
 
				+            "年龄": {"50-": {"percentage": "25%", "preference": "120%"}}
			
 
				+        }
			
 
				+    }
			
 
				+    features = AutoProvideUtil.extract_content_rule_feature(test_content_detail, test_fans_portrait)
			
 
				+    print("\n提取的特征：")
			
 
				+    for k, v in features.items():
			
 
				+        print(f"{k}: {v}")