Просмотр исходного кода

videoContentList 加 channel_name 强过滤,解决跨渠道客户(gzyhc/wxm)数据污染

- selectForRecommend 新增 channelName 参数(MyBatis 动态 AND channel_name=#{channelName})
- ContentPlatformPlanServiceImpl 加 resolveChannelName(param):
    type=2/3 → 群/企微合作-稳定;type=0/1/4 → 公众号合作-即转-稳定;
    type=999/空时 ghName 非空也兜回公众号
- 3 个 demand 池 fetcher(场景/人群需求/优质相似)统一注入 channelName
- docs/recommend-ordering.md 同步:新增常量、resolveChannelName 章节、SQL 段补充强过滤
刘立冬 7 часов назад
Родитель
Сommit
e0ee74db9a

+ 1 - 0
api-module/src/main/java/com/tzld/piaoquan/api/dao/mapper/contentplatform/ext/ContentPlatformDemandVideoMapperExt.java

@@ -37,6 +37,7 @@ public interface ContentPlatformDemandVideoMapperExt {
      * excludeSelfTitle=true 时过滤掉 video.title == demand_content_title 的行。
      */
     List<ContentPlatformDemandVideo> selectForRecommend(@Param("dt") String dt,
+                                                       @Param("channelName") String channelName,
                                                        @Param("crowdSegment") String crowdSegment,
                                                        @Param("demandStrategy") String demandStrategy,
                                                        @Param("dimension") String dimension,

+ 33 - 8
api-module/src/main/java/com/tzld/piaoquan/api/service/contentplatform/impl/ContentPlatformPlanServiceImpl.java

@@ -615,6 +615,9 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
     private static final String DEMAND_STRATEGY_PRIOR_SCENE = "人群需求-场景";
     private static final String DEMAND_STRATEGY_POSTERIOR = "优质相似";
     private static final String PRIOR_PREMIUM_DIMENSION = "传播的头部";
+    /** type → channel_name 映射(强过滤):同 crowd_segment 跨渠道客户(如 gzyhc/wxm)按入口平台切数据源 */
+    private static final String CHANNEL_NAME_GZH = "公众号合作-即转-稳定";
+    private static final String CHANNEL_NAME_QW  = "群/企微合作-稳定";
     private static final double PRIOR_GROUP_KEEP_RATIO = 0.5;
     private static final String POSTERIOR_FILTER_ABS_LIKE = "绝对高效率%";
     private static final String POSTERIOR_FILTER_REL_LIKE = "相对裂变率%";
@@ -623,6 +626,25 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
     private static final String SOURCE_POSTERIOR = "posterior";
     private static final String SOURCE_HOT = "hot";
 
+    /**
+     * 推导 channel_name(人群_渠道) 作为 demand 池强过滤。
+     * 信号优先级:
+     *   1. type 明确时按 type 映射 — 0/1/4 → 公众号合作-即转-稳定;2/3 → 群/企微合作-稳定
+     *   2. type=999/null 但带 ghName(公众号参数)→ 公众号入口,映射公众号
+     *   3. 否则 null,不限 channel_name(保留原行为)
+     * type 取值: 0-自动回复(公众号入口) / 1-服务号推送 / 2-企微-社群 / 3-企微-自动回复 / 4-公众号推送 / 999-不限。
+     * 作用:解决 crowd_segment 跨渠道客户(如 gzyhc/wxm)在企微/公众号入口下被对侧数据污染的问题。
+     */
+    private String resolveChannelName(VideoContentListParam param) {
+        Integer type = param.getType();
+        if (type != null) {
+            if (type == 2 || type == 3) return CHANNEL_NAME_QW;
+            if (type == 0 || type == 1 || type == 4) return CHANNEL_NAME_GZH;
+        }
+        if (StringUtils.hasText(param.getGhName())) return CHANNEL_NAME_GZH;
+        return null;
+    }
+
     @Override
     public Page<VideoContentItemVO> getVideoContentList(VideoContentListParam param) {
         ContentPlatformAccount user = LoginUserContext.getUser();
@@ -841,12 +863,13 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
         }
         String crowdSegment = user.getChannel();
         String ghName = StringUtils.hasText(param.getGhName()) ? param.getGhName() : null;
+        String channelName = resolveChannelName(param);
 
         List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommend(
-                dt, crowdSegment, DEMAND_STRATEGY_PRIOR_SCENE, null, null, null, ghName, null, limit, false);
+                dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR_SCENE, null, null, null, ghName, null, limit, false);
         if (ghName != null && rows.isEmpty()) {
             rows = demandVideoMapperExt.selectForRecommend(
-                    dt, crowdSegment, DEMAND_STRATEGY_PRIOR_SCENE, null, null, null, null, null, limit, false);
+                    dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR_SCENE, null, null, null, null, null, limit, false);
         }
         // 1. 同 video_id 取 total_rov 最大的代表行(SQL 已排序,putIfAbsent 保留首次)
         LinkedHashMap<Long, ContentPlatformDemandVideo> bestPerVideo = new LinkedHashMap<>();
@@ -888,15 +911,16 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
         }
         String crowdSegment = user.getChannel();
         String ghName = StringUtils.hasText(param.getGhName()) ? param.getGhName() : null;
+        String channelName = resolveChannelName(param);
         int fetchLimit = Math.max(limit * 3, DEMAND_CANDIDATE_LIMIT);
 
         List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommend(
-                dt, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, ghName, null, fetchLimit, false);
+                dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, ghName, null, fetchLimit, false);
 
         // 退化:该 ghName 无数据 → 退回渠道粒度
         if (ghName != null && rows.isEmpty()) {
             rows = demandVideoMapperExt.selectForRecommend(
-                    dt, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, null, null, fetchLimit, false);
+                    dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, null, null, fetchLimit, false);
         }
 
         Function<ContentPlatformDemandVideo, String> keyFn = r ->
@@ -956,19 +980,20 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
         }
         String crowdSegment = user.getChannel();
         String ghName = StringUtils.hasText(param.getGhName()) ? param.getGhName() : null;
+        String channelName = resolveChannelName(param);
         int fetchLimit = Math.max(limit * 3, DEMAND_CANDIDATE_LIMIT);
 
         List<ContentPlatformDemandVideo> stageAbs = demandVideoMapperExt.selectForRecommend(
-                dt, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, POSTERIOR_FILTER_ABS_LIKE, ghName, POSTERIOR_DRIVE_DIMENSION_TIME, fetchLimit, true);
+                dt, channelName, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, POSTERIOR_FILTER_ABS_LIKE, ghName, POSTERIOR_DRIVE_DIMENSION_TIME, fetchLimit, true);
         List<ContentPlatformDemandVideo> stageRel = demandVideoMapperExt.selectForRecommend(
-                dt, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, POSTERIOR_FILTER_REL_LIKE, ghName, POSTERIOR_DRIVE_DIMENSION_TIME, fetchLimit, true);
+                dt, channelName, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, POSTERIOR_FILTER_REL_LIKE, ghName, POSTERIOR_DRIVE_DIMENSION_TIME, fetchLimit, true);
 
         // 退化:该 ghName 在两阶段都无数据 → 退回渠道粒度(drive_dimension_time 仍严格为"昨日")
         if (ghName != null && stageAbs.isEmpty() && stageRel.isEmpty()) {
             stageAbs = demandVideoMapperExt.selectForRecommend(
-                    dt, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, POSTERIOR_FILTER_ABS_LIKE, null, POSTERIOR_DRIVE_DIMENSION_TIME, fetchLimit, true);
+                    dt, channelName, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, POSTERIOR_FILTER_ABS_LIKE, null, POSTERIOR_DRIVE_DIMENSION_TIME, fetchLimit, true);
             stageRel = demandVideoMapperExt.selectForRecommend(
-                    dt, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, POSTERIOR_FILTER_REL_LIKE, null, POSTERIOR_DRIVE_DIMENSION_TIME, fetchLimit, true);
+                    dt, channelName, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, POSTERIOR_FILTER_REL_LIKE, null, POSTERIOR_DRIVE_DIMENSION_TIME, fetchLimit, true);
         }
 
         Function<ContentPlatformDemandVideo, String> keyFn = r ->

+ 3 - 0
api-module/src/main/resources/mapper/contentplatform/ext/ContentPlatformDemandVideoMapperExt.xml

@@ -88,6 +88,9 @@
                match_text, title, cover, video, experiment_id, status, create_timestamp, update_timestamp
         FROM content_platform_demand_video
         WHERE dt = #{dt} AND status = 1
+        <if test="channelName != null and channelName != ''">
+            AND channel_name = #{channelName}
+        </if>
         <if test="crowdSegment != null and crowdSegment != ''">
             AND crowd_segment = #{crowdSegment}
         </if>

+ 26 - 7
docs/recommend-ordering.md

@@ -35,12 +35,28 @@ source 空(默认)  ──────►  四路随机穿插 (getInterleave
 | `DEMAND_STRATEGY_PRIOR_SCENE` | `"人群需求-场景"` | 场景池过滤值(旧名「先验需求-场景」) |
 | `DEMAND_STRATEGY_POSTERIOR` | `"优质相似"` | 优质相似池过滤值(posterior,旧名「后验需求」) |
 | `PRIOR_PREMIUM_DIMENSION` | `"传播的头部"` | prior 池 dimension 强过滤 |
+| `CHANNEL_NAME_GZH` | `"公众号合作-即转-稳定"` | 公众号入口对应 `channel_name` 强过滤值 |
+| `CHANNEL_NAME_QW`  | `"群/企微合作-稳定"`     | 企微入口对应 `channel_name` 强过滤值 |
 | `PRIOR_GROUP_KEEP_RATIO` | `0.5` | prior 池"特征组"按 total_rov 分位保留比例 |
 | `POSTERIOR_FILTER_ABS_LIKE` | `"绝对高效率%"` | posterior 池 A 段 `demand_filter_sort_strategy` LIKE |
 | `POSTERIOR_FILTER_REL_LIKE` | `"相对裂变率%"` | posterior 池 B 段 `demand_filter_sort_strategy` LIKE |
 | `POSTERIOR_DRIVE_DIMENSION_TIME` | `"昨日"` | posterior 池强制 `drive_dimension_time` |
 
-公共强过滤(所有 demand 池 SQL):`dt = max(dt)` AND `status = 1` AND `crowd_segment = user.channel`。
+公共强过滤(所有 demand 池 SQL):`dt = max(dt)` AND `status = 1` AND `crowd_segment = user.channel` AND `channel_name = resolveChannelName(param)`(可空)。
+
+### 2.1 `resolveChannelName(param)` — 入口 → `channel_name` 映射
+
+为避免同一 `crowd_segment` 在公众号系与企微系都存在(已确认 `gzyhc` / `wxm` 双渠道并存)时,选企微入口却拉到公众号数据,**3 个 demand 池 SQL 在源头加 `channel_name` 强过滤**。映射依据 `VideoContentListParam`:
+
+| `param.type` | 含义 | 映射 `channel_name` |
+|---|---|---|
+| `0` 自动回复 / `1` 服务号推送 / `4` 公众号推送 | 公众号系 | `CHANNEL_NAME_GZH` |
+| `2` 企微-社群 / `3` 企微-自动回复 | 企微系 | `CHANNEL_NAME_QW` |
+| `999` / 其它 | 不限平台 | 进入 fallback |
+
+**Fallback**:`type` 空或不在上表时,若 `param.ghName` 非空(只有公众号入口会带 ghName)→ `CHANNEL_NAME_GZH`;否则返回 `null`(不加 channel_name 过滤,保留旧行为)。
+
+> 设计动机:`crowd_segment` 是按客户(广告主)切的,而同一客户可能同时投公众号 + 企微;数据落库时按"人群_渠道"打 `channel_name`。前端切换入口后 `type` 一定会变,所以让 `type` 当一级路由信号最稳。
 
 ---
 
@@ -54,13 +70,14 @@ source 空(默认)  ──────►  四路随机穿插 (getInterleave
 SELECT ... FROM content_platform_demand_video
 WHERE dt=:maxDt AND status=1
   AND crowd_segment=:userChannel
+  AND channel_name=:resolvedChannelName  -- 若 resolveChannelName 命中(公众号/企微)
   AND demand_strategy='人群需求-场景'
-  AND channel_level3=:ghName        -- 若传入
+  AND channel_level3=:ghName             -- 若传入
 ORDER BY total_rov DESC, score DESC
 LIMIT 10000
 ```
 
-退化:如果 `ghName` 非空但查 0 条,**去掉 `ghName` 再查一次**(拿全渠道兜底)。
+退化:如果 `ghName` 非空但查 0 条,**去掉 `ghName` 再查一次**(拿全渠道兜底)。`channel_name` 在退化时**保留**(强过滤,跨渠道污染必须先挡掉)。
 
 后处理(顺序):
 1. 同 `videoId` 去重,保留首次出现(SQL 已按 `total_rov DESC, score DESC` 排序 → 首次 = 该视频的"最强代表需求"行)
@@ -80,11 +97,12 @@ LIMIT 10000
 单段查询:
 
 ```sql
-SELECT ... WHERE ... AND demand_strategy='人群需求' AND dimension='传播的头部' ...
+SELECT ... WHERE ... AND channel_name=:resolvedChannelName(可空)
+  AND demand_strategy='人群需求' AND dimension='传播的头部' ...
 ORDER BY total_rov DESC, score DESC LIMIT 30000
 ```
 
-退化:查询为空且 `ghName` 非空 → 去 `ghName` 重查。
+退化:查询为空且 `ghName` 非空 → 去 `ghName` 重查(`channel_name` 仍保留)
 
 **[新] 特征组分位裁剪 (`retainTopGroupsByTotalRov`,`keepRatio=0.5`)**:
 - 按 `(point_type, standard_element)` 分组,取每组 `max(total_rov)`(即该特征的人群需求强度)
@@ -114,7 +132,8 @@ A、B 两段独立查询:
 
 ```sql
 -- A 段: demand_filter_sort_strategy LIKE '绝对高效率%'
-SELECT ... WHERE ... AND demand_strategy='优质相似'
+SELECT ... WHERE ... AND channel_name=:resolvedChannelName(可空)
+  AND demand_strategy='优质相似'
   AND demand_filter_sort_strategy LIKE '绝对高效率%'
   AND drive_dimension_time='昨日'
   AND (title IS NULL OR demand_content_title IS NULL OR title <> demand_content_title)
@@ -124,7 +143,7 @@ ORDER BY total_rov DESC, score DESC LIMIT 30000
 SELECT ... AND demand_filter_sort_strategy LIKE '相对裂变率%' AND drive_dimension_time='昨日' ...
 ```
 
-退化:A、B 都空且 `ghName` 非空 → 去 `ghName` 重查(`drive_dimension_time='昨日'` 仍保留)
+退化:A、B 都空且 `ghName` 非空 → 去 `ghName` 重查(`drive_dimension_time='昨日'` 与 `channel_name` 强过滤仍保留)
 
 每段进 `groupAndTopK`:
 - 分组键:`demand_content_id`