| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- from .base import BaseStrategy
- class I2I(BaseStrategy):
- @staticmethod
- def base(title: str, limit: int = 50) -> str:
- query = f"""
- -- ① 源标题(可限定,强烈建议)
- WITH src_title_user AS (
- SELECT DISTINCT title AS 源标题, union_id
- FROM loghubods.title_union_id_info
- WHERE title = '{title}'
- ),
- -- ② 源标题 → 联想标题 共现
- co_occur AS (
- SELECT
- s.源标题, t.title AS 联想标题, COUNT(*) AS 联想次数
- FROM src_title_user s
- JOIN loghubods.title_union_id_info t ON s.union_id = t.union_id
- WHERE s.源标题 != t.title
- GROUP BY s.源标题, t.title
- ),
- -- ③ 联想标题的 uid 覆盖量(独立算)
- title_uid_cnt AS (
- SELECT
- title, COUNT(DISTINCT union_id) AS 联想标题_uid数量
- FROM loghubods.title_union_id_info
- GROUP BY title
- )
- -- ④ 合并
- SELECT c.源标题, c.联想标题, c.联想次数, u.联想标题_uid数量, c.联想次数 / (u.联想标题_uid数量 + 1000) AS 联想标题_uid覆盖率
- FROM co_occur c
- LEFT JOIN title_uid_cnt u ON c.联想标题 = u.title
- ORDER BY 联想标题_uid覆盖率 DESC
- LIMIT {limit};
- """
- return query
- @staticmethod
- def batch_base(title_list, limit: int = 1000):
- title_tuple = tuple(title_list)
- query = f"""
- SELECT src_title, rec_title, collinear_cnt, base_cnt, rec_collinear_ratio
- FROM loghubods.t2i_records
- WHERE src_title IN {title_tuple}
- AND data_version = 'v1' ORDER BY rec_collinear_ratio DESC
- LIMIT {limit};
- """
- return query
- @staticmethod
- def strategy_v1(title_list, limit: int = 500):
- title_tuple = tuple(title_list)
- query = f"""
- SELECT associated_title AS recommend_title
- ,sum(association_count) as collinear_cnt
- ,sum(associated_title_uid_count) as base_cnt
- ,sum(association_count) / (sum(associated_title_uid_count) + 10000) AS recommend_score
- FROM loghubods.i2i_table
- WHERE dt = MAX_PT('i2i_table')
- AND source_title IN {title_tuple}
- GROUP BY recommend_title
- ORDER BY recommend_score DESC
- LIMIT {limit};
- """
- return query
|