from .base import BaseStrategy class I2I(BaseStrategy): @staticmethod def base(title: str, limit: int = 50) -> str: query = f""" -- ① 源标题(可限定,强烈建议) WITH src_title_user AS ( SELECT DISTINCT title AS 源标题, union_id FROM loghubods.title_union_id_info WHERE title = '{title}' ), -- ② 源标题 → 联想标题 共现 co_occur AS ( SELECT s.源标题, t.title AS 联想标题, COUNT(*) AS 联想次数 FROM src_title_user s JOIN loghubods.title_union_id_info t ON s.union_id = t.union_id WHERE s.源标题 != t.title GROUP BY s.源标题, t.title ), -- ③ 联想标题的 uid 覆盖量(独立算) title_uid_cnt AS ( SELECT title, COUNT(DISTINCT union_id) AS 联想标题_uid数量 FROM loghubods.title_union_id_info GROUP BY title ) -- ④ 合并 SELECT c.源标题, c.联想标题, c.联想次数, u.联想标题_uid数量, c.联想次数 / (u.联想标题_uid数量 + 1000) AS 联想标题_uid覆盖率 FROM co_occur c LEFT JOIN title_uid_cnt u ON c.联想标题 = u.title ORDER BY 联想标题_uid覆盖率 DESC LIMIT {limit}; """ return query @staticmethod def batch_base(title_list, limit: int = 1000): title_tuple = tuple(title_list) query = f""" SELECT src_title, rec_title, collinear_cnt, base_cnt, rec_collinear_ratio FROM loghubods.t2i_records WHERE src_title IN {title_tuple} AND data_version = 'v1' ORDER BY rec_collinear_ratio DESC LIMIT {limit}; """ return query @staticmethod def strategy_v1(title_list, limit: int = 500): title_tuple = tuple(title_list) query = f""" SELECT associated_title AS recommend_title ,sum(association_count) as collinear_cnt ,sum(associated_title_uid_count) as base_cnt ,sum(association_count) / (sum(associated_title_uid_count) + 10000) AS recommend_score FROM loghubods.i2i_table WHERE dt = MAX_PT('i2i_table') AND source_title IN {title_tuple} GROUP BY recommend_title ORDER BY recommend_score DESC LIMIT {limit}; """ return query