Browse Source

offline-recommend

luojunhui 3 weeks ago
parent
commit
c041557265

+ 8 - 8
app/recommend/offline_recommend/core.py

@@ -75,7 +75,7 @@ class BaseOffRecommendUtils:
     async def get_recommend_articles_for_batch_titles(self, title_list: List[str], strategy: str) -> List[Dict[str, str]]:
         match strategy:
             case "v1":
-                query = I2I.batch_summary(title_list)
+                query = I2I.strategy_v1(title_list)
 
             case _:
                 query = I2I.batch_base(title_list)
@@ -128,15 +128,15 @@ class BaseOfflineRecommend(BaseOffRecommendUtils):
             {
                 "account_name": account_name,
                 "gh_id": gh_id,
-                "recommend_title": item.rec_title,
-                "collinear_cnt": item.total_collinear_cnt,
-                "base_cnt": item.total_base_cnt,
-                "collinear_ratio": item.rec_collinear_ratio,
+                "recommend_title": item.recommend_title,
+                "collinear_cnt": item.collinear_cnt,
+                "base_cnt": item.base_cnt,
+                "recommend_score": item.recommend_score,
             }
             for item in recommend_articles
-            if item.rec_title
-               and item.rec_title not in self.filter_title
-               and item.rec_title not in published_titles
+            if item.recommend_title
+               and item.recommend_title not in self.filter_title
+               and item.recommend_title not in published_titles
         ]
         return candidate_articles
 

+ 6 - 1
app/recommend/offline_recommend/strategy/get_top_article.py

@@ -17,7 +17,12 @@ class GetTopArticleStrategy(BaseStrategy):
     @staticmethod
     def strategy_v1() -> str:
         query = """
-            SELECT date_str, title, view_count from datastat_sort_strategy where position = 1 and gh_id = %s
+            SELECT title, sum(view_count) as total_view_count, sum(fans) as total_fan_count
+            FROM datastat_sort_strategy
+            WHERE position = 1 and gh_id = %s AND date_str >= '20250501' AND view_count > 1000
+            GROUP BY title
+            ORDER BY sum(view_count) / sum(fans) DESC
+            LIMIT 25;
         """
         return query
 

+ 18 - 0
app/recommend/offline_recommend/strategy/i2i.py

@@ -65,3 +65,21 @@ class I2I(BaseStrategy):
             LIMIT {limit};
         """
         return query
+
+
+    @staticmethod
+    def strategy_v1(title_list, limit: int = 500):
+        title_tuple = tuple(title_list)
+        query = f"""
+            SELECT  associated_title AS recommend_title
+                ,sum(association_count) as collinear_cnt
+                ,sum(associated_title_uid_count) as base_cnt
+                ,sum(association_count) / (sum(associated_title_uid_count) + 10000) AS recommend_score
+            FROM    loghubods.i2i_table
+            WHERE     dt = MAX_PT('i2i_table')
+            AND   source_title IN {title_tuple}
+            GROUP BY recommend_title
+            ORDER BY recommend_score DESC
+            LIMIT {limit};
+            """
+        return query