罗俊辉 преди 10 месеца
родител
ревизия
fe5cdaa936
променени са 3 файла, в които са добавени 88 реда и са изтрити 14 реда
  1. 11 1
      stratrgy/strategy.py
  2. 76 12
      tasks/task2.py
  3. 1 1
      tasks/task3.py

+ 11 - 1
stratrgy/strategy.py

@@ -38,13 +38,23 @@ class ArticlePoolStrategy(object):
         账号-位置-阅读倍数
         :return:
         """
+        L = []
         for line in detail_list:
             key = "{}_{}".format(line['gh_id'], line['index'])
             article_read = line['increase_read_count']
             avg_read = accountBaseInfo[key]['readAvg']
             # 计算比率
             level_rate = article_read / avg_read - 1
-            print(level_rate)
+            print(level_rate, article_read, avg_read)
+            print(line)
+            print("\n")
+            obj = {
+                "key": key,
+                "avg_read": avg_read,
+                "article_read": article_read,
+
+            }
+
 
 
 

+ 76 - 12
tasks/task2.py

@@ -1,9 +1,13 @@
 """
 @author: luojunhui
 """
+import datetime
+import json
+
+from tqdm import tqdm
 
 from applications import AIDTApi, DeNetMysql, PQMySQL, Functions
-from config import poolLevelConfig, cateMap, coldPoolArticlesNum
+from config import poolLevelConfig, cateMap, coldPoolArticlesNum, accountBaseInfo
 
 
 class ColdStartTask(object):
@@ -14,7 +18,20 @@ class ColdStartTask(object):
     DeMysql = DeNetMysql()
     PqMysql = PQMySQL()
     Fun = Functions()
-    pool4 = poolLevelConfig['1']
+    pool3 = poolLevelConfig['3']
+
+    @classmethod
+    def generate_account_dict(cls):
+        """
+        生成account_list
+        :return:
+        """
+        account_dict = {}
+        for key in accountBaseInfo:
+            account_name = accountBaseInfo[key]['accountName']
+            account_gh_id = accountBaseInfo[key]['ghId']
+            account_dict[account_name] = account_gh_id
+        return account_dict
 
     @classmethod
     def getTopArticles(cls, category, limit_count):
@@ -23,7 +40,7 @@ class ColdStartTask(object):
         :return:
         """
         sql = f"""
-            select content_id, content_link, title 
+            select content_channel_id, content_link, title 
             from cold_start_article_pool
             where category = '{category}'
             order by view_count DESC, publish_time_stamp DESC
@@ -33,14 +50,61 @@ class ColdStartTask(object):
         return result
 
     @classmethod
-    def computeScore(cls):
+    def getAccountScoreList(cls, title_list, account_name):
+        """
+        预分配文章给不同的账号
+        :return:
+        """
+        score_list = cls.Fun.getTitleScore(title_list=title_list, account_name=account_name)[account_name]['score_list']
+        return score_list
+
+    @classmethod
+    def splitToAccount(cls, obj_list):
+        """
+        split articles to each account
+        :return:
+        """
+        account_dict = cls.generate_account_dict()
+        account_list = list(account_dict.keys())
+        title_list = [i['title'] for i in obj_list]
+        for account in tqdm(account_list):
+            score_list = cls.getAccountScoreList(title_list=title_list, account_name=account)
+            L = []
+            for index, score in enumerate(score_list):
+                L.append((obj_list[index]['id'], score))
+            SL = sorted(L, key=lambda x: x[1], reverse=True)
+            gh_id = account_dict[account]
+            date_str = datetime.datetime.today().strftime("%Y-%m-%d")
+            insert_sql = f"""
+            INSERT INTO article_pre_distribute_account
+            (gh_id, date, article_list)
+            VALUES 
+            (%s, %s, %s);
+            """
+            try:
+                PQMySQL.update(sql=insert_sql, params=(gh_id, date_str, json.dumps(SL[:30], ensure_ascii=False)))
+            except Exception as e:
+                print("插入出现问题----{}".format(e))
+
+        print("成功更新完成")
+
+    @classmethod
+    def findArticlesDaily(cls):
         """
         和每个账号计算相关性分数
         :return:
         """
-        category_list = ["军事政法", "健康养生", "宗教历史"]
+        category_list = [
+            "军事政法",
+            "健康养生",
+            "宗教历史",
+            "情感生活",
+            "娱乐八卦",
+            "新闻媒体"
+        ]
         L = []
-        for category in category_list:
+        for category in tqdm(category_list):
+            print("{} is processing......".format(category))
             limit_count = coldPoolArticlesNum * cateMap.get(category, 0.1)
             article_tuple = cls.getTopArticles(category, int(limit_count))
             title_list = [article[2] for article in article_tuple]
@@ -63,17 +127,17 @@ class ColdStartTask(object):
         把文章send至第四层
         :return:
         """
-        result = cls.computeScore()
-        army = [i for i in result if i['cate'] == '军事政法']
-        healthy = [i for i in result if i['cate'] == '健康养生']
-        history = [i for i in result if i['cate'] == '宗教历史']
-        url_list = [i['url'] for i in result]
+        # 获取6个品类的数据
+        target_article_list = cls.findArticlesDaily()
+        # 预分配账号
+        cls.splitToAccount(target_article_list)
+
         # 再加一次配比,每个品类的数量占比
         cls.AidApi.updateArticleIntoCrawlerPlan(
             plan_id=plan_id,
             plan_name=plan_name,
             plan_tag=plan_tag,
-            url_list=[i['url'] for i in result]
+            url_list=[i['url'] for i in target_article_list]
         )
 
 

+ 1 - 1
tasks/task3.py

@@ -12,7 +12,7 @@ from stratrgy import ArticlePoolStrategy
 
 class SendToMultiLevels(object):
     """
-    冷启分配任务
+    流量池任务
     """
     AidApi = AIDTApi()
     DeMysql = DeNetMysql()