فهرست منبع

修改文件夹 strategy -- > flow_pool

删除无用代码 distribution.py, strategy.py
luojunhui 5 ماه پیش
والد
کامیت
269cc0bdfa
5فایلهای تغییر یافته به همراه3 افزوده شده و 246 حذف شده
  1. 3 0
      flow_pool/__init__.py
  2. 0 0
      flow_pool/upLevel.py
  3. 0 4
      stratrgy/__init__.py
  4. 0 140
      stratrgy/distribution.py
  5. 0 102
      stratrgy/strategy.py

+ 3 - 0
flow_pool/__init__.py

@@ -0,0 +1,3 @@
+"""
+@author: luojunhui
+"""

+ 0 - 0
stratrgy/upLevel.py → flow_pool/upLevel.py


+ 0 - 4
stratrgy/__init__.py

@@ -1,4 +0,0 @@
-"""
-@author: luojunhui
-"""
-from .strategy import ArticlePoolStrategy

+ 0 - 140
stratrgy/distribution.py

@@ -1,140 +0,0 @@
-"""
-@author: luojunhui
-分发逻辑
-"""
-import json
-import datetime
-from applications import PQMySQL, WeixinSpider
-from tqdm import tqdm
-from config import accountBaseInfo
-
-
-class ArticleDistribution(object):
-    """
-    冷启文章分发逻辑
-    """
-    account_position_dict = {
-        "gh_058e41145a0c": 30,
-        "gh_0e4fd9e88386": 30,
-        "gh_744cb16f6e16": 30,
-        "gh_ac43eb24376d": 30,
-        "gh_970460d9ccec": 30,
-        "gh_56ca3dae948c": 30,
-        "gh_c91b42649690": 30,
-        "gh_6d205db62f04": 30,
-        "gh_e24da99dc899": 30,
-        "gh_4c058673c07e": 30,
-        "gh_03d32e83122f": 30,
-        "gh_c69776baf2cd": 30,
-        "gh_30816d8adb52": 30,
-        "gh_789a40fe7935": 30,
-        "gh_95ed5ecf9363": 30,
-        "gh_3e91f0624545": 30,
-        "gh_57573f01b2ee": 30,
-        "gh_9877c8541764": 30,
-        "gh_6cfd1132df94": 30,
-        "gh_008ef23062ee": 30,
-        "gh_5ae65db96cb7": 30,
-        "gh_be8c29139989": 30,
-        "gh_51e4ad40466d": 30,
-        "gh_d4dffc34ac39": 30,
-        "gh_89ef4798d3ea": 30,
-        "gh_b15de7c99912": 30,
-        "gh_9f8dc5b0c74e": 30,
-        "gh_7b4a5f86d68c": 30,
-        "gh_c5cdf60d9ab4": 5,
-        "gh_0c89e11f8bf3": 5,
-        "gh_e0eb490115f5": 5,
-        "gh_a2901d34f75b": 5,
-        "gh_d5f935d0d1f2": 30
-    }
-    pq_mysql_client = PQMySQL()
-    Spider = WeixinSpider()
-
-    @classmethod
-    def generate_account_dict(cls):
-        """
-        生成account_list
-        :return:
-        """
-        account_dict = {}
-        for key in accountBaseInfo:
-            account_name = accountBaseInfo[key]['accountName']
-            account_gh_id = accountBaseInfo[key]['ghId']
-            account_dict[account_name] = account_gh_id
-        return account_dict
-
-    @classmethod
-    def findArticleScoreList(cls, url_md5):
-        """
-        获取文章的相关账号的相关性分数
-        :param url_md5:
-        :return:
-        """
-        sql = f"""
-        select account_score, ori_account from association_articles where url_md5 = '{url_md5}';
-        """
-        response = cls.pq_mysql_client.select(sql=sql)
-        return response
-
-    @classmethod
-    def association_split(cls, article_list):
-        """
-        联想类型文章分发逻辑
-        :param article_list:
-        :return:
-        """
-        account_name_map = cls.generate_account_dict()
-        L = {}
-        for article in tqdm(article_list):
-            link = article['url']
-            url_md5 = article['url_md5']
-            title = article['title']
-            c_id = article['id']
-            title_match_list = cls.findArticleScoreList(url_md5)
-            title_match_list = sorted(title_match_list, key=lambda x: x[0], reverse=True)
-            # print("标题:\t", title)
-            # print("相关账号:\t", title_match_list)
-            # print("\n")
-            for account_tuple in title_match_list:
-                account_name = account_tuple[1]
-                score = account_tuple[0]
-                account_gh_id = account_name_map[account_name]
-                if cls.account_position_dict.get(account_gh_id):
-                    try:
-                        # channel_content_id = cls.Spider.get_article_text(link)['data']['data']['channel_content_id']
-                        channel_content_id = c_id
-                    except:
-                        print(link)
-                        channel_content_id = url_md5
-                    # channel_content_id = "id"
-                    if cls.account_position_dict[account_gh_id] > 0:
-                        if L.get(account_gh_id):
-                            if len(L[account_gh_id]) >= 10:
-                                continue
-                            else:
-                                L[account_gh_id].append([channel_content_id, score])
-                        else:
-                            L[account_gh_id] = [[channel_content_id, score]]
-                        cls.account_position_dict[account_gh_id] -= 1
-                    else:
-                        continue
-        for account in tqdm(L):
-            date_str = datetime.datetime.today().strftime("%Y-%m-%d")
-            print(account, date_str, json.dumps(L[account], ensure_ascii=False))
-            insert_sql = f"""
-            INSERT INTO article_pre_distribute_account
-            (gh_id, date, article_list)
-            VALUES
-            (%s, %s, %s);
-            """
-            try:
-                PQMySQL.update(sql=insert_sql, params=(account, date_str, json.dumps(L[account], ensure_ascii=False)))
-            except Exception as e:
-                print("插入出现问题----{}".format(e))
-        return L
-
-
-
-
-

+ 0 - 102
stratrgy/strategy.py

@@ -1,102 +0,0 @@
-"""
-@author: luojunhui
-"""
-import json
-
-from tqdm import tqdm
-
-from applications.functions import Functions
-from config import accountBaseInfo, pool_level_detail
-
-
-class ArticlePoolStrategy(object):
-    """
-    长文策略池
-    """
-    Fun = Functions()
-
-    @classmethod
-    def getData(cls, article_list):
-        """
-        :param article_list: 每天召回的文章list
-        """
-        detail_list = []
-        print("查询文章url......")
-        id_tuple = [i['id'] for i in article_list]
-        detail_dict = cls.Fun.matchLinkByIdTuple(channel_id_tuple=tuple(id_tuple))
-        for i in tqdm(article_list):
-            content_id = i['id']
-            i['gh_key'] = detail_dict[content_id]['gh_key']
-            i['url'] = detail_dict[content_id]['url']
-            i['title'] = detail_dict[content_id]['title']
-            detail_list.append(i)
-        print("查询完成, 开始排序")
-        return detail_list
-
-    @classmethod
-    def splitByStrategy(cls, detail_list):
-        """
-        账号-位置-阅读倍数
-        :return:
-        """
-        L = []
-        for line in detail_list:
-            key = line['gh_key']
-            article_read = line['read_count']
-            if accountBaseInfo.get(key):
-                avg_read = accountBaseInfo[key]['readAvg']
-                # 计算比率
-                level_rate = article_read / avg_read - 1
-                obj = {
-                    "key": key,
-                    "avg_read": avg_read,
-                    "article_read": article_read,
-                    "level_rate": level_rate,
-                    "url": line['url'],
-                    "title": line['title']
-                }
-                L.append(obj)
-        L = sorted(L, key=lambda x: x["level_rate"], reverse=True)
-        result = {
-            "Level1": [],
-            "Level2": []
-        }
-        c1 = 0
-        c2 = 0
-        for line in L:
-            gh_key = line['key']
-            if pool_level_detail.get(gh_key):
-                now_level = pool_level_detail[gh_key]
-            else:
-                now_level = "3"
-            match now_level:
-                case "2":
-                    # 往1层升
-                    if line['level_rate'] > 0.1 and line['avg_read'] >= 1000:
-                        now_title_list = [i['title'] for i in result['Level1']]
-                        if cls.Fun.TitleSimilarity(now_title_list, line['title']):
-                            continue
-                        else:
-                            result['Level1'].append(line)
-                case "3":
-                    if line['level_rate'] > 0.1 and line['avg_read'] >= 100:
-                        now_title_list = [i['title'] for i in result['Level2']]
-                        if cls.Fun.TitleSimilarity(now_title_list, line['title']):
-                            continue
-                        else:
-                            result['Level2'].append(line)
-                case "1":
-                    continue
-                    # if line['level_rate'] > 1.0:
-                    #     now_title_list = [i['title'] for i in result['Level1']]
-                    #     if cls.Fun.TitleSimilarity(now_title_list, line['title']):
-                    #         continue
-                    #     else:
-                    #         result['Level1'].append(line)
-        return result
-
-
-
-
-
-