8 månader sedan · 269cc0bdfa
--- a/flow_pool/__init__.py
+++ b/flow_pool/__init__.py
@@ -0,0 +1,3 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
--- a/flow_pool/upLevel.py
+++ b/flow_pool/upLevel.py
--- a/stratrgy/__init__.py
+++ b/stratrgy/__init__.py
@@ -1,4 +0,0 @@
 
				-"""
			
 
				-@author: luojunhui
			
 
				-"""
			
 
				-from .strategy import ArticlePoolStrategy
			
--- a/stratrgy/distribution.py
+++ b/stratrgy/distribution.py
@@ -1,140 +0,0 @@
 
				-"""
			
 
				-@author: luojunhui
			
 
				-分发逻辑
			
 
				-"""
			
 
				-import json
			
 
				-import datetime
			
 
				-from applications import PQMySQL, WeixinSpider
			
 
				-from tqdm import tqdm
			
 
				-from config import accountBaseInfo
			
 
				-
			
 
				-
			
 
				-class ArticleDistribution(object):
			
 
				-    """
			
 
				-    冷启文章分发逻辑
			
 
				-    """
			
 
				-    account_position_dict = {
			
 
				-        "gh_058e41145a0c": 30,
			
 
				-        "gh_0e4fd9e88386": 30,
			
 
				-        "gh_744cb16f6e16": 30,
			
 
				-        "gh_ac43eb24376d": 30,
			
 
				-        "gh_970460d9ccec": 30,
			
 
				-        "gh_56ca3dae948c": 30,
			
 
				-        "gh_c91b42649690": 30,
			
 
				-        "gh_6d205db62f04": 30,
			
 
				-        "gh_e24da99dc899": 30,
			
 
				-        "gh_4c058673c07e": 30,
			
 
				-        "gh_03d32e83122f": 30,
			
 
				-        "gh_c69776baf2cd": 30,
			
 
				-        "gh_30816d8adb52": 30,
			
 
				-        "gh_789a40fe7935": 30,
			
 
				-        "gh_95ed5ecf9363": 30,
			
 
				-        "gh_3e91f0624545": 30,
			
 
				-        "gh_57573f01b2ee": 30,
			
 
				-        "gh_9877c8541764": 30,
			
 
				-        "gh_6cfd1132df94": 30,
			
 
				-        "gh_008ef23062ee": 30,
			
 
				-        "gh_5ae65db96cb7": 30,
			
 
				-        "gh_be8c29139989": 30,
			
 
				-        "gh_51e4ad40466d": 30,
			
 
				-        "gh_d4dffc34ac39": 30,
			
 
				-        "gh_89ef4798d3ea": 30,
			
 
				-        "gh_b15de7c99912": 30,
			
 
				-        "gh_9f8dc5b0c74e": 30,
			
 
				-        "gh_7b4a5f86d68c": 30,
			
 
				-        "gh_c5cdf60d9ab4": 5,
			
 
				-        "gh_0c89e11f8bf3": 5,
			
 
				-        "gh_e0eb490115f5": 5,
			
 
				-        "gh_a2901d34f75b": 5,
			
 
				-        "gh_d5f935d0d1f2": 30
			
 
				-    }
			
 
				-    pq_mysql_client = PQMySQL()
			
 
				-    Spider = WeixinSpider()
			
 
				-
			
 
				-    @classmethod
			
 
				-    def generate_account_dict(cls):
			
 
				-        """
			
 
				-        生成account_list
			
 
				-        :return:
			
 
				-        """
			
 
				-        account_dict = {}
			
 
				-        for key in accountBaseInfo:
			
 
				-            account_name = accountBaseInfo[key]['accountName']
			
 
				-            account_gh_id = accountBaseInfo[key]['ghId']
			
 
				-            account_dict[account_name] = account_gh_id
			
 
				-        return account_dict
			
 
				-
			
 
				-    @classmethod
			
 
				-    def findArticleScoreList(cls, url_md5):
			
 
				-        """
			
 
				-        获取文章的相关账号的相关性分数
			
 
				-        :param url_md5:
			
 
				-        :return:
			
 
				-        """
			
 
				-        sql = f"""
			
 
				-        select account_score, ori_account from association_articles where url_md5 = '{url_md5}';
			
 
				-        """
			
 
				-        response = cls.pq_mysql_client.select(sql=sql)
			
 
				-        return response
			
 
				-
			
 
				-    @classmethod
			
 
				-    def association_split(cls, article_list):
			
 
				-        """
			
 
				-        联想类型文章分发逻辑
			
 
				-        :param article_list:
			
 
				-        :return:
			
 
				-        """
			
 
				-        account_name_map = cls.generate_account_dict()
			
 
				-        L = {}
			
 
				-        for article in tqdm(article_list):
			
 
				-            link = article['url']
			
 
				-            url_md5 = article['url_md5']
			
 
				-            title = article['title']
			
 
				-            c_id = article['id']
			
 
				-            title_match_list = cls.findArticleScoreList(url_md5)
			
 
				-            title_match_list = sorted(title_match_list, key=lambda x: x[0], reverse=True)
			
 
				-            # print("标题:\t", title)
			
 
				-            # print("相关账号:\t", title_match_list)
			
 
				-            # print("\n")
			
 
				-            for account_tuple in title_match_list:
			
 
				-                account_name = account_tuple[1]
			
 
				-                score = account_tuple[0]
			
 
				-                account_gh_id = account_name_map[account_name]
			
 
				-                if cls.account_position_dict.get(account_gh_id):
			
 
				-                    try:
			
 
				-                        # channel_content_id = cls.Spider.get_article_text(link)['data']['data']['channel_content_id']
			
 
				-                        channel_content_id = c_id
			
 
				-                    except:
			
 
				-                        print(link)
			
 
				-                        channel_content_id = url_md5
			
 
				-                    # channel_content_id = "id"
			
 
				-                    if cls.account_position_dict[account_gh_id] > 0:
			
 
				-                        if L.get(account_gh_id):
			
 
				-                            if len(L[account_gh_id]) >= 10:
			
 
				-                                continue
			
 
				-                            else:
			
 
				-                                L[account_gh_id].append([channel_content_id, score])
			
 
				-                        else:
			
 
				-                            L[account_gh_id] = [[channel_content_id, score]]
			
 
				-                        cls.account_position_dict[account_gh_id] -= 1
			
 
				-                    else:
			
 
				-                        continue
			
 
				-        for account in tqdm(L):
			
 
				-            date_str = datetime.datetime.today().strftime("%Y-%m-%d")
			
 
				-            print(account, date_str, json.dumps(L[account], ensure_ascii=False))
			
 
				-            insert_sql = f"""
			
 
				-            INSERT INTO article_pre_distribute_account
			
 
				-            (gh_id, date, article_list)
			
 
				-            VALUES
			
 
				-            (%s, %s, %s);
			
 
				-            """
			
 
				-            try:
			
 
				-                PQMySQL.update(sql=insert_sql, params=(account, date_str, json.dumps(L[account], ensure_ascii=False)))
			
 
				-            except Exception as e:
			
 
				-                print("插入出现问题----{}".format(e))
			
 
				-        return L
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
--- a/stratrgy/strategy.py
+++ b/stratrgy/strategy.py
@@ -1,102 +0,0 @@
 
				-"""
			
 
				-@author: luojunhui
			
 
				-"""
			
 
				-import json
			
 
				-
			
 
				-from tqdm import tqdm
			
 
				-
			
 
				-from applications.functions import Functions
			
 
				-from config import accountBaseInfo, pool_level_detail
			
 
				-
			
 
				-
			
 
				-class ArticlePoolStrategy(object):
			
 
				-    """
			
 
				-    长文策略池
			
 
				-    """
			
 
				-    Fun = Functions()
			
 
				-
			
 
				-    @classmethod
			
 
				-    def getData(cls, article_list):
			
 
				-        """
			
 
				-        :param article_list: 每天召回的文章list
			
 
				-        """
			
 
				-        detail_list = []
			
 
				-        print("查询文章url......")
			
 
				-        id_tuple = [i['id'] for i in article_list]
			
 
				-        detail_dict = cls.Fun.matchLinkByIdTuple(channel_id_tuple=tuple(id_tuple))
			
 
				-        for i in tqdm(article_list):
			
 
				-            content_id = i['id']
			
 
				-            i['gh_key'] = detail_dict[content_id]['gh_key']
			
 
				-            i['url'] = detail_dict[content_id]['url']
			
 
				-            i['title'] = detail_dict[content_id]['title']
			
 
				-            detail_list.append(i)
			
 
				-        print("查询完成, 开始排序")
			
 
				-        return detail_list
			
 
				-
			
 
				-    @classmethod
			
 
				-    def splitByStrategy(cls, detail_list):
			
 
				-        """
			
 
				-        账号-位置-阅读倍数
			
 
				-        :return:
			
 
				-        """
			
 
				-        L = []
			
 
				-        for line in detail_list:
			
 
				-            key = line['gh_key']
			
 
				-            article_read = line['read_count']
			
 
				-            if accountBaseInfo.get(key):
			
 
				-                avg_read = accountBaseInfo[key]['readAvg']
			
 
				-                # 计算比率
			
 
				-                level_rate = article_read / avg_read - 1
			
 
				-                obj = {
			
 
				-                    "key": key,
			
 
				-                    "avg_read": avg_read,
			
 
				-                    "article_read": article_read,
			
 
				-                    "level_rate": level_rate,
			
 
				-                    "url": line['url'],
			
 
				-                    "title": line['title']
			
 
				-                }
			
 
				-                L.append(obj)
			
 
				-        L = sorted(L, key=lambda x: x["level_rate"], reverse=True)
			
 
				-        result = {
			
 
				-            "Level1": [],
			
 
				-            "Level2": []
			
 
				-        }
			
 
				-        c1 = 0
			
 
				-        c2 = 0
			
 
				-        for line in L:
			
 
				-            gh_key = line['key']
			
 
				-            if pool_level_detail.get(gh_key):
			
 
				-                now_level = pool_level_detail[gh_key]
			
 
				-            else:
			
 
				-                now_level = "3"
			
 
				-            match now_level:
			
 
				-                case "2":
			
 
				-                    # 往1层升
			
 
				-                    if line['level_rate'] > 0.1 and line['avg_read'] >= 1000:
			
 
				-                        now_title_list = [i['title'] for i in result['Level1']]
			
 
				-                        if cls.Fun.TitleSimilarity(now_title_list, line['title']):
			
 
				-                            continue
			
 
				-                        else:
			
 
				-                            result['Level1'].append(line)
			
 
				-                case "3":
			
 
				-                    if line['level_rate'] > 0.1 and line['avg_read'] >= 100:
			
 
				-                        now_title_list = [i['title'] for i in result['Level2']]
			
 
				-                        if cls.Fun.TitleSimilarity(now_title_list, line['title']):
			
 
				-                            continue
			
 
				-                        else:
			
 
				-                            result['Level2'].append(line)
			
 
				-                case "1":
			
 
				-                    continue
			
 
				-                    # if line['level_rate'] > 1.0:
			
 
				-                    #     now_title_list = [i['title'] for i in result['Level1']]
			
 
				-                    #     if cls.Fun.TitleSimilarity(now_title_list, line['title']):
			
 
				-                    #         continue
			
 
				-                    #     else:
			
 
				-                    #         result['Level1'].append(line)
			
 
				-        return result
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-