|
@@ -1,140 +0,0 @@
|
|
|
-"""
|
|
|
-@author: luojunhui
|
|
|
-分发逻辑
|
|
|
-"""
|
|
|
-import json
|
|
|
-import datetime
|
|
|
-from applications import PQMySQL, WeixinSpider
|
|
|
-from tqdm import tqdm
|
|
|
-from config import accountBaseInfo
|
|
|
-
|
|
|
-
|
|
|
-class ArticleDistribution(object):
|
|
|
- """
|
|
|
- 冷启文章分发逻辑
|
|
|
- """
|
|
|
- account_position_dict = {
|
|
|
- "gh_058e41145a0c": 30,
|
|
|
- "gh_0e4fd9e88386": 30,
|
|
|
- "gh_744cb16f6e16": 30,
|
|
|
- "gh_ac43eb24376d": 30,
|
|
|
- "gh_970460d9ccec": 30,
|
|
|
- "gh_56ca3dae948c": 30,
|
|
|
- "gh_c91b42649690": 30,
|
|
|
- "gh_6d205db62f04": 30,
|
|
|
- "gh_e24da99dc899": 30,
|
|
|
- "gh_4c058673c07e": 30,
|
|
|
- "gh_03d32e83122f": 30,
|
|
|
- "gh_c69776baf2cd": 30,
|
|
|
- "gh_30816d8adb52": 30,
|
|
|
- "gh_789a40fe7935": 30,
|
|
|
- "gh_95ed5ecf9363": 30,
|
|
|
- "gh_3e91f0624545": 30,
|
|
|
- "gh_57573f01b2ee": 30,
|
|
|
- "gh_9877c8541764": 30,
|
|
|
- "gh_6cfd1132df94": 30,
|
|
|
- "gh_008ef23062ee": 30,
|
|
|
- "gh_5ae65db96cb7": 30,
|
|
|
- "gh_be8c29139989": 30,
|
|
|
- "gh_51e4ad40466d": 30,
|
|
|
- "gh_d4dffc34ac39": 30,
|
|
|
- "gh_89ef4798d3ea": 30,
|
|
|
- "gh_b15de7c99912": 30,
|
|
|
- "gh_9f8dc5b0c74e": 30,
|
|
|
- "gh_7b4a5f86d68c": 30,
|
|
|
- "gh_c5cdf60d9ab4": 5,
|
|
|
- "gh_0c89e11f8bf3": 5,
|
|
|
- "gh_e0eb490115f5": 5,
|
|
|
- "gh_a2901d34f75b": 5,
|
|
|
- "gh_d5f935d0d1f2": 30
|
|
|
- }
|
|
|
- pq_mysql_client = PQMySQL()
|
|
|
- Spider = WeixinSpider()
|
|
|
-
|
|
|
- @classmethod
|
|
|
- def generate_account_dict(cls):
|
|
|
- """
|
|
|
- 生成account_list
|
|
|
- :return:
|
|
|
- """
|
|
|
- account_dict = {}
|
|
|
- for key in accountBaseInfo:
|
|
|
- account_name = accountBaseInfo[key]['accountName']
|
|
|
- account_gh_id = accountBaseInfo[key]['ghId']
|
|
|
- account_dict[account_name] = account_gh_id
|
|
|
- return account_dict
|
|
|
-
|
|
|
- @classmethod
|
|
|
- def findArticleScoreList(cls, url_md5):
|
|
|
- """
|
|
|
- 获取文章的相关账号的相关性分数
|
|
|
- :param url_md5:
|
|
|
- :return:
|
|
|
- """
|
|
|
- sql = f"""
|
|
|
- select account_score, ori_account from association_articles where url_md5 = '{url_md5}';
|
|
|
- """
|
|
|
- response = cls.pq_mysql_client.select(sql=sql)
|
|
|
- return response
|
|
|
-
|
|
|
- @classmethod
|
|
|
- def association_split(cls, article_list):
|
|
|
- """
|
|
|
- 联想类型文章分发逻辑
|
|
|
- :param article_list:
|
|
|
- :return:
|
|
|
- """
|
|
|
- account_name_map = cls.generate_account_dict()
|
|
|
- L = {}
|
|
|
- for article in tqdm(article_list):
|
|
|
- link = article['url']
|
|
|
- url_md5 = article['url_md5']
|
|
|
- title = article['title']
|
|
|
- c_id = article['id']
|
|
|
- title_match_list = cls.findArticleScoreList(url_md5)
|
|
|
- title_match_list = sorted(title_match_list, key=lambda x: x[0], reverse=True)
|
|
|
- # print("标题:\t", title)
|
|
|
- # print("相关账号:\t", title_match_list)
|
|
|
- # print("\n")
|
|
|
- for account_tuple in title_match_list:
|
|
|
- account_name = account_tuple[1]
|
|
|
- score = account_tuple[0]
|
|
|
- account_gh_id = account_name_map[account_name]
|
|
|
- if cls.account_position_dict.get(account_gh_id):
|
|
|
- try:
|
|
|
- # channel_content_id = cls.Spider.get_article_text(link)['data']['data']['channel_content_id']
|
|
|
- channel_content_id = c_id
|
|
|
- except:
|
|
|
- print(link)
|
|
|
- channel_content_id = url_md5
|
|
|
- # channel_content_id = "id"
|
|
|
- if cls.account_position_dict[account_gh_id] > 0:
|
|
|
- if L.get(account_gh_id):
|
|
|
- if len(L[account_gh_id]) >= 10:
|
|
|
- continue
|
|
|
- else:
|
|
|
- L[account_gh_id].append([channel_content_id, score])
|
|
|
- else:
|
|
|
- L[account_gh_id] = [[channel_content_id, score]]
|
|
|
- cls.account_position_dict[account_gh_id] -= 1
|
|
|
- else:
|
|
|
- continue
|
|
|
- for account in tqdm(L):
|
|
|
- date_str = datetime.datetime.today().strftime("%Y-%m-%d")
|
|
|
- print(account, date_str, json.dumps(L[account], ensure_ascii=False))
|
|
|
- insert_sql = f"""
|
|
|
- INSERT INTO article_pre_distribute_account
|
|
|
- (gh_id, date, article_list)
|
|
|
- VALUES
|
|
|
- (%s, %s, %s);
|
|
|
- """
|
|
|
- try:
|
|
|
- PQMySQL.update(sql=insert_sql, params=(account, date_str, json.dumps(L[account], ensure_ascii=False)))
|
|
|
- except Exception as e:
|
|
|
- print("插入出现问题----{}".format(e))
|
|
|
- return L
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|