distribution.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. """
  2. @author: luojunhui
  3. 分发逻辑
  4. """
  5. import json
  6. import datetime
  7. from applications import PQMySQL, WeixinSpider
  8. from tqdm import tqdm
  9. from config import accountBaseInfo
  10. class ArticleDistribution(object):
  11. """
  12. 冷启文章分发逻辑
  13. """
  14. account_position_dict = {
  15. "gh_058e41145a0c": 30,
  16. "gh_0e4fd9e88386": 30,
  17. "gh_744cb16f6e16": 30,
  18. "gh_ac43eb24376d": 30,
  19. "gh_970460d9ccec": 30,
  20. "gh_56ca3dae948c": 30,
  21. "gh_c91b42649690": 30,
  22. "gh_6d205db62f04": 30,
  23. "gh_e24da99dc899": 30,
  24. "gh_4c058673c07e": 30,
  25. "gh_03d32e83122f": 30,
  26. "gh_c69776baf2cd": 30,
  27. "gh_30816d8adb52": 30,
  28. "gh_789a40fe7935": 30,
  29. "gh_95ed5ecf9363": 30,
  30. "gh_3e91f0624545": 30,
  31. "gh_57573f01b2ee": 30,
  32. "gh_9877c8541764": 30,
  33. "gh_6cfd1132df94": 30,
  34. "gh_008ef23062ee": 30,
  35. "gh_5ae65db96cb7": 30,
  36. "gh_be8c29139989": 30,
  37. "gh_51e4ad40466d": 30,
  38. "gh_d4dffc34ac39": 30,
  39. "gh_89ef4798d3ea": 30,
  40. "gh_b15de7c99912": 30,
  41. "gh_9f8dc5b0c74e": 30,
  42. "gh_7b4a5f86d68c": 30,
  43. "gh_c5cdf60d9ab4": 5,
  44. "gh_0c89e11f8bf3": 5,
  45. "gh_e0eb490115f5": 5,
  46. "gh_a2901d34f75b": 5,
  47. "gh_d5f935d0d1f2": 30
  48. }
  49. pq_mysql_client = PQMySQL()
  50. Spider = WeixinSpider()
  51. @classmethod
  52. def generate_account_dict(cls):
  53. """
  54. 生成account_list
  55. :return:
  56. """
  57. account_dict = {}
  58. for key in accountBaseInfo:
  59. account_name = accountBaseInfo[key]['accountName']
  60. account_gh_id = accountBaseInfo[key]['ghId']
  61. account_dict[account_name] = account_gh_id
  62. return account_dict
  63. @classmethod
  64. def findArticleScoreList(cls, url_md5):
  65. """
  66. 获取文章的相关账号的相关性分数
  67. :param url_md5:
  68. :return:
  69. """
  70. sql = f"""
  71. select account_score, ori_account from association_articles where url_md5 = '{url_md5}';
  72. """
  73. response = cls.pq_mysql_client.select(sql=sql)
  74. return response
  75. @classmethod
  76. def association_split(cls, article_list):
  77. """
  78. 联想类型文章分发逻辑
  79. :param article_list:
  80. :return:
  81. """
  82. account_name_map = cls.generate_account_dict()
  83. L = {}
  84. for article in tqdm(article_list):
  85. link = article['url']
  86. url_md5 = article['url_md5']
  87. title = article['title']
  88. c_id = article['id']
  89. title_match_list = cls.findArticleScoreList(url_md5)
  90. title_match_list = sorted(title_match_list, key=lambda x: x[0], reverse=True)
  91. # print("标题:\t", title)
  92. # print("相关账号:\t", title_match_list)
  93. # print("\n")
  94. for account_tuple in title_match_list:
  95. account_name = account_tuple[1]
  96. score = account_tuple[0]
  97. account_gh_id = account_name_map[account_name]
  98. if cls.account_position_dict.get(account_gh_id):
  99. try:
  100. # channel_content_id = cls.Spider.get_article_text(link)['data']['data']['channel_content_id']
  101. channel_content_id = c_id
  102. except:
  103. print(link)
  104. channel_content_id = url_md5
  105. # channel_content_id = "id"
  106. if cls.account_position_dict[account_gh_id] > 0:
  107. if L.get(account_gh_id):
  108. if len(L[account_gh_id]) >= 10:
  109. continue
  110. else:
  111. L[account_gh_id].append([channel_content_id, score])
  112. else:
  113. L[account_gh_id] = [[channel_content_id, score]]
  114. cls.account_position_dict[account_gh_id] -= 1
  115. else:
  116. continue
  117. for account in tqdm(L):
  118. date_str = datetime.datetime.today().strftime("%Y-%m-%d")
  119. print(account, date_str, json.dumps(L[account], ensure_ascii=False))
  120. insert_sql = f"""
  121. INSERT INTO article_pre_distribute_account
  122. (gh_id, date, article_list)
  123. VALUES
  124. (%s, %s, %s);
  125. """
  126. try:
  127. PQMySQL.update(sql=insert_sql, params=(account, date_str, json.dumps(L[account], ensure_ascii=False)))
  128. except Exception as e:
  129. print("插入出现问题----{}".format(e))
  130. return L