article_exit_with_title.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. """
  2. @author: luojunhui
  3. """
  4. import traceback
  5. import pandas as pd
  6. from applications import PQMySQL, longArticlesMySQL, bot, log
  7. from applications.aiditApi import get_generated_article_list
  8. def get_level_up_articles() -> set:
  9. """
  10. :return:
  11. """
  12. pool_level2 = "20240804003153130851174"
  13. pool_level1 = "20240802171417146947657"
  14. pool_level0 = "20240802143345289374071"
  15. pool_level2_result = get_generated_article_list(pool_level2)
  16. title_list_2 = [i[1] for i in pool_level2_result]
  17. pool_level1_result = get_generated_article_list(pool_level1)
  18. title_list_1 = [i[1] for i in pool_level1_result]
  19. pool_level0_result = get_generated_article_list(pool_level0)
  20. title_list_0 = [i[1] for i in pool_level0_result]
  21. title_list = title_list_1 + title_list_0 + title_list_2
  22. good_title_set = set(title_list)
  23. return good_title_set
  24. class ArticleExitWithTitle(object):
  25. """
  26. 文章退场表格维护
  27. """
  28. def __init__(self):
  29. self.INIT_STATUS = 0
  30. self.pq_client = None
  31. self.lam_client = None
  32. def init_database(self) -> bool:
  33. """
  34. 初始化数据库
  35. :return:
  36. """
  37. try:
  38. self.pq_client = PQMySQL()
  39. except Exception as e:
  40. bot(
  41. title="文章退场管理任务,数据库连接失败",
  42. detail={
  43. "e": str(e),
  44. "error_msg": traceback.format_exc(),
  45. "server": "old server"
  46. }
  47. )
  48. return False
  49. try:
  50. self.lam_client = longArticlesMySQL()
  51. except Exception as e:
  52. bot(
  53. title="文章退场管理任务,数据库连接失败",
  54. detail={
  55. "e": str(e),
  56. "error_msg": traceback.format_exc(),
  57. "server": "new server"
  58. }
  59. )
  60. return True
  61. def bad_article_manager(self, read_times_on_avg_threshold, discovery_times_threshold) -> list[str]:
  62. """
  63. 找出质量很差的文章标题,将该标题设置为退场状态
  64. :return:
  65. """
  66. sql = f"""
  67. SELECT
  68. title, max(read_rate) as max_rate, count(1) as title_count
  69. FROM
  70. datastat_sort_strategy
  71. WHERE position > 2 and fans > 10000
  72. GROUP BY title
  73. HAVING title_count >= {discovery_times_threshold} and max_rate < {read_times_on_avg_threshold};
  74. """
  75. articles = self.lam_client.select(sql)
  76. return [i[0] for i in articles]
  77. def record_title_list(self, title_list, status) -> int:
  78. """
  79. 修改标题状态
  80. :param status:
  81. :param title_list:
  82. :return: None
  83. """
  84. fail_list = []
  85. insert_count = 0
  86. for title in title_list:
  87. insert_sql = f"""
  88. INSERT INTO cold_start_title_pool
  89. (title, status)
  90. values
  91. (%s, %s)
  92. """
  93. try:
  94. self.lam_client.update(
  95. sql=insert_sql,
  96. params=(title, status)
  97. )
  98. insert_count += 1
  99. except Exception as e:
  100. update_sql = f"""
  101. UPDATE cold_start_title_pool
  102. SET status = %s
  103. where title = %s and status = %s;
  104. """
  105. try:
  106. self.lam_client.update(
  107. sql=update_sql,
  108. params=(status, title, self.INIT_STATUS)
  109. )
  110. except Exception as e:
  111. error_msg = traceback.format_exc()
  112. log(
  113. task="article_exit_with_title",
  114. function="record_title_list",
  115. status="fail",
  116. data={
  117. "e": str(e),
  118. "error_msg": error_msg,
  119. }
  120. )
  121. fail_list.append(title)
  122. if fail_list:
  123. bot(
  124. title="冷启动文章标题退场,sql操作失败",
  125. detail=fail_list
  126. )
  127. return -1
  128. else:
  129. return insert_count
  130. def main():
  131. """
  132. main function
  133. :return:
  134. """
  135. UP_LEVEL_STATUS = 1
  136. ARTICLE_EXIT_STATUS = -1
  137. READ_TIMES_ON_AVG_THRESHOLD = 0.5
  138. DISCOVERY_TIMES_THRESHOLD = 10
  139. article_title_manager = ArticleExitWithTitle()
  140. article_title_manager.init_database()
  141. # 处理晋级标题
  142. up_level_title = get_level_up_articles()
  143. up_level_success_count = article_title_manager.record_title_list(title_list=up_level_title, status=UP_LEVEL_STATUS)
  144. # up_level_success_count = 0
  145. # 处理退场标题
  146. exit_article_list = article_title_manager.bad_article_manager(
  147. read_times_on_avg_threshold=READ_TIMES_ON_AVG_THRESHOLD,
  148. discovery_times_threshold=DISCOVERY_TIMES_THRESHOLD
  149. )
  150. exit_success_count = article_title_manager.record_title_list(title_list=exit_article_list, status=ARTICLE_EXIT_STATUS)
  151. if exit_success_count >= 0 and up_level_success_count >= 0:
  152. bot(
  153. title="冷启动文章晋级, 退场完成",
  154. detail={
  155. "已经晋级文章数量": up_level_success_count,
  156. "已经退场文章数控": exit_success_count,
  157. "阅读均值倍数阈值": READ_TIMES_ON_AVG_THRESHOLD,
  158. "探索次数阈值": DISCOVERY_TIMES_THRESHOLD
  159. },
  160. mention=False
  161. )
  162. if __name__ == '__main__':
  163. main()