article_exit_with_title.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. """
  2. @author: luojunhui
  3. """
  4. import traceback
  5. import pandas as pd
  6. from applications import PQMySQL, longArticlesMySQL, bot, log
  7. from applications.aiditApi import get_generated_article_list
  8. def get_level_up_articles() -> set:
  9. """
  10. :return:
  11. """
  12. pool_level2 = "20240804003153130851174"
  13. pool_level1 = "20240802171417146947657"
  14. pool_level0 = "20240802143345289374071"
  15. pool_level2_result = get_generated_article_list(pool_level2)
  16. title_list_2 = [i[1] for i in pool_level2_result]
  17. pool_level1_result = get_generated_article_list(pool_level1)
  18. title_list_1 = [i[1] for i in pool_level1_result]
  19. pool_level0_result = get_generated_article_list(pool_level0)
  20. title_list_0 = [i[1] for i in pool_level0_result]
  21. title_list = title_list_1 + title_list_0 + title_list_2
  22. good_title_set = set(title_list)
  23. return good_title_set
  24. class ArticleExitWithTitle(object):
  25. """
  26. 文章退场表格维护
  27. """
  28. def __init__(self):
  29. self.INIT_STATUS = 0
  30. self.pq_client = None
  31. self.lam_client = None
  32. def init_database(self) -> bool:
  33. """
  34. 初始化数据库
  35. :return:
  36. """
  37. try:
  38. self.pq_client = PQMySQL()
  39. except Exception as e:
  40. bot(
  41. title="文章退场管理任务,数据库连接失败",
  42. detail={
  43. "e": str(e),
  44. "error_msg": traceback.format_exc(),
  45. "server": "old server"
  46. }
  47. )
  48. return False
  49. try:
  50. self.lam_client = longArticlesMySQL()
  51. except Exception as e:
  52. bot(
  53. title="文章退场管理任务,数据库连接失败",
  54. detail={
  55. "e": str(e),
  56. "error_msg": traceback.format_exc(),
  57. "server": "new server"
  58. }
  59. )
  60. return True
  61. def get_discovery_published_articles(self) -> pd.DataFrame:
  62. """
  63. :return:
  64. """
  65. sql = f"""
  66. SELECT
  67. title, max(read_rate), count(1) as title_count
  68. FROM
  69. datastat_sort_strategy
  70. WHERE position > 2 and fans > 10000
  71. GROUP BY title;
  72. """
  73. articles = self.pq_client.select(sql)
  74. article_df = pd.DataFrame(articles, columns=['title', 'max_read_times_on_avg', 'articles_count'])
  75. return article_df
  76. def bad_article_manager(self, read_times_on_avg_threshold, discovery_times_threshold) -> list[str]:
  77. """
  78. 找出质量很差的文章标题,将该标题设置为退场状态
  79. :return:
  80. """
  81. discovery_published_articles_df = self.get_discovery_published_articles()
  82. target_bad_dataframe = discovery_published_articles_df[
  83. (discovery_published_articles_df['max_read_times_on_avg'] < read_times_on_avg_threshold)
  84. & (discovery_published_articles_df['articles_count'] < discovery_times_threshold)
  85. ]
  86. target_bad_title_list = target_bad_dataframe['title'].tolist()
  87. return target_bad_title_list
  88. def record_title_list(self, title_list, status) -> int:
  89. """
  90. 修改标题状态
  91. :param status:
  92. :param title_list:
  93. :return: None
  94. """
  95. fail_list = []
  96. insert_count = 0
  97. for title in title_list:
  98. insert_sql = f"""
  99. INSERT INTO cold_start_title_pool
  100. (title, status)
  101. values
  102. (%s, %s)
  103. """
  104. try:
  105. self.lam_client.update(
  106. sql=insert_sql,
  107. params=(title, status)
  108. )
  109. insert_count += 1
  110. except Exception as e:
  111. update_sql = f"""
  112. UPDATE cold_start_title_pool
  113. SET status = %s
  114. where title = %s and status = %s;
  115. """
  116. try:
  117. self.lam_client.update(
  118. sql=update_sql,
  119. params=(status, title, self.INIT_STATUS)
  120. )
  121. except Exception as e:
  122. error_msg = traceback.format_exc()
  123. log(
  124. task="article_exit_with_title",
  125. function="record_title_list",
  126. status="fail",
  127. data={
  128. "e": str(e),
  129. "error_msg": error_msg,
  130. }
  131. )
  132. fail_list.append(title)
  133. if fail_list:
  134. bot(
  135. title="冷启动文章标题退场,sql操作失败",
  136. detail=fail_list
  137. )
  138. return -1
  139. else:
  140. return insert_count
  141. def main():
  142. """
  143. main function
  144. :return:
  145. """
  146. UP_LEVEL_STATUS = 1
  147. ARTICLE_EXIT_STATUS = -1
  148. READ_TIMES_ON_AVG_THRESHOLD = 0.5
  149. DISCOVERY_TIMES_THRESHOLD = 3
  150. article_title_manager = ArticleExitWithTitle()
  151. article_title_manager.init_database()
  152. # 处理晋级标题
  153. up_level_title = get_level_up_articles()
  154. up_level_success_count = article_title_manager.record_title_list(title_list=up_level_title, status=UP_LEVEL_STATUS)
  155. # 处理退场标题
  156. exit_article_list = article_title_manager.bad_article_manager(
  157. read_times_on_avg_threshold=READ_TIMES_ON_AVG_THRESHOLD,
  158. discovery_times_threshold=DISCOVERY_TIMES_THRESHOLD
  159. )
  160. exit_success_count = article_title_manager.record_title_list(title_list=exit_article_list, status=ARTICLE_EXIT_STATUS)
  161. if exit_success_count >= 0 and up_level_success_count >= 0:
  162. bot(
  163. title="冷启动文章晋级, 退场完成",
  164. detail={
  165. "已经晋级文章数量": up_level_success_count,
  166. "已经退场文章数控": exit_success_count,
  167. "阅读均值倍数阈值": READ_TIMES_ON_AVG_THRESHOLD,
  168. "探索次数阈值": DISCOVERY_TIMES_THRESHOLD
  169. },
  170. mention=False
  171. )
  172. if __name__ == '__main__':
  173. main()