123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- """
- @author: luojunhui
- """
- import traceback
- import pandas as pd
- from applications import PQMySQL, longArticlesMySQL, bot, log
- from applications.aiditApi import get_generated_article_list
- def get_level_up_articles() -> set:
- """
- :return:
- """
- pool_level2 = "20240804003153130851174"
- pool_level1 = "20240802171417146947657"
- pool_level0 = "20240802143345289374071"
- pool_level2_result = get_generated_article_list(pool_level2)
- title_list_2 = [i[1] for i in pool_level2_result]
- pool_level1_result = get_generated_article_list(pool_level1)
- title_list_1 = [i[1] for i in pool_level1_result]
- pool_level0_result = get_generated_article_list(pool_level0)
- title_list_0 = [i[1] for i in pool_level0_result]
- title_list = title_list_1 + title_list_0 + title_list_2
- good_title_set = set(title_list)
- return good_title_set
- class ArticleExitWithTitle(object):
- """
- 文章退场表格维护
- """
- def __init__(self):
- self.INIT_STATUS = 0
- self.pq_client = None
- self.lam_client = None
- def init_database(self) -> bool:
- """
- 初始化数据库
- :return:
- """
- try:
- self.pq_client = PQMySQL()
- except Exception as e:
- bot(
- title="文章退场管理任务,数据库连接失败",
- detail={
- "e": str(e),
- "error_msg": traceback.format_exc(),
- "server": "old server"
- }
- )
- return False
- try:
- self.lam_client = longArticlesMySQL()
- except Exception as e:
- bot(
- title="文章退场管理任务,数据库连接失败",
- detail={
- "e": str(e),
- "error_msg": traceback.format_exc(),
- "server": "new server"
- }
- )
- return True
- def get_discovery_published_articles(self) -> pd.DataFrame:
- """
- :return:
- """
- sql = f"""
- SELECT
- title, max(read_rate), count(1) as title_count
- FROM
- datastat_sort_strategy
- WHERE position > 2 and fans > 10000
- GROUP BY title;
- """
- articles = self.pq_client.select(sql)
- article_df = pd.DataFrame(articles, columns=['title', 'max_read_times_on_avg', 'articles_count'])
- return article_df
- def bad_article_manager(self, read_times_on_avg_threshold, discovery_times_threshold) -> list[str]:
- """
- 找出质量很差的文章标题,将该标题设置为退场状态
- :return:
- """
- discovery_published_articles_df = self.get_discovery_published_articles()
- target_bad_dataframe = discovery_published_articles_df[
- (discovery_published_articles_df['max_read_times_on_avg'] < read_times_on_avg_threshold)
- & (discovery_published_articles_df['articles_count'] < discovery_times_threshold)
- ]
- target_bad_title_list = target_bad_dataframe['title'].tolist()
- return target_bad_title_list
- def record_title_list(self, title_list, status) -> int:
- """
- 修改标题状态
- :param status:
- :param title_list:
- :return: None
- """
- fail_list = []
- insert_count = 0
- for title in title_list:
- insert_sql = f"""
- INSERT INTO cold_start_title_pool
- (title, status)
- values
- (%s, %s)
- """
- try:
- self.lam_client.update(
- sql=insert_sql,
- params=(title, status)
- )
- insert_count += 1
- except Exception as e:
- update_sql = f"""
- UPDATE cold_start_title_pool
- SET status = %s
- where title = %s and status = %s;
- """
- try:
- self.lam_client.update(
- sql=update_sql,
- params=(status, title, self.INIT_STATUS)
- )
- except Exception as e:
- error_msg = traceback.format_exc()
- log(
- task="article_exit_with_title",
- function="record_title_list",
- status="fail",
- data={
- "e": str(e),
- "error_msg": error_msg,
- }
- )
- fail_list.append(title)
- if fail_list:
- bot(
- title="冷启动文章标题退场,sql操作失败",
- detail=fail_list
- )
- return -1
- else:
- return insert_count
- def main():
- """
- main function
- :return:
- """
- UP_LEVEL_STATUS = 1
- ARTICLE_EXIT_STATUS = -1
- READ_TIMES_ON_AVG_THRESHOLD = 0.5
- DISCOVERY_TIMES_THRESHOLD = 3
- article_title_manager = ArticleExitWithTitle()
- article_title_manager.init_database()
- # 处理晋级标题
- up_level_title = get_level_up_articles()
- up_level_success_count = article_title_manager.record_title_list(title_list=up_level_title, status=UP_LEVEL_STATUS)
- # 处理退场标题
- exit_article_list = article_title_manager.bad_article_manager(
- read_times_on_avg_threshold=READ_TIMES_ON_AVG_THRESHOLD,
- discovery_times_threshold=DISCOVERY_TIMES_THRESHOLD
- )
- exit_success_count = article_title_manager.record_title_list(title_list=exit_article_list, status=ARTICLE_EXIT_STATUS)
- if exit_success_count >= 0 and up_level_success_count >= 0:
- bot(
- title="冷启动文章晋级, 退场完成",
- detail={
- "已经晋级文章数量": up_level_success_count,
- "已经退场文章数控": exit_success_count,
- "阅读均值倍数阈值": READ_TIMES_ON_AVG_THRESHOLD,
- "探索次数阈值": DISCOVERY_TIMES_THRESHOLD
- },
- mention=False
- )
- if __name__ == '__main__':
- main()
|