| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 | """@author: luojunhui"""import jsonimport tracebackfrom datetime import datetime, timedeltafrom applications import PQMySQL, longArticlesMySQL, bot, logfrom applications.aiditApi import get_generated_article_listfrom config import apolloConfigconfig = apolloConfig()article_exit_threshold = json.loads(config.getConfigValue("article_exit_threshold"))def get_level_up_articles() -> set:    """    :return:    """    generate_pool_ids = [        "20240804003153130851174",        "20240802171417146947657",        "20240802143345289374071",    ]    good_title_set = set()    for pool_id in generate_pool_ids:        articles = get_generated_article_list(pool_id)        titles = [article[1] for article in articles]        good_title_set.update(titles)    return good_title_setclass ArticleTitleStatusManager(object):    """    文章退场表格维护    """    def __init__(self):        self.INIT_STATUS = 0        self.pq_client = None        self.lam_client = None    def init_database(self) -> bool:        """        初始化数据库        :return:        """        try:            self.pq_client = PQMySQL()        except Exception as e:            bot(                title="文章退场管理任务,数据库连接失败",                detail={                    "e": str(e),                    "error_msg": traceback.format_exc(),                    "server": "old server"                }            )            return False        try:            self.lam_client = longArticlesMySQL()        except Exception as e:            bot(                title="文章退场管理任务,数据库连接失败",                detail={                    "e": str(e),                    "error_msg": traceback.format_exc(),                    "server": "new server"                }            )        return True    def get_bad_articles(self, read_times_on_avg_threshold, discovery_times_threshold) -> list[str]:        """        找出质量很差的文章标题,将该标题设置为退场状态        :return:        """        sql = f"""            SELECT                title, max(read_rate) as max_rate, count(1) as title_count            FROM                datastat_sort_strategy            WHERE position > 2 and fans > 10000            GROUP BY title            HAVING title_count >= {discovery_times_threshold} and max_rate < {read_times_on_avg_threshold};        """        articles = self.lam_client.select(sql)        return [i[0] for i in articles]    def get_bad_articles_v2(self, publish_date_threshold, discovery_times_threshold) -> list[str]:        """        找出第一次发布在一个月之前,且发布次数大于5次的文章        :param publish_date_threshold: 发布时间戳阈值        :param discovery_times_threshold: 发布次数阈值        :return:        """        sql = f"""            SELECT                title, count(1) as title_count, min(date_str) as min_date            FROM                datastat_sort_strategy            WHERE position > 2 and fans > 10000            GROUP BY title            HAVING title_count >= {discovery_times_threshold} and min_date < {publish_date_threshold};        """        articles = self.lam_client.select(sql)        return [i[0] for i in articles]    def save_titles(self, title_list, status) -> int:        """        修改标题状态        :param status:        :param title_list:        :return: None        """        fail_list = []        insert_count = 0        for title in title_list:            insert_sql = f"""                INSERT INTO cold_start_title_pool                (title, status)                values                (%s, %s)            """            try:                self.lam_client.update(                    sql=insert_sql,                    params=(title, status)                )                insert_count += 1            except Exception as e:                update_sql = f"""                    UPDATE cold_start_title_pool                    SET status = %s                    where title = %s and status = %s;                """                try:                    self.lam_client.update(                        sql=update_sql,                        params=(status, title, self.INIT_STATUS)                    )                except Exception as e:                    error_msg = traceback.format_exc()                    log(                        task="article_exit_with_title",                        function="save_titles",                        status="fail",                        data={                            "e": str(e),                            "error_msg": error_msg,                        }                    )                    fail_list.append(title)        if fail_list:            bot(                title="冷启动文章标题退场,sql操作失败",                detail=fail_list            )            return -1        else:            return insert_countdef main():    """    main function    :return:    """    UP_LEVEL_STATUS = 1    ARTICLE_EXIT_STATUS = -1    # 策略一:    read_times_on_avg_threshold = article_exit_threshold['strategy_1']['read_times_on_avg']    explore_times_threshold = article_exit_threshold['strategy_1']['explore_times_threshold']    # 策略二:    publish_times_threshold = article_exit_threshold['strategy_2']['publish_times_threshold']    days_threshold = article_exit_threshold['strategy_2']['days_threshold']    first_publish_date_threshold = (datetime.now() - timedelta(days=days_threshold)).strftime('%Y%m%d')    article_title_manager = ArticleTitleStatusManager()    article_title_manager.init_database()    # 处理晋级标题    up_level_title = get_level_up_articles()    up_level_success_count = article_title_manager.save_titles(        title_list=up_level_title,        status=UP_LEVEL_STATUS    )    # 处理退场标题V1    exit_article_list = article_title_manager.get_bad_articles(        read_times_on_avg_threshold=read_times_on_avg_threshold,        discovery_times_threshold=explore_times_threshold    )    exit_success_count = article_title_manager.save_titles(        title_list=exit_article_list,        status=ARTICLE_EXIT_STATUS)    # 处理退场标题v2    exit_article_list_v2 = article_title_manager.get_bad_articles_v2(        publish_date_threshold=first_publish_date_threshold,        discovery_times_threshold=publish_times_threshold    )    exit_success_count_v2 = article_title_manager.save_titles(        title_list=exit_article_list_v2,        status=ARTICLE_EXIT_STATUS)    bot(        title="冷启动文章晋级/退场完成",        detail={            "晋级文章数量": up_level_success_count,            "策略1:退场文章数量": exit_success_count,            "策略2:退场文章数量": exit_success_count_v2,            "策略1:阅读均值倍数阈值": read_times_on_avg_threshold,            "策略1:探索次数阈值": explore_times_threshold,            "策略2:发布次数阈值": publish_times_threshold,            "策略2:发布天数阈值": days_threshold        },        mention=False    )if __name__ == '__main__':    main()
 |