""" @author: luojunhui """ import json import time from typing import AnyStr, List, Dict from tqdm import tqdm from applications import longArticlesMySQL, Functions, WeixinSpider from applications.const import ArticleAssociationTaskConst functions = Functions() db_client = longArticlesMySQL() spider = WeixinSpider() const = ArticleAssociationTaskConst() def get_good_articles() -> List[Dict]: """ 获取表现好的文章 :return: """ sql = f""" SELECT account_name, gh_id, view_count, read_rate, link, title FROM datastat_sort_strategy WHERE type = 9 and position = 1 and date_str > '20241101' and fans > 300000 and view_count > 5000 and read_rate > 1.1; """ article_list = db_client.select_json(sql) return article_list def get_recommend_article_list_for_each_article(account_name: AnyStr, article_url: AnyStr, title: AnyStr) -> List[Dict]: """ 获取推荐文章 :param title: :param account_name: :param article_url: :return: """ recommend_response = spider.get_recommend_articles(content_link=article_url) if recommend_response['code'] == const.SPIDER_API_SUCCESS_CODE: recommend_article_list = recommend_response['data']['data']['list'] filter_recommend_article_list = [ { "seed_account_name": account_name, "seed_url": article_url, "seed_title": title, "recommend_title": recommend_article['title'], "recommend_account_name": recommend_article['nickname'], "recommend_gh_id": recommend_article['username'], "recommend_url": recommend_article['url'], "recommend_send_timestamp": recommend_article['send_time'], "recommend_read": recommend_article['read_num'], "recommend_like": recommend_article['old_like_num'], "recommend_index": recommend_article['idx'], "recommend_time": int(time.time()) } for recommend_article in recommend_article_list if recommend_article['nickname'] != account_name ] return filter_recommend_article_list else: return [] def get_recommend_article_list_task() -> None: """ 获取推荐文章 :return: """ article_list = get_good_articles() for article_detail_tuple in tqdm(article_list[:1], desc="article list"): account_name = article_detail_tuple['account_name'] url = article_detail_tuple['link'] title = article_detail_tuple['title'] recommend_article_list = get_recommend_article_list_for_each_article( account_name=account_name, article_url=url, title=title ) insert_recommend_list_into_meta(recommend_article_list) def insert_recommend_list_into_meta(recommend_article_list: List[Dict]) -> None: """ 插入数据 :param recommend_article_list: :return: """ if not recommend_article_list: return for recommend_obj in recommend_article_list: try: insert_sql = f""" INSERT INTO crawler_meta_article (platform, mode, category, out_account_id, article_index, title, link, read_cnt, like_cnt, publish_time, crawler_time, status, unique_index, source_article_title, source_account) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """ db_client.update( insert_sql, params=( "weixin", "association", "article_association", recommend_obj['recommend_gh_id'], recommend_obj['recommend_index'], recommend_obj['recommend_title'], recommend_obj['recommend_url'], recommend_obj['recommend_read'], recommend_obj['recommend_like'], recommend_obj['recommend_send_timestamp'], int(time.time()), 1, functions.generateGzhId(url=recommend_obj['recommend_url']), recommend_obj['seed_title'], recommend_obj['seed_account_name'], ) ) except Exception as e: print("insert error", e) update_sql = f""" UPDATE crawler_meta_article SET read_cnt = %s, like_cnt = %s, source_article_title = %s, source_account = %s WHERE unique_index = %s and category = %s; """ try: db_client.update( update_sql, params=( recommend_obj['recommend_read'], recommend_obj['recommend_like'], recommend_obj['seed_title'], recommend_obj['seed_account_name'], functions.generateGzhId(url=recommend_obj['recommend_url']), "article_association", ) ) except Exception as e: print("update error", e) def main(): """ 主函数 :return: """ get_recommend_article_list_task()