123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- """
- @author: luojunhui
- """
- import json
- import time
- from typing import AnyStr, List, Dict
- from tqdm import tqdm
- from applications import longArticlesMySQL, Functions, WeixinSpider
- from applications.const import ArticleAssociationTaskConst
- functions = Functions()
- db_client = longArticlesMySQL()
- spider = WeixinSpider()
- const = ArticleAssociationTaskConst()
- def get_good_articles() -> List[Dict]:
- """
- 获取表现好的文章
- :return:
- """
- sql = f"""
- SELECT account_name, gh_id, view_count, read_rate, link, title
- FROM datastat_sort_strategy
- WHERE
- type = 9
- and position = 1
- and date_str > '20241101'
- and fans > 300000
- and view_count > 5000
- and read_rate > 1.1;
- """
- article_list = db_client.select_json(sql)
- return article_list
- def get_recommend_article_list_for_each_article(account_name: AnyStr, article_url: AnyStr, title: AnyStr) -> List[Dict]:
- """
- 获取推荐文章
- :param title:
- :param account_name:
- :param article_url:
- :return:
- """
- recommend_response = spider.get_recommend_articles(content_link=article_url)
- if recommend_response['code'] == const.SPIDER_API_SUCCESS_CODE:
- recommend_article_list = recommend_response['data']['data']['list']
- filter_recommend_article_list = [
- {
- "seed_account_name": account_name,
- "seed_url": article_url,
- "seed_title": title,
- "recommend_title": recommend_article['title'],
- "recommend_account_name": recommend_article['nickname'],
- "recommend_gh_id": recommend_article['username'],
- "recommend_url": recommend_article['url'],
- "recommend_send_timestamp": recommend_article['send_time'],
- "recommend_read": recommend_article['read_num'],
- "recommend_like": recommend_article['old_like_num'],
- "recommend_index": recommend_article['idx'],
- "recommend_time": int(time.time())
- }
- for recommend_article in recommend_article_list if recommend_article['nickname'] != account_name
- ]
- return filter_recommend_article_list
- else:
- return []
- def get_recommend_article_list_task() -> None:
- """
- 获取推荐文章
- :return:
- """
- article_list = get_good_articles()
- for article_detail_tuple in tqdm(article_list[:1], desc="article list"):
- account_name = article_detail_tuple['account_name']
- url = article_detail_tuple['link']
- title = article_detail_tuple['title']
- recommend_article_list = get_recommend_article_list_for_each_article(
- account_name=account_name,
- article_url=url,
- title=title
- )
- insert_recommend_list_into_meta(recommend_article_list)
- def insert_recommend_list_into_meta(recommend_article_list: List[Dict]) -> None:
- """
- 插入数据
- :param recommend_article_list:
- :return:
- """
- if not recommend_article_list:
- return
- for recommend_obj in recommend_article_list:
- try:
- insert_sql = f"""
- INSERT INTO crawler_meta_article
- (platform, mode, category, out_account_id, article_index, title, link, read_cnt, like_cnt, publish_time, crawler_time, status, unique_index, source_article_title, source_account)
- VALUES
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
- """
- db_client.update(
- insert_sql,
- params=(
- "weixin",
- "association",
- "article_association",
- recommend_obj['recommend_gh_id'],
- recommend_obj['recommend_index'],
- recommend_obj['recommend_title'],
- recommend_obj['recommend_url'],
- recommend_obj['recommend_read'],
- recommend_obj['recommend_like'],
- recommend_obj['recommend_send_timestamp'],
- int(time.time()),
- 1,
- functions.generateGzhId(url=recommend_obj['recommend_url']),
- recommend_obj['seed_title'],
- recommend_obj['seed_account_name'],
- )
- )
- except Exception as e:
- print("insert error", e)
- update_sql = f"""
- UPDATE crawler_meta_article
- SET
- read_cnt = %s,
- like_cnt = %s,
- source_article_title = %s,
- source_account = %s
- WHERE
- unique_index = %s and category = %s;
- """
- try:
- db_client.update(
- update_sql,
- params=(
- recommend_obj['recommend_read'],
- recommend_obj['recommend_like'],
- recommend_obj['seed_title'],
- recommend_obj['seed_account_name'],
- functions.generateGzhId(url=recommend_obj['recommend_url']),
- "article_association",
- )
- )
- except Exception as e:
- print("update error", e)
- def main():
- """
- 主函数
- :return:
- """
- get_recommend_article_list_task()
|