|
@@ -0,0 +1,160 @@
|
|
|
+"""
|
|
|
+@author: luojunhui
|
|
|
+"""
|
|
|
+import json
|
|
|
+import time
|
|
|
+from typing import AnyStr, List, Dict
|
|
|
+
|
|
|
+from tqdm import tqdm
|
|
|
+
|
|
|
+from applications import longArticlesMySQL, Functions, WeixinSpider
|
|
|
+from applications.const import ArticleAssociationTaskConst
|
|
|
+
|
|
|
+
|
|
|
+functions = Functions()
|
|
|
+db_client = longArticlesMySQL()
|
|
|
+spider = WeixinSpider()
|
|
|
+const = ArticleAssociationTaskConst()
|
|
|
+
|
|
|
+
|
|
|
+def get_good_articles() -> List[Dict]:
|
|
|
+ """
|
|
|
+ 获取表现好的文章
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ sql = f"""
|
|
|
+ SELECT account_name, gh_id, view_count, read_rate, link, title
|
|
|
+ FROM datastat_sort_strategy
|
|
|
+ WHERE
|
|
|
+ type = 9
|
|
|
+ and position = 1
|
|
|
+ and date_str > '20241101'
|
|
|
+ and fans > 300000
|
|
|
+ and view_count > 5000
|
|
|
+ and read_rate > 1.1;
|
|
|
+ """
|
|
|
+ article_list = db_client.select_json(sql)
|
|
|
+ return article_list
|
|
|
+
|
|
|
+
|
|
|
+def get_recommend_article_list_for_each_article(account_name: AnyStr, article_url: AnyStr, title: AnyStr) -> List[Dict]:
|
|
|
+ """
|
|
|
+ 获取推荐文章
|
|
|
+ :param title:
|
|
|
+ :param account_name:
|
|
|
+ :param article_url:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ recommend_response = spider.get_recommend_articles(content_link=article_url)
|
|
|
+ if recommend_response['code'] == const.SPIDER_API_SUCCESS_CODE:
|
|
|
+ recommend_article_list = recommend_response['data']['data']['list']
|
|
|
+ filter_recommend_article_list = [
|
|
|
+ {
|
|
|
+ "seed_account_name": account_name,
|
|
|
+ "seed_url": article_url,
|
|
|
+ "seed_title": title,
|
|
|
+ "recommend_title": recommend_article['title'],
|
|
|
+ "recommend_account_name": recommend_article['nickname'],
|
|
|
+ "recommend_gh_id": recommend_article['username'],
|
|
|
+ "recommend_url": recommend_article['url'],
|
|
|
+ "recommend_send_timestamp": recommend_article['send_time'],
|
|
|
+ "recommend_read": recommend_article['read_num'],
|
|
|
+ "recommend_like": recommend_article['old_like_num'],
|
|
|
+ "recommend_index": recommend_article['idx'],
|
|
|
+ "recommend_time": int(time.time())
|
|
|
+ }
|
|
|
+ for recommend_article in recommend_article_list if recommend_article['nickname'] != account_name
|
|
|
+ ]
|
|
|
+ return filter_recommend_article_list
|
|
|
+ else:
|
|
|
+ return []
|
|
|
+
|
|
|
+
|
|
|
+def get_recommend_article_list_task() -> None:
|
|
|
+ """
|
|
|
+ 获取推荐文章
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ article_list = get_good_articles()
|
|
|
+ for article_detail_tuple in tqdm(article_list[:1], desc="article list"):
|
|
|
+ account_name = article_detail_tuple['account_name']
|
|
|
+ url = article_detail_tuple['link']
|
|
|
+ title = article_detail_tuple['title']
|
|
|
+ recommend_article_list = get_recommend_article_list_for_each_article(
|
|
|
+ account_name=account_name,
|
|
|
+ article_url=url,
|
|
|
+ title=title
|
|
|
+ )
|
|
|
+ insert_recommend_list_into_meta(recommend_article_list)
|
|
|
+
|
|
|
+
|
|
|
+def insert_recommend_list_into_meta(recommend_article_list: List[Dict]) -> None:
|
|
|
+ """
|
|
|
+ 插入数据
|
|
|
+ :param recommend_article_list:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ if not recommend_article_list:
|
|
|
+ return
|
|
|
+ for recommend_obj in recommend_article_list:
|
|
|
+ try:
|
|
|
+ insert_sql = f"""
|
|
|
+ INSERT INTO crawler_meta_article
|
|
|
+ (platform, mode, category, out_account_id, article_index, title, link, read_cnt, like_cnt, publish_time, crawler_time, status, unique_index, source_article_title, source_account)
|
|
|
+ VALUES
|
|
|
+ (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
|
+ """
|
|
|
+ db_client.update(
|
|
|
+ insert_sql,
|
|
|
+ params=(
|
|
|
+ "weixin",
|
|
|
+ "association",
|
|
|
+ "article_association",
|
|
|
+ recommend_obj['recommend_gh_id'],
|
|
|
+ recommend_obj['recommend_index'],
|
|
|
+ recommend_obj['recommend_title'],
|
|
|
+ recommend_obj['recommend_url'],
|
|
|
+ recommend_obj['recommend_read'],
|
|
|
+ recommend_obj['recommend_like'],
|
|
|
+ recommend_obj['recommend_send_timestamp'],
|
|
|
+ int(time.time()),
|
|
|
+ 1,
|
|
|
+ functions.generateGzhId(url=recommend_obj['recommend_url']),
|
|
|
+ recommend_obj['seed_title'],
|
|
|
+ recommend_obj['seed_account_name'],
|
|
|
+ )
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ print("insert error", e)
|
|
|
+ update_sql = f"""
|
|
|
+ UPDATE crawler_meta_article
|
|
|
+ SET
|
|
|
+ read_cnt = %s,
|
|
|
+ like_cnt = %s,
|
|
|
+ source_article_title = %s,
|
|
|
+ source_account = %s
|
|
|
+ WHERE
|
|
|
+ unique_index = %s and category = %s;
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ db_client.update(
|
|
|
+ update_sql,
|
|
|
+ params=(
|
|
|
+ recommend_obj['recommend_read'],
|
|
|
+ recommend_obj['recommend_like'],
|
|
|
+ recommend_obj['seed_title'],
|
|
|
+ recommend_obj['seed_account_name'],
|
|
|
+ functions.generateGzhId(url=recommend_obj['recommend_url']),
|
|
|
+ "article_association",
|
|
|
+ )
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ print("update error", e)
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ """
|
|
|
+ 主函数
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ get_recommend_article_list_task()
|