|
@@ -10,7 +10,6 @@ from tqdm import tqdm
|
|
|
from applications import longArticlesMySQL, Functions, WeixinSpider
|
|
|
from applications.const import ArticleAssociationTaskConst
|
|
|
|
|
|
-
|
|
|
functions = Functions()
|
|
|
db_client = longArticlesMySQL()
|
|
|
spider = WeixinSpider()
|
|
@@ -20,7 +19,7 @@ const = ArticleAssociationTaskConst()
|
|
|
def get_good_articles() -> List[Dict]:
|
|
|
"""
|
|
|
获取表现好的文章
|
|
|
- :return:
|
|
|
+ :return: List[Dict] 查询到到的文章列表
|
|
|
"""
|
|
|
sql = f"""
|
|
|
SELECT account_name, gh_id, view_count, read_rate, link, title
|
|
@@ -31,7 +30,8 @@ def get_good_articles() -> List[Dict]:
|
|
|
and date_str > '20241101'
|
|
|
and fans > 300000
|
|
|
and view_count > 5000
|
|
|
- and read_rate > 1.1;
|
|
|
+ and read_rate > 1.1
|
|
|
+ and status = 1;
|
|
|
"""
|
|
|
article_list = db_client.select_json(sql)
|
|
|
return article_list
|
|
@@ -40,9 +40,9 @@ def get_good_articles() -> List[Dict]:
|
|
|
def get_recommend_article_list_for_each_article(account_name: AnyStr, article_url: AnyStr, title: AnyStr) -> List[Dict]:
|
|
|
"""
|
|
|
获取推荐文章
|
|
|
- :param title:
|
|
|
- :param account_name:
|
|
|
- :param article_url:
|
|
|
+ :param title: 种子标题
|
|
|
+ :param account_name: 种子账号
|
|
|
+ :param article_url: 种子文章链接
|
|
|
:return:
|
|
|
"""
|
|
|
recommend_response = spider.get_recommend_articles(content_link=article_url)
|
|
@@ -70,24 +70,6 @@ def get_recommend_article_list_for_each_article(account_name: AnyStr, article_ur
|
|
|
return []
|
|
|
|
|
|
|
|
|
-def get_recommend_article_list_task() -> None:
|
|
|
- """
|
|
|
- 获取推荐文章
|
|
|
- :return:
|
|
|
- """
|
|
|
- article_list = get_good_articles()
|
|
|
- for article_detail_tuple in tqdm(article_list[:1], desc="article list"):
|
|
|
- account_name = article_detail_tuple['account_name']
|
|
|
- url = article_detail_tuple['link']
|
|
|
- title = article_detail_tuple['title']
|
|
|
- recommend_article_list = get_recommend_article_list_for_each_article(
|
|
|
- account_name=account_name,
|
|
|
- article_url=url,
|
|
|
- title=title
|
|
|
- )
|
|
|
- insert_recommend_list_into_meta(recommend_article_list)
|
|
|
-
|
|
|
-
|
|
|
def insert_recommend_list_into_meta(recommend_article_list: List[Dict]) -> None:
|
|
|
"""
|
|
|
插入数据
|
|
@@ -152,9 +134,27 @@ def insert_recommend_list_into_meta(recommend_article_list: List[Dict]) -> None:
|
|
|
print("update error", e)
|
|
|
|
|
|
|
|
|
+def do_i2i_crawler_task() -> None:
|
|
|
+ """
|
|
|
+ 获取推荐文章
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ article_list = get_good_articles()
|
|
|
+ for article_detail_tuple in tqdm(article_list[:1], desc="article list"):
|
|
|
+ account_name = article_detail_tuple['account_name']
|
|
|
+ url = article_detail_tuple['link']
|
|
|
+ title = article_detail_tuple['title']
|
|
|
+ recommend_article_list = get_recommend_article_list_for_each_article(
|
|
|
+ account_name=account_name,
|
|
|
+ article_url=url,
|
|
|
+ title=title
|
|
|
+ )
|
|
|
+ insert_recommend_list_into_meta(recommend_article_list)
|
|
|
+
|
|
|
+
|
|
|
def main():
|
|
|
"""
|
|
|
主函数
|
|
|
:return:
|
|
|
"""
|
|
|
- get_recommend_article_list_task()
|
|
|
+ do_i2i_crawler_task()
|