luojunhui 4 miesięcy temu
rodzic
commit
106005f047
1 zmienionych plików z 25 dodań i 25 usunięć
  1. 25 25
      coldStartTasks/crawler/weixin_article_association.py

+ 25 - 25
coldStartTasks/crawler/weixin_article_association.py

@@ -10,7 +10,6 @@ from tqdm import tqdm
 from applications import longArticlesMySQL, Functions, WeixinSpider
 from applications.const import ArticleAssociationTaskConst
 
-
 functions = Functions()
 db_client = longArticlesMySQL()
 spider = WeixinSpider()
@@ -20,7 +19,7 @@ const = ArticleAssociationTaskConst()
 def get_good_articles() -> List[Dict]:
     """
     获取表现好的文章
-    :return:
+    :return: List[Dict] 查询到到的文章列表
     """
     sql = f"""
         SELECT account_name, gh_id, view_count, read_rate, link, title
@@ -31,7 +30,8 @@ def get_good_articles() -> List[Dict]:
             and date_str > '20241101' 
             and fans > 300000 
             and view_count > 5000
-            and read_rate > 1.1;
+            and read_rate > 1.1
+            and status = 1;
     """
     article_list = db_client.select_json(sql)
     return article_list
@@ -40,9 +40,9 @@ def get_good_articles() -> List[Dict]:
 def get_recommend_article_list_for_each_article(account_name: AnyStr, article_url: AnyStr, title: AnyStr) -> List[Dict]:
     """
     获取推荐文章
-    :param title:
-    :param account_name:
-    :param article_url:
+    :param title: 种子标题
+    :param account_name: 种子账号
+    :param article_url: 种子文章链接
     :return:
     """
     recommend_response = spider.get_recommend_articles(content_link=article_url)
@@ -70,24 +70,6 @@ def get_recommend_article_list_for_each_article(account_name: AnyStr, article_ur
         return []
 
 
-def get_recommend_article_list_task() -> None:
-    """
-    获取推荐文章
-    :return:
-    """
-    article_list = get_good_articles()
-    for article_detail_tuple in tqdm(article_list[:1], desc="article list"):
-        account_name = article_detail_tuple['account_name']
-        url = article_detail_tuple['link']
-        title = article_detail_tuple['title']
-        recommend_article_list = get_recommend_article_list_for_each_article(
-            account_name=account_name,
-            article_url=url,
-            title=title
-        )
-        insert_recommend_list_into_meta(recommend_article_list)
-
-
 def insert_recommend_list_into_meta(recommend_article_list: List[Dict]) -> None:
     """
     插入数据
@@ -152,9 +134,27 @@ def insert_recommend_list_into_meta(recommend_article_list: List[Dict]) -> None:
                 print("update error", e)
 
 
+def do_i2i_crawler_task() -> None:
+    """
+    获取推荐文章
+    :return:
+    """
+    article_list = get_good_articles()
+    for article_detail_tuple in tqdm(article_list[:1], desc="article list"):
+        account_name = article_detail_tuple['account_name']
+        url = article_detail_tuple['link']
+        title = article_detail_tuple['title']
+        recommend_article_list = get_recommend_article_list_for_each_article(
+            account_name=account_name,
+            article_url=url,
+            title=title
+        )
+        insert_recommend_list_into_meta(recommend_article_list)
+
+
 def main():
     """
     主函数
     :return:
     """
-    get_recommend_article_list_task()
+    do_i2i_crawler_task()