소스 검색

article_association_crawler

luojunhui 8 달 전
부모
커밋
9c949d1e79
2개의 변경된 파일14개의 추가작업 그리고 4개의 파일을 삭제
  1. 10 0
      applications/wxSpiderApi.py
  2. 4 4
      coldStartTasks/crawler/wechat/article_association.py

+ 10 - 0
applications/wxSpiderApi.py

@@ -5,6 +5,7 @@ import json
 import time
 import requests
 
+from applications import log
 from applications.decoratorApi import retryOnNone
 
 
@@ -115,5 +116,14 @@ class WeixinSpider(object):
             }
         )
         response = requests.request("POST", url=url, headers=cls.headers, data=payload, timeout=120)
+        log(
+            task="article_association_crawler",
+            function="get_recommend_articles_v2",
+            message="获取推荐链接,付费接口",
+            data={
+                "content_link": content_link,
+                "response": response.json()
+            }
+        )
         time.sleep(3)
         return response.json()

+ 4 - 4
coldStartTasks/crawler/wechat/article_association.py

@@ -116,6 +116,10 @@ class ArticleAssociationCrawler(object):
         """
         insert recommend article
         """
+        # whether account inside
+        if obj['gh_id'] in self.inner_account_set:
+            return
+
         # whether article title exists
         title = obj['title']
         select_sql = "select article_id from crawler_meta_article where title = %s;"
@@ -123,10 +127,6 @@ class ArticleAssociationCrawler(object):
         if res:
             return
 
-        # whether account inside
-        if obj['gh_id'] in self.inner_account_set:
-            return
-
         # whether title sensitive
         title_sensitivity = const.TITLE_SENSITIVE if whether_title_sensitive(title) else const.TITLE_NOT_SENSITIVE