|
@@ -15,6 +15,7 @@ from applications import log
|
|
|
from applications import Functions
|
|
|
from applications.db import DatabaseConnector
|
|
|
from config import long_articles_config
|
|
|
+from coldStartTasks.filter import article_crawler_duplicate_filter
|
|
|
|
|
|
functions = Functions()
|
|
|
|
|
@@ -91,11 +92,19 @@ class ToutiaoRecommendCrawler(object):
|
|
|
:param category
|
|
|
:return:
|
|
|
"""
|
|
|
+ title = item['title']
|
|
|
+ if article_crawler_duplicate_filter(new_article_title=title, db_client=self.db_client):
|
|
|
+ log(
|
|
|
+ function='toutiao_recommend_crawler',
|
|
|
+ task='toutiao_recommend',
|
|
|
+ message='标题去重'
|
|
|
+ )
|
|
|
+ return
|
|
|
+
|
|
|
item_id = item.get('item_id')
|
|
|
article_url = item['article_url']
|
|
|
like_count = item['like_count']
|
|
|
read_count = item['read_count']
|
|
|
- title = item['title']
|
|
|
user_info = item['user_info']
|
|
|
user_id = user_info.get('user_id')
|
|
|
abstract = item['Abstract']
|
|
@@ -125,7 +134,7 @@ class ToutiaoRecommendCrawler(object):
|
|
|
)
|
|
|
)
|
|
|
|
|
|
- def process_recommendation(self, category, recommendation) -> Dict:
|
|
|
+ def process_recommendation(self, category, recommendation):
|
|
|
"""
|
|
|
处理推荐流文章
|
|
|
:param recommendation
|