Explorar o código

Merge branch 'master' into 2025-03-11-toutiao-related-recommendation

luojunhui hai 3 meses
pai
achega
45ec9e3c4c

+ 1 - 1
account_cold_start_daily.py

@@ -54,7 +54,7 @@ class AccountColdStartDailyTask(object):
             # 抓取完成之后,给抓取到的标题进行相似度打分
             cold_start_title_similarity_task = ColdStartTitleSimilarityTask()
             cold_start_title_similarity_task.init_database()
-            cold_start_title_similarity_task.run()
+            cold_start_title_similarity_task.run(meta_source='article')
 
             bot(
                 title="账号冷启动任务,抓取完成",

+ 3 - 1
coldStartTasks/crawler/weixin_video_crawler.py

@@ -87,7 +87,8 @@ class WeixinVideoCrawler(object):
         select_sql = f"""
             SELECT gh_id, account_name, latest_crawler_timestamp
             FROM weixin_account_for_videos
-            WHERE status = {const.ACCOUNT_CRAWL_STATUS};
+            WHERE status = {const.ACCOUNT_CRAWL_STATUS}
+            ORDER BY latest_crawler_timestamp;
         """
         response = self.db_client.select(select_sql, DictCursor)
         return response
@@ -158,6 +159,7 @@ class WeixinVideoCrawler(object):
                     url_unique = functions.generateGzhId(article_url)
                     # 判断该视频链接是否下载,若已经下载则直接跳过
                     if self.is_downloaded(url_unique):
+                        print("url exists")
                         continue
 
                     title = article.get("Title", None)