luojunhui 3 ماه پیش
والد
کامیت
cb097e280a
1فایلهای تغییر یافته به همراه15 افزوده شده و 14 حذف شده
  1. 15 14
      coldStartTasks/crawler/weixinCategoryCrawler.py

+ 15 - 14
coldStartTasks/crawler/weixinCategoryCrawler.py

@@ -36,7 +36,8 @@ class weixinCategory(object):
         sql = f"""
         sql = f"""
             select gh_id, account_source, account_name, account_category, latest_update_time
             select gh_id, account_source, account_name, account_category, latest_update_time
             from long_articles_accounts 
             from long_articles_accounts 
-            where account_category = '{account_category}' and is_using = {ACCOUNT_GOOD_STATUS};
+            where account_category = '{account_category}' and is_using = {ACCOUNT_GOOD_STATUS}
+            and init_date = '2024-12-31';
             """
             """
         account_tuple = self.db_client_lam.select(sql)
         account_tuple = self.db_client_lam.select(sql)
         result = [
         result = [
@@ -120,23 +121,23 @@ class weixinCategory(object):
         response = self.spider.update_msg_list(ghId=gh_id, index=index)
         response = self.spider.update_msg_list(ghId=gh_id, index=index)
         msg_list = response.get("data", {}).get("data")
         msg_list = response.get("data", {}).get("data")
         if msg_list:
         if msg_list:
-            last_article_in_this_msg = msg_list[-1]
+            # last_article_in_this_msg = msg_list[-1]
             self.insert_data_into_db(
             self.insert_data_into_db(
                 gh_id=gh_id, category=category, article_list=msg_list
                 gh_id=gh_id, category=category, article_list=msg_list
             )
             )
-            last_time_stamp_in_this_msg = last_article_in_this_msg["AppMsg"]["BaseInfo"]["UpdateTime"]
-            if latest_time_stamp < last_time_stamp_in_this_msg:
-                next_cursor = response["data"]["next_cursor"]
-                return self.update_each_account(
-                    gh_id=gh_id,
-                    latest_time_stamp=latest_time_stamp,
-                    category=category,
-                    index=next_cursor,
-                )
-            else:
+            # last_time_stamp_in_this_msg = last_article_in_this_msg["AppMsg"]["BaseInfo"]["UpdateTime"]
+            # if latest_time_stamp < last_time_stamp_in_this_msg:
+            #     next_cursor = response["data"]["next_cursor"]
+            #     return self.update_each_account(
+            #         gh_id=gh_id,
+            #         latest_time_stamp=latest_time_stamp,
+            #         category=category,
+            #         index=next_cursor,
+            #     )
+            # else:
                 # 更新最近抓取时间
                 # 更新最近抓取时间
-                self.update_latest_account_timestamp(gh_id=gh_id)
-                print("账号时间更新成功")
+            self.update_latest_account_timestamp(gh_id=gh_id)
+            print("账号时间更新成功")
         else:
         else:
             print("No more data")
             print("No more data")