luojunhui 3 ay önce
ebeveyn
işleme
cb097e280a
1 değiştirilmiş dosya ile 15 ekleme ve 14 silme
  1. 15 14
      coldStartTasks/crawler/weixinCategoryCrawler.py

+ 15 - 14
coldStartTasks/crawler/weixinCategoryCrawler.py

@@ -36,7 +36,8 @@ class weixinCategory(object):
         sql = f"""
             select gh_id, account_source, account_name, account_category, latest_update_time
             from long_articles_accounts 
-            where account_category = '{account_category}' and is_using = {ACCOUNT_GOOD_STATUS};
+            where account_category = '{account_category}' and is_using = {ACCOUNT_GOOD_STATUS}
+            and init_date = '2024-12-31';
             """
         account_tuple = self.db_client_lam.select(sql)
         result = [
@@ -120,23 +121,23 @@ class weixinCategory(object):
         response = self.spider.update_msg_list(ghId=gh_id, index=index)
         msg_list = response.get("data", {}).get("data")
         if msg_list:
-            last_article_in_this_msg = msg_list[-1]
+            # last_article_in_this_msg = msg_list[-1]
             self.insert_data_into_db(
                 gh_id=gh_id, category=category, article_list=msg_list
             )
-            last_time_stamp_in_this_msg = last_article_in_this_msg["AppMsg"]["BaseInfo"]["UpdateTime"]
-            if latest_time_stamp < last_time_stamp_in_this_msg:
-                next_cursor = response["data"]["next_cursor"]
-                return self.update_each_account(
-                    gh_id=gh_id,
-                    latest_time_stamp=latest_time_stamp,
-                    category=category,
-                    index=next_cursor,
-                )
-            else:
+            # last_time_stamp_in_this_msg = last_article_in_this_msg["AppMsg"]["BaseInfo"]["UpdateTime"]
+            # if latest_time_stamp < last_time_stamp_in_this_msg:
+            #     next_cursor = response["data"]["next_cursor"]
+            #     return self.update_each_account(
+            #         gh_id=gh_id,
+            #         latest_time_stamp=latest_time_stamp,
+            #         category=category,
+            #         index=next_cursor,
+            #     )
+            # else:
                 # 更新最近抓取时间
-                self.update_latest_account_timestamp(gh_id=gh_id)
-                print("账号时间更新成功")
+            self.update_latest_account_timestamp(gh_id=gh_id)
+            print("账号时间更新成功")
         else:
             print("No more data")