luojunhui 9 месяцев назад
Родитель
Сommit
caa10421b8
2 измененных файлов с 15 добавлено и 16 удалено
  1. 1 1
      account_cold_start_daily.py
  2. 14 15
      coldStartTasks/crawler/weixinCategoryCrawler.py

+ 1 - 1
account_cold_start_daily.py

@@ -108,4 +108,4 @@ def main(category_list=None):
 
 
 if __name__ == '__main__':
-    main(['account_association'])
+    main()

+ 14 - 15
coldStartTasks/crawler/weixinCategoryCrawler.py

@@ -36,8 +36,7 @@ class weixinCategory(object):
         sql = f"""
             select gh_id, account_source, account_name, account_category, latest_update_time
             from long_articles_accounts 
-            where account_category = '{account_category}' and is_using = {ACCOUNT_GOOD_STATUS}
-            and init_date = '2024-12-31';
+            where account_category = '{account_category}' and is_using = {ACCOUNT_GOOD_STATUS};
             """
         account_tuple = self.db_client_lam.select(sql)
         result = [
@@ -121,23 +120,23 @@ class weixinCategory(object):
         response = self.spider.update_msg_list(ghId=gh_id, index=index)
         msg_list = response.get("data", {}).get("data")
         if msg_list:
-            # last_article_in_this_msg = msg_list[-1]
+            last_article_in_this_msg = msg_list[-1]
             self.insert_data_into_db(
                 gh_id=gh_id, category=category, article_list=msg_list
             )
-            # last_time_stamp_in_this_msg = last_article_in_this_msg["AppMsg"]["BaseInfo"]["UpdateTime"]
-            # if latest_time_stamp < last_time_stamp_in_this_msg:
-            #     next_cursor = response["data"]["next_cursor"]
-            #     return self.update_each_account(
-            #         gh_id=gh_id,
-            #         latest_time_stamp=latest_time_stamp,
-            #         category=category,
-            #         index=next_cursor,
-            #     )
-            # else:
+            last_time_stamp_in_this_msg = last_article_in_this_msg["AppMsg"]["BaseInfo"]["UpdateTime"]
+            if latest_time_stamp < last_time_stamp_in_this_msg:
+                next_cursor = response["data"]["next_cursor"]
+                return self.update_each_account(
+                    gh_id=gh_id,
+                    latest_time_stamp=latest_time_stamp,
+                    category=category,
+                    index=next_cursor,
+                )
+            else:
                 # 更新最近抓取时间
-            self.update_latest_account_timestamp(gh_id=gh_id)
-            print("账号时间更新成功")
+                self.update_latest_account_timestamp(gh_id=gh_id)
+                print("账号时间更新成功")
         else:
             print("No more data")