罗俊辉 1 рік тому
батько
коміт
8c945f30d2

+ 24 - 9
xiaoniangao/xiaoniangao_author/xiaoniangao_author_test.py

@@ -11,6 +11,7 @@ from common.mq import MQ
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
 from common.pipeline import PiaoQuanPipelineTest
 from common.pipeline import PiaoQuanPipelineTest
 from common.public import get_config_from_mysql, clean_title
 from common.public import get_config_from_mysql, clean_title
+from common.scheduling_db import MysqlHelper
 
 
 
 
 def tunnel_proxies():
 def tunnel_proxies():
@@ -44,13 +45,16 @@ class XiaoNianGaoAuthor:
         # 每轮只抓取定量的数据,到达数量后自己退出
         # 每轮只抓取定量的数据,到达数量后自己退出
         max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
         max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
         for user_dict in self.user_list:
         for user_dict in self.user_list:
-            if self.download_count <= max_count:
-                self.get_video_list(user_dict)
-                time.sleep(random.randint(1, 15))
-            else:
-                message = "本轮已经抓取足够数量的视频,已经自动退出"
-                print(message)
-                return
+            print(user_dict)
+            account_level = user_dict['account_level']
+            if account_level and account_level != "P3":
+                if self.download_count <= max_count:
+                    self.get_video_list(user_dict)
+                    time.sleep(random.randint(1, 15))
+                else:
+                    message = "本轮已经抓取足够数量的视频,已经自动退出"
+                    print(message)
+                    return
 
 
     def get_video_list(self, user_dict):
     def get_video_list(self, user_dict):
         next_t = -1
         next_t = -1
@@ -179,7 +183,12 @@ class XiaoNianGaoAuthor:
             item=video_dict,
             item=video_dict,
             trace_id=trace_id,
             trace_id=trace_id,
         )
         )
-        flag = pipeline.process_item()
+        account_level = user_dict['account_level']
+        if account_level == "P0" or account_level == "P1":
+            flag = True
+        else:
+            flag = pipeline.process_item()
+        # flag = pipeline.process_item()
         if flag:
         if flag:
             video_dict["width"] = video_dict["video_width"]
             video_dict["width"] = video_dict["video_width"]
             video_dict["height"] = video_dict["video_height"]
             video_dict["height"] = video_dict["video_height"]
@@ -195,11 +204,17 @@ class XiaoNianGaoAuthor:
 
 
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
+    select_user_sql = (
+        f"""select * from crawler_user_v3 where task_id=21"""
+    )
+    user_list = MysqlHelper.get_values(
+        "author", "xiaoniangao", select_user_sql, "prod", ""
+    )
     XNGA = XiaoNianGaoAuthor(
     XNGA = XiaoNianGaoAuthor(
         platform="xiaoniangao",
         platform="xiaoniangao",
         mode="author",
         mode="author",
         rule_dict={},
         rule_dict={},
         env="prod",
         env="prod",
-        user_list=[{"link": 295640510, "uid": "12334"}],
+        user_list=user_list
     )
     )
     XNGA.get_author_list()
     XNGA.get_author_list()

+ 28 - 23
xiaoniangao/xiaoniangao_author/xiaoniangao_author_v2.py

@@ -23,9 +23,9 @@ def tunnel_proxies():
     password = "5zqcjkmy"
     password = "5zqcjkmy"
     tunnel_proxies = {
     tunnel_proxies = {
         "http": "http://%(user)s:%(pwd)s@%(proxy)s/"
         "http": "http://%(user)s:%(pwd)s@%(proxy)s/"
-        % {"user": username, "pwd": password, "proxy": tunnel},
+                % {"user": username, "pwd": password, "proxy": tunnel},
         "https": "http://%(user)s:%(pwd)s@%(proxy)s/"
         "https": "http://%(user)s:%(pwd)s@%(proxy)s/"
-        % {"user": username, "pwd": password, "proxy": tunnel},
+                 % {"user": username, "pwd": password, "proxy": tunnel},
     }
     }
 
 
     return tunnel_proxies
     return tunnel_proxies
@@ -45,25 +45,27 @@ class XiaoNianGaoAuthor:
         # 每轮只抓取定量的数据,到达数量后自己退出
         # 每轮只抓取定量的数据,到达数量后自己退出
         max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
         max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
         for user_dict in self.user_list:
         for user_dict in self.user_list:
-            if self.download_count <= max_count:
-                self.get_video_list(user_dict)
-                # time.sleep(random.randint(1, 10))
-                time.sleep(1)
-            else:
-                AliyunLogger.logging(
-                    code="2000",
-                    platform=self.platform,
-                    mode=self.mode,
-                    env=self.env,
-                    message="本轮已经抓取足够数量的视频,已经自动退出",
-                )
-                Common.logging(
-                    log_type=self.mode,
-                    crawler=self.platform,
-                    env=self.env,
-                    message="本轮已经抓取足够数量的视频,已经自动退出",
-                )
-                return
+            account_level = user_dict['account_level']
+            if account_level and account_level != "P3":
+                if self.download_count <= max_count:
+                    self.get_video_list(user_dict)
+                    # time.sleep(random.randint(1, 10))
+                    time.sleep(1)
+                else:
+                    AliyunLogger.logging(
+                        code="2000",
+                        platform=self.platform,
+                        mode=self.mode,
+                        env=self.env,
+                        message="本轮已经抓取足够数量的视频,已经自动退出",
+                    )
+                    Common.logging(
+                        log_type=self.mode,
+                        crawler=self.platform,
+                        env=self.env,
+                        message="本轮已经抓取足够数量的视频,已经自动退出",
+                    )
+                    return
 
 
     def get_video_list(self, user_dict):
     def get_video_list(self, user_dict):
         next_t = -1
         next_t = -1
@@ -266,8 +268,11 @@ class XiaoNianGaoAuthor:
             item=video_dict,
             item=video_dict,
             trace_id=trace_id,
             trace_id=trace_id,
         )
         )
-        # try:
-        flag = pipeline.process_item()
+        account_level = user_dict['account_level']
+        if account_level == "P0" or account_level == "P1":
+            flag = True
+        else:
+            flag = pipeline.process_item()
         if flag:
         if flag:
             video_dict["width"] = video_dict["video_width"]
             video_dict["width"] = video_dict["video_width"]
             video_dict["height"] = video_dict["video_height"]
             video_dict["height"] = video_dict["video_height"]

+ 34 - 0
xiaoniangao/xiaoniangao_plus/change_xng_account.py

@@ -0,0 +1,34 @@
+
+import pandas as pd
+from tqdm import tqdm
+from common.db import MysqlHelper
+
+
+def read_excel(path):
+    result = {}
+    data_list = pd.read_excel(path).values.tolist()
+    for item in data_list:
+        uid, level = item[0], item[6]
+        result[uid] = level
+    return result
+
+
+def update_level(video_obj):
+    for key in tqdm(video_obj):
+        uid = int(key)
+        level = video_obj[key]
+        sql = f"""UPDATE crawler_user_v3 SET account_level = "{level}" where uid = "{uid}";"""
+        # print(sql)
+        MysqlHelper.update_values(
+            log_type="author",
+            crawler="xiaoniangao",
+            sql=sql,
+            env="prod",
+            machine=""
+        )
+
+
+if __name__ == '__main__':
+    excel_path = r'/Users/luojunhui/Desktop/小年糕账号筛选.xlsx'
+    account_dict = read_excel(excel_path)
+    update_level(account_dict)