2 years ago · 8c945f30d2
--- a/xiaoniangao/xiaoniangao_author/xiaoniangao_author_test.py
+++ b/xiaoniangao/xiaoniangao_author/xiaoniangao_author_test.py
@@ -11,6 +11,7 @@ from common.mq import MQ
 
															 sys.path.append(os.getcwd())
														
 
															 from common.pipeline import PiaoQuanPipelineTest
														
 
															 from common.public import get_config_from_mysql, clean_title
														
 
															+from common.scheduling_db import MysqlHelper
														
 
															 def tunnel_proxies():
														
@@ -44,13 +45,16 @@ class XiaoNianGaoAuthor:
 
															         # 每轮只抓取定量的数据，到达数量后自己退出
														
 
															         max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
														
 
															         for user_dict in self.user_list:
														
 
															-            if self.download_count <= max_count:
														
 
															-                self.get_video_list(user_dict)
														
 
															-                time.sleep(random.randint(1, 15))
														
 
															-            else:
														
 
															-                message = "本轮已经抓取足够数量的视频，已经自动退出"
														
 
															-                print(message)
														
 
															-                return
														
 
															+            print(user_dict)
														
 
															+            account_level = user_dict['account_level']
														
 
															+            if account_level and account_level != "P3":
														
 
															+                if self.download_count <= max_count:
														
 
															+                    self.get_video_list(user_dict)
														
 
															+                    time.sleep(random.randint(1, 15))
														
 
															+                else:
														
 
															+                    message = "本轮已经抓取足够数量的视频，已经自动退出"
														
 
															+                    print(message)
														
 
															+                    return
														
 
															     def get_video_list(self, user_dict):
														
 
															         next_t = -1
														
@@ -179,7 +183,12 @@ class XiaoNianGaoAuthor:
 
															             item=video_dict,
														
 
															             trace_id=trace_id,
														
 
															         )
														
 
															-        flag = pipeline.process_item()
														
 
															+        account_level = user_dict['account_level']
														
 
															+        if account_level == "P0" or account_level == "P1":
														
 
															+            flag = True
														
 
															+        else:
														
 
															+            flag = pipeline.process_item()
														
 
															+        # flag = pipeline.process_item()
														
 
															         if flag:
														
 
															             video_dict["width"] = video_dict["video_width"]
														
 
															             video_dict["height"] = video_dict["video_height"]
														
@@ -195,11 +204,17 @@ class XiaoNianGaoAuthor:
 
															 if __name__ == "__main__":
														
 
															+    select_user_sql = (
														
 
															+        f"""select * from crawler_user_v3 where task_id=21"""
														
 
															+    )
														
 
															+    user_list = MysqlHelper.get_values(
														
 
															+        "author", "xiaoniangao", select_user_sql, "prod", ""
														
 
															+    )
														
 
															     XNGA = XiaoNianGaoAuthor(
														
 
															         platform="xiaoniangao",
														
 
															         mode="author",
														
 
															         rule_dict={},
														
 
															         env="prod",
														
 
															-        user_list=[{"link": 295640510, "uid": "12334"}],
														
 
															+        user_list=user_list
														
 
															     )
														
 
															     XNGA.get_author_list()
														
--- a/xiaoniangao/xiaoniangao_author/xiaoniangao_author_v2.py
+++ b/xiaoniangao/xiaoniangao_author/xiaoniangao_author_v2.py
@@ -23,9 +23,9 @@ def tunnel_proxies():
 
															     password = "5zqcjkmy"
														
 
															     tunnel_proxies = {
														
 
															         "http": "http://%(user)s:%(pwd)s@%(proxy)s/"
														
 
															-        % {"user": username, "pwd": password, "proxy": tunnel},
														
 
															+                % {"user": username, "pwd": password, "proxy": tunnel},
														
 
															         "https": "http://%(user)s:%(pwd)s@%(proxy)s/"
														
 
															-        % {"user": username, "pwd": password, "proxy": tunnel},
														
 
															+                 % {"user": username, "pwd": password, "proxy": tunnel},
														
 
															     }
														
 
															     return tunnel_proxies
														
@@ -45,25 +45,27 @@ class XiaoNianGaoAuthor:
 
															         # 每轮只抓取定量的数据，到达数量后自己退出
														
 
															         max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
														
 
															         for user_dict in self.user_list:
														
 
															-            if self.download_count <= max_count:
														
 
															-                self.get_video_list(user_dict)
														
 
															-                # time.sleep(random.randint(1, 10))
														
 
															-                time.sleep(1)
														
 
															-            else:
														
 
															-                AliyunLogger.logging(
														
 
															-                    code="2000",
														
 
															-                    platform=self.platform,
														
 
															-                    mode=self.mode,
														
 
															-                    env=self.env,
														
 
															-                    message="本轮已经抓取足够数量的视频，已经自动退出",
														
 
															-                )
														
 
															-                Common.logging(
														
 
															-                    log_type=self.mode,
														
 
															-                    crawler=self.platform,
														
 
															-                    env=self.env,
														
 
															-                    message="本轮已经抓取足够数量的视频，已经自动退出",
														
 
															-                )
														
 
															-                return
														
 
															+            account_level = user_dict['account_level']
														
 
															+            if account_level and account_level != "P3":
														
 
															+                if self.download_count <= max_count:
														
 
															+                    self.get_video_list(user_dict)
														
 
															+                    # time.sleep(random.randint(1, 10))
														
 
															+                    time.sleep(1)
														
 
															+                else:
														
 
															+                    AliyunLogger.logging(
														
 
															+                        code="2000",
														
 
															+                        platform=self.platform,
														
 
															+                        mode=self.mode,
														
 
															+                        env=self.env,
														
 
															+                        message="本轮已经抓取足够数量的视频，已经自动退出",
														
 
															+                    )
														
 
															+                    Common.logging(
														
 
															+                        log_type=self.mode,
														
 
															+                        crawler=self.platform,
														
 
															+                        env=self.env,
														
 
															+                        message="本轮已经抓取足够数量的视频，已经自动退出",
														
 
															+                    )
														
 
															+                    return
														
 
															     def get_video_list(self, user_dict):
														
 
															         next_t = -1
														
@@ -266,8 +268,11 @@ class XiaoNianGaoAuthor:
 
															             item=video_dict,
														
 
															             trace_id=trace_id,
														
 
															         )
														
 
															-        # try:
														
 
															-        flag = pipeline.process_item()
														
 
															+        account_level = user_dict['account_level']
														
 
															+        if account_level == "P0" or account_level == "P1":
														
 
															+            flag = True
														
 
															+        else:
														
 
															+            flag = pipeline.process_item()
														
 
															         if flag:
														
 
															             video_dict["width"] = video_dict["video_width"]
														
 
															             video_dict["height"] = video_dict["video_height"]
														
--- a/xiaoniangao/xiaoniangao_plus/change_xng_account.py
+++ b/xiaoniangao/xiaoniangao_plus/change_xng_account.py
@@ -0,0 +1,34 @@
 
															+
														
 
															+import pandas as pd
														
 
															+from tqdm import tqdm
														
 
															+from common.db import MysqlHelper
														
 
															+
														
 
															+
														
 
															+def read_excel(path):
														
 
															+    result = {}
														
 
															+    data_list = pd.read_excel(path).values.tolist()
														
 
															+    for item in data_list:
														
 
															+        uid, level = item[0], item[6]
														
 
															+        result[uid] = level
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+def update_level(video_obj):
														
 
															+    for key in tqdm(video_obj):
														
 
															+        uid = int(key)
														
 
															+        level = video_obj[key]
														
 
															+        sql = f"""UPDATE crawler_user_v3 SET account_level = "{level}" where uid = "{uid}";"""
														
 
															+        # print(sql)
														
 
															+        MysqlHelper.update_values(
														
 
															+            log_type="author",
														
 
															+            crawler="xiaoniangao",
														
 
															+            sql=sql,
														
 
															+            env="prod",
														
 
															+            machine=""
														
 
															+        )
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    excel_path = r'/Users/luojunhui/Desktop/小年糕账号筛选.xlsx'
														
 
															+    account_dict = read_excel(excel_path)
														
 
															+    update_level(account_dict)