Przeglądaj źródła

gzh
线下管理器
视频刷刷
西瓜

罗俊辉 1 rok temu
rodzic
commit
d2a5fc1638

+ 8 - 7
app/off_line_controler.py

@@ -50,10 +50,11 @@ class SpiderScheduler(object):
 
 if __name__ == "__main__":
     SC = SpiderScheduler()
-    # schedule.every().day.at("20:06").do(SC.run_xng_plus, hour=1)
-    schedule.every().day.at("20:30").do(SC.run_spss, hour=1)
-
-    schedule.every().day.at("18:30").do(SC.run_spss_id, hour=1)
-
-    while True:
-        schedule.run_pending()
+    SC.run_spss(hour=5)
+    # # schedule.every().day.at("20:06").do(SC.run_xng_plus, hour=1)
+    # schedule.every().day.at("20:30").do(SC.run_spss, hour=1)
+    #
+    # schedule.every().day.at("18:30").do(SC.run_spss_id, hour=1)
+    #
+    # while True:
+    #     schedule.run_pending()

+ 2 - 2
spider/crawler_offline/shipinshuashua.py

@@ -37,7 +37,7 @@ class SPSSRecommend:
         self.env = env
         self.rule_dict = rule_dict
         self.our_uid_list = our_uid
-        chromedriverExecutable = "/Users/luojunhui/chromedriver/chromedriver_v111/chromedriver"
+        chromedriverExecutable = "/Users/luojunhui/chromedriver/chromedriver_v116/chromedriver"
         self.aliyun_log = AliyunLogger(platform=crawler, mode=log_type, env=env)
         Local.logger(self.log_type, self.crawler).info("启动微信")
         # 微信的配置文件
@@ -46,7 +46,7 @@ class SPSSRecommend:
             "devicesName": "Android",
             "appPackage": "com.tencent.mm",
             "appActivity": ".ui.LauncherUI",
-            "autoGrantPermissions": "true",
+            "autoGrantPermissions": True,
             "noReset": True,
             "resetkeyboard": True,
             "unicodekeyboard": True,

+ 31 - 0
spider/crawler_online/gongzhonghao_author.py

@@ -0,0 +1,31 @@
+"""
+@author: Curry Luo
+@file: gongzhonghao.py
+@time: 2024/01/05
+"""
+
+
+class GongZhongHaoAuthor(object):
+    """
+    公众号账号爬虫
+    """
+
+    def __init__(self, platform, mode, user_list, rule_dict, env="prod"):
+        self.platform = platform
+        self.mode = mode
+        self.user_list = user_list
+        self.rule_dict = rule_dict
+        self.env = env
+
+    def get_video_list(self):
+        """
+        获取视频列表
+        :return:
+        """
+        return 0
+
+    def fake_id_manage(self):
+        """
+        获取fake_id
+        :return:
+        """

+ 187 - 0
spider/crawler_online/xigua_author.py

@@ -0,0 +1,187 @@
+"""
+西瓜视频——新规则抓取
+"""
+import os
+import sys
+import uuid
+import time
+import random
+import string
+import asyncio
+import requests
+
+sys.path.append(os.getcwd())
+
+from application.common.messageQueue import MQ
+from application.common.proxies import tunnel_proxies
+from application.common.log import AliyunLogger
+
+
+async def create_signature():
+    """
+    随机生成签名
+    :return:
+    """
+    src_digits = string.digits  # string_数字
+    src_uppercase = string.ascii_uppercase  # string_大写字母
+    src_lowercase = string.ascii_lowercase  # string_小写字母
+    digits_num = random.randint(1, 6)
+    uppercase_num = random.randint(1, 26 - digits_num - 1)
+    lowercase_num = 26 - (digits_num + uppercase_num)
+    password = (
+            random.sample(src_digits, digits_num)
+            + random.sample(src_uppercase, uppercase_num)
+            + random.sample(src_lowercase, lowercase_num)
+    )
+    random.shuffle(password)
+    new_password = "AAAAAAAAAA" + "".join(password)[10:-4] + "AAAB"
+    new_password_start = new_password[0:18]
+    new_password_end = new_password[-7:]
+    if new_password[18] == "8":
+        new_password = new_password_start + "w" + new_password_end
+    elif new_password[18] == "9":
+        new_password = new_password_start + "x" + new_password_end
+    elif new_password[18] == "-":
+        new_password = new_password_start + "y" + new_password_end
+    elif new_password[18] == ".":
+        new_password = new_password_start + "z" + new_password_end
+    else:
+        new_password = new_password_start + "y" + new_password_end
+    return new_password
+
+
+class XiGuaAuthor(object):
+    """
+    西瓜账号抓取object
+    """
+
+    def __init__(self, platform, mode, rule_dict, user_list, env):
+        self.platform = platform
+        self.mode = mode
+        self.rule_dict = rule_dict
+        self.user_list = user_list
+        self.env = env
+        self.download_cnt = 0
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.expire_flag = False
+        self.aliyun_log = AliyunLogger(platform=self.platform, mode=self.mode)
+
+    async def process_author_list(self):
+        """
+        抓取多个账号
+        :return:
+        """
+        for user_account in self.user_list:
+            await self.process_each_author(user_account)
+
+    async def process_each_author(self, user_account):
+        """
+        抓取单个账号的视频列表;
+        :return:
+        """
+        off_set = 0
+        signature = await create_signature()
+        url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
+        while True:
+            params = {
+                "to_user_id": str(
+                    user_account["link"].replace("https://www.ixigua.com/home/", "")
+                ),
+                "offset": str(off_set),
+                "limit": "30",
+                "maxBehotTime": "0",
+                "order": "new",
+                "isHome": "0",
+                "_signature": signature,
+            }
+            headers = {
+                "referer": f'https://www.ixigua.com/home/{user_account["link"].replace("https://www.ixigua.com/home/", "")}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
+                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41",
+            }
+            response = requests.get(
+                url=url,
+                headers=headers,
+                params=params,
+                proxies=tunnel_proxies(),
+                timeout=5
+            )
+            off_set += 30
+            if "data" not in response.text or response.status_code != 200:
+                self.aliyun_log.logging(
+                    code="2000",
+                    message=f"get_videoList:{response.text}\n",
+                )
+                return
+            elif not response.json()["data"]["videoList"]:
+                self.aliyun_log.logging(
+                    code="2000",
+                    message=f"没有更多数据啦~\n",
+                )
+                return
+            else:
+                video_list = response.json()["data"]["videoList"]
+                for video in video_list:
+                    try:
+                        self.aliyun_log.logging(
+                            code="1001",
+                            data=video,
+                            message="扫描到一条视频"
+                        )
+                        # 判断时间是否符合要求
+                        if self.date_flag(video, user_account):
+                            return
+                        else:
+                            self.process_video_obj(video, user_account)
+                    except Exception as e:
+                        self.aliyun_log.logging(
+                            code="3000",
+                            data=video,
+                            message="抓取单条视频异常, 报错原因是: {}".format(e)
+                        )
+
+    def rule_maker(self, account):
+        """
+        通过不同的账号生成不同的规则
+        :param account: 输入的账号信息
+        {'play_cnt': {'min': 100000, 'max': 0}, 'period': {'min': 5, 'max': 5}}
+        """
+        flag = account.split("")
+        if flag == "V1":
+            rule_dict = {
+                "play_cnt": {"min": 50000, "max": 0},
+                'period': {"min": 15, "max": 15},
+                'special': True
+            }
+            return rule_dict
+        elif flag == "V2":
+            rule_dict = {
+                "play_cnt": {"min": 10000, "max": 0},
+                'period': {"min": 7, "max": 7},
+                'special': True
+            }
+            return rule_dict
+        elif flag == "V3":
+            rule_dict = {
+                "play_cnt": {"min": 5000, "max": 0},
+                'period': {"min": 3, "max": 3},
+                'special': True
+            }
+            return rule_dict
+        else:
+            return self.rule_dict
+
+    def date_flag(self, video, user_account):
+        """
+        判断时间是否满足条件
+        :param video: 视频信息
+        :param user_account: 用户账号
+        :return:
+        """
+        rule = self.rule_maker(user_account)
+
+
+
+
+
+
+