zhangyong 5 miesięcy temu
rodzic
commit
ca24738f41

+ 57 - 0
application/common/mysql/sql.py

@@ -0,0 +1,57 @@
+
+
+from datetime import datetime
+import os
+import sys
+
+from application.common.mysql import MysqlHelper
+
+class Sql:
+    """
+    修改用户名+头像
+    """
+    def update_name_url(self, mid, avatar_url, user_name):
+        sql = f""" update xng_uid set avatar_url = "{avatar_url}", user_name="{user_name}" where uid = "{mid}"; """
+        db = MysqlHelper()
+        repeat_video = db.update(sql=sql)
+        if repeat_video:
+            return True
+        return False
+
+    """
+    插入 用户名 头像 用户id
+    """
+
+    def insert_name_url(self, uid, avatar_url, user_name):
+        current_time = datetime.now()
+        formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+        insert_sql = f"""INSERT INTO xng_uid (uid, avatar_url, user_name, data_time) values ('{uid}' ,'{avatar_url}','{user_name}', '{formatted_time}')"""
+        db = MysqlHelper()
+        repeat_video = db.update(sql=insert_sql)
+        if repeat_video:
+            return True
+        return False
+
+    """
+    查询用户id是否存在
+    """
+
+    def select_id(self, uid):
+        sql = f""" select uid from xng_uid where uid = "{uid}"; """
+        db = MysqlHelper()
+        repeat_video = db.select(sql=sql)
+        if repeat_video:
+            return True
+        return False
+
+    """
+    查询用户id是否之前已添加过
+    """
+
+    def select_id_status(self, uid):
+        sql = f""" select uid from crawler_user_v3 where link = "{uid}"; """
+        db = MysqlHelper()
+        repeat_video = db.select(sql=sql)
+        if repeat_video:
+            return False
+        return True

+ 0 - 1
application/common/proxies/fast_proxy.py

@@ -20,5 +20,4 @@ def haiwai_tunnel_proxies():
         "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
         "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
     }
-    print(f"代理地址:{proxies}")
     return proxies

+ 54 - 0
application/common/redis/xng_redis.py

@@ -0,0 +1,54 @@
+import json
+
+import redis
+
+
+
+class XNGSyncRedisHelper:
+    _pool: redis.ConnectionPool = None
+    _instance = None
+
+    def __init__(self):
+        if not self._instance:
+            self._pool = self._get_pool()
+            self._instance = self
+
+    def _get_pool(self) -> redis.ConnectionPool:
+        if self._pool is None:
+            self._pool = redis.ConnectionPool(
+                host="r-bp1mb0v08fqi4hjffupd.redis.rds.aliyuncs.com",  # 外网地址
+                # host="r-bp1mb0v08fqi4hjffu.redis.rds.aliyuncs.com",  # 内网地址
+                port=6379,
+                db=0,
+                password="Wqsd@2019",
+                # password="Qingqu2019",
+
+            )
+        return self._pool
+
+    def get_client(self) -> redis.Redis:
+        pool = self._get_pool()
+        client = redis.Redis(connection_pool=pool)
+        return client
+
+    def close(self):
+        if self._pool:
+            self._pool.disconnect(inuse_connections=True)
+
+
+
+
+def xng_get_video_data():
+    """获取一条id"""
+    task = f"task:xng_video_id"
+    helper = XNGSyncRedisHelper()
+    client = helper.get_client()
+    ret = client.rpop(task)
+    return ret
+
+def xng_in_video_data(ret):
+    """写入"""
+    task = f"task:xng_video_id"
+    helper = XNGSyncRedisHelper()
+    client = helper.get_client()
+    client.rpush(task, ret)

+ 8 - 8
spider/crawler_offline/xiaoniangao_zhanghao.py

@@ -117,7 +117,7 @@ class XiaoNianGaoZH(object):
         with open(file_path, 'r') as file:
             content = file.read()
 
-        url = "https://admin.piaoquantv.com/manager/crawler/v3/task/save"
+        url = "https://admin.piaoquantv.com/manager/crawler/v3/task/save?muid=999"
         payload = {
             "taskName": "小年糕账号",
             "source": "xiaoniangao",
@@ -138,7 +138,7 @@ class XiaoNianGaoZH(object):
         headers = {
             'accept': 'application/json',
             'content-type': 'application/json;',
-            'cookie': 'SESSION=ZmYwMzBmOWItM2M5YS00ZGMyLTk3MjctMzE0YzE4MmUxNThh',
+            'cookie': 'SESSION=YjM4YmE5NDgtMjJmNi00NjA1LTgyNDUtYTNlZGVlOGNmODMy',
             'origin': 'https://admin.piaoquantv.com',
             'pragma': 'no-cache',
             'priority': 'u=1, i',
@@ -307,7 +307,7 @@ class XiaoNianGaoZH(object):
 
     def insert_number(self, mid, tag_id):
         for i in range(3):
-            url = "https://admin.piaoquantv.com/manager/crawler/v3/user/save"
+            url = "https://admin.piaoquantv.com/manager/crawler/v3/user/save?muid=999"
             payload = {
                 "source": "xiaoniangao",
                 "mode": "author",
@@ -323,7 +323,7 @@ class XiaoNianGaoZH(object):
 
             headers = {
                 'content-length': '0',
-                'cookie': 'SESSION=MWM4YzVlMTctNzdkNC00NjE3LWIxZTctOGQwYzgzYmVmN2Qw',
+                'cookie': 'SESSION=YjM4YmE5NDgtMjJmNi00NjA1LTgyNDUtYTNlZGVlOGNmODMy',
                 'origin': 'https://admin.piaoquantv.com',
                 'priority': 'u=1, i',
                 'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
@@ -337,7 +337,7 @@ class XiaoNianGaoZH(object):
             if code == 0:
                 print("添加账号成功")
                 time.sleep(1)
-                url = "https://admin.piaoquantv.com/manager/crawler/v3/user/list"
+                url = "https://admin.piaoquantv.com/manager/crawler/v3/user/list?muid=999"
                 payload = {
                     "pageNum": 1,
                     "pageSize": 20
@@ -367,12 +367,12 @@ class XiaoNianGaoZH(object):
 
     def get_tag_id(self, date_int):
         for i in range(3):
-            url = f"https://admin.piaoquantv.com/manager/user/up/searchUserTypeTag?keyword={date_int}&muid=7"
+            url = f"https://admin.piaoquantv.com/manager/user/up/searchUserTypeTag?keyword={date_int}&muid=7?muid=999"
 
             payload = {}
             headers = {
                 'content-length': '0',
-                'cookie': 'SESSION=MWM4YzVlMTctNzdkNC00NjE3LWIxZTctOGQwYzgzYmVmN2Qw',
+                'cookie': 'SESSION=YjM4YmE5NDgtMjJmNi00NjA1LTgyNDUtYTNlZGVlOGNmODMy',
                 'origin': 'https://admin.piaoquantv.com',
                 'priority': 'u=1, i',
                 'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
@@ -387,7 +387,7 @@ class XiaoNianGaoZH(object):
                 tagId = content[0]['tagId']
                 return tagId
             else:
-                url = f"https://admin.piaoquantv.com/manager/user/up/createUserTypeTag?tagName={date_int}&muid=7"
+                url = f"https://admin.piaoquantv.com/manager/user/up/createUserTypeTag?tagName={date_int}&muid=7?muid=999"
                 response = requests.request("POST", url, headers=headers, data=payload)
                 response = response.json()
                 content = response["content"]

+ 169 - 0
spider/crawler_offline/xng_zhanghao.py

@@ -0,0 +1,169 @@
+
+import os
+import random
+import subprocess
+import sys
+import time
+import uuid
+import requests
+from datetime import datetime, timedelta
+from appium import webdriver
+from appium.webdriver.extensions.android.nativekey import AndroidKey
+from appium.webdriver.common.touch_action import TouchAction
+from bs4 import BeautifulSoup
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.common.by import By
+
+from application.common.redis.xng_redis import xng_get_video_data
+
+
+class XiaoNianGaoZH(object):
+
+    def save_pq_uid(self, content):
+        current_time = datetime.now()
+        time_after_10_minutes = current_time + timedelta(minutes=5)
+        # 获取时间戳
+        timestamp_seconds = time_after_10_minutes.timestamp()
+        timestamp_milliseconds = int(timestamp_seconds * 1000)
+
+        url = "https://admin.piaoquantv.com/manager/crawler/v3/task/save?muid=999"
+        payload = {
+            "taskName": "小年糕账号",
+            "source": "xiaoniangao",
+            "mode": "author",
+            "modeValue": "0",
+            "modeBoard": "0",
+            "spiderName": "run_xng_author",
+            "startTime": timestamp_milliseconds,
+            "interval": 4800,
+            "uid": str(content),
+            "machine": "aliyun",
+            "rule": [{"period": {"min": 15, "max": 3}},
+                     {"duration": {"min": 50, "max": 0}},
+                     {"share_cnt": {"min": 2, "max": 0}},
+                     {"videos_cnt": {"min": 300, "max": 0}}],
+            "id": 21
+        }
+        headers = {
+            'accept': 'application/json',
+            'content-type': 'application/json;',
+            'cookie': 'SESSION=YjM4YmE5NDgtMjJmNi00NjA1LTgyNDUtYTNlZGVlOGNmODMy',
+            'origin': 'https://admin.piaoquantv.com',
+            'pragma': 'no-cache',
+            'priority': 'u=1, i',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
+        }
+
+        requests.request("POST", url, headers=headers, json=payload)
+
+
+    def get_tag_id(self, date_int):
+        for i in range(3):
+            url = f"https://admin.piaoquantv.com/manager/user/up/searchUserTypeTag?keyword={date_int}&muid=7?muid=999"
+
+            payload = {}
+            headers = {
+                'content-length': '0',
+                'cookie': 'SESSION=YjM4YmE5NDgtMjJmNi00NjA1LTgyNDUtYTNlZGVlOGNmODMy',
+                'origin': 'https://admin.piaoquantv.com',
+                'priority': 'u=1, i',
+                'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"macOS"'
+            }
+
+            response = requests.request("POST", url, headers=headers, data=payload)
+            response = response.json()
+            content = response["content"]
+            if content:
+                tagId = content[0]['tagId']
+                return tagId
+            else:
+                url = f"https://admin.piaoquantv.com/manager/user/up/createUserTypeTag?tagName={date_int}&muid=7?muid=999"
+                response = requests.request("POST", url, headers=headers, data=payload)
+                response = response.json()
+                content = response["content"]
+                if content:
+                    tagId = content['tagId']
+                    return tagId
+
+    def insert_number(self, mid, tag_id):
+        for i in range(3):
+            url = "https://admin.piaoquantv.com/manager/crawler/v3/user/save?muid=999"
+            payload = {
+                "source": "xiaoniangao",
+                "mode": "author",
+                "modeValue": "",
+                "modeBoard": "",
+                "recomStatus": -6,
+                "appRecomStatus": -6,
+                "autoAuditStatus": 0,
+                "tag": f"459,454,106,8240,{int(tag_id)}",
+                "contentCategory": 0,
+                "link": str(mid)
+            }
+
+            headers = {
+                'content-length': '0',
+                'cookie': 'SESSION=YjM4YmE5NDgtMjJmNi00NjA1LTgyNDUtYTNlZGVlOGNmODMy',
+                'origin': 'https://admin.piaoquantv.com',
+                'priority': 'u=1, i',
+                'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"macOS"'
+            }
+
+            response = requests.request("POST", url, headers=headers, json=payload)
+            response = response.json()
+            code = response["code"]
+            if code == 0:
+                print("添加账号成功")
+                time.sleep(1)
+                url = "https://admin.piaoquantv.com/manager/crawler/v3/user/list?muid=999"
+                payload = {
+                    "pageNum": 1,
+                    "pageSize": 20
+                }
+                response = requests.request("POST", url, headers=headers, json=payload)
+                response = response.json()
+                list = response["content"]['list']
+                link = list[0]["link"]
+                if link == str(mid):
+                    print("获取站内账号ID成功")
+                    return list[0]["uid"]
+
+    def get_video_info_2(self):
+        mid_list = []
+        while True:
+            mid = xng_get_video_data()
+            if mid:
+                mid = mid["mid"]
+                mid_list.append(mid)
+            else:
+                break
+        if mid_list:
+            current_time = datetime.now()
+            formatted_time = current_time.strftime("%Y%m%d")
+            date_int = int(formatted_time)
+            tag_id = self.get_tag_id(date_int)
+            mid_list = list(set(mid_list))
+            for m_id in mid_list:
+                time.sleep(2)
+                print(tag_id)
+                # 新增账号
+                pq_uid = self.insert_number(m_id, tag_id)
+                if pq_uid:
+                    file_path = '/root/automatic_crawler/spider/crawler_offline/xng_zh.txt'
+                    with open(file_path, 'r') as file:
+                        content = file.read()
+                    if content and content[-1] != ',':
+                        uid = ',' + str(uid)
+                    with open(file_path, 'a') as file:
+                        file.write(uid)
+                    with open(file_path, 'r') as file:
+                        content = file.read()
+                time.sleep(1)
+            self.save_pq_uid(content)
+
+if __name__ == '__main__':
+    XiaoNianGaoZH.get_video_info_2()

+ 41 - 2
spider/crawler_online/xiaoniangaotuijianliu.py

@@ -5,12 +5,12 @@ import time
 import uuid
 import json
 
-from datetime import datetime
 
 import cv2
 import requests
 
-from application.common import Feishu
+from application.common.mysql.sql import Sql
+from application.common.redis.xng_redis import xng_in_video_data
 
 sys.path.append(os.getcwd())
 
@@ -96,6 +96,25 @@ class XNGTJLRecommend(object):
         trace_id = self.platform + str(uuid.uuid1())
         our_user = random.choice(self.user_list)
         item = VideoItem()
+        try:
+            mid = int(video_obj['user']['mid'])
+            print(f"id:{mid}")
+            user_name = video_obj['user']['nick']
+            avatar_url = video_obj['user']['hurl']
+            sql = Sql()
+            max_id = sql.select_id(mid)
+            if max_id:
+                sql.update_name_url(mid, avatar_url, user_name)
+            else:
+                time.sleep(1)
+                link = sql.select_id_status(mid)
+                if link:
+                    sql.insert_name_url(mid, avatar_url, user_name)
+                    print(f"开始写入{max_id}")
+                    xng_in_video_data({"mid": max_id})
+        except Exception as e:
+            print(f"写入异常{e}")
+            pass
         url = video_obj["v_url"]
         duration = self.get_video_duration(url)
         item.add_video_info("video_id", video_obj["id"])
@@ -133,7 +152,27 @@ class XNGTJLRecommend(object):
             ):
                 self.limit_flag = True
 
+    """
+    查询用户id是否存在
+    """
+    def select_id(self, uid):
+        sql = f""" select uid from xng_uid where uid = "{uid}"; """
+        db = MysqlHelper()
+        repeat_video = db.select(sql=sql)
+        if repeat_video:
+            return True
+        return False
 
+    """
+    查询用户id是否之前已添加过
+    """
+    def select_id_status(self, uid):
+        sql = f""" select uid from crawler_user_v3 where link = "{uid}"; """
+        db = MysqlHelper()
+        repeat_video = db.select(sql=sql)
+        if repeat_video:
+            return False
+        return True
 
     def run(self):
         self.get_recommend_list()

+ 21 - 2
spider/crawler_online/zhufuquanzituijianliu.py

@@ -5,12 +5,12 @@ import time
 import uuid
 import json
 
-from datetime import datetime
 
 import cv2
 import requests
 
-from application.common import Feishu
+from application.common.mysql.sql import Sql
+from application.common.redis.xng_redis import xng_in_video_data
 
 sys.path.append(os.getcwd())
 
@@ -95,6 +95,25 @@ class ZFQZTJLRecommend(object):
         trace_id = self.platform + str(uuid.uuid1())
         our_user = random.choice(self.user_list)
         item = VideoItem()
+        try:
+            mid = int(video_obj['user']['mid'])
+            print(f"id:{mid}")
+            user_name = video_obj['user']['nick']
+            avatar_url = video_obj['user']['hurl']
+            sql = Sql()
+            max_id = sql.select_id(mid)
+            if max_id:
+                sql.update_name_url(mid, avatar_url, user_name)
+            else:
+                time.sleep(1)
+                link = sql.select_id_status(mid)
+                if link:
+                    sql.insert_name_url(mid, avatar_url, user_name)
+                    print(f"开始写入{max_id}")
+                    xng_in_video_data({"mid": max_id})
+        except Exception as e:
+            print(f"写入异常{e}")
+            pass
         url =  video_obj["v_url"]
         duration = self.get_video_duration(url)
         item.add_video_info("video_id", video_obj["id"])