Ver Fonte

新增快手渠道

zhangyong há 1 ano atrás
pai
commit
54ba027bf2

+ 3 - 2
common/aliyun_oss_uploading.py

@@ -6,10 +6,10 @@ from typing import Dict, Any,  Optional
 import oss2
 import requests
 
-OSS_BUCKET_PATH = "douyin"
+# OSS_BUCKET_PATH = "douyin"
 OSS_ACCESS_KEY_ID = "LTAIP6x1l3DXfSxm"
 OSS_ACCESS_KEY_SECRET = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
-OSS_BUCKET_ENDPOINT = "oss-cn-hangzhou-internal.aliyuncs.com"# 外网地址上到服务器需要更改为内网地址
+OSS_BUCKET_ENDPOINT = "oss-cn-hangzhou-internal.aliyuncs.com"# 内网地址
 OSS_BUCKET_NAME = "art-crawler"
 class Oss():
     # 抓取视频上传到art-crawler
@@ -17,6 +17,7 @@ class Oss():
     def video_sync_upload_oss(cls, src_url: str,
                         video_id: str,
                         account_id: str,
+                        OSS_BUCKET_PATH: str,
                         referer: Optional[str] = None) -> Dict[str, Any]:
         headers = {
             'Accept': '*/*',

+ 5 - 1
common/feishu.py

@@ -337,7 +337,11 @@ class Feishu:
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/CPDNs06R2hux6SthZ1wcQmkAnYg?sheet=OpE35G"
                 users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangxueke")) + "></at> <at id=" + str(
                     cls.get_userid(log_type, crawler, "muxinyi")) + "></at>\n"
-
+            elif crawler == "快手":
+                content = "快手cookie过期"
+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/CPDNs06R2hux6SthZ1wcQmkAnYg?sheet=OpE35G"
+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangxueke")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, crawler, "muxinyi")) + "></at>\n"
 
             data = json.dumps({
                 "msg_type": "interactive",

+ 58 - 17
common/material.py

@@ -12,7 +12,7 @@ from common.feishu import Feishu
 
 class Material():
 
-    # 获取视频链接 存入数据库
+    # 获取抖音视频链接 存入数据库
     @classmethod
     def insert_user(cls):
         # 获取抖音视频链接
@@ -32,6 +32,26 @@ class Material():
                     machine="",
                 )
 
+    # 获取快手视频链接 存入数据库
+    @classmethod
+    def insert_kuaishou_user(cls):
+        # 获取快手视频链接
+        douyin = Feishu.get_values_batch("prod", "succinct", "MLVd0q")
+        # 提取账号昵称和账号主页链接
+        channel = '快手'
+        for row in douyin[2:]:
+            platform = row[0]
+            if platform == channel:
+                account_name = row[2]
+                account_link = row[3]
+                user_id = account_link.split("profile/")[1]
+                insert_sql = f"""INSERT INTO video_user_id (name, user_id, channel) values ('{account_name}', '{user_id}', '{channel}')"""
+                MysqlHelper.update_values(
+                    sql=insert_sql,
+                    env="prod",
+                    machine="",
+                )
+
     # 随机获取标题
     @classmethod
     def get_title(cls):
@@ -63,6 +83,14 @@ class Material():
             if item[0] == '抖音':
                 return item[1]
 
+    # 获取快手 cookie
+    @classmethod
+    def get_kuaishou_cookie(cls):
+        kuaishou_token = Feishu.get_values_batch("prod", "succinct", "OpE35G")
+        for item in kuaishou_token:
+            if item[0] == '快手':
+                return item[1]
+
     @classmethod
     def get_houtai_cookie(cls):
         douyin_token = Feishu.get_values_batch("prod", "succinct", "OpE35G")
@@ -72,7 +100,7 @@ class Material():
 
     # 获取音频类型
     @classmethod
-    def get_audio_type(cls, video_type):
+    def get_audio_type(cls, video_type, count):
         list = []
         title_list = []
         if video_type == "口播--美文类":
@@ -90,21 +118,34 @@ class Material():
             srt = list['text']
             return audio_id, srt, title_list
         else:
-            audio_type = [{"audio": "音画美文--美文类", "type": "6VXm7q"}, {"audio": "音画美文--通用类", "type": "aSNFl8"}]
-            audio_type = random.choice(audio_type)
-            type = audio_type['type']
-            audio_type = Feishu.get_values_batch("prod", "succinct", type)
-            for row in audio_type[1:]:
-                audio_id = row[0]
-                text = row[1]
-                title = row[2]
-                number = {"audio_id": audio_id, "text": text}
-                list.append(number)
-                title_list.append(title)
-            list = random.choice(list)
-            audio_id = list['audio_id']
-            srt = list['text']
-            return audio_id, srt, title_list
+            if count == 0 or count == 1:
+                audio_type = Feishu.get_values_batch("prod", "succinct", "6VXm7q")
+                for row in audio_type[1:]:
+                    audio_id = row[0]
+                    text = row[1]
+                    title = row[2]
+                    number = {"audio_id": audio_id, "text": text}
+                    list.append(number)
+                    title_list.append(title)
+                audio_id = list[count]['audio_id']
+                srt = list[count]['text']
+                return audio_id, srt, title_list
+            else:
+                audio_type = [{"audio": "音画美文--美文类", "type": "6VXm7q"}, {"audio": "音画美文--通用类", "type": "aSNFl8"}]
+                audio_type = random.choice(audio_type)
+                type = audio_type['type']
+                audio_type = Feishu.get_values_batch("prod", "succinct", type)
+                for row in audio_type[1:]:
+                    audio_id = row[0]
+                    text = row[1]
+                    title = row[2]
+                    number = {"audio_id": audio_id, "text": text}
+                    list.append(number)
+                    title_list.append(title)
+                list = random.choice(list)
+                audio_id = list['audio_id']
+                srt = list['text']
+                return audio_id, srt, title_list
 
 
 

+ 2 - 2
main.py

@@ -45,11 +45,11 @@ def job_video_stitching():
     if int(yinmei_count) < 10:
         Common.logger().info("开始执行")
         video_type = "音画美文"
-        VideoStitching.video_stitching(video_type)
+        VideoStitching.video_stitching(video_type, yinmei_count)
     elif int(koubo_count) < 10:
         Common.logger().info("开始执行")
         video_type = "口播--美文类"
-        VideoStitching.video_stitching(video_type)
+        VideoStitching.video_stitching(video_type, koubo_count)
 
 def job_douyin_data():
     douyinAuthor.get_videoList()

+ 4 - 3
video_capture/douyin/douyin_author/douyin_author.py

@@ -21,12 +21,13 @@ from requests.adapters import HTTPAdapter
 from video_capture.douyin.douyin_author.douyin_author_help import DouYinHelper
 
 class douyinAuthor():
+
     """
-    获取用户主页id
+    获取抖音用户主页id
     """
     @classmethod
     def get_videoUserId(cls):
-        select_user_sql = f"""select user_id from video_user_id;"""
+        select_user_sql = f"""select user_id from video_user_id where channel = "抖音";"""
         user_list = MysqlHelper.get_values(select_user_sql, "prod")
         return user_list
 
@@ -146,7 +147,7 @@ class douyinAuthor():
                                         break
                                     continue
                                 video_url = data[i].get('video').get('play_addr').get('url_list')[0]  # 视频链接
-                                oss_object_key = Oss.video_sync_upload_oss(video_url, video_id, account_id)
+                                oss_object_key = Oss.video_sync_upload_oss(video_url, video_id, account_id, "douyin")
                                 status = oss_object_key.get("status")
                                 # 发送 oss
                                 oss_object_key = oss_object_key.get("oss_object_key")

+ 0 - 0
video_capture/kuaishou/__init__.py


+ 0 - 0
video_capture/kuaishou/kuaishou_author/__init__.py


+ 159 - 0
video_capture/kuaishou/kuaishou_author/kuaishou_author.py

@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+# @Time: 2024/01/18
+import datetime
+import os
+import random
+import sys
+import time
+from datetime import datetime
+import requests
+import json
+import urllib3
+sys.path.append(os.getcwd())
+from common.aliyun_oss_uploading import Oss
+from common.common import Common
+from common.material import Material
+from common.feishu import Feishu
+from common.db import MysqlHelper
+from requests.adapters import HTTPAdapter
+
+
+class kuaishouAuthor():
+
+    """
+    oss视频地址 存入数据库
+    """
+    @classmethod
+    def insert_videoUrl(cls, video_id, account_id, oss_object_key):
+        current_time = datetime.now()
+        formatted_time = current_time.strftime("%Y-%m-%d %H:%M")
+        insert_sql = f"""INSERT INTO video_url (video_id, account_id, oss_object_key, time) values ("{video_id}", "{account_id}", "{oss_object_key}", "{formatted_time}")"""
+        MysqlHelper.update_values(
+            sql=insert_sql,
+            env="prod",
+            machine="",
+        )
+
+    """
+    获取快手用户主页id
+    """
+    @classmethod
+    def get_kuaishou_videoUserId(cls):
+        select_user_sql = f"""select user_id from video_user_id where channel = "快手";"""
+        user_list = MysqlHelper.get_values(select_user_sql, "prod")
+        return user_list
+
+    """
+    查询该video_id是否在数据库存在
+    """
+    @classmethod
+    def select_videoUrl_id(cls, video_id):
+        select_user_sql = f"""select video_id from video_url where video_id={video_id} ;"""
+        user_list = MysqlHelper.get_values(select_user_sql, "prod")
+        if user_list:
+            return True
+        else:
+            return False
+
+    """快手读取数据 将数据存储到oss上"""
+    @classmethod
+    def get_kuaishou_videoList(cls):
+        try:
+            cookie = Material.get_kuaishou_cookie()
+            # 读取飞书表格,更新数据库用户主页id
+            Material.insert_kuaishou_user()
+            # 获取 用户主页id
+            user_list = cls.get_kuaishou_videoUserId()
+            if len(user_list) == 0:
+                return
+            for i in user_list:
+                account_id = i[0].replace('(', '').replace(')', '').replace(',', '')
+                Common.logger().info(f"用户主页ID:{account_id}")
+                pcursor = ""
+                while True:
+                    time.sleep(random.randint(5, 10))
+                    url = "https://www.kuaishou.com/graphql"
+                    payload = json.dumps({
+                        "operationName": "visionProfilePhotoList",
+                        "variables": {
+                            "userId": account_id,
+                            "pcursor": pcursor,
+                            "page": "profile"
+                        },
+                        "query": "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
+                    })
+                    headers = {
+                        'Accept': '*/*',
+                        'Content-Type': 'application/json',
+                        'Origin': 'https://www.kuaishou.com',
+                        'Cookie': cookie,
+                        'Content-Length': '1260',
+                        'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
+                        'Host': 'www.kuaishou.com',
+                        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
+                        'Referer': f'https://www.kuaishou.com/profile/{account_id}',
+                        'Accept-Encoding': 'gzip, deflate, br',
+                        'Connection': 'keep-alive'
+                    }
+                    urllib3.disable_warnings()
+                    s = requests.session()
+                    # max_retries=3 重试3次
+                    s.mount('http://', HTTPAdapter(max_retries=3))
+                    s.mount('https://', HTTPAdapter(max_retries=3))
+                    response = s.post(url=url, headers=headers, data=payload, verify=False,
+                                      timeout=10)
+                    response.close()
+                    if response.status_code != 200:
+                        Common.logger().info(
+                            f"接口请求失败,请更换cookie,{response.status_code}")
+                        Feishu.bot('recommend', '快手', '快手cookie失效,请及时更换~')
+                        # 如果返回空信息,则随机睡眠 600, 1200 秒
+                        time.sleep(random.randint(600, 1200))
+                        continue
+                    elif "feeds" not in response.json()["data"]["visionProfilePhotoList"]:
+                        Common.logger().info(
+                            f"接口请求失败,请更换cookie,{response.status_code}")
+                        Feishu.bot('recommend', '快手', '快手cookie失效,请及时更换~')
+                        # 如果返回空信息,则随机睡眠 600, 1200 秒
+                        time.sleep(random.randint(600, 1200))
+                        continue
+                    elif len(response.json()["data"]["visionProfilePhotoList"]["feeds"]) == 0:
+                        Common.logger().info(
+                            f"接口请求失败,请更换cookie,{response.status_code}")
+                        Feishu.bot('recommend', '快手', '快手cookie失效,请及时更换~')
+                        # 如果返回空信息,则随机睡眠 600, 1200 秒
+                        time.sleep(random.randint(600, 1200))
+                        continue
+                    pcursor = response.json()['data']['visionProfilePhotoList']['pcursor']
+                    feeds = response.json()['data']['visionProfilePhotoList']['feeds']
+                    count = 0
+                    for i in range(len(feeds)):
+                        try:
+                            try:
+                                video_id = feeds[i].get("photo", {}).get("videoResource").get("h264", {}).get("videoId", "")
+                            except KeyError:
+                                video_id = feeds[i].get("photo", {}).get("videoResource").get("hevc", {}).get("videoId", "")
+                            video_url = feeds[i].get('photo', {}).get('photoUrl', "")
+                            count += 1
+                            id = cls.select_videoUrl_id(video_id)
+                            if id:
+                                if count > 5:
+                                    Common.logger().info(
+                                        f"重复视频不在抓取该用户,用户主页id:{account_id}")
+                                    break
+                                continue
+                            oss_object_key = Oss.video_sync_upload_oss(video_url, video_id, account_id, "kuaishou")
+                            status = oss_object_key.get("status")
+                            # 发送 oss
+                            oss_object_key = oss_object_key.get("oss_object_key")
+                            Common.logger().info(f"抖音视频链接oss发送成功,oss地址:{oss_object_key}")
+                            # 发送成功 存入数据库
+                            if status == 200:
+                                cls.insert_videoUrl(video_id, account_id, oss_object_key)
+                                Common.logger().info(f"视频地址插入数据库成功,视频id:{video_id},用户主页id:{account_id},视频储存地址:{oss_object_key}")
+                        except Exception as e:
+                            Common.logger().warning(f"抓取单条视频异常:{e}\n")
+                            continue
+        except Exception as e:
+            Common.logger().warning(f"抓取异常:{e}\n")
+            return

+ 2 - 2
video_stitching/video_stitching.py

@@ -318,10 +318,10 @@ class VideoStitching():
             return ""
 
     @classmethod
-    def video_stitching(cls, video_type):
+    def video_stitching(cls, video_type, count):
         cookie = Material.get_houtai_cookie()
         # 获取音频类型+字幕
-        audio_id, srt, title_list = Material.get_audio_type(video_type)
+        audio_id, srt, title_list = Material.get_audio_type(video_type, count)
         if video_type == "口播--美文类":
             # 获取已入库的口播视频
             audio_list = cls.get_audio_list()