1 year ago · 54ba027bf2
--- a/common/aliyun_oss_uploading.py
+++ b/common/aliyun_oss_uploading.py
@@ -6,10 +6,10 @@ from typing import Dict, Any,  Optional
 
				 import oss2
			
 
				 import requests
			
 
				 
			
 
				-OSS_BUCKET_PATH = "douyin"
			
 
				+# OSS_BUCKET_PATH = "douyin"
			
 
				 OSS_ACCESS_KEY_ID = "LTAIP6x1l3DXfSxm"
			
 
				 OSS_ACCESS_KEY_SECRET = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
			
 
				-OSS_BUCKET_ENDPOINT = "oss-cn-hangzhou-internal.aliyuncs.com"# 外网地址上到服务器需要更改为内网地址
			
 
				+OSS_BUCKET_ENDPOINT = "oss-cn-hangzhou-internal.aliyuncs.com"# 内网地址
			
 
				 OSS_BUCKET_NAME = "art-crawler"
			
 
				 class Oss():
			
 
				     # 抓取视频上传到art-crawler
			
@@ -17,6 +17,7 @@ class Oss():
 
				     def video_sync_upload_oss(cls, src_url: str,
			
 
				                         video_id: str,
			
 
				                         account_id: str,
			
 
				+                        OSS_BUCKET_PATH: str,
			
 
				                         referer: Optional[str] = None) -> Dict[str, Any]:
			
 
				         headers = {
			
 
				             'Accept': '*/*',
			
--- a/common/feishu.py
+++ b/common/feishu.py
@@ -337,7 +337,11 @@ class Feishu:
 
				                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/CPDNs06R2hux6SthZ1wcQmkAnYg?sheet=OpE35G"
			
 
				                 users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangxueke")) + "></at> <at id=" + str(
			
 
				                     cls.get_userid(log_type, crawler, "muxinyi")) + "></at>\n"
			
 
				-
			
 
				+            elif crawler == "快手":
			
 
				+                content = "快手cookie过期"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/CPDNs06R2hux6SthZ1wcQmkAnYg?sheet=OpE35G"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangxueke")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "muxinyi")) + "></at>\n"
			
 
				 
			
 
				             data = json.dumps({
			
 
				                 "msg_type": "interactive",
			
--- a/common/material.py
+++ b/common/material.py
@@ -12,7 +12,7 @@ from common.feishu import Feishu
 
				 
			
 
				 class Material():
			
 
				 
			
 
				-    # 获取视频链接 存入数据库
			
 
				+    # 获取抖音视频链接 存入数据库
			
 
				     @classmethod
			
 
				     def insert_user(cls):
			
 
				         # 获取抖音视频链接
			
@@ -32,6 +32,26 @@ class Material():
 
				                     machine="",
			
 
				                 )
			
 
				 
			
 
				+    # 获取快手视频链接 存入数据库
			
 
				+    @classmethod
			
 
				+    def insert_kuaishou_user(cls):
			
 
				+        # 获取快手视频链接
			
 
				+        douyin = Feishu.get_values_batch("prod", "succinct", "MLVd0q")
			
 
				+        # 提取账号昵称和账号主页链接
			
 
				+        channel = '快手'
			
 
				+        for row in douyin[2:]:
			
 
				+            platform = row[0]
			
 
				+            if platform == channel:
			
 
				+                account_name = row[2]
			
 
				+                account_link = row[3]
			
 
				+                user_id = account_link.split("profile/")[1]
			
 
				+                insert_sql = f"""INSERT INTO video_user_id (name, user_id, channel) values ('{account_name}', '{user_id}', '{channel}')"""
			
 
				+                MysqlHelper.update_values(
			
 
				+                    sql=insert_sql,
			
 
				+                    env="prod",
			
 
				+                    machine="",
			
 
				+                )
			
 
				+
			
 
				     # 随机获取标题
			
 
				     @classmethod
			
 
				     def get_title(cls):
			
@@ -63,6 +83,14 @@ class Material():
 
				             if item[0] == '抖音':
			
 
				                 return item[1]
			
 
				 
			
 
				+    # 获取快手 cookie
			
 
				+    @classmethod
			
 
				+    def get_kuaishou_cookie(cls):
			
 
				+        kuaishou_token = Feishu.get_values_batch("prod", "succinct", "OpE35G")
			
 
				+        for item in kuaishou_token:
			
 
				+            if item[0] == '快手':
			
 
				+                return item[1]
			
 
				+
			
 
				     @classmethod
			
 
				     def get_houtai_cookie(cls):
			
 
				         douyin_token = Feishu.get_values_batch("prod", "succinct", "OpE35G")
			
@@ -72,7 +100,7 @@ class Material():
 
				 
			
 
				     # 获取音频类型
			
 
				     @classmethod
			
 
				-    def get_audio_type(cls, video_type):
			
 
				+    def get_audio_type(cls, video_type, count):
			
 
				         list = []
			
 
				         title_list = []
			
 
				         if video_type == "口播--美文类":
			
@@ -90,21 +118,34 @@ class Material():
 
				             srt = list['text']
			
 
				             return audio_id, srt, title_list
			
 
				         else:
			
 
				-            audio_type = [{"audio": "音画美文--美文类", "type": "6VXm7q"}, {"audio": "音画美文--通用类", "type": "aSNFl8"}]
			
 
				-            audio_type = random.choice(audio_type)
			
 
				-            type = audio_type['type']
			
 
				-            audio_type = Feishu.get_values_batch("prod", "succinct", type)
			
 
				-            for row in audio_type[1:]:
			
 
				-                audio_id = row[0]
			
 
				-                text = row[1]
			
 
				-                title = row[2]
			
 
				-                number = {"audio_id": audio_id, "text": text}
			
 
				-                list.append(number)
			
 
				-                title_list.append(title)
			
 
				-            list = random.choice(list)
			
 
				-            audio_id = list['audio_id']
			
 
				-            srt = list['text']
			
 
				-            return audio_id, srt, title_list
			
 
				+            if count == 0 or count == 1:
			
 
				+                audio_type = Feishu.get_values_batch("prod", "succinct", "6VXm7q")
			
 
				+                for row in audio_type[1:]:
			
 
				+                    audio_id = row[0]
			
 
				+                    text = row[1]
			
 
				+                    title = row[2]
			
 
				+                    number = {"audio_id": audio_id, "text": text}
			
 
				+                    list.append(number)
			
 
				+                    title_list.append(title)
			
 
				+                audio_id = list[count]['audio_id']
			
 
				+                srt = list[count]['text']
			
 
				+                return audio_id, srt, title_list
			
 
				+            else:
			
 
				+                audio_type = [{"audio": "音画美文--美文类", "type": "6VXm7q"}, {"audio": "音画美文--通用类", "type": "aSNFl8"}]
			
 
				+                audio_type = random.choice(audio_type)
			
 
				+                type = audio_type['type']
			
 
				+                audio_type = Feishu.get_values_batch("prod", "succinct", type)
			
 
				+                for row in audio_type[1:]:
			
 
				+                    audio_id = row[0]
			
 
				+                    text = row[1]
			
 
				+                    title = row[2]
			
 
				+                    number = {"audio_id": audio_id, "text": text}
			
 
				+                    list.append(number)
			
 
				+                    title_list.append(title)
			
 
				+                list = random.choice(list)
			
 
				+                audio_id = list['audio_id']
			
 
				+                srt = list['text']
			
 
				+                return audio_id, srt, title_list
			
 
				 
			
 
				 
			
 
				 
			
--- a/main.py
+++ b/main.py
@@ -45,11 +45,11 @@ def job_video_stitching():
 
				     if int(yinmei_count) < 10:
			
 
				         Common.logger().info("开始执行")
			
 
				         video_type = "音画美文"
			
 
				-        VideoStitching.video_stitching(video_type)
			
 
				+        VideoStitching.video_stitching(video_type, yinmei_count)
			
 
				     elif int(koubo_count) < 10:
			
 
				         Common.logger().info("开始执行")
			
 
				         video_type = "口播--美文类"
			
 
				-        VideoStitching.video_stitching(video_type)
			
 
				+        VideoStitching.video_stitching(video_type, koubo_count)
			
 
				 
			
 
				 def job_douyin_data():
			
 
				     douyinAuthor.get_videoList()
			
--- a/video_capture/douyin/douyin_author/douyin_author.py
+++ b/video_capture/douyin/douyin_author/douyin_author.py
@@ -21,12 +21,13 @@ from requests.adapters import HTTPAdapter
 
				 from video_capture.douyin.douyin_author.douyin_author_help import DouYinHelper
			
 
				 
			
 
				 class douyinAuthor():
			
 
				+
			
 
				     """
			
 
				-    获取用户主页id
			
 
				+    获取抖音用户主页id
			
 
				     """
			
 
				     @classmethod
			
 
				     def get_videoUserId(cls):
			
 
				-        select_user_sql = f"""select user_id from video_user_id;"""
			
 
				+        select_user_sql = f"""select user_id from video_user_id where channel = "抖音";"""
			
 
				         user_list = MysqlHelper.get_values(select_user_sql, "prod")
			
 
				         return user_list
			
 
				 
			
@@ -146,7 +147,7 @@ class douyinAuthor():
 
				                                         break
			
 
				                                     continue
			
 
				                                 video_url = data[i].get('video').get('play_addr').get('url_list')[0]  # 视频链接
			
 
				-                                oss_object_key = Oss.video_sync_upload_oss(video_url, video_id, account_id)
			
 
				+                                oss_object_key = Oss.video_sync_upload_oss(video_url, video_id, account_id, "douyin")
			
 
				                                 status = oss_object_key.get("status")
			
 
				                                 # 发送 oss
			
 
				                                 oss_object_key = oss_object_key.get("oss_object_key")
			
--- a/video_capture/kuaishou/__init__.py
+++ b/video_capture/kuaishou/__init__.py
--- a/video_capture/kuaishou/kuaishou_author/__init__.py
+++ b/video_capture/kuaishou/kuaishou_author/__init__.py
--- a/video_capture/kuaishou/kuaishou_author/kuaishou_author.py
+++ b/video_capture/kuaishou/kuaishou_author/kuaishou_author.py
@@ -0,0 +1,159 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Time: 2024/01/18
			
 
				+import datetime
			
 
				+import os
			
 
				+import random
			
 
				+import sys
			
 
				+import time
			
 
				+from datetime import datetime
			
 
				+import requests
			
 
				+import json
			
 
				+import urllib3
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.aliyun_oss_uploading import Oss
			
 
				+from common.common import Common
			
 
				+from common.material import Material
			
 
				+from common.feishu import Feishu
			
 
				+from common.db import MysqlHelper
			
 
				+from requests.adapters import HTTPAdapter
			
 
				+
			
 
				+
			
 
				+class kuaishouAuthor():
			
 
				+
			
 
				+    """
			
 
				+    oss视频地址 存入数据库
			
 
				+    """
			
 
				+    @classmethod
			
 
				+    def insert_videoUrl(cls, video_id, account_id, oss_object_key):
			
 
				+        current_time = datetime.now()
			
 
				+        formatted_time = current_time.strftime("%Y-%m-%d %H:%M")
			
 
				+        insert_sql = f"""INSERT INTO video_url (video_id, account_id, oss_object_key, time) values ("{video_id}", "{account_id}", "{oss_object_key}", "{formatted_time}")"""
			
 
				+        MysqlHelper.update_values(
			
 
				+            sql=insert_sql,
			
 
				+            env="prod",
			
 
				+            machine="",
			
 
				+        )
			
 
				+
			
 
				+    """
			
 
				+    获取快手用户主页id
			
 
				+    """
			
 
				+    @classmethod
			
 
				+    def get_kuaishou_videoUserId(cls):
			
 
				+        select_user_sql = f"""select user_id from video_user_id where channel = "快手";"""
			
 
				+        user_list = MysqlHelper.get_values(select_user_sql, "prod")
			
 
				+        return user_list
			
 
				+
			
 
				+    """
			
 
				+    查询该video_id是否在数据库存在
			
 
				+    """
			
 
				+    @classmethod
			
 
				+    def select_videoUrl_id(cls, video_id):
			
 
				+        select_user_sql = f"""select video_id from video_url where video_id={video_id} ;"""
			
 
				+        user_list = MysqlHelper.get_values(select_user_sql, "prod")
			
 
				+        if user_list:
			
 
				+            return True
			
 
				+        else:
			
 
				+            return False
			
 
				+
			
 
				+    """快手读取数据 将数据存储到oss上"""
			
 
				+    @classmethod
			
 
				+    def get_kuaishou_videoList(cls):
			
 
				+        try:
			
 
				+            cookie = Material.get_kuaishou_cookie()
			
 
				+            # 读取飞书表格,更新数据库用户主页id
			
 
				+            Material.insert_kuaishou_user()
			
 
				+            # 获取 用户主页id
			
 
				+            user_list = cls.get_kuaishou_videoUserId()
			
 
				+            if len(user_list) == 0:
			
 
				+                return
			
 
				+            for i in user_list:
			
 
				+                account_id = i[0].replace('(', '').replace(')', '').replace(',', '')
			
 
				+                Common.logger().info(f"用户主页ID：{account_id}")
			
 
				+                pcursor = ""
			
 
				+                while True:
			
 
				+                    time.sleep(random.randint(5, 10))
			
 
				+                    url = "https://www.kuaishou.com/graphql"
			
 
				+                    payload = json.dumps({
			
 
				+                        "operationName": "visionProfilePhotoList",
			
 
				+                        "variables": {
			
 
				+                            "userId": account_id,
			
 
				+                            "pcursor": pcursor,
			
 
				+                            "page": "profile"
			
 
				+                        },
			
 
				+                        "query": "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
			
 
				+                    })
			
 
				+                    headers = {
			
 
				+                        'Accept': '*/*',
			
 
				+                        'Content-Type': 'application/json',
			
 
				+                        'Origin': 'https://www.kuaishou.com',
			
 
				+                        'Cookie': cookie,
			
 
				+                        'Content-Length': '1260',
			
 
				+                        'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
			
 
				+                        'Host': 'www.kuaishou.com',
			
 
				+                        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
			
 
				+                        'Referer': f'https://www.kuaishou.com/profile/{account_id}',
			
 
				+                        'Accept-Encoding': 'gzip, deflate, br',
			
 
				+                        'Connection': 'keep-alive'
			
 
				+                    }
			
 
				+                    urllib3.disable_warnings()
			
 
				+                    s = requests.session()
			
 
				+                    # max_retries=3 重试3次
			
 
				+                    s.mount('http://', HTTPAdapter(max_retries=3))
			
 
				+                    s.mount('https://', HTTPAdapter(max_retries=3))
			
 
				+                    response = s.post(url=url, headers=headers, data=payload, verify=False,
			
 
				+                                      timeout=10)
			
 
				+                    response.close()
			
 
				+                    if response.status_code != 200:
			
 
				+                        Common.logger().info(
			
 
				+                            f"接口请求失败，请更换cookie，{response.status_code}")
			
 
				+                        Feishu.bot('recommend', '快手', '快手cookie失效，请及时更换～')
			
 
				+                        # 如果返回空信息，则随机睡眠 600, 1200 秒
			
 
				+                        time.sleep(random.randint(600, 1200))
			
 
				+                        continue
			
 
				+                    elif "feeds" not in response.json()["data"]["visionProfilePhotoList"]:
			
 
				+                        Common.logger().info(
			
 
				+                            f"接口请求失败，请更换cookie，{response.status_code}")
			
 
				+                        Feishu.bot('recommend', '快手', '快手cookie失效，请及时更换～')
			
 
				+                        # 如果返回空信息，则随机睡眠 600, 1200 秒
			
 
				+                        time.sleep(random.randint(600, 1200))
			
 
				+                        continue
			
 
				+                    elif len(response.json()["data"]["visionProfilePhotoList"]["feeds"]) == 0:
			
 
				+                        Common.logger().info(
			
 
				+                            f"接口请求失败，请更换cookie，{response.status_code}")
			
 
				+                        Feishu.bot('recommend', '快手', '快手cookie失效，请及时更换～')
			
 
				+                        # 如果返回空信息，则随机睡眠 600, 1200 秒
			
 
				+                        time.sleep(random.randint(600, 1200))
			
 
				+                        continue
			
 
				+                    pcursor = response.json()['data']['visionProfilePhotoList']['pcursor']
			
 
				+                    feeds = response.json()['data']['visionProfilePhotoList']['feeds']
			
 
				+                    count = 0
			
 
				+                    for i in range(len(feeds)):
			
 
				+                        try:
			
 
				+                            try:
			
 
				+                                video_id = feeds[i].get("photo", {}).get("videoResource").get("h264", {}).get("videoId", "")
			
 
				+                            except KeyError:
			
 
				+                                video_id = feeds[i].get("photo", {}).get("videoResource").get("hevc", {}).get("videoId", "")
			
 
				+                            video_url = feeds[i].get('photo', {}).get('photoUrl', "")
			
 
				+                            count += 1
			
 
				+                            id = cls.select_videoUrl_id(video_id)
			
 
				+                            if id:
			
 
				+                                if count > 5:
			
 
				+                                    Common.logger().info(
			
 
				+                                        f"重复视频不在抓取该用户，用户主页id：{account_id}")
			
 
				+                                    break
			
 
				+                                continue
			
 
				+                            oss_object_key = Oss.video_sync_upload_oss(video_url, video_id, account_id, "kuaishou")
			
 
				+                            status = oss_object_key.get("status")
			
 
				+                            # 发送 oss
			
 
				+                            oss_object_key = oss_object_key.get("oss_object_key")
			
 
				+                            Common.logger().info(f"抖音视频链接oss发送成功，oss地址：{oss_object_key}")
			
 
				+                            # 发送成功 存入数据库
			
 
				+                            if status == 200:
			
 
				+                                cls.insert_videoUrl(video_id, account_id, oss_object_key)
			
 
				+                                Common.logger().info(f"视频地址插入数据库成功，视频id：{video_id},用户主页id：{account_id},视频储存地址：{oss_object_key}")
			
 
				+                        except Exception as e:
			
 
				+                            Common.logger().warning(f"抓取单条视频异常:{e}\n")
			
 
				+                            continue
			
 
				+        except Exception as e:
			
 
				+            Common.logger().warning(f"抓取异常:{e}\n")
			
 
				+            return
			
--- a/video_stitching/video_stitching.py
+++ b/video_stitching/video_stitching.py
@@ -318,10 +318,10 @@ class VideoStitching():
 
				             return ""
			
 
				 
			
 
				     @classmethod
			
 
				-    def video_stitching(cls, video_type):
			
 
				+    def video_stitching(cls, video_type, count):
			
 
				         cookie = Material.get_houtai_cookie()
			
 
				         # 获取音频类型+字幕
			
 
				-        audio_id, srt, title_list = Material.get_audio_type(video_type)
			
 
				+        audio_id, srt, title_list = Material.get_audio_type(video_type, count)
			
 
				         if video_type == "口播--美文类":
			
 
				             # 获取已入库的口播视频
			
 
				             audio_list = cls.get_audio_list()