zhangyong 3 달 전
부모
커밋
821ce7b8d2
3개의 변경된 파일243개의 추가작업 그리고 2개의 파일을 삭제
  1. 117 0
      data_channel/dy.py
  2. 122 0
      data_channel/ks.py
  3. 4 2
      video_rewriting/video_processor.py

+ 117 - 0
data_channel/dy.py

@@ -0,0 +1,117 @@
+import random
+import time
+
+import requests
+import json
+
+from common import Feishu, AliyunLogger, Material
+from common.sql_help import sqlCollect
+
+
+class DYX:
+    @classmethod
+    def get_dy_list(cls, task_mark, url_id, number, mark, channel_id, name):
+        url = "http://8.217.192.46:8889/crawler/dou_yin/blogger"
+        list = []
+        next_cursor = ''
+        if not url_id or not url_id.strip():
+            return list
+        for i in range(5):
+            try:
+                payload = json.dumps({
+                    "account_id": url_id,
+                    "source": "app",
+                    "sort_type": "最新",
+                    "cursor": next_cursor
+                })
+                headers = {
+                    'Content-Type': 'application/json'
+                }
+
+                response = requests.request("POST", url, headers=headers, data=payload, timeout=30)
+                time.sleep(random.randint(1, 5))
+                response = response.json()
+                code = response['code']
+                if code != 0:
+                    return list
+                data_list = response['data']
+                next_cursor = str(data_list['next_cursor'])
+                data = data_list['data']
+                for i in range(len(data)):
+                    video_id = data[i].get('aweme_id')  # 文章id
+                    day_count = Material.get_count_restrict(channel_id)
+                    if day_count:
+                        status = sqlCollect.is_used_days(video_id, mark, channel_id, day_count)
+                    else:
+                        status = sqlCollect.is_used(video_id, mark, channel_id)
+                    video_url = data[i].get('video', {}).get('play_addr', {}).get('url_list', [None])[0]
+                    # 视频链接
+                    digg_count = int(data[i].get('statistics').get('digg_count'))  # 点赞
+                    share_count = int(data[i].get('statistics').get('share_count'))  # 转发
+                    duration = data[i].get('duration')
+                    duration = duration / 1000
+                    old_title = data[i].get('desc', "").strip().replace("\n", "") \
+                        .replace("/", "").replace("\\", "").replace("\r", "") \
+                        .replace(":", "").replace("*", "").replace("?", "") \
+                        .replace("?", "").replace('"', "").replace("<", "") \
+                        .replace(">", "").replace("|", "").replace(" ", "") \
+                        .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
+                        .replace("'", "").replace("#", "").replace("Merge", "")
+                    log_data = f"user:{url_id},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,share_count:{share_count},,digg_count:{digg_count},,duration:{duration}"
+                    AliyunLogger.logging(channel_id, name, url_id, video_id, "扫描到一条视频", "2001", log_data)
+
+                    if status:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "该视频已改造过", "2002", log_data)
+                        continue
+                    video_percent = '%.2f' % (int(share_count) / int(digg_count))
+                    special = float(0.15)
+                    if int(share_count) < 200:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享小于500", "2003", log_data)
+                        continue
+                    if float(video_percent) < special:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享/点赞小于0.25", "2003", log_data)
+                        continue
+                    if int(duration) < 30 or int(duration) > 720:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
+                        continue
+                    cover_url = data[i].get('video').get('cover').get('url_list')[0]  # 视频封面
+                    all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": video_percent,
+                                "old_title": old_title}
+                    list.append(all_data)
+                    AliyunLogger.logging(channel_id, name, url_id, video_id, "符合规则等待改造", "2004", log_data)
+                    if len(list) == int(number):
+                        return list
+                if next_cursor == False:
+                    return list
+            except Exception as exc:
+                return list
+            return list
+        return list
+
+    @classmethod
+    def get_video(cls, video_id):
+        url = "http://8.217.192.46:8889/crawler/dou_yin/detail"
+        for i in range(3):
+            payload = json.dumps({
+                "content_id": str(video_id)
+            })
+            headers = {
+                'Content-Type': 'application/json'
+            }
+
+            response = requests.request("POST", url, headers=headers, data=payload, timeout=30)
+            response = response.json()
+            code = response["code"]
+            if code == 10000:
+                time.sleep(60)
+            data = response["data"]["data"]
+            video_url = data["video_url_list"][0]["video_url"]
+            image_url = data["image_url_list"][0]["image_url"]
+            return video_url, image_url
+        return None, None
+
+
+
+if __name__ == '__main__':
+    # DYX.get_dy_list(1,2,1,3)
+    DYX.get_dy_list("1","MS4wLjABAAAA2QEvnEb7cQDAg6vZXq3j8_LlbO_DiturnV7VeybFKY4",1,"1",'', "")

+ 122 - 0
data_channel/ks.py

@@ -0,0 +1,122 @@
+import random
+import time
+import requests
+import json
+from common import Feishu, AliyunLogger, Material
+from common.sql_help import sqlCollect
+
+class KSX:
+
+    @classmethod
+    def get_ks_list(cls, task_mark, url_id, number, mark, channel_id, name):
+        #  快手app
+        list = []
+        url = "http://8.217.192.46:8889/crawler/kuai_shou/blogger"
+        next_cursor = ""
+        try:
+            if not url_id or not url_id.strip():
+                return list
+            for i in range(5):
+                payload = json.dumps({
+                    "account_id": url_id,
+                    "sort_type": "最新",
+                    "cursor": next_cursor
+                })
+                headers = {
+                    'Content-Type': 'application/json'
+                }
+                time.sleep(random.randint(1, 5))
+                response = requests.request("POST", url, headers=headers, data=payload, timeout=30)
+                response = response.json()
+
+                data_all_list = response["data"]
+                if data_all_list == None or len(data_all_list) == 0:
+                    try:
+                        if int(response["cdoe"]) == 27006:
+                            Feishu.finish_bot("kuai_shou/blogger接口"+response["msg"],
+                                              "https://open.feishu.cn/open-apis/bot/v2/hook/575ca6a1-84b4-4a2f-983b-1d178e7b16eb", "【快手 Token 使用提示 】")
+                    except Exception as exc:
+                        return list
+
+                has_more = data_all_list["has_more"]
+                next_cursor = str(data_all_list["next_cursor"])
+
+                data_list = data_all_list["data"]
+                for data in data_list:
+                    photo_id = data["photo_id"]
+                    day_count = Material.get_count_restrict(channel_id)
+                    if day_count:
+                        status = sqlCollect.is_used_days(photo_id, mark, channel_id, day_count)
+                    else:
+
+                        status = sqlCollect.is_used(photo_id, mark, channel_id)
+
+                    view_count = data["view_count"]
+                    share_count = data["share_count"]
+                    old_title = data["caption"]  # 标题
+
+                    video_percent = '%.4f' % (int(share_count) / (view_count))
+                    duration = data["duration"]
+                    duration = int(duration)/1000
+                    special = float(0.0001)
+                    log_data = f"user:{url_id},,video_id:{photo_id},,video_url:'',original_title:{old_title},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
+
+                    AliyunLogger.logging(channel_id, name, url_id, photo_id, "扫描到一条视频", "2001", log_data)
+                    if status:
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "该视频已改造过", "2002", log_data)
+                        continue
+                    if float(video_percent) < special:
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "不符合规则:分享/浏览小于0.0005", "2003", log_data)
+                        continue
+                    if int(share_count) < 500:
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "不符合规则:分享小于100", "2003", log_data)
+                        continue
+                    if int(duration) < 30 or (duration) > 720:
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
+                        continue
+                    video_url, image_url = cls.get_video(photo_id)
+                    if video_url:
+                        log_data = f"user:{url_id},,video_id:{photo_id},,video_url:{video_url},,original_title:{old_title},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
+                        all_data = {"video_id": photo_id, "cover": image_url, "video_url": video_url,
+                                    "rule": video_percent,
+                                    "old_title": old_title}
+                        list.append(all_data)
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "符合规则等待改造", "2004", log_data)
+
+                        if len(list) == int(number):
+                            return list
+                    else:
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "无法获取到视频链接", "2003", log_data)
+                        continue
+                if has_more == False:
+                    return list
+                return list
+        except Exception as exc:
+            return list
+
+    @classmethod
+    def get_video(cls, video_id):
+        url = "http://8.217.192.46:8889/crawler/kuai_shou/detail"
+
+        payload = json.dumps({
+            "content_id": str(video_id)
+        })
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        time.sleep(random.uniform(1, 10))
+
+        response = requests.request("POST", url, headers=headers, data=payload, timeout=30)
+        response = response.json()
+        data = response["data"]["data"]
+        video_url = data["video_url_list"][0]["video_url"]
+        image_url = data["image_url_list"][0]["image_url"]
+        return video_url, image_url
+
+
+
+if __name__ == '__main__':
+    # Feishu.finish_bot('测试',
+    #                   "https://open.feishu.cn/open-apis/bot/v2/hook/575ca6a1-84b4-4a2f-983b-1d178e7b16eb", "【 Token 使用提示 】")
+    # DYLS.get_video("7314923922602954022")
+    KSX.get_ks_list("1","3xzicxg2nandemc",1,"1",'','')

+ 4 - 2
video_rewriting/video_processor.py

@@ -16,8 +16,10 @@ from common.tts_help import TTS
 from common import Material, Feishu, Oss, AliyunLogger
 from common.ffmpeg import FFmpeg
 from data_channel.douyin import DY
+from data_channel.dy import DYX
 from data_channel.dy_keyword import DyKeyword
 from data_channel.dy_ls import DYLS
+from data_channel.ks import KSX
 from data_channel.ks_feed import KSFeed
 from data_channel.ks_keyword import KsKeyword
 from data_channel.ks_ls import KSLS
@@ -515,13 +517,13 @@ class VideoProcessor:
         根据渠道ID获取数据列表
         """
         if channel_id == "抖音":
-            return DY.get_dy_url(task_mark, url, number, mark, feishu_id, cookie_sheet, channel_id, name)
+            return DYX.get_dy_list(task_mark, url, number, mark, channel_id, name)
         elif channel_id == "票圈":
             return PQ.get_pq_url(task_mark, url, number, mark, channel_id, name)
         elif channel_id == "视频号":
             return SPH.get_sph_url(task_mark, url, number, mark, channel_id, name)
         elif channel_id == "快手":
-            return KS.get_ks_url(task_mark, url, number, mark, feishu_id, cookie_sheet, channel_id, name)
+            return KSX.get_ks_list(task_mark, url, number, mark, channel_id, name)
         # elif channel_id == "快手创作者版":
         #     return KsFeedVideo.get_data(channel_id, name)
         elif channel_id == "单点视频":