zhangyong vor 4 Monaten
Ursprung
Commit
6fd9efcb91

+ 5 - 2
carry_video/carry_video.py

@@ -49,6 +49,8 @@ class CarryViode:
                     location = response.headers.get('Location', None)
                     video_id = re.search(r'/video/(\d+)/?', location.split('?')[0] if location else url).group(1)
                 url = "http://8.217.192.46:8889/crawler/dou_yin/detail"
+                if not video_id or video_id == "":
+                    return None, None, None
                 payload = json.dumps({
                     "content_id": str(video_id)
                 })
@@ -89,7 +91,8 @@ class CarryViode:
             video_id = re.search(r'/(f|photo|short-video|long-video)/(.*)/?',
                                  location.split('?')[0] if location else url).group(2)
             url = "http://8.217.192.46:8889/crawler/kuai_shou/detail"
-
+            if not video_id or video_id == "":
+                return None, None, None
             payload = json.dumps({
                 "content_id": str(video_id)
             })
@@ -109,7 +112,7 @@ class CarryViode:
                 original_title = data["title"]
                 return video_url, original_title, video_id
             elif code == 27006:
-                if "作品不存在" in response['msg']:
+                if "作品不存在" in response['msg'] or "内容不存在" in response['msg'] or "私密作品" in response['msg']:
                     return "作品不存在", None, None
             time.sleep(3)
         except Exception as e:

+ 0 - 23
data_channel/data_help.py

@@ -1,23 +0,0 @@
-import cv2
-
-
-class dataHelp():
-    """
-    获取视频时长
-    """
-    @classmethod
-    def video_duration(cls, filename):
-        cap = cv2.VideoCapture(filename)
-        if cap.isOpened():
-            rate = cap.get(5)
-            frame_num = cap.get(7)
-            duration = frame_num / rate
-            return duration
-        return 0
-
-
-
-if __name__ == '__main__':
-
-    a = dataHelp.video_duration("/Users/tzld/Desktop/video_rewriting/path/aiyuepw_video.mp4")
-    print(a)

+ 0 - 128
data_channel/douyin.py

@@ -1,128 +0,0 @@
-import json
-import random
-import time
-
-import requests
-import urllib3
-from requests.adapters import HTTPAdapter
-from common import Material, Common, Feishu, AliyunLogger
-from common.sql_help import sqlCollect
-from data_channel.data_help import dataHelp
-from data_channel.douyin_help import DouYinHelper
-
-
-class DY:
-
-    @classmethod
-    def get_dy_url(cls, task_mark, url_id, number, mark, feishu_id, cookie_sheet, channel_id, name):
-        list = []
-        next_cursor = 0
-        try:
-            for i in range(6):
-                cookie = Material.get_cookie_data(feishu_id, cookie_sheet, channel_id)
-                time.sleep(random.randint(1, 5))
-                url = 'https://www.douyin.com/aweme/v1/web/aweme/post/'
-                headers = {
-                    'Accept': 'application/json, text/plain, */*',
-                    'Accept-Language': 'zh-CN,zh;q=0.9',
-                    'Cache-Control': 'no-cache',
-                    'Cookie': cookie,
-                    'Pragma': 'no-cache',
-                    'Referer': f'https://www.douyin.com/user/{url_id}',
-                    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
-                                  'Chrome/118.0.0.0 Safari/537.36',
-                }
-                query = DouYinHelper.get_full_query(ua=headers['User-Agent'], extra_data={
-                    'sec_user_id': url_id,
-                    'max_cursor': next_cursor,
-                    'locate_query': 'false',
-                    'show_live_replay_strategy': '1',
-                    'need_time_list': '1',
-                    'time_list_query': '0',
-                    'whale_cut_token': '',
-                    'cut_version': '1',
-                    'count': '18',
-                    'publish_video_strategy_type': '2',
-                })
-                urllib3.disable_warnings()
-                s = requests.session()
-                s.mount('http://', HTTPAdapter(max_retries=3))
-                s.mount('https://', HTTPAdapter(max_retries=3))
-                response = requests.request(method='GET', url=url, headers=headers, params=query)
-                body = response.content.decode()
-                obj = json.loads(body)
-                has_more = True if obj.get('has_more', 0) == 1 else False
-                next_cursor = str(obj.get('max_cursor')) if has_more else None
-                data = obj.get('aweme_list', [])
-                if data == [] and len(data) == 0:
-                    if name == '抖音品类账号' or name == '抖音品类账号-1':
-                        Feishu.bot("wangxueke", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '王雪珂')
-                        Feishu.bot("xinxin", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '信欣')
-                        Feishu.bot("liuzhaoheng", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '刘兆恒')
-
-                    else:
-                        Feishu.bot(mark, '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', name)
-                    return list
-                response.close()
-                for i in range(len(data)):
-                    entity_type = data[i].get('media_type')
-                    if entity_type == 4:
-                        # is_top = data[i].get('is_top')  # 是否置顶
-                        video_id = data[i].get('aweme_id')  # 文章id
-                        status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
-
-                        video_uri = data[i].get('video', {}).get('play_addr', {}).get('uri')
-                        ratio = f'{data[i].get("video", {}).get("height")}p'
-                        video_url = f'https://www.iesdouyin.com/aweme/v1/play/?video_id={video_uri}&ratio={ratio}&line=0'  # 视频链接
-                        digg_count = int(data[i].get('statistics').get('digg_count'))  # 点赞
-                        # comment_count = int(data[i].get('statistics').get('comment_count'))  # 评论
-                        share_count = int(data[i].get('statistics').get('share_count'))  # 转发
-                        old_title = data[i].get('desc', "").strip().replace("\n", "") \
-                            .replace("/", "").replace("\\", "").replace("\r", "") \
-                            .replace(":", "").replace("*", "").replace("?", "") \
-                            .replace("?", "").replace('"', "").replace("<", "") \
-                            .replace(">", "").replace("|", "").replace(" ", "") \
-                            .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
-                            .replace("'", "").replace("#", "").replace("Merge", "")
-                        duration = dataHelp.video_duration(video_url)
-                        Common.logger("dy").info(
-                            f"扫描:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count}")
-                        log_data = f"user:{url_id},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,share_count:{share_count},,digg_count:{digg_count},,duration:{duration}"
-                        AliyunLogger.logging(channel_id, name, url_id, video_id, "扫描到一条视频", "2001", log_data)
-                        if status:
-                            AliyunLogger.logging(channel_id, name, url_id, video_id, "该视频已改造过", "2002", log_data)
-                            continue
-                        if share_count < 200:
-                            AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享小于200", "2003", log_data)
-                            Common.logger("dy").info(
-                                f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count}")
-                            continue
-                        video_percent = '%.2f' % (share_count / digg_count)
-                        special = float(0.15)
-                        if float(video_percent) < special:
-                            AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享/点赞小于0.15", "2003", log_data)
-                            Common.logger("dy").info(
-                                f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ")
-                            continue
-
-                        if int(duration) < 30 or int(duration) > 720:
-                            AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
-                            Common.logger("dy").info(
-                                f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{duration} ")
-                            continue
-                        cover_url = data[i].get('video').get('cover').get('url_list')[0]  # 视频封面
-                        all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url,
-                                    "rule": video_percent, "old_title": old_title}
-                        list.append(all_data)
-                        AliyunLogger.logging(channel_id, name, url_id, video_id, "符合规则等待改造", "2004", log_data)
-                        if len(list) == int(number):
-                            Common.logger(mark).info(f"获取抖音视频总数:{len(list)}\n")
-                            return list
-
-            return list
-        except Exception as exc:
-            # Feishu.bot("liuzhaoheng", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '刘兆恒')
-            # Feishu.bot("wangxueke", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '王雪珂')
-            # Feishu.bot("xinxin", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '信欣')
-            Common.logger("dy").info(f"抖音历史数据获取失败:{exc}\n")
-            return list

+ 0 - 138
data_channel/douyin_help.py

@@ -1,138 +0,0 @@
-import json
-import time
-from base64 import b64encode
-from functools import reduce
-from hashlib import md5
-from random import choice, randint
-from typing import Any, Dict, List, Optional
-from urllib.parse import urlencode
-
-
-class DouYinHelper(object):
-    ttwid_list = [
-        '1|G3wy_-RdLJnfG5P9zAcP54OM8_nTLZVrJxNi1lPzdmg|1693558867|5e43c47a424e939aaf7193b096e3c6f2274982ee64e9608c99c54d2a43982aca'
-    ]
-
-    @classmethod
-    def _0x30492c(cls, x: bytes, y: bytes, f: Optional[List[int]] = None) -> bytes:
-        """RC4加密, 可以用Crypto.Cipher.ARC4替代"""
-        c = 0
-        d = [i for i in range(256)]
-        for b in range(256):
-            c = (c + d[b] + x[b % len(x)]) % 256
-            e = d[b]
-            d[b] = d[c]
-            d[c] = e
-        t, c = 0, 0
-
-        if not f:
-            f = []
-        for i in range(len(y)):
-            t = (t + 1) % 256
-            c = (c + d[t]) % 256
-            e = d[t]
-            d[t] = d[c]
-            d[c] = e
-            f.append(y[i] ^ d[(d[t] + d[c]) % 256])
-        return bytes(f)
-
-    @classmethod
-    def _0x485470(cls, a: str) -> List[int]:
-        _0x583e81 = [0] * 103
-        for i in range(10):
-            _0x583e81[i + 48] = i
-        for j in range(10, 16):
-            _0x583e81[j + 87] = j
-
-        b = len(a) >> 1
-        e = b << 1
-        d = [0] * b
-        c = 0
-        for f in range(0, e, 2):
-            d[c] = _0x583e81[ord(a[f])] << 4 | _0x583e81[ord(a[f + 1])]
-            c += 1
-        return d
-
-    @classmethod
-    def calc_x_bogus(cls, ua: str, query: str, data: Optional[Dict[str, Any]] = None) -> str:
-        """计算X_Bogus参数"""
-        query = query.encode()
-        for _ in range(2):
-            query = md5(query).hexdigest()
-            query = bytes([int(query[i:i + 2], 16) for i in range(0, len(query), 2)])
-
-        data = json.dumps(data, separators=(',', ':'), ensure_ascii=False).encode() if data else b''
-        for _ in range(2):
-            data = md5(data).hexdigest()
-            data = bytes([int(data[i:i + 2], 16) for i in range(0, len(data), 2)])
-
-        a = b'\x00\x01\x0e'
-        ua = b64encode(cls._0x30492c(a, ua.encode())).decode()
-        ua = md5(ua.encode()).hexdigest()
-        ua = cls._0x485470(ua)
-
-        t = int(time.time())
-        fp = 2421646185  # 真实的canvas指纹
-        arr1 = [
-            64,
-            1 / 256,
-            1 % 256,
-            14,
-            query[14],
-            query[15],
-            data[14],
-            data[15],
-            ua[14],
-            ua[15],
-            t >> 24 & 255,
-            t >> 16 & 255,
-            t >> 8 & 255,
-            t >> 0 & 255,
-            fp >> 24 & 255,
-            fp >> 16 & 255,
-            fp >> 8 & 255,
-            fp >> 0 & 255,
-        ]
-        reduce_num = reduce(lambda x, y: int(x) ^ int(y), arr1)
-        arr1.append(reduce_num)
-        arr2 = [int(arr1[i]) for i in range(len(arr1))]
-
-        garble = cls._0x30492c(b'\xff', bytes(arr2), [2, 255])
-        m = 'Dkdpgh4ZKsQB80/Mfvw36XI1R25-WUAlEi7NLboqYTOPuzmFjJnryx9HVGcaStCe='
-        xb = ''
-        for i in range(0, len(garble), 3):
-            a, b, c = garble[i], garble[i + 1], garble[i + 2]
-            base_num = c | b << 8 | a << 16
-            c1 = m[(base_num & 16515072) >> 18]
-            c2 = m[(base_num & 258048) >> 12]
-            c3 = m[(base_num & 4032) >> 6]
-            c4 = m[(base_num & 63)]
-            xb += ''.join([c1, c2, c3, c4])
-        return xb
-
-    @classmethod
-    def get_full_query(cls, ua: str, extra_data: Dict[str, Any]) -> Dict[str, Any]:
-        ms_token = b64encode(bytes([randint(0, 255) for _ in range(94)])).decode()
-        ms_token = ms_token.replace('+', '-').replace('/', '_').rstrip('=')
-
-        data = {
-            'device_platform': 'webapp',
-            'aid': '6383',
-            'channel': 'channel_pc_web',
-            'pc_client_type': '1',
-            'version_code': '190500',
-            'version_name': '19.5.0',
-            'cookie_enabled': 'true',
-            'platform': 'PC',
-            'msToken': ms_token,
-        }
-        data.update(extra_data)
-        query = urlencode(data, safe='=')
-        x_bogus = cls.calc_x_bogus(ua=ua, query=query, data=None)
-        data.update({'X-Bogus': x_bogus})
-        return data
-
-    @classmethod
-    def get_cookie(cls):
-        ttwid = choice(cls.ttwid_list)
-        return f'ttwid={ttwid}'

+ 0 - 109
data_channel/dy_keyword.py

@@ -1,109 +0,0 @@
-import requests
-import json
-
-from common import Common, AliyunLogger
-from common.sql_help import sqlCollect
-
-
-class DyKeyword:
-    @classmethod
-    def get_key_word(cls, keyword, task_mark, mark, channel_id, name, task):
-        combo = task['combo']
-        content_type = combo[0]
-        publish_time = combo[1]
-        duration = combo[2]
-        share_count_rule = 0
-        special = 0
-        short_duration_rule = 0
-        url = "http://47.236.68.175:8889/crawler/dou_yin/keyword"
-        list = []
-        payload = json.dumps({
-            "keyword": keyword,
-            "content_type": "视频",
-            "sort_type": content_type,
-            "publish_time": publish_time,
-            "duration": duration,
-            "cursor": ""
-        })
-        headers = {
-            'Content-Type': 'application/json'
-        }
-        if " 不限" == publish_time:
-            share_count_rule = 200
-            special = 0.15
-            short_duration_rule = 30
-        elif "一天内" == publish_time:
-            share_count_rule = 0
-            special = 0.10
-            short_duration_rule = 25
-        elif "一周内" == publish_time:
-            share_count_rule = 100
-            special = 0.15
-            short_duration_rule = 25
-        elif "半年内" == publish_time:
-            share_count_rule = 200
-            special = 0.15
-            short_duration_rule = 25
-        try:
-            response = requests.request("POST", url, headers=headers, data=payload)
-            response = response.json()
-            code = response['code']
-            if code != 0:
-                Common.logger("dy-key-word").info(f"抖音搜索词数据获取失败,接口为/dou_yin/keyword\n")
-                return list
-            data = response['data']['data']
-            for i in range(len(data)):
-
-                video_id = data[i].get('aweme_id')  # 文章id
-                status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
-                video_uri = data[i].get('video', {}).get('play_addr', {}).get('uri')
-                ratio = f'{data[i].get("video", {}).get("height")}p'
-                video_url = f'https://www.iesdouyin.com/aweme/v1/play/?video_id={video_uri}&ratio={ratio}&line=0'  # 视频链接
-                old_title = data[i].get('desc', "").strip().replace("\n", "") \
-                    .replace("/", "").replace("\\", "").replace("\r", "") \
-                    .replace(":", "").replace("*", "").replace("?", "") \
-                    .replace("?", "").replace('"', "").replace("<", "") \
-                    .replace(">", "").replace("|", "").replace(" ", "") \
-                    .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
-                    .replace("'", "").replace("#", "").replace("Merge", "")
-                digg_count = int(data[i].get('statistics').get('digg_count'))  # 点赞
-                share_count = int(data[i].get('statistics').get('share_count'))  # 转发
-                duration = data[i].get('duration')
-                duration = duration / 1000
-                log_data = f"user:{keyword},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,share_count:{share_count},,digg_count:{digg_count},,duration:{duration}"
-                AliyunLogger.logging(channel_id, name, keyword, video_id, "扫描到一条视频", "2001", log_data)
-                Common.logger("dy-key-word").info(
-                    f"扫描:{task_mark},搜索词:{keyword},视频id{video_id} ,分享:{share_count},点赞{digg_count}")
-                if status:
-                    AliyunLogger.logging(channel_id, name, keyword, video_id, "该视频已改造过", "2002", log_data)
-                    continue
-                video_percent = '%.2f' % (int(share_count) / int(digg_count))
-                if int(share_count) < share_count_rule:
-                    AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:分享小于{share_count_rule}", "2003", log_data)
-                    Common.logger("dy-key-word").info(
-                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
-                    continue
-                if float(video_percent) < special:
-                    AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:分享/点赞小于{special}", "2003", log_data)
-                    Common.logger("dy-key-word").info(
-                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
-                    continue
-                if int(duration) < short_duration_rule or int(duration) > 720:
-                    AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:时长不符合规则大于720秒/小于{short_duration_rule}秒", "2003", log_data)
-                    Common.logger("dy-key-word").info(
-                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
-                    continue
-                cover_url = data[i].get('video').get('cover').get('url_list')[0]  # 视频封面
-                all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": video_percent,
-                            "old_title": old_title}
-                list.append(all_data)
-                AliyunLogger.logging(channel_id, name, keyword, video_id, "符合规则等待改造", "2004", log_data)
-            return list
-        except Exception as exc:
-            Common.logger("dy-key-word").info(f"抖音搜索词{keyword}获取失败{exc}\n")
-            return list
-
-
-if __name__ == '__main__':
-
-    DyKeyword.get_key_word('keyword', 'sort_type', 'publish_time', 'duration', 'task_mark', 'mark')

+ 0 - 192
data_channel/dy_ls.py

@@ -1,192 +0,0 @@
-import random
-import time
-
-import requests
-import json
-
-from common import Common, Feishu, AliyunLogger
-from common.sql_help import sqlCollect
-
-
-class DYLS:
-    @classmethod
-    def get_dy_zr_list(cls, task_mark, url_id, number, mark, channel_id, name):
-        url = "http://47.236.68.175:8889/crawler/dou_yin/blogger"
-        list = []
-        next_cursor = ''
-        for i in range(20):
-            try:
-                payload = json.dumps({
-                    "account_id": url_id,
-                    "source": "app",
-                    "sort": "最热",
-                    "cursor": next_cursor
-                })
-                headers = {
-                    'Content-Type': 'application/json'
-                }
-
-                response = requests.request("POST", url, headers=headers, data=payload)
-                time.sleep(random.randint(1, 5))
-                response = response.json()
-                code = response['code']
-                if code != 0:
-                    Common.logger("dy-ls").info(f"抖音历史数据获取失败,接口为/dou_yin/blogge\n")
-                    return list
-                data_list = response['data']
-                next_cursor = str(data_list['next_cursor'])
-                data = data_list['data']
-                for i in range(len(data)):
-                    video_id = data[i].get('aweme_id')  # 文章id
-                    # status = sqlCollect.is_used(task_mark, video_id, mark, "抖音")
-                    # if status:
-                    status = sqlCollect.is_used(task_mark, video_id, mark, "抖音历史")
-
-                    video_uri = data[i].get('video', {}).get('play_addr', {}).get('uri')
-                    ratio = f'{data[i].get("video", {}).get("height")}p'
-                    video_url = f'https://www.iesdouyin.com/aweme/v1/play/?video_id={video_uri}&ratio={ratio}&line=0'  # 视频链接
-                    digg_count = int(data[i].get('statistics').get('digg_count'))  # 点赞
-                    share_count = int(data[i].get('statistics').get('share_count'))  # 转发
-                    duration = data[i].get('duration')
-                    duration = duration / 1000
-                    old_title = data[i].get('desc', "").strip().replace("\n", "") \
-                        .replace("/", "").replace("\\", "").replace("\r", "") \
-                        .replace(":", "").replace("*", "").replace("?", "") \
-                        .replace("?", "").replace('"', "").replace("<", "") \
-                        .replace(">", "").replace("|", "").replace(" ", "") \
-                        .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
-                        .replace("'", "").replace("#", "").replace("Merge", "")
-                    log_data = f"user:{url_id},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,share_count:{share_count},,digg_count:{digg_count},,duration:{duration}"
-                    AliyunLogger.logging(channel_id, name, url_id, video_id, "扫描到一条视频", "2001", log_data)
-                    Common.logger("dy-ls").info(
-                        f"扫描:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count}")
-                    if status:
-                        AliyunLogger.logging(channel_id, name, url_id, video_id, "该视频已改造过", "2002", log_data)
-                        continue
-                    video_percent = '%.2f' % (int(share_count) / int(digg_count))
-                    special = float(0.25)
-                    if int(share_count) < 500:
-                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享小于500", "2003", log_data)
-                        Common.logger("dy-ls").info(
-                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
-                        continue
-                    if float(video_percent) < special:
-                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享/点赞小于0.25", "2003", log_data)
-                        Common.logger("dy-ls").info(
-                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
-                        continue
-                    if int(duration) < 30 or int(duration) > 720:
-                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
-                        Common.logger("dy-ls").info(
-                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
-                        continue
-                    cover_url = data[i].get('video').get('cover').get('url_list')[0]  # 视频封面
-                    all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": video_percent,
-                                "old_title": old_title}
-                    list.append(all_data)
-                    AliyunLogger.logging(channel_id, name, url_id, video_id, "符合规则等待改造", "2004", log_data)
-                    if len(list) == int(number):
-                        Common.logger("dy-ls").info(f"获取抖音历史视频总数:{len(list)}\n")
-                        return list
-
-                if next_cursor == False:
-                    return list
-            except Exception as exc:
-                Common.logger("dy-ls").info(f"抖音历史数据获取失败:{exc}\n")
-                return list
-            return list
-        return list
-
-
-    @classmethod
-    def get_dyls_list(cls, task_mark, url_id, number, mark):
-        next_cursor = ""
-        for i in range(10):
-            list = []
-            try:
-                #  抖查查
-                url = "http://47.236.68.175:8889/crawler/dou_yin/blogger"
-                payload = json.dumps({
-                    "account_id": url_id,
-                    "source": "抖查查",
-                    "cursor": next_cursor
-                })
-                headers = {
-                    'Content-Type': 'application/json'
-                }
-                time.sleep(random.randint(1, 5))
-                response = requests.request("POST", url, headers=headers, data=payload)
-                response = response.json()
-
-                data_all_list = response["data"]
-                has_more = data_all_list["has_more"]
-                next_cursor = str(data_all_list["next_cursor"])
-                data_list = data_all_list["data"]
-                for data in data_list:
-                    # comment_count = data["comment_count"]
-                    # download_count = data["download_count"]
-                    share_count = data["share_count"]
-                    good_count = data["good_count"]
-                    # collect_count = data["collect_count"]
-                    duration = data["duration"]
-                    video_id = data["video_id"]
-                    old_title = data["video_desc"]
-                    status = sqlCollect.is_used(video_id, mark, "抖音")
-                    if status:
-                        status = sqlCollect.is_used(video_id, mark, "抖音历史")
-                        if status == False:
-                            continue
-
-                        video_percent = '%.2f' % (int(share_count) / int(good_count))
-                        special = float(0.25)
-                        duration = duration / 1000
-                        if int(share_count) < 500 or float(video_percent) < special or int(duration) < 30 or int(duration) > 720:
-                            Common.logger("dy-ls").info(
-                                f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{good_count} ,时长:{int(duration)} ")
-                            continue
-                        video_url, image_url = cls.get_video(video_id)
-                        if video_url:
-                            all_data = {"video_id": video_id, "cover": image_url, "video_url": video_url, "rule": video_percent,
-                                        "old_title": old_title}
-                            list.append(all_data)
-                            if len(list) == int(number):
-                                Common.logger("dy-ls").info(f"获取抖音历史视频总数:{len(list)}\n")
-                                return list
-                        else:
-                            Common.logger("dy-ls").info(f"抖音历史获取url失败")
-                            Feishu.finish_bot("dou_yin/detail接口无法获取到视频链接",
-                                              "https://open.feishu.cn/open-apis/bot/v2/hook/575ca6a1-84b4-4a2f-983b-1d178e7b16eb",
-                                              "【抖音异常提示 】")
-                if has_more == False:
-                    return list
-            except Exception as exc:
-                Common.logger("dy-ls").info(f"抖音历史数据获取失败:{exc}\n")
-                return list
-
-    @classmethod
-    def get_video(cls, video_id):
-        url = "http://47.236.68.175:8889/crawler/dou_yin/detail"
-        for i in range(3):
-            payload = json.dumps({
-                "content_id": str(video_id)
-            })
-            headers = {
-                'Content-Type': 'application/json'
-            }
-
-            response = requests.request("POST", url, headers=headers, data=payload)
-            response = response.json()
-            code = response["code"]
-            if code == 10000:
-                time.sleep(60)
-            data = response["data"]["data"]
-            video_url = data["video_url_list"][0]["video_url"]
-            image_url = data["image_url_list"][0]["image_url"]
-            return video_url, image_url
-        return None, None
-
-
-
-if __name__ == '__main__':
-    DYLS.get_dy_zr_list(1,2,1,3)
-    # DYLS.get_dyls_list("1","MS4wLjABAAAA2QEvnEb7cQDAg6vZXq3j8_LlbO_DiturnV7VeybFKY4",1,"1")

+ 0 - 34
data_channel/ks_feed.py

@@ -1,34 +0,0 @@
-from common import AliyunLogger
-from common.sql_help import sqlCollect
-
-
-class KSFeed:
-    @classmethod
-    def get_feed_date(cls):
-        try:
-            data_list = sqlCollect.get_feed_data("快手")
-            list = []
-            if data_list:
-                for data in data_list:
-                    cover_url = data[3]
-                    video_url = data[2]
-                    if video_url and cover_url:
-                        video_id = data[0]
-                        channel = data[1]
-                        title = data[4]
-                        log_data = f"user:{channel},,video_id:{video_id},,video_url:{video_url},,original_title:{title}"
-                        AliyunLogger.logging(channel, channel, video_url, video_id, "扫描到一条视频",
-                                             "2001", log_data)
-                        AliyunLogger.logging(channel, channel, video_url, video_id, "符合规则等待改造",
-                                             "2004", log_data)
-                        all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "old_title": title, "rule":''}
-                        list.append(all_data)
-                return list
-            else:
-                return list
-        except Exception:
-            return list
-
-
-if __name__ == '__main__':
-    KSFeed.get_feed_date()

+ 0 - 146
data_channel/ks_keyword.py

@@ -1,146 +0,0 @@
-import time
-
-import requests
-import json
-
-from common import Common, AliyunLogger, Feishu
-from common.sql_help import sqlCollect
-
-
-class KsKeyword:
-    @classmethod
-    def get_key_word(cls, keyword, task_mark, mark, channel_id, name, task):
-        combo = task['combo']
-        content_type = combo[0]
-        publish_time = combo[1]
-        duration = combo[2]
-        share_count_rule = 0
-        special = 0
-        short_duration_rule = 0
-
-        url = "http://47.236.68.175:8889/crawler/kuai_shou/keyword"
-        list = []
-
-        payload = json.dumps({
-            "keyword": keyword,
-            "content_type": "综合",
-            "sort_type": content_type,
-            "publish_time": publish_time,
-            "duration": duration,
-            "cursor": ""
-        })
-        headers = {
-            'Content-Type': 'application/json'
-        }
-
-        if " 不限" == publish_time:
-            share_count_rule = 100
-            special = 0.0005
-            short_duration_rule = 25
-        elif "近1日" == publish_time:
-            share_count_rule = 0
-            special = 0.0003
-            short_duration_rule = 25
-        elif "近7日" == publish_time:
-            share_count_rule = 50
-            special = 0.0005
-            short_duration_rule = 25
-        elif "近1月" == publish_time:
-            share_count_rule = 100
-            special = 0.0005
-            short_duration_rule = 25
-        try:
-            time.sleep(3)
-            response = requests.request("POST", url, headers=headers, data=payload)
-            response = response.json()
-            code = response['code']
-            if code != 0:
-                if code == 27006 and response['msg'] == '快手内容已被删除或无法访问':
-                    Feishu.finish_bot(f"kuai_shou/keyword {response['msg']},cookie 过期需要更换",
-                                      "https://open.feishu.cn/open-apis/bot/v2/hook/575ca6a1-84b4-4a2f-983b-1d178e7b16eb",
-                                      "【快手搜索接口使用提示】")
-                    Common.logger("ks-key-word").info(f"快手搜索词数据获取失败,{response['msg']}\n")
-                    return list
-                Common.logger("ks-key-word").info(f"快手搜索词数据获取失败,接口为kuai_shou/keyword\n")
-                return list
-            data_list = response['data']['data']
-            for data in data_list:
-                data = data['feed']
-                photo_id = data["photo_id"]
-                status = sqlCollect.is_used(task_mark, photo_id, mark, channel_id)
-
-                view_count = data["view_count"]
-                share_count = data["share_count"]
-                old_title = data["caption"]  # 标题
-
-                video_percent = '%.4f' % (int(share_count) / int(view_count))
-                duration = data["duration"]
-                duration = int(duration) / 1000
-                log_data = f"user:{keyword},,video_id:{photo_id},,video_url:'',original_title:{old_title},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
-
-                AliyunLogger.logging(channel_id, name, keyword, photo_id, "扫描到一条视频", "2001", log_data)
-                if status:
-                    AliyunLogger.logging(channel_id, name, keyword, photo_id, "该视频已改造过", "2002", log_data)
-                    continue
-                if float(video_percent) < special:
-                    AliyunLogger.logging(channel_id, name, keyword, photo_id, f"不符合规则:分享/浏览{special}", "2003", log_data)
-
-                    Common.logger("ks-key-word").info(
-                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
-                    continue
-                if int(share_count) < share_count_rule:
-                    AliyunLogger.logging(channel_id, name, keyword, photo_id, f"不符合规则:分享小于{share_count_rule}", "2003", log_data)
-
-                    Common.logger("ks-key-word").info(
-                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
-                    continue
-                if int(duration) < short_duration_rule or int(duration) > 600:
-                    AliyunLogger.logging(channel_id, name, keyword, photo_id, f"不符合规则:时长不符合规则大于600秒/小于{short_duration_rule}", "2003",
-                                         log_data)
-
-                    Common.logger("ks-key-word").info(
-                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
-                    continue
-                video_url, image_url = cls.get_video(photo_id)
-                if video_url:
-                    log_data = f"user:{keyword},,video_id:{photo_id},,video_url:{video_url},,original_title:{old_title},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
-                    all_data = {"video_id": photo_id, "cover": image_url, "video_url": video_url,
-                                "rule": video_percent,
-                                "old_title": old_title}
-                    list.append(all_data)
-                    AliyunLogger.logging(channel_id, name, keyword, photo_id, "符合规则等待改造", "2004", log_data)
-                else:
-                    AliyunLogger.logging(channel_id, name, photo_id, photo_id, "无法获取到视频链接", "2003")
-                    continue
-            return list
-        except Exception as exc:
-            Common.logger("ks-key-word").info(f"快手搜索词{keyword}获取失败{exc}\n")
-            return list
-
-    @classmethod
-    def get_video(cls, video_id):
-        url = "http://47.236.68.175:8889/crawler/kuai_shou/detail"
-
-        payload = json.dumps({
-            "content_id": str(video_id)
-        })
-        headers = {
-            'Content-Type': 'application/json'
-        }
-
-        response = requests.request("POST", url, headers=headers, data=payload)
-        response = response.json()
-        data = response["data"]["data"]
-        video_url = data["video_url_list"][0]["video_url"]
-        image_url = data["image_url_list"][0]["image_url"]
-        return video_url, image_url
-
-
-if __name__ == '__main__':
-    keyword = '毛主席故居'
-    task_mark = '1'
-    mark = 'pl-gjc'
-    channel_id = '快手搜索'
-    name = '1'
-    task = {'combo': ['最新发布', '近1日', '1分钟内']}
-    KsKeyword.get_key_word(keyword, task_mark, mark, channel_id, name, task)

+ 0 - 124
data_channel/ks_ls.py

@@ -1,124 +0,0 @@
-import random
-import time
-import requests
-import json
-from common import Common, Feishu, AliyunLogger
-from common.sql_help import sqlCollect
-
-class KSLS:
-
-    @classmethod
-    def get_ksls_list(cls, task_mark, url_id, number, mark, channel_id, name):
-        #  快手app
-        url = "http://47.236.68.175:8889/crawler/kuai_shou/blogger"
-        next_cursor = ""
-        try:
-            for i in range(20):
-                payload = json.dumps({
-                    "account_id": url_id,
-                    "sort_type": "最热",
-                    "cursor": next_cursor
-                })
-                headers = {
-                    'Content-Type': 'application/json'
-                }
-                time.sleep(random.randint(1, 5))
-                response = requests.request("POST", url, headers=headers, data=payload)
-                response = response.json()
-                list = []
-                data_all_list = response["data"]
-                if data_all_list == None or len(data_all_list) == 0:
-                    try:
-                        if int(response["cdoe"]) == 27006:
-                            Feishu.finish_bot("kuai_shou/blogger接口"+response["msg"],
-                                              "https://open.feishu.cn/open-apis/bot/v2/hook/575ca6a1-84b4-4a2f-983b-1d178e7b16eb", "【快手 Token 使用提示 】")
-                    except Exception as exc:
-                        return list
-
-                has_more = data_all_list["has_more"]
-                next_cursor = str(data_all_list["next_cursor"])
-
-                data_list = data_all_list["data"]
-                for data in data_list:
-                    photo_id = data["photo_id"]
-                    status = sqlCollect.is_used(task_mark, photo_id, mark, "快手历史")
-
-                    view_count = data["view_count"]
-                    share_count = data["share_count"]
-                    old_title = data["caption"]  # 标题
-
-                    video_percent = '%.4f' % (int(share_count) / (view_count))
-                    duration = data["duration"]
-                    duration = int(duration)/1000
-                    special = float(0.0005)
-                    log_data = f"user:{url_id},,video_id:{photo_id},,video_url:'',original_title:{old_title},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
-
-                    AliyunLogger.logging(channel_id, name, url_id, photo_id, "扫描到一条视频", "2001", log_data)
-                    if status:
-                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "该视频已改造过", "2002", log_data)
-                        continue
-                    if float(video_percent) < special:
-                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "不符合规则:分享/浏览小于0.0005", "2003", log_data)
-
-                        Common.logger("ks-ls").info(
-                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
-                        continue
-                    if int(share_count) < 100:
-                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "不符合规则:分享小于100", "2003", log_data)
-
-                        Common.logger("ks-ls").info(
-                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
-                        continue
-                    if int(duration) < 30 or (duration) > 720:
-                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
-
-                        Common.logger("ks-ls").info(
-                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
-                        continue
-                    video_url, image_url = cls.get_video(photo_id)
-                    if video_url:
-                        log_data = f"user:{url_id},,video_id:{photo_id},,video_url:{video_url},,original_title:{old_title},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
-                        all_data = {"video_id": photo_id, "cover": image_url, "video_url": video_url,
-                                    "rule": video_percent,
-                                    "old_title": old_title}
-                        list.append(all_data)
-                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "符合规则等待改造", "2004", log_data)
-
-                        if len(list) == int(number):
-                            Common.logger("ks-ls").info(f"获取快手历史视频总数:{len(list)}\n")
-                            return list
-                    else:
-                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "无法获取到视频链接", "2003", log_data)
-                        continue
-                if has_more == False:
-                    return list
-                return list
-        except Exception as exc:
-            Common.logger("ks-ls").info(f"快手历史数据获取失败:{exc}\n")
-            return list
-
-    @classmethod
-    def get_video(cls, video_id):
-        url = "http://47.236.68.175:8889/crawler/kuai_shou/detail"
-
-        payload = json.dumps({
-            "content_id": str(video_id)
-        })
-        headers = {
-            'Content-Type': 'application/json'
-        }
-
-        response = requests.request("POST", url, headers=headers, data=payload)
-        response = response.json()
-        data = response["data"]["data"]
-        video_url = data["video_url_list"][0]["video_url"]
-        image_url = data["image_url_list"][0]["image_url"]
-        return video_url, image_url
-
-
-
-if __name__ == '__main__':
-    # Feishu.finish_bot('测试',
-    #                   "https://open.feishu.cn/open-apis/bot/v2/hook/575ca6a1-84b4-4a2f-983b-1d178e7b16eb", "【 Token 使用提示 】")
-    # DYLS.get_video("7314923922602954022")
-    KSLS.get_ksls_list("1","3xzicxg2nandemc",1,"1")

+ 0 - 121
data_channel/ks_pc_keyword.py

@@ -1,121 +0,0 @@
-import time
-
-import requests
-import json
-
-from common import Common, AliyunLogger, Feishu, Material
-from common.sql_help import sqlCollect
-from common.userAgent import get_random_user_agent
-
-
-class KsPcKeyword:
-    @classmethod
-    def get_key_word(cls, keyword, task_mark, mark, channel_id, name, task):
-        list = []
-        url = "https://www.kuaishou.com/graphql"
-
-        payload = json.dumps({
-            "operationName": "visionSearchPhoto",
-            "variables": {
-                "keyword": keyword,
-                "pcursor": "",
-                "page": "search"
-            },
-            "query": "fragment photoContent on PhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  riskTagContent\n  riskTagUrl\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  riskTagContent\n  riskTagUrl\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    ...recoPhotoFragment\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n  visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    searchSessionId\n    pcursor\n    aladdinBanner {\n      imgUrl\n      link\n      __typename\n    }\n    __typename\n  }\n}\n"
-        })
-        cookie = Material.get_cookie_data("KsoMsyP2ghleM9tzBfmcEEXBnXg", "U1gySe", "快手搜索-cookie")
-        headers = {
-            'Accept-Language': 'zh-CN,zh;q=0.9',
-            'Cache-Control': 'no-cache',
-            'Connection': 'keep-alive',
-            'Origin': 'https://www.kuaishou.com',
-            'Pragma': 'no-cache',
-            'User-Agent': get_random_user_agent("pc"),
-            'accept': '*/*',
-            'content-type': 'application/json',
-            'Cookie': cookie
-        }
-        try:
-            time.sleep(3)
-            # 代理信息
-            proxy = "http://spkbt3wnzw:cx6R=v5mQuBgqsQ4o7@cn.visitxiangtan.com:30000"
-            proxies = {
-                "http": proxy,
-                "https": proxy
-            }
-            response = requests.request("POST", url, headers=headers, data=payload, proxies=proxies)
-            text = response.text
-            if text:
-                response_dict = json.loads(text)
-                result = response_dict.get('result', None)
-                if result:
-                    log_type = ['liukunyu', 'wangxueke', 'xinxin']
-                    mark_name = ['刘坤宇', '王雪珂', '信欣']
-                    Feishu.bot(log_type, '快手关键词搜索', f'快手关键词搜索cookie过期,请及时更换', mark_name)
-                    time.sleep(10)
-                    return list
-            response = response.json()
-            data_list = response['data']['visionSearchPhoto']['feeds']
-            for data in data_list:
-                data = data['photo']
-                photo_id = data["id"]
-                status = sqlCollect.is_used(task_mark, photo_id, mark, channel_id)
-
-                view_count = data["viewCount"] if "viewCount" in data and data["viewCount"] else 0
-                like_count = data["likeCount"] if "likeCount" in data and data["likeCount"] else 0
-                like_count = cls.convert_to_number(like_count)
-                video_percent = '%.4f' % (int(like_count) / int(view_count))
-                special = 0.015
-                old_title = data["caption"]  # 标题
-                duration = data["duration"]
-                duration = int(duration) / 1000
-                video_url = data["photoUrl"]
-                image_url = data["coverUrl"]
-                log_data = f"user:{keyword},,video_id:{photo_id},,video_url:{video_url},original_title:{old_title},,like_count:{like_count},,view_count:{view_count},,duration:{duration}"
-                AliyunLogger.logging(channel_id, name, keyword, photo_id, "扫描到一条视频", "2001", log_data)
-                if status:
-                    AliyunLogger.logging(channel_id, name, keyword, photo_id, "该视频已改造过", "2001", log_data)
-                    continue
-                if int(view_count) < 1000:
-                    AliyunLogger.logging(channel_id, name, keyword, photo_id, f"不符合规则:浏览小于1000", "2003", log_data)
-                    Common.logger("ks-key-word").info(
-                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{photo_id} ,浏览:{view_count},浏览{view_count} ,时长:{int(duration)} ")
-                    continue
-                if float(video_percent) < special:
-                    AliyunLogger.logging(channel_id, name, keyword, photo_id, f"不符合规则:点赞/浏览{special}", "2003", log_data)
-                    Common.logger("ks-key-word").info(
-                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{photo_id} ,浏览:{view_count},浏览{view_count} ,时长:{int(duration)} ")
-                    continue
-                if int(duration) < 30 or int(duration) > 600:
-                    AliyunLogger.logging(channel_id, name, keyword, photo_id,
-                                         f"不符合规则:时长不符合规则大于600秒/小于30秒", "2003",
-                                         log_data)
-
-                    Common.logger("ks-key-word").info(
-                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{photo_id} ,浏览:{view_count},浏览{view_count} ,时长:{int(duration)} ")
-                    continue
-                AliyunLogger.logging(channel_id, name, keyword, photo_id, "符合规则等待改造", "2004", log_data)
-                all_data = {"video_id": photo_id, "cover": image_url, "video_url": video_url,
-                            "rule": '',
-                            "old_title": old_title}
-                list.append(all_data)
-            return list
-        except Exception as exc:
-            Common.logger("ks-key-word").info(f"快手搜索词{keyword}获取失败{exc}\n")
-            return list
-
-    @classmethod
-    def convert_to_number(cls, value):
-        if value.endswith("万"):
-            return float(value[:-1]) * 10000  # 去掉“万”并乘以 10000
-        return int(value)  # 处理其他格式
-
-
-if __name__ == '__main__':
-    keyword = '毛主席故居'
-    task_mark = '1'
-    mark = 'pl-gjc'
-    channel_id = '快手搜索'
-    name = '1'
-    task = {'combo': ['最新发布', '近1日', '1分钟内']}
-    KsPcKeyword.get_key_word(keyword, task_mark, mark, channel_id, name, task)

+ 0 - 164
data_channel/kuaishou.py

@@ -1,164 +0,0 @@
-import random
-import time
-import requests
-import json
-import urllib3
-from requests.adapters import HTTPAdapter
-
-from common import Feishu, Material, Common, AliyunLogger
-from common.sql_help import sqlCollect
-from data_channel.data_help import dataHelp
-
-
-class KS:
-
-    @classmethod
-    def get_share_count(cls, v_id):
-        url = "http://47.236.68.175:8889/crawler/kuai_shou/detail"
-
-        payload = json.dumps({
-            "content_id": v_id
-        })
-        headers = {
-            'Content-Type': 'application/json'
-        }
-        for i in range(5):
-            try:
-                time.sleep(2)
-                response = requests.request("POST", url, headers=headers, data=payload)
-                response = response.json()
-                if response["code"] == 0:
-                    data = response["data"]["data"]
-                    share_count = data.get("share_count")
-                    return int(share_count)
-            except KeyError as e:
-                Common.logger("ks").info(f"获取分享数据失败:{e}\n")
-        return 0
-
-    @classmethod
-    def get_ks_url(cls, task_mark, url_id, number, mark, feishu_id, cookie_sheet, channel_id, name):
-        list = []
-        pcursor = ""
-        url = "https://www.kuaishou.com/graphql"
-
-        for i in range(3):
-            cookie = Material.get_cookie_data(feishu_id, cookie_sheet, channel_id)
-            time.sleep(random.randint(1, 5))
-            payload = json.dumps({
-                "operationName": "visionProfilePhotoList",
-                "variables": {
-                    "userId": url_id,
-                    "pcursor": pcursor,
-                    "page": "profile"
-                },
-                "query": "fragment photoContent on PhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  riskTagContent\n  riskTagUrl\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  riskTagContent\n  riskTagUrl\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    ...recoPhotoFragment\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
-            })
-            headers = {
-                'accept': '*/*',
-                'content-type': 'application/json',
-                'Origin': 'https://www.kuaishou.com',
-                'Cookie': cookie,
-                'Accept-Language': 'zh-CN,zh;q=0.9',
-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
-                'Referer': f'https://www.kuaishou.com/profile/{url_id}',
-                'Accept-Encoding': 'gzip, deflate, br',
-                'Connection': 'keep-alive'
-            }
-            urllib3.disable_warnings()
-            s = requests.session()
-            s.mount('http://', HTTPAdapter(max_retries=3))
-            s.mount('https://', HTTPAdapter(max_retries=3))
-            # response = requests.request("POST", url, headers=headers, data=payload, timeout=10)
-            try:
-                response = s.post(url=url, headers=headers, data=payload, verify=False, timeout=10)
-                response.close()
-                if response.status_code != 200:
-                    return list
-                elif "visionProfilePhotoList" not in response.json()["data"]:
-                    if name == '快手品类账号':
-                        Feishu.bot("wangxueke", '机器自动改造消息通知', f'快手-{name}cookie过期,请及时更换', 'wangxueke')
-                        Feishu.bot("liuzhaoheng", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '刘兆恒')
-
-                    else:
-                        Feishu.bot(mark, '机器自动改造消息通知', f'快手-{name}cookie过期,请及时更换', name)
-                    time.sleep(900)
-                    continue
-                elif "feeds" not in response.json()["data"]["visionProfilePhotoList"]:
-                    if name == '快手品类账号':
-                        Feishu.bot("wangxueke", '机器自动改造消息通知', f'快手-{name}cookie过期,请及时更换', 'wangxueke')
-                        Feishu.bot("liuzhaoheng", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '刘兆恒')
-
-                    else:
-                        Feishu.bot(mark, '机器自动改造消息通知', f'快手-{name}cookie过期,请及时更换', name)
-                    time.sleep(900)
-                    continue
-                elif len(response.json()["data"]["visionProfilePhotoList"]["feeds"]) == 0:
-                    if name == '快手品类账号':
-                        Feishu.bot("wangxueke", '机器自动改造消息通知', f'快手-{name}cookie过期,请及时更换', 'wangxueke')
-                        Feishu.bot("liuzhaoheng", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '刘兆恒')
-
-                    else:
-                        Feishu.bot(mark, '机器自动改造消息通知', f'快手-{name}cookie使用频繁无法获取到数据,请及时更换', name)
-                    time.sleep(900)
-                    continue
-                pcursor = response.json()['data']['visionProfilePhotoList']['pcursor']
-                feeds = response.json()['data']['visionProfilePhotoList']['feeds']
-
-                for i in range(len(feeds)):
-                    # try:
-                    #     video_id = feeds[i].get("photo", {}).get("videoResource").get("h264", {}).get("videoId", "")
-                    # except KeyError:
-                    #     video_id = feeds[i].get("photo", {}).get("videoResource").get("hevc", {}).get("videoId", "")
-                    # status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
-                    # if status:
-                    #     continue
-                    video_id = feeds[i].get("photo", {}).get("id", "")
-                    status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
-
-                    share_count = cls.get_share_count(video_id)
-                    old_title = feeds[i].get("photo", {}).get("caption")
-                    cover_url = feeds[i].get('photo', {}).get('coverUrl', "")
-                    video_url = feeds[i].get('photo', {}).get('photoUrl', "")
-                    view_count = int(feeds[i].get('photo', {}).get('viewCount', 0))
-                    realLikeCount = int(feeds[i].get('photo', {}).get('realLikeCount', 0))
-                    video_percent = '%.4f' % (share_count / view_count)
-
-                    duration = dataHelp.video_duration(video_url)
-                    log_data = f"user:{url_id},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
-                    # log_data = f"user:{url_id},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,view_count:{view_count},,duration:{duration}"
-
-                    AliyunLogger.logging(channel_id, name, url_id, video_id, "扫描到一条视频", "2001", log_data)
-                    Common.logger("ks").info(
-                        f"扫描:{task_mark},用户主页id:{url_id},视频id{video_id} ,播放数:{view_count} ,分享数:{share_count},时长:{duration} ")
-                    if status:
-                        AliyunLogger.logging(channel_id, name, url_id, video_id, "该视频已改造过", "2002", log_data)
-                        continue
-                    special = float(0.001)
-                    if float(video_percent) < special:
-                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享/浏览小于0.001", "2003", log_data)
-                        Common.logger("ks").info(
-                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,播放数:{view_count} ,分享数:{share_count},时长:{duration} ")
-                        continue
-                    if share_count < 500:
-                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享小于500", "2003", log_data)
-                        Common.logger("ks").info(
-                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,播放数:{view_count} ,分享数:{share_count},时长:{duration} ")
-                        continue
-                    if duration < 30 or duration > 720:
-                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
-                        Common.logger("ks").info(
-                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,播放数:{view_count} ,分享数:{share_count},时长:{duration} ")
-                        continue
-                    all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": video_percent, "old_title": old_title}
-                    list.append(all_data)
-                    AliyunLogger.logging(channel_id, name, url_id, video_id, "符合规则等待改造", "2004", log_data)
-                    if len(list) == int(number):
-                        Common.logger(mark).info(f"获取快手视频总数:{len(list)}\n")
-                        return list
-            except Exception as exc:
-                Common.logger("ks").warning(f"{name}的快手获取数据失败:{exc}\n")
-                return list
-        return list
-
-if __name__ == '__main__':
-    KS.get_share_count("5188428384967044201")

+ 0 - 325
data_channel/kuaishouchuangzuozhe.py

@@ -1,325 +0,0 @@
-import json
-import os
-from hashlib import md5
-import requests
-import time
-from urllib.parse import urlencode
-from datetime import datetime, timedelta
-
-from common import Oss, Feishu, Common, AliyunLogger
-from common.sql_help import sqlCollect
-
-headers = {
-    'Accept-Language': 'zh-cn',
-    'Connection': 'keep-alive',
-    'Content-Type': 'application/x-www-form-urlencoded',
-    'Host': 'creator-app.kuaishou.com',
-    'User-Agent': 'kwai-android aegon/3.12.1',
-}
-class KsFeedVideo:
-    CATEGORY_IDS = {
-        1: "生活",
-        2: "才艺",
-        # 3: "时尚",
-        # 4: "宠物",
-        5: "读书",
-        # 6: "二次元",
-        7: "家居",
-        # 8: "数码",
-        9: "搞笑",
-        10: "健康",
-        11: "旅游",
-        12: "美食",
-        # 13: "美妆",
-        # 14: "汽车",
-        15: "亲子",
-        16: "情感",
-        # 17: "三农",
-        # 18: "摄影",
-        # 19: "舞蹈",
-        # 20: "颜值",
-        # 21: "音乐",
-        # 22: "影视",
-        # 23: "短剧",
-        # 24: "游戏",
-        25: "运动",
-        26: "资讯",
-        27: "人文"
-    }
-    current_category_index = 0
-
-    @staticmethod
-    def calculate_sig(data):
-        src = ''.join([f'{key}={data[key]}' for key in sorted(data.keys())])
-        salt = '08d8eece8e83'
-        return md5(f'{src}{salt}'.encode()).hexdigest()
-
-
-    """
-    切换品类
-    """
-    @classmethod
-    def switch_category(cls):
-        if cls.current_category_index >= len(cls.CATEGORY_IDS):
-            cls.current_category_index = 0
-        category_id = list(cls.CATEGORY_IDS.keys())[cls.current_category_index]
-        url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/category/confirm/optimize'
-        data = {
-            'isRecommendChange': False,
-            'categoryId': category_id,
-            # 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB",
-            'kuaishou.api_st': 'Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB;region_ticket=RT_FAC86448E713714136C088FFCC4431455D1FA7E05A6D25DAD4E4B8CC011FB6E8294169DD9',
-            'client_key': '214c9979',
-        }
-        sig = cls.calculate_sig(data)
-        data['sig'] = sig
-        response = requests.post(url=url, headers=headers, data=data)
-        body = response.content.decode()
-        cls.current_category_index += 1
-        return body
-
-    """
-    获取feed流信息
-    """
-    @classmethod
-    def get_feed_list(cls):
-        cls.switch_category()
-        url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/feed'
-        data = {
-            'cs': False,
-            'kuaishou.api_st': 'Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB;region_ticket=RT_FAC86448E713714136C088FFCC4431455D1FA7E05A6D25DAD4E4B8CC011FB6E8294169DD9',
-            # 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB",
-            'client_key': '214c9979',
-        }
-        sig = cls.calculate_sig(data)
-        data['sig'] = sig
-        response = requests.post(url=url, headers=headers, data=data)
-        body = response.content.decode()
-        return body
-
-    """
-    获取观众画像
-    """
-    @classmethod
-    def analyze_photo(cls, photo_id):
-        url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/photo/analysis'
-
-        headers = {
-            'Accept-Language': 'zh-cn',
-            'Connection': 'keep-alive',
-            'Content-Type': 'application/x-www-form-urlencoded',
-            'Host': 'creator-app.kuaishou.com',
-        }
-        data = {
-            'photoId': photo_id,
-            'client_key': '214c9979',
-        }
-        sig = cls.calculate_sig(data)
-        data['sig'] = sig
-        response = requests.post(url=url, headers=headers, data=data)
-        body = response.content.decode()
-        json_body = json.loads(body)
-        user_range = json_body['data']['play']['userRange']
-        if len(user_range) == 0:
-            return False, "无画像"
-        age_range = user_range['ageRange']
-        value = age_range[5]['value']
-        value = int(value.strip('%'))
-        if value >= 40:
-            return False, value
-        else:
-            return True, value
-
-    """
-    视频时长转换成秒
-    """
-    @classmethod
-    def milliseconds_to_seconds(cls, milliseconds):
-        seconds = milliseconds / 1000
-        return int(seconds)
-
-    """
-    判断当前视频是否在90天内
-    """
-    @classmethod
-    def get_video_data(cls, timestamp_str):
-        timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
-        # 获取当前时间
-        current_time = datetime.now()
-        difference = current_time - timestamp
-        if difference <= timedelta(days=90):
-            return False
-        else:
-            return True
-    """
-    获取票圈ID
-    """
-    @classmethod
-    def get_id_by_category(cls, category_name):
-        category_list = [
-            {"id": 71502003, "category": "生活"},
-            {"id": 71502004, "category": "才艺"},
-            {"id": 71502005, "category": "时尚"},
-            {"id": 71502006, "category": "宠物"},
-            {"id": 71502007, "category": "读书"},
-            {"id": 71502008, "category": "二次元"},
-            {"id": 71502009, "category": "家居"},
-            {"id": 71502010, "category": "数码"},
-            {"id": 71502011, "category": "搞笑"},
-            {"id": 71502012, "category": "健康"},
-            {"id": 71502013, "category": "旅游"},
-            {"id": 71502014, "category": "美食"},
-            {"id": 71502015, "category": "美妆"},
-            {"id": 71502016, "category": "汽车"},
-            {"id": 71502018, "category": "亲子"},
-            {"id": 71502019, "category": "情感"},
-            {"id": 71502020, "category": "三农"},
-            {"id": 71502021, "category": "摄影"},
-            {"id": 71502022, "category": "舞蹈"},
-            {"id": 71502023, "category": "颜值"},
-            {"id": 71502024, "category": "音乐"},
-            {"id": 71502025, "category": "影视"},
-            {"id": 71502026, "category": "短剧"},
-            {"id": 71502027, "category": "游戏"},
-            {"id": 71502028, "category": "运动"},
-            {"id": 71502029, "category": "资讯"},
-            {"id": 71502030, "category": "人文"}
-        ]
-        for category in category_list:
-            if category['category'] == category_name:
-                return category['id']
-        return None
-
-    """
-    新生成视频上传到对应账号下
-    """
-    @classmethod
-    def insert_piaoquantv(cls, new_video_path, new_title, n_id, cover):
-
-        url = "https://vlogapi.piaoquantv.com/longvideoapi/crawler/video/send"
-        headers = {
-            'User-Agent': 'PQSpeed/486 CFNetwork/1410.1 Darwin/22.6.0',
-            'cookie': 'JSESSIONID=4DEA2B5173BB9A9E82DB772C0ACDBC9F; JSESSIONID=D02C334150025222A0B824A98B539B78',
-            'referer': 'http://appspeed.piaoquantv.com',
-            'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
-            'accept-language': 'zh-CN,zh-Hans;q=0.9',
-            'Content-Type': 'application/x-www-form-urlencoded'
-        }
-        payload = {
-            'coverImgPath': cover,
-            'deviceToken': '9ef064f2f7869b3fd67d6141f8a899175dddc91240971172f1f2a662ef891408',
-            'fileExtensions': 'MP4',
-            'loginUid': n_id,
-            'networkType': 'Wi-Fi',
-            'platform': 'iOS',
-            'requestId': 'fb972cbd4f390afcfd3da1869cd7d001',
-            'sessionId': '362290597725ce1fa870d7be4f46dcc2',
-            'subSessionId': '362290597725ce1fa870d7be4f46dcc2',
-            'title': new_title,
-            'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
-            'uid': n_id,
-            'versionCode': '486',
-            'versionName': '3.4.12',
-            'videoFromScene': '1',
-            'videoPath': new_video_path,
-            'viewStatus': '1'
-        }
-        encoded_payload = urlencode(payload)
-        response = requests.request("POST", url, headers=headers, data=encoded_payload)
-        data = response.json()
-        code = data["code"]
-        if code == 0:
-            new_video_id = data["data"]["id"]
-            return new_video_id
-        else:
-            return None
-
-    @classmethod
-    def get_data(cls, channel_id, name):
-        number = 1
-        list = []
-        for category_id, category_name in cls.CATEGORY_IDS.items():
-            try:
-                feed_data = cls.get_feed_list()
-                feed_data = json.loads(feed_data)
-                feeds = feed_data['feeds']
-                for feed in feeds:
-                    photo_id = feed["photo_id"]  # 视频ID
-                    status = sqlCollect.ks_is_used(photo_id)
-
-                    user_name = feed["user_name"]  # 用户名
-                    user_sex = feed["user_sex"]  # 性别 F为女,U为男
-                    time_data = feed["time"]  # 发布时间
-                    caption = feed["caption"]  # 标题
-                    view_count = feed["view_count"]  # 浏览数
-                    like_count = feed["like_count"]  # 点赞数
-                    share_count = feed["share_count"]  # 分享数
-                    duration = feed["duration"]  # 时长/秒
-                    duration = cls.milliseconds_to_seconds(duration)
-                    main_mv_url = feed["main_mv_url"]  # 视频链接
-                    thumbnail_url = feed["thumbnail_url"]  # 视频封面
-                    user_id = feed["user_id"]  # 用户id非用户主页id
-                    log_data = f"user:{user_name},,video_id:{photo_id},,video_url:{main_mv_url},,original_title:{caption},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
-                    AliyunLogger.logging(channel_id, name, user_name, photo_id, "扫描到一条视频", "2001", log_data)
-                    value, age = cls.analyze_photo(photo_id)
-                    if status:
-                        AliyunLogger.logging(channel_id, name, user_name, photo_id, "该视频已改造过", "2001", log_data)
-                        continue
-                    if value:
-                        AliyunLogger.logging(channel_id, name, user_name, photo_id, f"不符合规则:50+年龄占比小于40%,实际占比{age}", "2003", log_data)
-                        sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count,
-                                                  share_count, duration, main_mv_url, thumbnail_url, user_id, '1',
-                                                  photo_id, category_name, age, oss_object=None, video_uid=None)
-
-                        continue
-                    video_percent = '%.4f' % (share_count / view_count)
-                    special = float(0.0005)
-                    if float(video_percent) < special:
-                        AliyunLogger.logging(channel_id, name, user_name, photo_id, "不符合规则:分享/浏览小于0.0005", "2003", log_data)
-
-                        sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, '1', photo_id, category_name, age, oss_object=None, video_uid=None)
-                        continue
-                    if share_count < 100:
-                        AliyunLogger.logging(channel_id, name, user_name, photo_id, "不符合规则:分享小于100", "2003", log_data)
-
-                        sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count,
-                                                  share_count, duration, main_mv_url, thumbnail_url, user_id, '1',
-                                                  photo_id, category_name, age, oss_object=None, video_uid=None)
-                        continue
-                    if duration < 30 or duration > 720:
-                        AliyunLogger.logging(channel_id, name, user_name, photo_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
-                        sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count,
-                                                  share_count, duration, main_mv_url, thumbnail_url, user_id, '1',
-                                                  photo_id, category_name, age, oss_object=None, video_uid=None)
-                        continue
-
-                    sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count,
-                                              like_count, share_count, duration, main_mv_url, thumbnail_url,
-                                              user_id, '0', photo_id, category_name, age, oss_object=None, video_uid=None)
-                    all_data = {"video_id": photo_id, "cover": thumbnail_url, "video_url": main_mv_url, "rule": video_percent,
-                                "old_title": caption}
-                    AliyunLogger.logging(channel_id, name, user_name, photo_id, "符合规则等待改造", "2004", log_data)
-
-                    list.append(all_data)
-                    current_time = datetime.now()
-                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
-                    values = [
-                        [category_name, user_name, photo_id, user_sex, caption, view_count, like_count, share_count, duration,
-                         main_mv_url, thumbnail_url, user_id, age, '', '', time_data, formatted_time]]
-                    Feishu.insert_columns("PlcisKhObhzmBothRutc65sJnph", "8fQxFv", "ROWS", 2, 3)
-                    time.sleep(0.5)
-                    Feishu.update_values("PlcisKhObhzmBothRutc65sJnph", "8fQxFv", "A3:Z3", values)
-                    if len(list) == int(number):
-                        Common.logger("ks-czz").info(f"获取快手创作者视频总数:{len(list)}\n")
-                        return list
-                time.sleep(5)
-            except Exception as exc:
-                print(f"异常信息: {exc}")
-                return list
-        return list
-
-
-
-# Example usage:
-if __name__ == "__main__":
-    KsFeedVideo.get_data(1)

+ 0 - 136
data_channel/shipinhao.py

@@ -1,136 +0,0 @@
-import json
-import random
-import time
-
-import requests
-
-from common import Common, AliyunLogger, Feishu
-from common.sql_help import sqlCollect
-from data_channel.data_help import dataHelp
-
-
-class SPH:
-
-    @classmethod
-    def find_target_user(cls, name, user_list):
-        """
-        在搜索到到账号列表中找目标列表
-        """
-        for obj in user_list:
-            if obj['contact']["nickname"] == name:
-                return obj
-            else:
-                continue
-        return False
-
-    @classmethod
-    def get_account_id(cls, account_name):
-        channel = 'shipinhao'
-        history_id = sqlCollect.get_history_id(channel, account_name)
-        if history_id:
-            return history_id
-        else:
-            url = "http://47.236.68.175:8889/crawler/wei_xin/shi_pin_hao/account_info"
-            payload = json.dumps({
-                "account_name": account_name
-            })
-            headers = {
-                'Content-Type': 'application/json'
-            }
-
-            response = requests.request("POST", url, headers=headers, data=payload)
-            response = response.json()
-            if response['code'] == 0:
-                data = response['data']['data']
-
-                channel_account_id = data['channel_account_id']
-                if channel_account_id:
-                    sqlCollect.insert_history_id(account_name, channel_account_id, channel)
-
-
-                    return channel_account_id
-                else:
-                    return False
-            else:
-                Feishu.finish_bot("shi_pin_hao/account_info接口获取失败",
-                                  "https://open.feishu.cn/open-apis/bot/v2/hook/575ca6a1-84b4-4a2f-983b-1d178e7b16eb",
-                                  "【视频号接口异常提示 】")
-                return False
-
-    @classmethod
-    def get_sph_url(cls, task_mark, url_id, number, mark, channel_id, name):
-        account_id = cls.get_account_id(url_id)
-        if account_id:
-            url = "http://47.236.68.175:8889/crawler/wei_xin/shi_pin_hao/blogger"
-            next_cursor = ""
-            list = []
-            # for i in range(10):
-            headers = {
-                'Content-Type': 'application/json'
-            }
-            payload = json.dumps({
-                "account_id": account_id,
-                "cursor": next_cursor
-            })
-            try:
-                response = requests.request("POST", url, headers=headers, data=payload)
-                time.sleep(random.randint(1, 5))
-                res_json = response.json()
-                if res_json['code'] == 0:
-                    # next_cursor = res_json['data']['next_cursor']
-                    data_lsit = res_json['data']['data']
-                    if data_lsit == None:
-                        return list
-                    for obj in data_lsit:
-                        objectId = obj['id']
-                        status = sqlCollect.is_used(task_mark, objectId, mark, "视频号")
-
-                        old_title = obj['objectDesc']['description']
-                        url_p = obj['objectDesc']['media'][0]['Url']
-                        url_token = obj['objectDesc']['media'][0]['urlToken']
-                        video_url = f"{url_p}{url_token}"
-                        decode_key = obj['objectDesc']['media'][0]['decodeKey']
-                        cover = obj['objectDesc']['media'][0]['coverUrl']
-
-                        share_cnt = int(obj['forwardCount'])  # 分享
-                        like_cnt = int(obj['likeCount'])  # 点赞
-                        duration = int(obj['objectDesc']['media'][0]['VideoPlayLen'])
-                        # duration = int(duration_ms) / 1000
-
-                        log_data = f"user:{url_id},,video_id:{objectId},,video_url:{video_url},,original_title:{old_title},,share_count:{share_cnt},,like_count:{like_cnt},,duration:{duration}"
-                        AliyunLogger.logging(channel_id, name, url_id, objectId, "扫描到一条视频", "2001", log_data)
-                        Common.logger("sph").info(
-                            f"扫描:{task_mark},用户主页id:{url_id},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt}")
-                        if status:
-                            AliyunLogger.logging(channel_id, name, url_id, objectId, "该视频已改造过", "2002", log_data)
-                            continue
-                        video_percent = '%.2f' % (share_cnt / like_cnt)
-                        special = float(0.25)
-                        if like_cnt >= 30000 or like_cnt >= 50000 or (share_cnt >= 300 and float(video_percent) >= special):
-                            if int(duration) < 30 or int(duration) > 720:
-                                Common.logger("sph").info(
-                                    f"任务:{task_mark},用户主页id:{url_id},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
-                                AliyunLogger.logging(channel_id, name, url, objectId, "不符合规则:时长不符合规则大于720秒/小于30秒",
-                                                     "2003", log_data)
-
-                                continue
-                            all_data = {"video_id": objectId, "cover": cover, "video_url": video_url, "rule": video_percent, "old_title": old_title, "decode_key": decode_key}
-                            list.append(all_data)
-                            AliyunLogger.logging(channel_id, name, url_id, objectId, "符合规则等待改造", "2004", log_data)
-                            if len(list) == int(number):
-                                Common.logger(mark).info(f"获取视频号视频总数:{len(list)}\n")
-                                return list
-                        else:
-                            AliyunLogger.logging(channel_id, name, url_id, objectId, "不符合规则:点赞小于30000/50000 或 分享/点赞小于0.25和分享小于300", "2003", log_data)
-
-                            Common.logger("sph").info(
-                                     f"不符合规则:{task_mark},用户主页id:{url_id},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt}")
-                            continue
-                    return list
-            except Exception as e:
-                print(e)
-                return list
-
-
-if __name__ == '__main__':
-    SPH.get_sph_url('1',"人民日报",'10','2',"视频号",'视频号品类账号')

+ 0 - 48
data_channel/shipinhaodandian.py

@@ -1,48 +0,0 @@
-from common import AliyunLogger
-from common.sql_help import sqlCollect
-
-
-class SPHDD:
-
-    @classmethod
-    def get_sphdd_data(cls, url, channel_id, name):
-        list = []
-        try:
-            data_list = sqlCollect.get_shp_dd_data(url)
-
-            if data_list:
-                for data in data_list:
-                    cover_url = data[4]
-                    video_url = data[5]
-                    if video_url and cover_url:
-                        video_id = data[0]
-                        old_title = data[1]
-                        # author_id = data[2]
-                        author_name = data[3]
-                        video_duration = data[6]
-                        from_user_id = data[7]
-                        from_user_name = data[8]
-                        from_group_id = data[9]
-                        from_group_name = data[10]
-                        source = data[11]
-                        wx_msg = data[12]
-                        is_encrypted = data[13]
-                        decode_key = data[14]
-                        log_data = f"user:{url},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title}"
-                        AliyunLogger.logging(f"{channel_id}-{source}", name, url, video_id, "扫描到一条视频", "2001", log_data)
-                        AliyunLogger.logging(f"{channel_id}-{source}", name, url, video_id, "符合规则等待改造", "2004", log_data)
-                        all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": author_name,
-                                    "old_title": old_title, "from_user_name": from_user_name, "from_group_name": from_group_name, "source": source, "wx_msg": wx_msg, "is_encrypted": is_encrypted, "decode_key": decode_key}
-                        list.append(all_data)
-                return list
-            else:
-                return list
-        except Exception:
-            return list
-
-
-
-
-if __name__ == '__main__':
-
-    SPHDD.get_sphdd_data("刘坤宇", '', '')

+ 0 - 89
data_channel/sph_keyword.py

@@ -1,89 +0,0 @@
-import re
-import time
-
-import requests
-import json
-
-from common import Common, Feishu, AliyunLogger
-from common.sql_help import sqlCollect
-
-
-class SphKeyword:
-
-    @classmethod
-    def time_str_to_seconds(cls, time_str):
-        # 分钟和秒用 ":" 分隔
-        minutes, seconds = map(int, time_str.split(":"))
-        # 转换为秒
-        total_seconds = minutes * 60 + seconds
-        return total_seconds
-
-    @classmethod
-    def get_key_word(cls, keyword, task_mark, mark, channel_id, name):
-        url = "http://47.236.68.175:8889/crawler/wei_xin/shi_pin_hao/keyword"
-        list = []
-        payload = json.dumps({
-            "keyword": keyword,
-            "sort": "不限",
-            "cursor": ""
-        })
-        headers = {
-            'Content-Type': 'application/json'
-        }
-        try:
-            time.sleep(1)
-            response = requests.request("POST", url, headers=headers, data=payload)
-            response = response.json()
-            code = response['code']
-            if code != 0:
-                Feishu.finish_bot(f"shi_pin_hao/keyword {response['msg']}",
-                                  "https://open.feishu.cn/open-apis/bot/v2/hook/575ca6a1-84b4-4a2f-983b-1d178e7b16eb",
-                                  "【视频号搜索接口使用提示】")
-                Common.logger("sph-key-word").info(f"快手搜索词数据获取失败,{response['msg']}\n")
-                return list
-            data_list = response['data']['data']
-            for data in data_list:
-                items = data['items'][0]
-                video_id = data["boxID"]
-                duration = items["duration"]
-                if duration == '' or duration == None:
-                    duration = "00:01"
-                duration = cls.time_str_to_seconds(duration)
-                digg_count = items.get('likeNum', "0")
-                if digg_count == '10万+':
-                    digg_count = '100000'
-                old_title =items.get('title', "")
-                old_title = re.sub(r'<em.*?>.*?</em>', '', old_title)
-                cover_url = items["image"]
-                video_url = items["videoUrl"]
-                log_data = f"user:{keyword},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,digg_count:{digg_count},,duration:{duration}"
-                AliyunLogger.logging(channel_id, name, keyword, video_id, "扫描到一条视频", "2001", log_data)
-                Common.logger("sph-key-word").info(
-                    f"扫描:{task_mark},搜索词:{keyword},视频id{video_id},点赞{digg_count}")
-                status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
-                if status:
-                    AliyunLogger.logging(channel_id, name, keyword, video_id, "该视频已改造过", "2002", log_data)
-                    continue
-                if int(digg_count) < 2000:
-                    AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:点赞小于2000", "2003",
-                                         log_data)
-                    Common.logger("sph-key-word").info(
-                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} ,点赞{digg_count} ,时长:{int(duration)} ")
-                    continue
-                if int(duration) < 30 or int(duration) > 900:
-                    AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:时长不符合规则大于900秒/小于30秒", "2003", log_data)
-                    Common.logger("sph-key-word").info(
-                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} 点赞{digg_count} ,时长:{int(duration)} ")
-                    continue
-                all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": '',
-                            "old_title": old_title}
-                list.append(all_data)
-                AliyunLogger.logging(channel_id, name, keyword, video_id, "符合规则等待改造", "2004", log_data)
-            return list
-        except Exception as exc:
-            Common.logger("sph-key-word").info(f"视频号搜索词{keyword}获取失败{exc}\n")
-            return list
-
-
-if __name__ == '__main__':
-    SphKeyword.get_key_word('最有钱的地方', '', '', '', '')

+ 0 - 66
data_channel/sph_ls.py

@@ -1,66 +0,0 @@
-from common import Common, AliyunLogger
-from common.sql_help import sqlCollect
-
-
-class SPHLS:
-
-    @classmethod
-    def get_sphls_data(cls, task_mark, url, number, mark, channel_id, name):
-        data_list = sqlCollect.sph_data_info_list(url)
-        list = []
-        if data_list:
-            for data in data_list:
-                video_id = data[0]
-                status = sqlCollect.is_used(task_mark,video_id, mark, "视频号历史")
-
-                old_title = data[1]
-                share_cnt = int(data[2])
-                like_cnt = int(data[3])
-                oss_url = data[4]
-                oss_cover = data[5]
-                duration = int(float(data[6]))
-                Common.logger("sph-ls").info(
-                    f"扫描:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt},时长:{duration}")
-                log_data = f"user:{url},,video_id:{video_id},,video_url:{oss_url},,original_title:{old_title},,share_count:{share_cnt},,like_count:{like_cnt},,duration:{duration}"
-                AliyunLogger.logging(channel_id, name, url, video_id, "扫描到一条视频", "2001", log_data)
-
-                if status:
-                    AliyunLogger.logging(channel_id, name, url, video_id, "该视频已改造过", "2002", log_data)
-                    continue
-                if share_cnt < 300:
-                    AliyunLogger.logging(channel_id, name, url, video_id, "不符合规则:分享小于300", "2003", log_data)
-
-                    Common.logger("sph-ls").info(
-                        f"任务:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
-                    continue
-                if share_cnt < like_cnt:
-                    AliyunLogger.logging(channel_id, name, url, video_id, "不符合规则:分享小于点赞", "2003", log_data)
-
-                    Common.logger("sph-ls").info(
-                        f"任务:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
-                    continue
-                if duration < 30 or duration > 720:
-                    AliyunLogger.logging(channel_id, name, url, video_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003",
-                                         log_data)
-                    Common.logger("sph-ls").info(
-                        f"任务:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
-                    continue
-                all_data = {"video_id": video_id, "cover": oss_cover, "video_url": oss_url, "rule": '',
-                            "old_title": old_title}
-                list.append(all_data)
-                AliyunLogger.logging(channel_id, name, url, video_id, "符合规则等待改造", "2004", log_data)
-                if len(list) == int(number):
-                    Common.logger("sph-ls").info(f"获取视频号视频总数:{len(list)}\n")
-                    return list
-            return list
-        else:
-            Common.logger("sph-ls").info(f"{url}无数据\n")
-            return list
-
-
-
-
-
-
-if __name__ == '__main__':
-    SPHLS.get_sphls_data(1,2,3,4)