Prechádzať zdrojové kódy

增加阿里云日志

zhangyong 7 mesiacov pred
rodič
commit
069dd8448c

+ 2 - 1
common/__init__.py

@@ -3,4 +3,5 @@ from .aliyun_oss import Oss
 from .feishu_form import Material
 from .feishu_utils import Feishu
 from .mysql_db import MysqlHelper
-from .redis import SyncRedisHelper
+from .redis import SyncRedisHelper
+from .aliyun_log import AliyunLogger

+ 73 - 0
common/aliyun_log.py

@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+"""
+公共方法,包含:生成log / 删除log
+"""
+import json
+from datetime import date, timedelta
+from datetime import datetime
+from typing import Optional
+
+from aliyun.log import PutLogsRequest, LogClient, LogItem
+
+proxies = {"http": None, "https": None}
+
+
+class AliyunLogger:
+    # 统一获取当前时间 <class 'datetime.datetime'>  2022-04-14 20:13:51.244472
+    now = datetime.now()
+    # 昨天 <class 'str'>  2022-04-13
+    yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
+    # 今天 <class 'datetime.date'>  2022-04-14
+    today = date.today()
+    # 明天 <class 'str'>  2022-04-15
+    tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
+
+    # 写入阿里云日志
+    @staticmethod
+    def logging(channel: str,
+            principal: str,
+            channel_user: str,
+            channel_video_id: str,
+            message: str,
+            code: str,
+            data: Optional[str] = None,
+            pq_video_id: Optional[str] = None):
+        """
+        写入阿里云日志
+        测试库: https://sls.console.aliyun.com/lognext/project/crawler-log-dev/logsearch/crawler-log-dev
+        正式库: https://sls.console.aliyun.com/lognext/project/crawler-log-prod/logsearch/crawler-log-prod
+        """
+        accessKeyId = "LTAIWYUujJAm7CbH"
+        accessKey = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P"
+
+        project = "crawler-log-prod"
+        logstore = "rewriting-log"
+        endpoint = "cn-hangzhou.log.aliyuncs.com"
+        if data:
+            data = dict(item.split(":", 1) for item in data.split(","))
+        contents = [
+            ("principal", principal),
+            ("channel", channel),
+            ("channel_user", str(channel_user) if channel_user is not None else ""),
+            ("channel_video_id", str(channel_video_id) if channel_video_id is not None else ""),
+            ("message", str(message) if message is not None else ""),
+            ("code", str(code) if code is not None else ""),
+            ("data", json.dumps(data, ensure_ascii=False) if data else ""),
+            ("pq_video_id", pq_video_id if pq_video_id else "")
+        ]
+        # 创建 LogClient 实例
+        client = LogClient(endpoint, accessKeyId, accessKey)
+        log_group = []
+        log_item = LogItem()
+        log_item.set_contents(contents)
+        log_group.append(log_item)
+        # 写入日志
+        request = PutLogsRequest(
+            project=project,
+            logstore=logstore,
+            topic="",
+            source="",
+            logitems=log_group,
+            compress=False,
+        )
+        client.put_logs(request)

+ 2 - 2
common/sql_help.py

@@ -36,8 +36,8 @@ class sqlCollect():
         """
         data = MysqlHelper.get_values(sql, (str(video_id), task_mark, mark_name, channel))
         if len(data) == 0 or data == ():
-            return True
-        return False
+            return False
+        return True
 
     @classmethod
     def get_history_id(cls, channel, url):

+ 47 - 37
data_channel/douyin.py

@@ -5,7 +5,7 @@ import time
 import requests
 import urllib3
 from requests.adapters import HTTPAdapter
-from common import Material, Common, Feishu
+from common import Material, Common, Feishu, AliyunLogger
 from common.sql_help import sqlCollect
 from data_channel.data_help import dataHelp
 from data_channel.douyin_help import DouYinHelper
@@ -55,7 +55,7 @@ class DY:
             data = obj.get('aweme_list', [])
             if data == [] and len(data) == 0:
                 if name == '抖音品类账号' or name == '抖音品类账号-1':
-                    Feishu.bot("wangxueke", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', 'wangxueke')
+                    Feishu.bot("liukunyu", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', 'liukunyu')
                 else:
                     Feishu.bot(mark, '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', name)
                 return list
@@ -66,41 +66,51 @@ class DY:
                     # is_top = data[i].get('is_top')  # 是否置顶
                     video_id = data[i].get('aweme_id')  # 文章id
                     status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
+
+                    video_uri = data[i].get('video', {}).get('play_addr', {}).get('uri')
+                    ratio = f'{data[i].get("video", {}).get("height")}p'
+                    video_url = f'https://www.iesdouyin.com/aweme/v1/play/?video_id={video_uri}&ratio={ratio}&line=0'  # 视频链接
+                    digg_count = int(data[i].get('statistics').get('digg_count'))  # 点赞
+                    # comment_count = int(data[i].get('statistics').get('comment_count'))  # 评论
+                    share_count = int(data[i].get('statistics').get('share_count'))  # 转发
+                    old_title = data[i].get('desc', "").strip().replace("\n", "") \
+                        .replace("/", "").replace("\\", "").replace("\r", "") \
+                        .replace(":", "").replace("*", "").replace("?", "") \
+                        .replace("?", "").replace('"', "").replace("<", "") \
+                        .replace(">", "").replace("|", "").replace(" ", "") \
+                        .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
+                        .replace("'", "").replace("#", "").replace("Merge", "")
+                    duration = dataHelp.video_duration(video_url)
+                    Common.logger("dy").info(
+                        f"扫描:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count}")
+                    log_data = f"user:{url_id},video_id:{video_id},video_url:{video_url},original_title:{old_title},share_count:{share_count},digg_count:{digg_count},duration:{duration}"
+                    AliyunLogger.logging(channel_id, name, url_id, video_id, "扫描到一条视频", "2001", log_data)
                     if status:
-                        video_uri = data[i].get('video', {}).get('play_addr', {}).get('uri')
-                        ratio = f'{data[i].get("video", {}).get("height")}p'
-                        video_url = f'https://www.iesdouyin.com/aweme/v1/play/?video_id={video_uri}&ratio={ratio}&line=0'  # 视频链接
-                        digg_count = int(data[i].get('statistics').get('digg_count'))  # 点赞
-                        # comment_count = int(data[i].get('statistics').get('comment_count'))  # 评论
-                        share_count = int(data[i].get('statistics').get('share_count'))  # 转发
-                        old_title = data[i].get('desc', "").strip().replace("\n", "") \
-                            .replace("/", "").replace("\\", "").replace("\r", "") \
-                            .replace(":", "").replace("*", "").replace("?", "") \
-                            .replace("?", "").replace('"', "").replace("<", "") \
-                            .replace(">", "").replace("|", "").replace(" ", "") \
-                            .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
-                            .replace("'", "").replace("#", "").replace("Merge", "")
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "该视频已改造过", "2001", log_data)
+                        continue
+                    if share_count < 200:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享小于200", "2003", log_data)
+                        Common.logger("dy").info(
+                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count}")
+                        continue
+                    video_percent = '%.2f' % (share_count / digg_count)
+                    special = float(0.15)
+                    if float(video_percent) < special:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享/点赞小于0.15", "2003", log_data)
+                        Common.logger("dy").info(
+                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ")
+                        continue
+
+                    if int(duration) < 30 or int(duration) > 720:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
                         Common.logger("dy").info(
-                            f"扫描:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count}")
-                        if share_count < 200:
-                            Common.logger("dy").info(
-                                f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count}")
-                            continue
-                        video_percent = '%.2f' % (share_count / digg_count)
-                        special = float(0.15)
-                        if float(video_percent) < special:
-                            Common.logger("dy").info(
-                                f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ")
-                            continue
-                        duration = dataHelp.video_duration(video_url)
-                        if int(duration) < 30 or int(duration) > 720:
-                            Common.logger("dy").info(
-                                f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{duration} ")
-                            continue
-                        cover_url = data[i].get('video').get('cover').get('url_list')[0]  # 视频封面
-                        all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": video_percent, "old_title": old_title}
-                        list.append(all_data)
-                        if len(list) == int(number):
-                            Common.logger(mark).info(f"获取抖音视频总数:{len(list)}\n")
-                            return list
+                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{duration} ")
+                        continue
+                    AliyunLogger.logging(channel_id, name, url_id, video_id, "符合规则等待改造", "2004", log_data)
+                    cover_url = data[i].get('video').get('cover').get('url_list')[0]  # 视频封面
+                    all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": video_percent, "old_title": old_title}
+                    list.append(all_data)
+                    if len(list) == int(number):
+                        Common.logger(mark).info(f"获取抖音视频总数:{len(list)}\n")
+                        return list
         return list

+ 22 - 7
data_channel/dy_ls.py

@@ -4,13 +4,13 @@ import time
 import requests
 import json
 
-from common import Common, Feishu
+from common import Common, Feishu, AliyunLogger
 from common.sql_help import sqlCollect
 
 
 class DYLS:
     @classmethod
-    def get_dy_zr_list(cls, task_mark, url_id, number, mark):
+    def get_dy_zr_list(cls, task_mark, url_id, number, mark, channel_id, name):
         url = "http://8.217.190.241:8888/crawler/dou_yin/blogger"
         list = []
         next_cursor = ''
@@ -41,14 +41,14 @@ class DYLS:
                     # status = sqlCollect.is_used(task_mark, video_id, mark, "抖音")
                     # if status:
                     status = sqlCollect.is_used(task_mark, video_id, mark, "抖音历史")
-                    if status == False:
-                        continue
+
                     video_uri = data[i].get('video', {}).get('play_addr', {}).get('uri')
                     ratio = f'{data[i].get("video", {}).get("height")}p'
                     video_url = f'https://www.iesdouyin.com/aweme/v1/play/?video_id={video_uri}&ratio={ratio}&line=0'  # 视频链接
                     digg_count = int(data[i].get('statistics').get('digg_count'))  # 点赞
                     share_count = int(data[i].get('statistics').get('share_count'))  # 转发
                     duration = data[i].get('duration')
+                    duration = duration / 1000
                     old_title = data[i].get('desc', "").strip().replace("\n", "") \
                         .replace("/", "").replace("\\", "").replace("\r", "") \
                         .replace(":", "").replace("*", "").replace("?", "") \
@@ -56,16 +56,31 @@ class DYLS:
                         .replace(">", "").replace("|", "").replace(" ", "") \
                         .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
                         .replace("'", "").replace("#", "").replace("Merge", "")
+                    log_data = f"user:{url_id},video_id:{video_id},video_url:{video_url},original_title:{old_title},share_count:{share_count},digg_count:{digg_count},duration:{duration}"
+                    AliyunLogger.logging(channel_id, name, url_id, video_id, "扫描到一条视频", "2001", log_data)
                     Common.logger("dy-ls").info(
                         f"扫描:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count}")
+                    if status:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "该视频已改造过", "2001", log_data)
+                        continue
                     video_percent = '%.2f' % (int(share_count) / int(digg_count))
                     special = float(0.25)
-                    duration = duration / 1000
-                    if int(share_count) < 500 or float(video_percent) < special or int(duration) < 30 or int(
-                            duration) > 720:
+                    if int(share_count) < 500:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享小于500", "2003", log_data)
+                        Common.logger("dy-ls").info(
+                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
+                        continue
+                    if float(video_percent) < special:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享/点赞小于0.25", "2003", log_data)
+                        Common.logger("dy-ls").info(
+                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
+                        continue
+                    if int(duration) < 30 or int(duration) > 720:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
                         Common.logger("dy-ls").info(
                             f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
                         continue
+                    AliyunLogger.logging(channel_id, name, url_id, video_id, "符合规则等待改造", "2004", log_data)
                     cover_url = data[i].get('video').get('cover').get('url_list')[0]  # 视频封面
                     all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": video_percent,
                                 "old_title": old_title}

+ 26 - 8
data_channel/ks_ls.py

@@ -2,13 +2,13 @@ import random
 import time
 import requests
 import json
-from common import Common, Feishu
+from common import Common, Feishu, AliyunLogger
 from common.sql_help import sqlCollect
 
 class KSLS:
 
     @classmethod
-    def get_ksls_list(cls, task_mark, url_id, number, mark):
+    def get_ksls_list(cls, task_mark, url_id, number, mark, channel_id, name):
         #  快手app
         url = "http://8.217.190.241:8888/crawler/kuai_shou/blogger"
         next_cursor = ""
@@ -40,12 +40,8 @@ class KSLS:
                 data_list = data_all_list["data"]
                 for data in data_list:
                     photo_id = data["photo_id"]
-                    # status = sqlCollect.is_used( photo_id, mark, "快手")
-                    # if status == False:
-                    #     continue
                     status = sqlCollect.is_used(task_mark, photo_id, mark, "快手历史")
-                    if status == False:
-                        continue
+
                     view_count = data["view_count"]
                     share_count = data["share_count"]
                     old_title = data["caption"]  # 标题
@@ -54,12 +50,34 @@ class KSLS:
                     duration = data["duration"]
                     duration = int(duration)/1000
                     special = float(0.0005)
-                    if float(video_percent) < special or int(share_count) < 100 or int(duration) < 30 or (duration) > 720:
+                    log_data = f"user:{url_id},video_id:{photo_id},video_url:'',original_title:{old_title},share_count:{share_count},view_count:{view_count},duration:{duration}"
+
+                    AliyunLogger.logging(channel_id, name, url_id, photo_id, "扫描到一条视频", "2001", log_data)
+                    if status:
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "该视频已改造过", "2001", log_data)
+                        continue
+                    if float(video_percent) < special:
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "不符合规则:分享/浏览小于0.0005", "2003", log_data)
+
+                        Common.logger("ks-ls").info(
+                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
+                        continue
+                    if int(share_count) < 100:
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "不符合规则:分享小于100", "2003", log_data)
+
+                        Common.logger("ks-ls").info(
+                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
+                        continue
+                    if int(duration) < 30 or (duration) > 720:
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
+
                         Common.logger("ks-ls").info(
                             f"不符合规则:{task_mark},用户主页id:{url_id},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
                         continue
                     video_url, image_url = cls.get_video(photo_id)
                     if video_url:
+                        log_data = f"user:{url_id},video_id:{photo_id},video_url:{video_url},original_title:{old_title},share_count:{share_count},view_count:{view_count},duration:{duration}"
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "符合规则等待改造", "2004", log_data)
                         all_data = {"video_id": photo_id, "cover": image_url, "video_url": video_url,
                                     "rule": video_percent,
                                     "old_title": old_title}

+ 46 - 28
data_channel/kuaishou.py

@@ -5,7 +5,7 @@ import json
 import urllib3
 from requests.adapters import HTTPAdapter
 
-from common import Feishu, Material, Common
+from common import Feishu, Material, Common, AliyunLogger
 from common.sql_help import sqlCollect
 from data_channel.data_help import dataHelp
 
@@ -99,35 +99,53 @@ class KS:
             feeds = response.json()['data']['visionProfilePhotoList']['feeds']
             try:
                 for i in range(len(feeds)):
-                    try:
-                        video_id = feeds[i].get("photo", {}).get("videoResource").get("h264", {}).get("videoId", "")
-                    except KeyError:
-                        video_id = feeds[i].get("photo", {}).get("videoResource").get("hevc", {}).get("videoId", "")
+                    # try:
+                    #     video_id = feeds[i].get("photo", {}).get("videoResource").get("h264", {}).get("videoId", "")
+                    # except KeyError:
+                    #     video_id = feeds[i].get("photo", {}).get("videoResource").get("hevc", {}).get("videoId", "")
+                    # status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
+                    # if status:
+                    #     continue
+                    video_id = feeds[i].get("photo", {}).get("id", "")
                     status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
+
+                    share_count = cls.get_share_count(video_id)
+                    old_title = feeds[i].get("photo", {}).get("caption")
+                    cover_url = feeds[i].get('photo', {}).get('coverUrl', "")
+                    video_url = feeds[i].get('photo', {}).get('photoUrl', "")
+                    view_count = int(feeds[i].get('photo', {}).get('viewCount', 0))
+                    realLikeCount = int(feeds[i].get('photo', {}).get('realLikeCount', 0))
+                    video_percent = '%.4f' % (share_count / view_count)
+                    log_data = f"user:{url_id},video_id:{video_id},video_url:{video_url},original_title:{old_title},share_count:{share_count},view_count:{view_count},duration:{duration}"
+                    AliyunLogger.logging(channel_id, name, url_id, video_id, "扫描到一条视频", "2001", log_data)
+                    duration = dataHelp.video_duration(video_url)
+                    Common.logger("ks").info(
+                        f"扫描:{task_mark},用户主页id:{url_id},视频id{video_id} ,播放数:{view_count} ,分享数:{share_count},时长:{duration} ")
                     if status:
-                        video_id = feeds[i].get("photo", {}).get("id", "")
-                        status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
-                        if status:
-                            share_count = cls.get_share_count(video_id)
-                            old_title = feeds[i].get("photo", {}).get("caption")
-                            cover_url = feeds[i].get('photo', {}).get('coverUrl', "")
-                            video_url = feeds[i].get('photo', {}).get('photoUrl', "")
-                            view_count = int(feeds[i].get('photo', {}).get('viewCount', 0))
-                            realLikeCount = int(feeds[i].get('photo', {}).get('realLikeCount', 0))
-                            video_percent = '%.4f' % (share_count / view_count)
-                            duration = dataHelp.video_duration(video_url)
-                            Common.logger("ks").info(
-                                f"扫描:{task_mark},用户主页id:{url_id},视频id{video_id} ,播放数:{view_count} ,分享数:{share_count},时长:{duration} ")
-                            special = float(0.001)
-                            if float(video_percent) < special or share_count < 500 or duration < 30 or duration > 720:
-                                Common.logger("ks").info(
-                                    f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,播放数:{view_count} ,分享数:{share_count},时长:{duration} ")
-                                continue
-                            all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": video_percent, "old_title": old_title}
-                            list.append(all_data)
-                            if len(list) == int(number):
-                                Common.logger(mark).info(f"获取快手视频总数:{len(list)}\n")
-                                return list
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "该视频已改造过", "2001", log_data)
+                        continue
+                    special = float(0.001)
+                    if float(video_percent) < special:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享/浏览小于0.001", "2003", log_data)
+                        Common.logger("ks").info(
+                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,播放数:{view_count} ,分享数:{share_count},时长:{duration} ")
+                        continue
+                    if share_count < 500:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享小于500", "2003", log_data)
+                        Common.logger("ks").info(
+                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,播放数:{view_count} ,分享数:{share_count},时长:{duration} ")
+                        continue
+                    if duration < 30 or duration > 720:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
+                        Common.logger("ks").info(
+                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,播放数:{view_count} ,分享数:{share_count},时长:{duration} ")
+                        continue
+                    AliyunLogger.logging(channel_id, name, url_id, video_id, "符合规则等待改造", "2004", log_data)
+                    all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": video_percent, "old_title": old_title}
+                    list.append(all_data)
+                    if len(list) == int(number):
+                        Common.logger(mark).info(f"获取快手视频总数:{len(list)}\n")
+                        return list
             except Exception as exc:
                 Common.logger("ks").warning(f"{name}的快手获取数据失败:{exc}\n")
                 return list

+ 27 - 5
data_channel/kuaishouchuangzuozhe.py

@@ -6,7 +6,7 @@ import time
 from urllib.parse import urlencode
 from datetime import datetime, timedelta
 
-from common import Oss, Feishu, Common
+from common import Oss, Feishu, Common, AliyunLogger
 from common.sql_help import sqlCollect
 
 headers = {
@@ -235,7 +235,7 @@ class KsFeedVideo:
             return None
 
     @classmethod
-    def get_data(cls):
+    def get_data(cls, channel_id, name):
         number = 1
         list = []
         for category_id, category_name in cls.CATEGORY_IDS.items():
@@ -246,8 +246,7 @@ class KsFeedVideo:
                 for feed in feeds:
                     photo_id = feed["photo_id"]  # 视频ID
                     status = sqlCollect.ks_is_used(photo_id)
-                    if status:
-                        continue
+
                     user_name = feed["user_name"]  # 用户名
                     user_sex = feed["user_sex"]  # 性别 F为女,U为男
                     time_data = feed["time"]  # 发布时间
@@ -260,8 +259,14 @@ class KsFeedVideo:
                     main_mv_url = feed["main_mv_url"]  # 视频链接
                     thumbnail_url = feed["thumbnail_url"]  # 视频封面
                     user_id = feed["user_id"]  # 用户id非用户主页id
+                    log_data = f"user:{user_name},video_id:{photo_id},video_url:{main_mv_url},original_title:{caption},share_count:{share_count},view_count:{view_count},duration:{duration}"
+                    AliyunLogger.logging(channel_id, name, user_name, photo_id, "扫描到一条视频", "2001", log_data)
                     value, age = cls.analyze_photo(photo_id)
+                    if status:
+                        AliyunLogger.logging(channel_id, name, user_name, photo_id, "该视频已改造过", "2001", log_data)
+                        continue
                     if value:
+                        AliyunLogger.logging(channel_id, name, user_name, photo_id, f"不符合规则:50+年龄占比小于40%,实际占比{age}", "2003", log_data)
                         sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count,
                                                   share_count, duration, main_mv_url, thumbnail_url, user_id, '1',
                                                   photo_id, category_name, age, oss_object=None, video_uid=None)
@@ -269,15 +274,32 @@ class KsFeedVideo:
                         continue
                     video_percent = '%.4f' % (share_count / view_count)
                     special = float(0.0005)
-                    if float(video_percent) < special or share_count < 100 or duration < 30 or duration > 720:
+                    if float(video_percent) < special:
+                        AliyunLogger.logging(channel_id, name, user_name, photo_id, "不符合规则:分享/浏览小于0.0005", "2003", log_data)
+
                         sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, '1', photo_id, category_name, age, oss_object=None, video_uid=None)
                         continue
+                    if share_count < 100:
+                        AliyunLogger.logging(channel_id, name, user_name, photo_id, "不符合规则:分享小于100", "2003", log_data)
+
+                        sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count,
+                                                  share_count, duration, main_mv_url, thumbnail_url, user_id, '1',
+                                                  photo_id, category_name, age, oss_object=None, video_uid=None)
+                        continue
+                    if duration < 30 or duration > 720:
+                        AliyunLogger.logging(channel_id, name, user_name, photo_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
+                        sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count,
+                                                  share_count, duration, main_mv_url, thumbnail_url, user_id, '1',
+                                                  photo_id, category_name, age, oss_object=None, video_uid=None)
+                        continue
 
                     sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count,
                                               like_count, share_count, duration, main_mv_url, thumbnail_url,
                                               user_id, '0', photo_id, category_name, age, oss_object=None, video_uid=None)
                     all_data = {"video_id": photo_id, "cover": thumbnail_url, "video_url": main_mv_url, "rule": video_percent,
                                 "old_title": caption}
+                    AliyunLogger.logging(channel_id, name, user_name, photo_id, "符合规则等待改造", "2004", log_data)
+
                     list.append(all_data)
                     current_time = datetime.now()
                     formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")

+ 17 - 11
data_channel/piaoquan.py

@@ -4,7 +4,7 @@ import json
 import requests
 from urllib.parse import urlencode
 
-from common import Common
+from common import Common, AliyunLogger
 from common.sql_help import sqlCollect
 
 
@@ -99,7 +99,7 @@ class PQ:
     获取用户下的所有视频
     """
     @classmethod
-    def get_pq_url(cls, task_mark, user_id, number, mark):
+    def get_pq_url(cls, task_mark, user_id, number, mark, channel_id, name):
         url = f"https://admin.piaoquantv.com/manager/video/page?uid={user_id}&pageNum=1&pageSize=100"
 
         payload = {}
@@ -117,16 +117,22 @@ class PQ:
             for url in content:
                 video_id = url["id"]
                 status = sqlCollect.is_used(task_mark, video_id, mark, "票圈")
-                if status:
-                    cover = url["coverImgPath"]
-                    video_url = url["transedVideoPath"]
-                    old_title = url["title"]
 
-                    all_data = {"video_id": video_id, "cover": cover, "video_url": video_url, "rule": "无", "old_title": old_title}
-                    list.append(all_data)
-                    if len(list) == int(number):
-                        Common.logger("pq").info(f"获取视频总数:{len(list)}\n")
-                        return list
+                cover = url["coverImgPath"]
+                video_url = url["transedVideoPath"]
+                old_title = url["title"]
+                log_data = f"user:{user_id},video_id:{video_id},video_url:{video_url},original_title:{old_title}"
+                AliyunLogger.logging(channel_id, name, user_id, video_id, "扫描到一条视频", "2001", log_data)
+
+                if status:
+                    AliyunLogger.logging(channel_id, name, user_id, video_id, "该视频已改造过", "2001", log_data)
+                    continue
+                AliyunLogger.logging(channel_id, name, user_id, video_id, "符合规则等待改造", "2004", log_data)
+                all_data = {"video_id": video_id, "cover": cover, "video_url": video_url, "rule": "无", "old_title": old_title}
+                list.append(all_data)
+                if len(list) == int(number):
+                    Common.logger("pq").info(f"获取视频总数:{len(list)}\n")
+                    return list
             Common.logger("pq").info(f"获取票圈视频总数:{len(list)}\n")
             return list
         except Exception as e:

+ 49 - 47
data_channel/shipinhao.py

@@ -4,7 +4,7 @@ import time
 
 import requests
 
-from common import Common
+from common import Common, AliyunLogger
 from common.sql_help import sqlCollect
 from data_channel.data_help import dataHelp
 
@@ -53,8 +53,8 @@ class SPH:
                 return False
 
     @classmethod
-    def get_sph_url(cls, task_mark, url, number, mark):
-        account_id = cls.get_account_id(url)
+    def get_sph_url(cls, task_mark, url_id, number, mark, channel_id, name):
+        account_id = cls.get_account_id(url_id)
         if account_id:
             url = "http://61.48.133.26:30001/FinderGetUpMasterNextPage"
             last_buffer = ""
@@ -87,52 +87,54 @@ class SPH:
                     for obj in res_json["UpMasterHomePage"]:
                         objectId = obj['objectId']
                         status = sqlCollect.is_used(task_mark, objectId, mark, "视频号")
+                        objectNonceId = obj['objectNonceId']
+                        url1 = "http://61.48.133.26:30001/GetFinderDownloadAddress"
+                        payload = json.dumps({
+                            "objectId": objectId,
+                            "objectNonceId": objectNonceId
+                        })
+                        headers = {
+                            'Content-Type': 'text/plain'
+                        }
+                        response = requests.request("POST", url1, headers=headers, data=payload)
+                        time.sleep(random.randint(0, 1))
+                        video_obj = response.json()
+                        video_url = video_obj.get('DownloadAddress')
+                        share_cnt = int(obj['forward_count'])  # 分享
+                        like_cnt = int(obj['like_count'])  # 点赞
+                        old_title = video_obj.get('title').split("\n")[0].split("#")[0]
+                        duration = dataHelp.video_duration(video_url)
+                        log_data = f"user:{url_id},video_id:{objectId},video_url:{video_url},original_title:{old_title},share_count:{share_cnt},like_count:{like_cnt},duration:{duration}"
+                        AliyunLogger.logging(channel_id, name, url_id, objectId, "扫描到一条视频", "2001", log_data)
+
+                        Common.logger("sph").info(
+                            f"扫描:{task_mark},用户主页id:{url_id},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt}")
                         if status:
-                            objectNonceId = obj['objectNonceId']
-                            url = "http://61.48.133.26:30001/GetFinderDownloadAddress"
-                            payload = json.dumps({
-                                "objectId": objectId,
-                                "objectNonceId": objectNonceId
-                            })
-                            headers = {
-                                'Content-Type': 'text/plain'
-                            }
-                            response = requests.request("POST", url, headers=headers, data=payload)
-                            time.sleep(random.randint(0, 1))
-                            video_obj = response.json()
-                            video_url = video_obj.get('DownloadAddress')
-                            share_cnt = int(obj['forward_count'])  # 分享
-                            like_cnt = int(obj['like_count'])  # 点赞
-                            old_title = video_obj.get('title').split("\n")[0].split("#")[0]
-                            Common.logger("sph").info(
-                                f"扫描:{task_mark},用户主页id:{url},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt}")
-                            # if share_cnt < 500:
-                            #     Common.logger("sph").info(
-                            #         f"不符合规则:{task_mark},用户主页id:{url},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt}")
-                            #     continue
-                            video_percent = '%.2f' % (share_cnt / like_cnt)
-                            special = float(0.25)
-                            if like_cnt >= 30000 or like_cnt >= 50000 or (share_cnt >= 300 and float(video_percent) >= special):
-                            
-                            # if float(video_percent) < special:
-                            #     Common.logger("sph").info(
-                            #         f"不符合规则:{task_mark},用户主页id:{url},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt}")
-                            #     continue
-                                duration = dataHelp.video_duration(video_url)
-                                if int(duration) < 30 or int(duration) > 720:
-                                    Common.logger("sph").info(
-                                        f"任务:{task_mark},用户主页id:{url},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
-                                    continue
-                                cover = video_obj.get('thumb_url')
-                                all_data = {"video_id": objectId, "cover": cover, "video_url": video_url, "rule": video_percent, "old_title": old_title}
-                                list.append(all_data)
-                                if len(list) == int(number):
-                                    Common.logger(mark).info(f"获取视频号视频总数:{len(list)}\n")
-                                    return list
-                            else:
+                            AliyunLogger.logging(channel_id, name, url_id, objectId, "该视频已改造过", "2001", log_data)
+                            continue
+                        video_percent = '%.2f' % (share_cnt / like_cnt)
+                        special = float(0.25)
+                        if like_cnt >= 30000 or like_cnt >= 50000 or (share_cnt >= 300 and float(video_percent) >= special):
+                            if int(duration) < 30 or int(duration) > 720:
                                 Common.logger("sph").info(
-                                         f"不符合规则:{task_mark},用户主页id:{url},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt}")
+                                    f"任务:{task_mark},用户主页id:{url_id},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
+                                AliyunLogger.logging(channel_id, name, url, objectId, "不符合规则:时长不符合规则大于720秒/小于30秒",
+                                                     "2003", log_data)
+
                                 continue
+                            cover = video_obj.get('thumb_url')
+                            AliyunLogger.logging(channel_id, name, url_id, objectId, "符合规则等待改造", "2004", log_data)
+                            all_data = {"video_id": objectId, "cover": cover, "video_url": video_url, "rule": video_percent, "old_title": old_title}
+                            list.append(all_data)
+                            if len(list) == int(number):
+                                Common.logger(mark).info(f"获取视频号视频总数:{len(list)}\n")
+                                return list
+                        else:
+                            AliyunLogger.logging(channel_id, name, url_id, objectId, "不符合规则:点赞小于30000/50000 或 分享/点赞小于0.25和分享小于300", "2003", log_data)
+
+                            Common.logger("sph").info(
+                                     f"不符合规则:{task_mark},用户主页id:{url_id},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt}")
+                            continue
                                 
 
             return list
@@ -140,4 +142,4 @@ class SPH:
 
 
 if __name__ == '__main__':
-    SPH.get_sph_url('1',"霖霖觅影",'10','2')
+    SPH.get_sph_url('1',"霖霖觅影",'10','2',"视频号",'视频号品类账号')

+ 5 - 1
data_channel/shipinhaodandian.py

@@ -1,10 +1,11 @@
+from common import AliyunLogger
 from common.sql_help import sqlCollect
 
 
 class SPHDD:
 
     @classmethod
-    def get_sphdd_data(cls, url):
+    def get_sphdd_data(cls, url, channel_id, name):
         data_list = sqlCollect.get_shp_dd_data(url)
         list = []
         if data_list:
@@ -22,6 +23,9 @@ class SPHDD:
                     from_group_id = data[9]
                     from_group_name = data[10]
                     source = data[11]
+                    log_data = f"user:{url},video_id:{video_id},video_url:{video_url},original_title:{old_title}"
+                    AliyunLogger.logging(f"{channel_id}-{source}", name, url, video_id, "扫描到一条视频", "2001", log_data)
+                    AliyunLogger.logging(f"{channel_id}-{source}", name, url, video_id, "符合规则等待改造", "2004", log_data)
                     all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": author_name,
                                 "old_title": old_title, "from_user_name": from_user_name, "from_group_name": from_group_name, "source": source}
                     list.append(all_data)

+ 40 - 20
data_channel/sph_ls.py

@@ -1,37 +1,57 @@
-from common import Common
+from common import Common, AliyunLogger
 from common.sql_help import sqlCollect
 
 
 class SPHLS:
 
     @classmethod
-    def get_sphls_data(cls, task_mark, url, number, mark):
+    def get_sphls_data(cls, task_mark, url, number, mark, channel_id, name):
         data_list = sqlCollect.sph_data_info_list(url)
         list = []
         if data_list:
             for data in data_list:
                 video_id = data[0]
                 status = sqlCollect.is_used(task_mark,video_id, mark, "视频号历史")
+
+                old_title = data[1]
+                share_cnt = int(data[2])
+                like_cnt = int(data[3])
+                oss_url = data[4]
+                oss_cover = data[5]
+                duration = int(float(data[6]))
+                Common.logger("sph-ls").info(
+                    f"扫描:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt},时长:{duration}")
+                log_data = f"user:{url},video_id:{video_id},video_url:{oss_url},original_title:{old_title},share_count:{share_cnt},like_count:{like_cnt},duration:{duration}"
+                AliyunLogger.logging(channel_id, name, url, video_id, "扫描到一条视频", "2001", log_data)
+
                 if status:
-                    old_title = data[1]
-                    share_cnt = int(data[2])
-                    like_cnt = int(data[3])
-                    oss_url = data[4]
-                    oss_cover = data[5]
-                    duration = int(float(data[6]))
+                    AliyunLogger.logging(channel_id, name, url, video_id, "该视频已改造过", "2001", log_data)
+                    continue
+                if share_cnt < 300:
+                    AliyunLogger.logging(channel_id, name, url, video_id, "不符合规则:分享小于300", "2003", log_data)
+
+                    Common.logger("sph-ls").info(
+                        f"任务:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
+                    continue
+                if share_cnt < like_cnt:
+                    AliyunLogger.logging(channel_id, name, url, video_id, "不符合规则:分享小于点赞", "2003", log_data)
+
+                    Common.logger("sph-ls").info(
+                        f"任务:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
+                    continue
+                if duration < 30 or duration > 720:
+                    AliyunLogger.logging(channel_id, name, url, video_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003",
+                                         log_data)
                     Common.logger("sph-ls").info(
-                        f"扫描:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt},时长:{duration}")
-                    if share_cnt < 300 or share_cnt < like_cnt or duration < 30 or duration > 720:
-                        Common.logger("sph-ls").info(
-                            f"任务:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
-                        continue
-
-                    all_data = {"video_id": video_id, "cover": oss_cover, "video_url": oss_url, "rule": '',
-                                "old_title": old_title}
-                    list.append(all_data)
-                    if len(list) == int(number):
-                        Common.logger("sph-ls").info(f"获取视频号视频总数:{len(list)}\n")
-                        return list
+                        f"任务:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
+                    continue
+                AliyunLogger.logging(channel_id, name, url, video_id, "符合规则等待改造", "2004", log_data)
+                all_data = {"video_id": video_id, "cover": oss_cover, "video_url": oss_url, "rule": '',
+                            "old_title": old_title}
+                list.append(all_data)
+                if len(list) == int(number):
+                    Common.logger("sph-ls").info(f"获取视频号视频总数:{len(list)}\n")
+                    return list
             return list
         else:
             Common.logger("sph-ls").info(f"{url}无数据\n")

+ 15 - 8
video_rewriting/video_processor.py

@@ -11,7 +11,7 @@ import concurrent.futures
 
 from common.redis import get_data, get_redis_video_data
 from common.tts_help import TTS
-from common import Material, Feishu, Common, Oss
+from common import Material, Feishu, Common, Oss, AliyunLogger
 from common.ffmpeg import FFmpeg
 from common.gpt4o_help import GPT4o
 from data_channel.douyin import DY
@@ -90,6 +90,7 @@ class VideoProcessor:
         Common.logger(mark).info(f"{name}的{task_mark}下{channel_id}的用户:{url}开始获取视频")
         data_list = cls.get_data_list(channel_id, task_mark, url, number, mark, feishu_id, cookie_sheet, name)
         if not data_list:
+            AliyunLogger.logging(channel_id, name, url, "", "无改造视频", "4000")
             Common.logger(mark).info(f"{name}的{task_mark}下{channel_id}的视频ID{url} 已经改造过了")
             text = (
                 f"**通知类型**: 没有改造的视频\n"
@@ -132,6 +133,7 @@ class VideoProcessor:
                 new_video_path = cls.download_and_process_video(channel_id, video_url, video_path_url, v_id,
                                                                 crop_total, gg_duration_total, pw_random_id, new_title, mark)
                 if not os.path.isfile(new_video_path):
+                    AliyunLogger.logging(channel_id, name, url, v_id, "视频下载失败", "3002", f"video_url:{video_url}")
                     text = (
                         f"**通知类型**: 视频下载失败\n"
                         f"**负责人**: {name}\n"
@@ -153,6 +155,8 @@ class VideoProcessor:
                             # new_video_path = FFmpeg.single_video(new_video_path, video_path_url, zm)
 
                     if not os.path.isfile(new_video_path):
+                        log_data = f"user:{url},video_id:{v_id},video_url:{video_url},ai_title:{new_title}"
+                        AliyunLogger.logging(channel_id, name, url, v_id, "视频改造失败", "3001", f"video_url:{log_data}")
                         text = (
                             f"**通知类型**: 视频改造失败\n"
                             f"**负责人**: {name}\n"
@@ -196,6 +200,9 @@ class VideoProcessor:
                         f"**视频主页ID**: {url}\n"
                         f"**视频Video_id**: {v_id}\n"
                     )
+                    log_data = f"user:{url},video_id:{v_id},video_url:{video_url},ai_title:{new_title}"
+                    AliyunLogger.logging(channel_id, name, url, v_id, "视频改造成功", "1000", log_data, code)
+
                     Feishu.finish_bot(text,
                                       "https://open.feishu.cn/open-apis/bot/v2/hook/e7697dc6-5254-4411-8b59-3cd0742bf703",
                                       "【 机器改造通知 】")
@@ -269,21 +276,21 @@ class VideoProcessor:
         if channel_id == "抖音":
             return DY.get_dy_url(task_mark, url, number, mark, feishu_id, cookie_sheet, channel_id, name)
         elif channel_id == "票圈":
-            return PQ.get_pq_url(task_mark, url, number, mark)
+            return PQ.get_pq_url(task_mark, url, number, mark, channel_id, name)
         elif channel_id == "视频号":
-            return SPH.get_sph_url(task_mark, url, number, mark)
+            return SPH.get_sph_url(task_mark, url, number, mark, channel_id, name)
         elif channel_id == "快手":
             return KS.get_ks_url(task_mark, url, number, mark, feishu_id, cookie_sheet, channel_id, name)
         elif channel_id == "快手创作者版":
-            return KsFeedVideo.get_data()
+            return KsFeedVideo.get_data(channel_id, name)
         elif channel_id == "单点视频":
-            return SPHDD.get_sphdd_data(url)
+            return SPHDD.get_sphdd_data(url, channel_id, name)
         elif channel_id == "抖音历史":
-            return DYLS.get_dy_zr_list(task_mark, url, number, mark)
+            return DYLS.get_dy_zr_list(task_mark, url, number, mark, channel_id, name)
         elif channel_id == "快手历史":
-            return KSLS.get_ksls_list(task_mark, url, number, mark)
+            return KSLS.get_ksls_list(task_mark, url, number, mark, channel_id, name)
         elif channel_id == "视频号历史":
-            return SPHLS.get_sphls_data(task_mark, url, number, mark)
+            return SPHLS.get_sphls_data(task_mark, url, number, mark, channel_id, name)
 
     @classmethod
     def generate_title(cls, video, title):