Jelajahi Sumber

增加搜索视频号

zhangyong 1 tahun lalu
induk
melakukan
a5b8eaed9a

+ 4 - 4
common/redis.py

@@ -44,23 +44,23 @@ def get_data(name, feishu_id, feishu_sheet):
         acquire_lock = client.set(lock, 1, ex=60, nx=True)
         if not acquire_lock:
             return None
-        if name == 'dy-pl-gjc' or name == 'ks-pl-gjc':
+        if name == 'dy-pl-gjc' or name == 'ks-pl-gjc' or name == 'sph-pl-gjc':
             data = Material.get_keyword_data(feishu_id, feishu_sheet)
         else:
             data = Material.get_task_data(feishu_id, feishu_sheet)
         client.rpush(task, *data)
     ret = client.lpop(task)
-    if name == 'dy-pl-gjc' or name == 'dd-sp' or name == 'ks-pl-gjc':
+    if name == 'dy-pl-gjc' or name == 'dd-sp' or name == 'ks-pl-gjc' or name == 'sph-pl-gjc':
         client.rpush(task, ret)
     return ret
 
-"""抖音搜索计数插入"""
+"""搜索计数插入"""
 def increment_key(mark_count):
     helper = SyncRedisHelper()
     client = helper.get_client()
     client.incrby(mark_count, 1)
 
-"""抖音搜索计数获取"""
+"""搜索计数获取"""
 def get_first_value_with_prefix(mark_count):
     helper = SyncRedisHelper()
     client = helper.get_client()

+ 4 - 4
data_channel/dy_keyword.py

@@ -80,17 +80,17 @@ class DyKeyword:
                 video_percent = '%.2f' % (int(share_count) / int(digg_count))
                 if int(share_count) < share_count_rule:
                     AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:分享小于{share_count_rule}", "2003", log_data)
-                    Common.logger("dy-ls").info(
+                    Common.logger("dy-key-word").info(
                         f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
                     continue
                 if float(video_percent) < special:
                     AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:分享/点赞小于{special}", "2003", log_data)
-                    Common.logger("dy-ls").info(
+                    Common.logger("dy-key-word").info(
                         f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
                     continue
                 if int(duration) < short_duration_rule or int(duration) > 720:
                     AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:时长不符合规则大于720秒/小于{short_duration_rule}秒", "2003", log_data)
-                    Common.logger("dy-ls").info(
+                    Common.logger("dy-key-word").info(
                         f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{int(duration)} ")
                     continue
                 AliyunLogger.logging(channel_id, name, keyword, video_id, "符合规则等待改造", "2004", log_data)
@@ -106,4 +106,4 @@ class DyKeyword:
 
 if __name__ == '__main__':
 
-    DyKeyword.get_key_word('keyword', 'sort_type', 'publish_time', 'duration', 'task_mark', 'mark', 'channel_id', 'name')
+    DyKeyword.get_key_word('keyword', 'sort_type', 'publish_time', 'duration', 'task_mark', 'mark')

+ 56 - 5
data_channel/sph_keyword.py

@@ -1,12 +1,25 @@
+import re
+import time
+
 import requests
 import json
 
-from common import Common, Feishu
+from common import Common, Feishu, AliyunLogger
+from common.sql_help import sqlCollect
+
 
+class SphKeyword:
+
+    @classmethod
+    def time_str_to_seconds(cls, time_str):
+        # 分钟和秒用 ":" 分隔
+        minutes, seconds = map(int, time_str.split(":"))
+        # 转换为秒
+        total_seconds = minutes * 60 + seconds
+        return total_seconds
 
-class KsKeyword:
     @classmethod
-    def get_key_word(cls, keyword, task_mark, mark, channel_id, name, task):
+    def get_key_word(cls, keyword, task_mark, mark, channel_id, name):
         url = "http://8.217.190.241:8888/crawler/wei_xin/shi_pin_hao/keyword"
         list = []
         payload = json.dumps({
@@ -18,6 +31,7 @@ class KsKeyword:
             'Content-Type': 'application/json'
         }
         try:
+            time.sleep(1)
             response = requests.request("POST", url, headers=headers, data=payload)
             response = response.json()
             code = response['code']
@@ -27,8 +41,45 @@ class KsKeyword:
                                   "【视频号搜索接口使用提示】")
                 Common.logger("sph-key-word").info(f"快手搜索词数据获取失败,{response['msg']}\n")
                 return list
-            data = response['data']['data']
-
+            data_list = response['data']['data']
+            for data in data_list:
+                items = data['items'][0]
+                video_id = data["'boxID'"]
+                duration = items["duration"]
+                duration = cls.time_str_to_seconds(duration)
+                digg_count = items.get('likeNum', "0")
+                old_title =items.get('title', "")
+                old_title = re.sub(r'<em.*?>.*?</em>', '', old_title)
+                cover_url = items["image"]
+                video_url = items["videoUrl"]
+                log_data = f"user:{keyword},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,digg_count:{digg_count},,duration:{duration}"
+                AliyunLogger.logging(channel_id, name, keyword, video_id, "扫描到一条视频", "2001", log_data)
+                Common.logger("sph-key-word").info(
+                    f"扫描:{task_mark},搜索词:{keyword},视频id{video_id},点赞{digg_count}")
+                status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
+                if status:
+                    AliyunLogger.logging(channel_id, name, keyword, video_id, "该视频已改造过", "2001", log_data)
+                    continue
+                if int(digg_count) < 2000:
+                    AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:点赞小于2000", "2003",
+                                         log_data)
+                    Common.logger("sph-key-word").info(
+                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} ,点赞{digg_count} ,时长:{int(duration)} ")
+                    continue
+                if int(duration) < 30 or int(duration) > 900:
+                    AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:时长不符合规则大于900秒/小于30秒", "2003", log_data)
+                    Common.logger("sph-key-word").info(
+                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} 点赞{digg_count} ,时长:{int(duration)} ")
+                    continue
+                AliyunLogger.logging(channel_id, name, keyword, video_id, "符合规则等待改造", "2004", log_data)
+                all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": '',
+                            "old_title": old_title}
+                list.append(all_data)
+            return list
         except Exception as exc:
             Common.logger("sph-key-word").info(f"视频号搜索词{keyword}获取失败{exc}\n")
             return list
+
+
+if __name__ == '__main__':
+    SphKeyword.get_key_word('iphone手机', '', '', '', '')

+ 21 - 0
job_keyword_sph.py

@@ -0,0 +1,21 @@
+
+import time
+from common import Material
+
+from video_rewriting.video_processor import VideoProcessor
+def video_task_start():
+    """视频号视频搜索处理视频任务,返回用户名并根据结果决定延迟时间"""
+    data = Material.feishu_list()[16]
+    while True:
+        try:
+            print("开始执行任务")
+            mark = VideoProcessor.main(data)
+            print(f"返回用户名: {mark}")
+            time.sleep(10 if mark else 120)  # 根据 mark 是否为空设置延迟
+        except Exception as e:
+            print("处理任务时出现异常:", e)
+            time.sleep(10)
+            continue
+if __name__ == '__main__':
+    video_task_start()
+

+ 6 - 1
video_rewriting/video_processor.py

@@ -26,6 +26,7 @@ from data_channel.shipinhao import SPH
 
 # 读取配置文件
 from data_channel.shipinhaodandian import SPHDD
+from data_channel.sph_keyword import SphKeyword
 from data_channel.sph_ls import SPHLS
 
 config = configparser.ConfigParser()
@@ -246,6 +247,8 @@ class VideoProcessor:
                             tag_channel = "来源_抖音关键词"
                         elif channel_id == "快手搜索":
                             tag_channel = "来源_快手关键词"
+                        elif channel_id == "视频号搜索":
+                            tag_channel = "来源_视频号关键词"
                         tag = f"{tag_first},{tag_keyword},{tag_channel}"
                         tag_status = Tag.video_tag(code, tag)
                         if tag_status == 0:
@@ -371,6 +374,8 @@ class VideoProcessor:
             return DyKeyword.get_key_word(url, task_mark, mark, channel_id, name, task)
         elif channel_id == '快手搜索':
             return KsKeyword.get_key_word(url, task_mark, mark, channel_id, name, task)
+        elif channel_id == '视频号搜索':
+            return SphKeyword.get_key_word(url, task_mark, mark, channel_id, name)
 
 
     @classmethod
@@ -417,7 +422,7 @@ class VideoProcessor:
             if new_video_path == None:
                 return None
             Common.logger(mark).info(f"{channel_id}视频下载成功: {new_video_path}")
-        elif channel_id == "票圈" or channel_id == "快手创作者版":
+        elif channel_id == "票圈" or channel_id == "快手创作者版" or channel_id == '视频号搜索':
             new_video_path = PQ.download_video(video_url, video_path_url, v_id)
             if new_video_path == None:
                 return None