import re import time import requests import json from common import Common, Feishu, AliyunLogger from common.sql_help import sqlCollect class SphKeyword: @classmethod def time_str_to_seconds(cls, time_str): # 分钟和秒用 ":" 分隔 minutes, seconds = map(int, time_str.split(":")) # 转换为秒 total_seconds = minutes * 60 + seconds return total_seconds @classmethod def get_key_word(cls, keyword, task_mark, mark, channel_id, name): url = "http://47.236.68.175:8889/crawler/wei_xin/shi_pin_hao/keyword" list = [] payload = json.dumps({ "keyword": keyword, "sort": "不限", "cursor": "" }) headers = { 'Content-Type': 'application/json' } try: time.sleep(1) response = requests.request("POST", url, headers=headers, data=payload, timeout=30) response = response.json() code = response['code'] if code != 0: Feishu.finish_bot(f"shi_pin_hao/keyword {response['msg']}", "https://open.feishu.cn/open-apis/bot/v2/hook/575ca6a1-84b4-4a2f-983b-1d178e7b16eb", "【视频号搜索接口使用提示】") Common.logger("sph-key-word").info(f"视频号搜索词数据获取失败,{response['msg']}\n") return list data_list = response['data']['data'] for data in data_list: items = data['items'][0] video_id = data["boxID"] duration = items["duration"] if duration == '' or duration == None: duration = "00:01" duration = cls.time_str_to_seconds(duration) digg_count = items.get('likeNum', "0") if digg_count == '10万+': digg_count = '100000' old_title =items.get('title', "") old_title = re.sub(r'.*?', '', old_title) cover_url = items["image"] video_url = items["videoUrl"] log_data = f"user:{keyword},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,digg_count:{digg_count},,duration:{duration}" AliyunLogger.logging(channel_id, name, keyword, video_id, "扫描到一条视频", "2001", log_data) Common.logger("sph-key-word").info( f"扫描:{task_mark},搜索词:{keyword},视频id{video_id},点赞{digg_count}") status = sqlCollect.is_used(task_mark, video_id, mark, channel_id) if status: AliyunLogger.logging(channel_id, name, keyword, video_id, "该视频已改造过", "2002", log_data) continue if int(digg_count) < 2000: AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:点赞小于2000", "2003", log_data) Common.logger("sph-key-word").info( f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} ,点赞{digg_count} ,时长:{int(duration)} ") continue if int(duration) < 30 or int(duration) > 900: AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:时长不符合规则大于900秒/小于30秒", "2003", log_data) Common.logger("sph-key-word").info( f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} 点赞{digg_count} ,时长:{int(duration)} ") continue all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": '', "old_title": old_title} list.append(all_data) AliyunLogger.logging(channel_id, name, keyword, video_id, "符合规则等待改造", "2004", log_data) return list except Exception as exc: Common.logger("sph-key-word").info(f"视频号搜索词{keyword}获取失败{exc}\n") return list if __name__ == '__main__': SphKeyword.get_key_word('最有钱的地方', '', '', '', '')