|
@@ -1,12 +1,25 @@
|
|
|
+import re
|
|
|
+import time
|
|
|
+
|
|
|
import requests
|
|
|
import json
|
|
|
|
|
|
-from common import Common, Feishu
|
|
|
+from common import Common, Feishu, AliyunLogger
|
|
|
+from common.sql_help import sqlCollect
|
|
|
+
|
|
|
|
|
|
+class SphKeyword:
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def time_str_to_seconds(cls, time_str):
|
|
|
+ # 分钟和秒用 ":" 分隔
|
|
|
+ minutes, seconds = map(int, time_str.split(":"))
|
|
|
+ # 转换为秒
|
|
|
+ total_seconds = minutes * 60 + seconds
|
|
|
+ return total_seconds
|
|
|
|
|
|
-class KsKeyword:
|
|
|
@classmethod
|
|
|
- def get_key_word(cls, keyword, task_mark, mark, channel_id, name, task):
|
|
|
+ def get_key_word(cls, keyword, task_mark, mark, channel_id, name):
|
|
|
url = "http://8.217.190.241:8888/crawler/wei_xin/shi_pin_hao/keyword"
|
|
|
list = []
|
|
|
payload = json.dumps({
|
|
@@ -18,6 +31,7 @@ class KsKeyword:
|
|
|
'Content-Type': 'application/json'
|
|
|
}
|
|
|
try:
|
|
|
+ time.sleep(1)
|
|
|
response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
response = response.json()
|
|
|
code = response['code']
|
|
@@ -27,8 +41,45 @@ class KsKeyword:
|
|
|
"【视频号搜索接口使用提示】")
|
|
|
Common.logger("sph-key-word").info(f"快手搜索词数据获取失败,{response['msg']}\n")
|
|
|
return list
|
|
|
- data = response['data']['data']
|
|
|
-
|
|
|
+ data_list = response['data']['data']
|
|
|
+ for data in data_list:
|
|
|
+ items = data['items'][0]
|
|
|
+ video_id = data["'boxID'"]
|
|
|
+ duration = items["duration"]
|
|
|
+ duration = cls.time_str_to_seconds(duration)
|
|
|
+ digg_count = items.get('likeNum', "0")
|
|
|
+ old_title =items.get('title', "")
|
|
|
+ old_title = re.sub(r'<em.*?>.*?</em>', '', old_title)
|
|
|
+ cover_url = items["image"]
|
|
|
+ video_url = items["videoUrl"]
|
|
|
+ log_data = f"user:{keyword},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,digg_count:{digg_count},,duration:{duration}"
|
|
|
+ AliyunLogger.logging(channel_id, name, keyword, video_id, "扫描到一条视频", "2001", log_data)
|
|
|
+ Common.logger("sph-key-word").info(
|
|
|
+ f"扫描:{task_mark},搜索词:{keyword},视频id{video_id},点赞{digg_count}")
|
|
|
+ status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
|
|
|
+ if status:
|
|
|
+ AliyunLogger.logging(channel_id, name, keyword, video_id, "该视频已改造过", "2001", log_data)
|
|
|
+ continue
|
|
|
+ if int(digg_count) < 2000:
|
|
|
+ AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:点赞小于2000", "2003",
|
|
|
+ log_data)
|
|
|
+ Common.logger("sph-key-word").info(
|
|
|
+ f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} ,点赞{digg_count} ,时长:{int(duration)} ")
|
|
|
+ continue
|
|
|
+ if int(duration) < 30 or int(duration) > 900:
|
|
|
+ AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:时长不符合规则大于900秒/小于30秒", "2003", log_data)
|
|
|
+ Common.logger("sph-key-word").info(
|
|
|
+ f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} 点赞{digg_count} ,时长:{int(duration)} ")
|
|
|
+ continue
|
|
|
+ AliyunLogger.logging(channel_id, name, keyword, video_id, "符合规则等待改造", "2004", log_data)
|
|
|
+ all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": '',
|
|
|
+ "old_title": old_title}
|
|
|
+ list.append(all_data)
|
|
|
+ return list
|
|
|
except Exception as exc:
|
|
|
Common.logger("sph-key-word").info(f"视频号搜索词{keyword}获取失败{exc}\n")
|
|
|
return list
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ SphKeyword.get_key_word('iphone手机', '', '', '', '')
|