|  | @@ -1,12 +1,25 @@
 | 
											
												
													
														|  | 
 |  | +import re
 | 
											
												
													
														|  | 
 |  | +import time
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  |  import requests
 |  |  import requests
 | 
											
												
													
														|  |  import json
 |  |  import json
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -from common import Common, Feishu
 |  | 
 | 
											
												
													
														|  | 
 |  | +from common import Common, Feishu, AliyunLogger
 | 
											
												
													
														|  | 
 |  | +from common.sql_help import sqlCollect
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | 
 |  | +class SphKeyword:
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    @classmethod
 | 
											
												
													
														|  | 
 |  | +    def time_str_to_seconds(cls, time_str):
 | 
											
												
													
														|  | 
 |  | +        # 分钟和秒用 ":" 分隔
 | 
											
												
													
														|  | 
 |  | +        minutes, seconds = map(int, time_str.split(":"))
 | 
											
												
													
														|  | 
 |  | +        # 转换为秒
 | 
											
												
													
														|  | 
 |  | +        total_seconds = minutes * 60 + seconds
 | 
											
												
													
														|  | 
 |  | +        return total_seconds
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -class KsKeyword:
 |  | 
 | 
											
												
													
														|  |      @classmethod
 |  |      @classmethod
 | 
											
												
													
														|  | -    def get_key_word(cls, keyword, task_mark, mark, channel_id, name, task):
 |  | 
 | 
											
												
													
														|  | 
 |  | +    def get_key_word(cls, keyword, task_mark, mark, channel_id, name):
 | 
											
												
													
														|  |          url = "http://8.217.190.241:8888/crawler/wei_xin/shi_pin_hao/keyword"
 |  |          url = "http://8.217.190.241:8888/crawler/wei_xin/shi_pin_hao/keyword"
 | 
											
												
													
														|  |          list = []
 |  |          list = []
 | 
											
												
													
														|  |          payload = json.dumps({
 |  |          payload = json.dumps({
 | 
											
										
											
												
													
														|  | @@ -18,6 +31,7 @@ class KsKeyword:
 | 
											
												
													
														|  |              'Content-Type': 'application/json'
 |  |              'Content-Type': 'application/json'
 | 
											
												
													
														|  |          }
 |  |          }
 | 
											
												
													
														|  |          try:
 |  |          try:
 | 
											
												
													
														|  | 
 |  | +            time.sleep(1)
 | 
											
												
													
														|  |              response = requests.request("POST", url, headers=headers, data=payload)
 |  |              response = requests.request("POST", url, headers=headers, data=payload)
 | 
											
												
													
														|  |              response = response.json()
 |  |              response = response.json()
 | 
											
												
													
														|  |              code = response['code']
 |  |              code = response['code']
 | 
											
										
											
												
													
														|  | @@ -27,8 +41,45 @@ class KsKeyword:
 | 
											
												
													
														|  |                                    "【视频号搜索接口使用提示】")
 |  |                                    "【视频号搜索接口使用提示】")
 | 
											
												
													
														|  |                  Common.logger("sph-key-word").info(f"快手搜索词数据获取失败,{response['msg']}\n")
 |  |                  Common.logger("sph-key-word").info(f"快手搜索词数据获取失败,{response['msg']}\n")
 | 
											
												
													
														|  |                  return list
 |  |                  return list
 | 
											
												
													
														|  | -            data = response['data']['data']
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  | 
 |  | +            data_list = response['data']['data']
 | 
											
												
													
														|  | 
 |  | +            for data in data_list:
 | 
											
												
													
														|  | 
 |  | +                items = data['items'][0]
 | 
											
												
													
														|  | 
 |  | +                video_id = data["'boxID'"]
 | 
											
												
													
														|  | 
 |  | +                duration = items["duration"]
 | 
											
												
													
														|  | 
 |  | +                duration = cls.time_str_to_seconds(duration)
 | 
											
												
													
														|  | 
 |  | +                digg_count = items.get('likeNum', "0")
 | 
											
												
													
														|  | 
 |  | +                old_title =items.get('title', "")
 | 
											
												
													
														|  | 
 |  | +                old_title = re.sub(r'<em.*?>.*?</em>', '', old_title)
 | 
											
												
													
														|  | 
 |  | +                cover_url = items["image"]
 | 
											
												
													
														|  | 
 |  | +                video_url = items["videoUrl"]
 | 
											
												
													
														|  | 
 |  | +                log_data = f"user:{keyword},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,digg_count:{digg_count},,duration:{duration}"
 | 
											
												
													
														|  | 
 |  | +                AliyunLogger.logging(channel_id, name, keyword, video_id, "扫描到一条视频", "2001", log_data)
 | 
											
												
													
														|  | 
 |  | +                Common.logger("sph-key-word").info(
 | 
											
												
													
														|  | 
 |  | +                    f"扫描:{task_mark},搜索词:{keyword},视频id{video_id},点赞{digg_count}")
 | 
											
												
													
														|  | 
 |  | +                status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
 | 
											
												
													
														|  | 
 |  | +                if status:
 | 
											
												
													
														|  | 
 |  | +                    AliyunLogger.logging(channel_id, name, keyword, video_id, "该视频已改造过", "2001", log_data)
 | 
											
												
													
														|  | 
 |  | +                    continue
 | 
											
												
													
														|  | 
 |  | +                if int(digg_count) < 2000:
 | 
											
												
													
														|  | 
 |  | +                    AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:点赞小于2000", "2003",
 | 
											
												
													
														|  | 
 |  | +                                         log_data)
 | 
											
												
													
														|  | 
 |  | +                    Common.logger("sph-key-word").info(
 | 
											
												
													
														|  | 
 |  | +                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} ,点赞{digg_count} ,时长:{int(duration)} ")
 | 
											
												
													
														|  | 
 |  | +                    continue
 | 
											
												
													
														|  | 
 |  | +                if int(duration) < 30 or int(duration) > 900:
 | 
											
												
													
														|  | 
 |  | +                    AliyunLogger.logging(channel_id, name, keyword, video_id, f"不符合规则:时长不符合规则大于900秒/小于30秒", "2003", log_data)
 | 
											
												
													
														|  | 
 |  | +                    Common.logger("sph-key-word").info(
 | 
											
												
													
														|  | 
 |  | +                        f"不符合规则:{task_mark},用户主页id:{keyword},视频id{video_id} 点赞{digg_count} ,时长:{int(duration)} ")
 | 
											
												
													
														|  | 
 |  | +                    continue
 | 
											
												
													
														|  | 
 |  | +                AliyunLogger.logging(channel_id, name, keyword, video_id, "符合规则等待改造", "2004", log_data)
 | 
											
												
													
														|  | 
 |  | +                all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": '',
 | 
											
												
													
														|  | 
 |  | +                            "old_title": old_title}
 | 
											
												
													
														|  | 
 |  | +                list.append(all_data)
 | 
											
												
													
														|  | 
 |  | +            return list
 | 
											
												
													
														|  |          except Exception as exc:
 |  |          except Exception as exc:
 | 
											
												
													
														|  |              Common.logger("sph-key-word").info(f"视频号搜索词{keyword}获取失败{exc}\n")
 |  |              Common.logger("sph-key-word").info(f"视频号搜索词{keyword}获取失败{exc}\n")
 | 
											
												
													
														|  |              return list
 |  |              return list
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +if __name__ == '__main__':
 | 
											
												
													
														|  | 
 |  | +    SphKeyword.get_key_word('iphone手机', '', '', '', '')
 |