zhangyong 4 miesięcy temu
rodzic
commit
a3b93c3918
1 zmienionych plików z 53 dodań i 49 usunięć
  1. 53 49
      data_channel/ks_keyword.py

+ 53 - 49
data_channel/ks_keyword.py

@@ -10,45 +10,46 @@ from common.sql_help import sqlCollect
 class KsKeyword:
     @classmethod
     def get_key_word(cls, keyword, task_mark, mark, channel_id, name, task):
-        combo = task['combo']
-        content_type = combo[0]
-        publish_time = combo[1]
-        duration = combo[2]
-        share_count_rule = 0
-        special = 0
-        short_duration_rule = 0
+        # combo = task['combo']
+        # content_type = combo[0]
+        # publish_time = combo[1]
+        # duration = combo[2]
+        # share_count_rule = 0
+        # special = 0
+        # short_duration_rule = 0
 
         url = "http://8.217.192.46:8889/crawler/kuai_shou/keyword"
         list = []
 
         payload = json.dumps({
             "keyword": keyword,
-            "content_type": "综合",
-            "sort_type": content_type,
-            "publish_time": publish_time,
-            "duration": duration,
+            "content_type": "",
+            "sort_type": "",
+            "publish_time": "",
+            "duration": "",
             "cursor": ""
         })
         headers = {
             'Content-Type': 'application/json'
         }
-
-        if " 不限" == publish_time:
-            share_count_rule = 100
-            special = 0.0005
-            short_duration_rule = 25
-        elif "近1日" == publish_time:
-            share_count_rule = 0
-            special = 0.0003
-            short_duration_rule = 25
-        elif "近7日" == publish_time:
-            share_count_rule = 50
-            special = 0.0005
-            short_duration_rule = 25
-        elif "近1月" == publish_time:
-            share_count_rule = 100
-            special = 0.0005
-            short_duration_rule = 25
+        share_count_rule = 100
+        special = 0.0005
+        short_duration_rule = 30
+
+        # if " 不限" == publish_time:
+        #
+        # elif "近1日" == publish_time:
+        #     share_count_rule = 0
+        #     special = 0.0003
+        #     short_duration_rule = 25
+        # elif "近7日" == publish_time:
+        #     share_count_rule = 50
+        #     special = 0.0005
+        #     short_duration_rule = 25
+        # elif "近1月" == publish_time:
+        #     share_count_rule = 100
+        #     special = 0.0005
+        #     short_duration_rule = 25
         try:
             time.sleep(3)
             response = requests.request("POST", url, headers=headers, data=payload, timeout=30)
@@ -65,17 +66,23 @@ class KsKeyword:
                 return list
             data_list = response['data']['data']
             for data in data_list:
-                data = data['feed']
-                photo_id = data["photo_id"]
+                type = int(data['type'])
+                if type != 1:
+                    continue
+                photo_type = data['photoType']
+                if photo_type != "VIDEO":
+                    continue
+                photo_id =  data['photoId']
                 status = sqlCollect.is_used(task_mark, photo_id, mark, channel_id)
 
-                view_count = data["view_count"]
-                share_count = data["share_count"]
-                old_title = data["caption"]  # 标题
+                image_url = data['webpCoverUrls'][0]['url']
+                video_url = data['mainMvUrls'][0]['url']
+                view_count = data.get('viewCount', 0)
+                share_count = data.get('shareCount', 0)
+                old_title = data['caption']  # 标题
 
                 video_percent = '%.4f' % (int(share_count) / int(view_count))
-                duration = data["duration"]
-                duration = int(duration) / 1000
+                duration = int(int(data["duration"]) / 1000)
                 log_data = f"user:{keyword},,video_id:{photo_id},,video_url:'',original_title:{old_title},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
 
                 AliyunLogger.logging(channel_id, name, keyword, photo_id, "扫描到一条视频", "2001", log_data)
@@ -94,24 +101,21 @@ class KsKeyword:
                     Common.logger("ks-key-word").info(
                         f"不符合规则:{task_mark},用户主页id:{keyword},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
                     continue
-                if int(duration) < short_duration_rule or int(duration) > 600:
-                    AliyunLogger.logging(channel_id, name, keyword, photo_id, f"不符合规则:时长不符合规则大于600秒/小于{short_duration_rule}", "2003",
+                if int(duration) < short_duration_rule or int(duration) > 720:
+                    AliyunLogger.logging(channel_id, name, keyword, photo_id, f"不符合规则:时长不符合规则大于720秒/小于{short_duration_rule}", "2003",
                                          log_data)
 
                     Common.logger("ks-key-word").info(
                         f"不符合规则:{task_mark},用户主页id:{keyword},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
                     continue
-                video_url, image_url = cls.get_video(photo_id)
-                if video_url:
-                    log_data = f"user:{keyword},,video_id:{photo_id},,video_url:{video_url},,original_title:{old_title},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
-                    all_data = {"video_id": photo_id, "cover": image_url, "video_url": video_url,
-                                "rule": video_percent,
-                                "old_title": old_title}
-                    list.append(all_data)
-                    AliyunLogger.logging(channel_id, name, keyword, photo_id, "符合规则等待改造", "2004", log_data)
-                else:
-                    AliyunLogger.logging(channel_id, name, photo_id, photo_id, "无法获取到视频链接", "2003")
-                    continue
+
+                log_data = f"user:{keyword},,video_id:{photo_id},,video_url:{video_url},,original_title:{old_title},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
+                all_data = {"video_id": photo_id, "cover": image_url, "video_url": video_url,
+                            "rule": video_percent,
+                            "old_title": old_title}
+                list.append(all_data)
+                AliyunLogger.logging(channel_id, name, keyword, photo_id, "符合规则等待改造", "2004", log_data)
+
             return list
         except Exception as exc:
             Common.logger("ks-key-word").info(f"快手搜索词{keyword}获取失败{exc}\n")
@@ -137,7 +141,7 @@ class KsKeyword:
 
 
 if __name__ == '__main__':
-    keyword = '毛主席故居'
+    keyword = '新闻联播'
     task_mark = '1'
     mark = 'pl-gjc'
     channel_id = '快手搜索'