Browse Source

Merge remote-tracking branch 'origin/master'

zhangyong 1 year ago
parent
commit
78ea79c0fb
3 changed files with 106 additions and 43 deletions
  1. 73 22
      common/video_item.py
  2. 2 10
      haokanshipin/haokanshipin_author/hksp_author.py
  3. 31 11
      xigua/xigua_author/xigua_author.py

+ 73 - 22
common/video_item.py

@@ -1,45 +1,96 @@
+import time
+
 from common.public import clean_title
 from .aliyun_log import AliyunLogger
 
 
-class VideoItem:
+class VideoItem(object):
     """
     function: 当扫描进一条视频的时候,对该视频的基本信息进行处理,保证发送给 pipeline和 etl 的 video_dict 是正确的
     __init__: 初始化空json 对象,用来存储视频信息
     add_video_info: 把视频信息存储到 item 对象中
     check_item: 检查 item 对象中的各个元素以及处理
     """
+
     def __init__(self):
         self.item = {}
 
     def add_video_info(self, key, value):
         self.item[key] = value
 
-    # 判断视频格式, 做兼容
     def check_item(self):
-        # video_title
+        """
+        判断item 里面的字段,是否符合要求
+        字段分为 3 类:
+        1. 必须存在数据的字段: ["video_id", "user_id", "user_name", "out_user_id", "out_video_id", "session", "video_url", "cover_url", "platform", "strategy"]
+        2. 不存在默认为 0 的字段 :["duration", "play_cnt", "like_cnt", "comment_cnt", "share_cnt", "width", "height"]
+        3. 需要后出理的字段: video_title, publish_time
+        """
         if self.item.get("video_title"):
-            self.item['video_title'] = clean_title(self.item['video_title'])
+            self.item["video_title"] = clean_title(self.item["video_title"])
         else:
-            self.item['video_title'] = "No title"
-        # video_id
-
-        # video_time, publish_time_str, publish_time_stamp, update_time_stamp
-
-        # play_cnt, like_cnt, comment_cnt, share_cnt
-
-        # width, height, video_width, video_height
-
-        # user_name, user_id, out_user_name, out_user_id
-
-        # profile_id, profile_mid
-
-        # session
+            return False
+        if self.item.get("publish_time_stamp"):
+            publish_time_str = time.strftime(
+                "%Y-%m-%d %H:%M:%S", time.localtime(self.item["publish_time_stamp"])
+            )
+            self.add_video_info("publish_time_str", publish_time_str)
+        else:
+            publish_time_stamp = int(time.time())
+            publish_time_str = time.strftime(
+                "%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp)
+            )
+            self.add_video_info("publish_time_stamp", publish_time_stamp)
+            self.add_video_info("publish_time_str", publish_time_str)
+        self.add_video_info("publish_time", publish_time_str)
+        if not self.item.get("update_time_stamp"):
+            self.add_video_info("update_time_stamp", int(time.time()))
 
-        # video_url
+        # 如果不存在,默认值为 0
+        config_keys = [
+            "duration",
+            "play_cnt",
+            "like_cnt",
+            "comment_cnt",
+            "share_cnt",
+            "width",
+            "height",
+        ]
+        for config_key in config_keys:
+            if self.item.get(config_key):
+                continue
+            else:
+                self.add_video_info(config_key, 0)
 
-        # cover_url
+        # 必须存在的元素,若不存在则会报错
+        must_keys = [
+            "video_id",
+            "user_id",
+            "user_name",
+            "out_video_id",
+            "session",
+            "video_url",
+            "cover_url",
+            "platform",
+            "strategy",
+        ]
+        """
+        video_id, out_video_id 均为站外视频 id
+        usr_id: 站内用户 id
+        out_user_id: 站外用户 id
+        user_name: 站外用户名称
+        """
+        for m_key in must_keys:
+            if self.item.get(m_key):
+                continue
+            else:
+                # print(m_key)
+                return False
+        return True
 
     def produce_item(self):
-        self.check_item()
-        return self.item
+        flag = self.check_item()
+        if flag:
+            return self.item
+        else:
+            return False

File diff suppressed because it is too large
+ 2 - 10
haokanshipin/haokanshipin_author/hksp_author.py


+ 31 - 11
xigua/xigua_author/xigua_author.py

@@ -726,6 +726,7 @@ class XiGuaAuthor:
                             platform=self.platform,
                             mode=self.mode,
                             env=self.env,
+                            data=video_obj,
                             message="扫描到一条视频",
                         )
                         date_flag = self.process_video_obj(video_obj, user_dict)
@@ -807,19 +808,38 @@ class XiGuaAuthor:
                     trace_id=trace_id,
                     message="成功发送 MQ 至 ETL",
                 )
+                return True
             else:
-                if float(video_dict['like_cnt']) / float(video_dict['play_cnt']) >= 0.04:
-                    self.mq.send_msg(video_dict)
-                    self.download_count += 1
-                    AliyunLogger.logging(
-                        code="1002",
-                        platform=self.platform,
-                        mode=self.mode,
-                        env=self.env,
-                        data=video_dict,
-                        trace_id=trace_id,
-                        message="成功发送 MQ 至 ETL",
+                AliyunLogger.logging(
+                    code="2008",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message="不满足特殊规则, 播放量",
+                    data=video_dict
+                )
+            if float(video_dict['like_cnt']) / float(video_dict['play_cnt']) >= 0.04:
+                self.mq.send_msg(video_dict)
+                self.download_count += 1
+                AliyunLogger.logging(
+                    code="1002",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    data=video_dict,
+                    trace_id=trace_id,
+                    message="成功发送 MQ 至 ETL",
                     )
+                return True
+            else:
+                AliyunLogger.logging(
+                    code="2008",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message="不满足特殊规则, 点赞量/播放量",
+                    data=video_dict
+                )
         return True
 
     def get_video_info(self, item_id, trace_id):

Some files were not shown because too many files changed in this diff