|  | @@ -1,45 +1,96 @@
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  from common.public import clean_title
 | 
	
		
			
				|  |  |  from .aliyun_log import AliyunLogger
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -class VideoItem:
 | 
	
		
			
				|  |  | +class VideoItem(object):
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  |      function: 当扫描进一条视频的时候,对该视频的基本信息进行处理,保证发送给 pipeline和 etl 的 video_dict 是正确的
 | 
	
		
			
				|  |  |      __init__: 初始化空json 对象,用来存储视频信息
 | 
	
		
			
				|  |  |      add_video_info: 把视频信息存储到 item 对象中
 | 
	
		
			
				|  |  |      check_item: 检查 item 对象中的各个元素以及处理
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      def __init__(self):
 | 
	
		
			
				|  |  |          self.item = {}
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def add_video_info(self, key, value):
 | 
	
		
			
				|  |  |          self.item[key] = value
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    # 判断视频格式, 做兼容
 | 
	
		
			
				|  |  |      def check_item(self):
 | 
	
		
			
				|  |  | -        # video_title
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        判断item 里面的字段,是否符合要求
 | 
	
		
			
				|  |  | +        字段分为 3 类:
 | 
	
		
			
				|  |  | +        1. 必须存在数据的字段: ["video_id", "user_id", "user_name", "out_user_id", "out_video_id", "session", "video_url", "cover_url", "platform", "strategy"]
 | 
	
		
			
				|  |  | +        2. 不存在默认为 0 的字段 :["duration", "play_cnt", "like_cnt", "comment_cnt", "share_cnt", "width", "height"]
 | 
	
		
			
				|  |  | +        3. 需要后出理的字段: video_title, publish_time
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  |          if self.item.get("video_title"):
 | 
	
		
			
				|  |  | -            self.item['video_title'] = clean_title(self.item['video_title'])
 | 
	
		
			
				|  |  | +            self.item["video_title"] = clean_title(self.item["video_title"])
 | 
	
		
			
				|  |  |          else:
 | 
	
		
			
				|  |  | -            self.item['video_title'] = "No title"
 | 
	
		
			
				|  |  | -        # video_id
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        # video_time, publish_time_str, publish_time_stamp, update_time_stamp
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        # play_cnt, like_cnt, comment_cnt, share_cnt
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        # width, height, video_width, video_height
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        # user_name, user_id, out_user_name, out_user_id
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        # profile_id, profile_mid
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        # session
 | 
	
		
			
				|  |  | +            return False
 | 
	
		
			
				|  |  | +        if self.item.get("publish_time_stamp"):
 | 
	
		
			
				|  |  | +            publish_time_str = time.strftime(
 | 
	
		
			
				|  |  | +                "%Y-%m-%d %H:%M:%S", time.localtime(self.item["publish_time_stamp"])
 | 
	
		
			
				|  |  | +            )
 | 
	
		
			
				|  |  | +            self.add_video_info("publish_time_str", publish_time_str)
 | 
	
		
			
				|  |  | +        else:
 | 
	
		
			
				|  |  | +            publish_time_stamp = int(time.time())
 | 
	
		
			
				|  |  | +            publish_time_str = time.strftime(
 | 
	
		
			
				|  |  | +                "%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp)
 | 
	
		
			
				|  |  | +            )
 | 
	
		
			
				|  |  | +            self.add_video_info("publish_time_stamp", publish_time_stamp)
 | 
	
		
			
				|  |  | +            self.add_video_info("publish_time_str", publish_time_str)
 | 
	
		
			
				|  |  | +        self.add_video_info("publish_time", publish_time_str)
 | 
	
		
			
				|  |  | +        if not self.item.get("update_time_stamp"):
 | 
	
		
			
				|  |  | +            self.add_video_info("update_time_stamp", int(time.time()))
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        # video_url
 | 
	
		
			
				|  |  | +        # 如果不存在,默认值为 0
 | 
	
		
			
				|  |  | +        config_keys = [
 | 
	
		
			
				|  |  | +            "duration",
 | 
	
		
			
				|  |  | +            "play_cnt",
 | 
	
		
			
				|  |  | +            "like_cnt",
 | 
	
		
			
				|  |  | +            "comment_cnt",
 | 
	
		
			
				|  |  | +            "share_cnt",
 | 
	
		
			
				|  |  | +            "width",
 | 
	
		
			
				|  |  | +            "height",
 | 
	
		
			
				|  |  | +        ]
 | 
	
		
			
				|  |  | +        for config_key in config_keys:
 | 
	
		
			
				|  |  | +            if self.item.get(config_key):
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                self.add_video_info(config_key, 0)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        # cover_url
 | 
	
		
			
				|  |  | +        # 必须存在的元素,若不存在则会报错
 | 
	
		
			
				|  |  | +        must_keys = [
 | 
	
		
			
				|  |  | +            "video_id",
 | 
	
		
			
				|  |  | +            "user_id",
 | 
	
		
			
				|  |  | +            "user_name",
 | 
	
		
			
				|  |  | +            "out_video_id",
 | 
	
		
			
				|  |  | +            "session",
 | 
	
		
			
				|  |  | +            "video_url",
 | 
	
		
			
				|  |  | +            "cover_url",
 | 
	
		
			
				|  |  | +            "platform",
 | 
	
		
			
				|  |  | +            "strategy",
 | 
	
		
			
				|  |  | +        ]
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        video_id, out_video_id 均为站外视频 id
 | 
	
		
			
				|  |  | +        usr_id: 站内用户 id
 | 
	
		
			
				|  |  | +        out_user_id: 站外用户 id
 | 
	
		
			
				|  |  | +        user_name: 站外用户名称
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        for m_key in must_keys:
 | 
	
		
			
				|  |  | +            if self.item.get(m_key):
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                # print(m_key)
 | 
	
		
			
				|  |  | +                return False
 | 
	
		
			
				|  |  | +        return True
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def produce_item(self):
 | 
	
		
			
				|  |  | -        self.check_item()
 | 
	
		
			
				|  |  | -        return self.item
 | 
	
		
			
				|  |  | +        flag = self.check_item()
 | 
	
		
			
				|  |  | +        if flag:
 | 
	
		
			
				|  |  | +            return self.item
 | 
	
		
			
				|  |  | +        else:
 | 
	
		
			
				|  |  | +            return False
 |