item.py 3.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. """
  2. @author: luojunhui
  3. """
  4. import time
  5. from applications.functions.common import Functions
  6. class VideoItem(object):
  7. """
  8. function: 当扫描进一条视频的时候,对该视频的基本信息进行处理,保证发送给 pipeline和 etl 的 video_dict 是正确的
  9. __init__: 初始化空json 对象,用来存储视频信息
  10. add_video_info: 把视频信息存储到 item 对象中
  11. check_item: 检查 item 对象中的各个元素以及处理
  12. """
  13. def __init__(self):
  14. self.item = {}
  15. def add_video_info(self, key, value):
  16. self.item[key] = value
  17. def check_item(self):
  18. """
  19. 判断item 里面的字段,是否符合要求
  20. 字段分为 3 类:
  21. 1. 必须存在数据的字段: ["video_id", "user_id", "user_name", "out_user_id", "out_video_id", "session", "video_url", "cover_url", "platform", "strategy"]
  22. 2. 不存在默认为 0 的字段 :["duration", "play_cnt", "like_cnt", "comment_cnt", "share_cnt", "width", "height"]
  23. 3. 需要后出理的字段: video_title, publish_time
  24. """
  25. if self.item.get("video_title"):
  26. self.item["video_title"] = Functions().clean_title(self.item["video_title"])
  27. else:
  28. return False
  29. if self.item.get("publish_time_stamp"):
  30. publish_time_str = time.strftime(
  31. "%Y-%m-%d %H:%M:%S", time.localtime(self.item["publish_time_stamp"])
  32. )
  33. self.add_video_info("publish_time_str", publish_time_str)
  34. else:
  35. publish_time_stamp = int(time.time())
  36. publish_time_str = time.strftime(
  37. "%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp)
  38. )
  39. self.add_video_info("publish_time_stamp", publish_time_stamp)
  40. self.add_video_info("publish_time_str", publish_time_str)
  41. self.add_video_info("publish_time", publish_time_str)
  42. if not self.item.get("update_time_stamp"):
  43. self.add_video_info("update_time_stamp", int(time.time()))
  44. # 如果不存在,默认值为 0
  45. config_keys = [
  46. "duration",
  47. "play_cnt",
  48. "like_cnt",
  49. "comment_cnt",
  50. "share_cnt",
  51. "width",
  52. "height",
  53. ]
  54. for config_key in config_keys:
  55. if self.item.get(config_key):
  56. continue
  57. else:
  58. self.add_video_info(config_key, 0)
  59. # 必须存在的元素,若不存在则会报错
  60. must_keys = [
  61. "video_id",
  62. "user_id",
  63. "user_name",
  64. "out_video_id",
  65. "session",
  66. "video_url",
  67. "cover_url",
  68. "platform",
  69. "strategy",
  70. ]
  71. """
  72. video_id, out_video_id 均为站外视频 id
  73. usr_id: 站内用户 id
  74. out_user_id: 站外用户 id
  75. user_name: 站外用户名称
  76. """
  77. for m_key in must_keys:
  78. if self.item.get(m_key):
  79. continue
  80. else:
  81. # print(m_key)
  82. return False
  83. return True
  84. def produce_item(self):
  85. flag = self.check_item()
  86. if flag:
  87. return self.item
  88. else:
  89. return False