item.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. """
  2. @author: luojunhui
  3. """
  4. import time
  5. default_single_video_table_fields = {
  6. "platform": "gzh",
  7. "article_title": None,
  8. "content_trace_id": None,
  9. "read_cnt": 0,
  10. "article_index": None,
  11. "out_account_name": None,
  12. "article_url": None,
  13. "url_unique_md5": None,
  14. "category": None,
  15. "publish_timestamp": None,
  16. "out_account_id": None,
  17. "cover_url": None,
  18. "crawler_timestamp": int(time.time()),
  19. "source_account": 1,
  20. "article_publish_type": None,
  21. "like_cnt": 0,
  22. "bad_status": 0,
  23. "tags": None,
  24. "video_oss_path": None,
  25. }
  26. default_account_table_fields = {
  27. "platform": 'Not NULL',
  28. "account_id": 'Not NULL',
  29. "account_name": 'Not NULL',
  30. "max_cursor": None,
  31. "account_init_date": None,
  32. "status": 0,
  33. "priority": 0,
  34. }
  35. default_association_table_fields = {
  36. "account_name": 'Not NULL',
  37. "account_id": 'Not NULL',
  38. "recommend_video_id": 'Not NULL',
  39. "title": 'Not NULL',
  40. "read_cnt": 0,
  41. "duration": 0,
  42. "seed_account": 'Not NULL',
  43. "seed_title": 'Not NULL',
  44. "recommend_date": 'Not NULL',
  45. "platform": 'Not NULL'
  46. }
  47. class Item(object):
  48. """
  49. format save to article meta table or single video source table
  50. """
  51. def __init__(self):
  52. self.item = {}
  53. def add(self, key, value):
  54. """
  55. add key value to item
  56. """
  57. self.item[key] = value
  58. def check_video_item(self):
  59. """
  60. check video item
  61. """
  62. fields = list(default_single_video_table_fields.keys())
  63. for field in fields:
  64. if self.item.get(field, None) is not None:
  65. continue
  66. else:
  67. self.item[field] = default_single_video_table_fields[field]
  68. def check_article_item(self):
  69. """
  70. check article item
  71. """
  72. return
  73. def check_account_item(self):
  74. """
  75. check account item
  76. """
  77. fields = list(default_account_table_fields.keys())
  78. for key in fields:
  79. if self.item.get(key, None) is not None:
  80. continue
  81. elif default_account_table_fields[key] == 'Not NULL':
  82. raise ValueError(f"{key} is not None, please check your account item")
  83. else:
  84. self.item[key] = default_account_table_fields[key]
  85. def check_association_item(self):
  86. """
  87. check association item
  88. """
  89. fields = list(default_association_table_fields.keys())
  90. for field in fields:
  91. if self.item.get(field, None) is not None:
  92. continue
  93. elif default_account_table_fields[field] == 'Not NULL':
  94. raise ValueError(f"{field} is not None, please check your account item")
  95. else:
  96. self.item[field] = default_association_table_fields[field]
  97. def check(self, source):
  98. """
  99. check item
  100. """
  101. match source:
  102. case "video":
  103. self.check_video_item()
  104. case "article":
  105. self.check_article_item()
  106. case "account":
  107. self.check_account_item()
  108. case "association":
  109. self.check_association_item()