item.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. """
  2. @author: luojunhui
  3. """
  4. import time
  5. default_single_video_table_fields = {
  6. "platform": "gzh",
  7. "article_title": None,
  8. "content_trace_id": None,
  9. "read_cnt": 0,
  10. "article_index": None,
  11. "out_account_name": None,
  12. "article_url": None,
  13. "url_unique_md5": None,
  14. "category": None,
  15. "publish_timestamp": None,
  16. "out_account_id": None,
  17. "cover_url": None,
  18. "crawler_timestamp": int(time.time()),
  19. "source_account": 1,
  20. "article_publish_type": None,
  21. "like_cnt": 0,
  22. "bad_status": 0,
  23. "tags": None,
  24. "video_oss_path": None,
  25. "audit_status": 0,
  26. "category_status": 0,
  27. "audit_video_id": None,
  28. "mini_program_title": None
  29. }
  30. default_account_table_fields = {
  31. "platform": 'Not NULL',
  32. "account_id": 'Not NULL',
  33. "account_name": 'Not NULL',
  34. "max_cursor": None,
  35. "account_init_date": None,
  36. "status": 0,
  37. "priority": 0,
  38. }
  39. default_candidate_account_table_fields = {
  40. "platform": 'Not NULL',
  41. "account_id": 'Not NULL',
  42. "account_name": 'Not NULL',
  43. "crawler_date": 'Not NULL',
  44. "title_list": "[]"
  45. }
  46. class Item(object):
  47. """
  48. format save to article meta table or single video source table
  49. """
  50. def __init__(self):
  51. self.item = {}
  52. def add(self, key, value):
  53. """
  54. add key value to item
  55. """
  56. self.item[key] = value
  57. def check_video_item(self):
  58. """
  59. check video item
  60. """
  61. fields = list(default_single_video_table_fields.keys())
  62. for field in fields:
  63. if self.item.get(field, None) is not None:
  64. continue
  65. else:
  66. self.item[field] = default_single_video_table_fields[field]
  67. def check_article_item(self):
  68. """
  69. check article item
  70. """
  71. return
  72. def check_account_item(self):
  73. """
  74. check account item
  75. """
  76. fields = list(default_account_table_fields.keys())
  77. for key in fields:
  78. if self.item.get(key, None) is not None:
  79. continue
  80. elif default_account_table_fields[key] == 'Not NULL':
  81. raise ValueError(f"{key} is not None, please check your account item")
  82. else:
  83. self.item[key] = default_account_table_fields[key]
  84. def check_candidate_account_item(self):
  85. """
  86. check association item
  87. """
  88. fields = list(default_candidate_account_table_fields.keys())
  89. for field in fields:
  90. if self.item.get(field, None) is not None:
  91. continue
  92. elif default_candidate_account_table_fields[field] == 'Not NULL':
  93. raise ValueError(f"{field} is not None, please check your account item")
  94. else:
  95. self.item[field] = default_candidate_account_table_fields[field]
  96. def check(self, source):
  97. """
  98. check item
  99. """
  100. match source:
  101. case "video":
  102. self.check_video_item()
  103. case "article":
  104. self.check_article_item()
  105. case "account":
  106. self.check_account_item()
  107. case "candidate_account":
  108. self.check_candidate_account_item()