item.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. """
  2. @author: luojunhui
  3. """
  4. import time
  5. from pydantic import BaseModel
  6. default_single_video_table_fields = {
  7. "platform": "gzh",
  8. "article_title": None,
  9. "content_trace_id": None,
  10. "read_cnt": 0,
  11. "article_index": None,
  12. "out_account_name": None,
  13. "article_url": None,
  14. "url_unique_md5": None,
  15. "category": None,
  16. "publish_timestamp": None,
  17. "out_account_id": None,
  18. "cover_url": None,
  19. "crawler_timestamp": int(time.time()),
  20. "source_account": 1,
  21. "article_publish_type": None,
  22. "like_cnt": 0,
  23. "bad_status": 0,
  24. "tags": None,
  25. "video_oss_path": None,
  26. "audit_status": 0,
  27. "category_status": 0,
  28. "audit_video_id": None,
  29. "mini_program_title": None
  30. }
  31. class MetaArticleItem(BaseModel):
  32. platform: str | None = None
  33. mode: str | None = None
  34. category: str | None = None
  35. out_account_id: str | None = None
  36. article_index: int | None = None
  37. title: str | None = None
  38. link: str | None = None
  39. read_cnt: int = 0
  40. like_cnt: int = 0
  41. description: str | None = None
  42. publish_time: int | None = None
  43. crawler_time: int | None = None
  44. status: str | None = None
  45. channel_content_id: str | None = None
  46. unique_index: str | None = None
  47. source_article_title: str | None = None
  48. source_account: str | None = None
  49. llm_sensitivity: float | None = None
  50. title_sensitivity: float | None = None
  51. class Item(object):
  52. """
  53. format save to article meta table or single video source table
  54. """
  55. def __init__(self):
  56. self.item = {}
  57. def add(self, key, value):
  58. """
  59. add key value to item
  60. """
  61. self.item[key] = value
  62. def check_video_item(self):
  63. """
  64. check video item
  65. """
  66. fields = list(default_single_video_table_fields.keys())
  67. for field in fields:
  68. if self.item.get(field, None) is not None:
  69. continue
  70. else:
  71. self.item[field] = default_single_video_table_fields[field]
  72. def check_article_item(self):
  73. """
  74. check article item
  75. """
  76. article_item = MetaArticleItem(**self.item)
  77. return article_item
  78. def check(self, source):
  79. """
  80. check item
  81. """
  82. match source:
  83. case "video":
  84. self.check_video_item()
  85. case "article":
  86. return self.check_article_item()
  87. return None