12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697 |
- """
- @author: luojunhui
- """
- import time
- from pydantic import BaseModel
- default_single_video_table_fields = {
- "platform": "gzh",
- "article_title": None,
- "content_trace_id": None,
- "read_cnt": 0,
- "article_index": None,
- "out_account_name": None,
- "article_url": None,
- "url_unique_md5": None,
- "category": None,
- "publish_timestamp": None,
- "out_account_id": None,
- "cover_url": None,
- "crawler_timestamp": int(time.time()),
- "source_account": 1,
- "article_publish_type": None,
- "like_cnt": 0,
- "bad_status": 0,
- "tags": None,
- "video_oss_path": None,
- "audit_status": 0,
- "category_status": 0,
- "audit_video_id": None,
- "mini_program_title": None
- }
- class MetaArticleItem(BaseModel):
- platform: str | None = None
- mode: str | None = None
- category: str | None = None
- out_account_id: str | None = None
- article_index: int | None = None
- title: str | None = None
- link: str | None = None
- read_cnt: int = 0
- like_cnt: int = 0
- description: str | None = None
- publish_time: int | None = None
- crawler_time: int | None = None
- status: str | None = None
- channel_content_id: str | None = None
- unique_index: str | None = None
- source_article_title: str | None = None
- source_account: str | None = None
- llm_sensitivity: float | None = None
- title_sensitivity: float | None = None
- class Item(object):
- """
- format save to article meta table or single video source table
- """
- def __init__(self):
- self.item = {}
- def add(self, key, value):
- """
- add key value to item
- """
- self.item[key] = value
- def check_video_item(self):
- """
- check video item
- """
- fields = list(default_single_video_table_fields.keys())
- for field in fields:
- if self.item.get(field, None) is not None:
- continue
- else:
- self.item[field] = default_single_video_table_fields[field]
- def check_article_item(self):
- """
- check article item
- """
- article_item = MetaArticleItem(**self.item)
- return article_item
- def check(self, source):
- """
- check item
- """
- match source:
- case "video":
- self.check_video_item()
- case "article":
- return self.check_article_item()
- return None
|