model.py 2.2 KB

1234567891011121314151617181920212223242526272829
  1. from pydantic import BaseModel, Field
  2. from typing import Optional
  3. class CrawlerMetaArticle(BaseModel):
  4. article_id: Optional[int] = Field(description="unique id")
  5. platform: Optional[str] = Field(None, max_length=64, description="抓取平台")
  6. mode: Optional[str] = Field(None, max_length=16, description="抓取模式")
  7. category: Optional[str] = Field(None, max_length=32, description="品类,搜索类型")
  8. out_account_id: Optional[str] = Field(None, max_length=256, description="外站账号id")
  9. article_index: Optional[int] = Field(None, description="文章位置")
  10. title: Optional[str] = Field(None, max_length=128, description="文章标题")
  11. link: Optional[str] = Field(None, description="文章链接")
  12. read_cnt: Optional[int] = Field(None, description="阅读量")
  13. like_cnt: Optional[int] = Field(None, description="点赞量")
  14. description: Optional[str] = Field(None, description="文章描述")
  15. publish_time: Optional[int] = Field(None, description="文章发布时间")
  16. crawler_time: Optional[int] = Field(None, description="文章抓取时间")
  17. score: Optional[float] = Field(None, description="相关性分数")
  18. status: Optional[int] = Field(None, description="状态, 1表示正常,2表示已经进入待发布层, 0表示相关性低文章")
  19. channel_content_id: Optional[str] = Field(None, max_length=128, description="channel_content_id by pw")
  20. unique_index: Optional[str] = Field(None, max_length=64, description="通过公众号信息生成的唯一字符id")
  21. source_article_title: Optional[str] = Field(None, max_length=255, description="来源文章标题")
  22. source_account: Optional[str] = Field(None, max_length=64, description="来源账号")
  23. llm_sensitivity: Optional[int] = Field(None, description="大模型判断标题敏感性: 0-不敏感, 1-敏感")
  24. title_sensitivity: int = Field(0, description="匹配敏感词库: 0敏感, 1:不敏感")
  25. category_by_ai: Optional[str] = Field(None, max_length=32, description="通过大模型生成的品类")
  26. category_status: int = Field(0, description="0: 初始,1:处理中 2: 成功 99:失败")
  27. category_status_update_ts: Optional[int] = Field(None, description="品类状态修改时间戳")