1234567891011121314151617181920212223242526272829 |
- from pydantic import BaseModel, Field
- from typing import Optional
- class CrawlerMetaArticle(BaseModel):
- article_id: Optional[int] = Field(description="unique id")
- platform: Optional[str] = Field(None, max_length=64, description="抓取平台")
- mode: Optional[str] = Field(None, max_length=16, description="抓取模式")
- category: Optional[str] = Field(None, max_length=32, description="品类,搜索类型")
- out_account_id: Optional[str] = Field(None, max_length=256, description="外站账号id")
- article_index: Optional[int] = Field(None, description="文章位置")
- title: Optional[str] = Field(None, max_length=128, description="文章标题")
- link: Optional[str] = Field(None, description="文章链接")
- read_cnt: Optional[int] = Field(None, description="阅读量")
- like_cnt: Optional[int] = Field(None, description="点赞量")
- description: Optional[str] = Field(None, description="文章描述")
- publish_time: Optional[int] = Field(None, description="文章发布时间")
- crawler_time: Optional[int] = Field(None, description="文章抓取时间")
- score: Optional[float] = Field(None, description="相关性分数")
- status: Optional[int] = Field(None, description="状态, 1表示正常,2表示已经进入待发布层, 0表示相关性低文章")
- channel_content_id: Optional[str] = Field(None, max_length=128, description="channel_content_id by pw")
- unique_index: Optional[str] = Field(None, max_length=64, description="通过公众号信息生成的唯一字符id")
- source_article_title: Optional[str] = Field(None, max_length=255, description="来源文章标题")
- source_account: Optional[str] = Field(None, max_length=64, description="来源账号")
- llm_sensitivity: Optional[int] = Field(None, description="大模型判断标题敏感性: 0-不敏感, 1-敏感")
- title_sensitivity: int = Field(0, description="匹配敏感词库: 0敏感, 1:不敏感")
- category_by_ai: Optional[str] = Field(None, max_length=32, description="通过大模型生成的品类")
- category_status: int = Field(0, description="0: 初始,1:处理中 2: 成功 99:失败")
- category_status_update_ts: Optional[int] = Field(None, description="品类状态修改时间戳")
|