12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- """
- @author: luojunhui
- """
- import datetime
- from pydantic import BaseModel, Field
- from typing import Optional
- class CrawlerMetaArticle(BaseModel):
- platform: str = Field(default=..., description="抓取平台")
- mode: str = Field(default=..., description="抓取模式")
- category: str = Field(
- default=..., description="抓取类型:最初设计不合理,积重难返,实际与品类无关"
- )
- out_account_id: str = Field(default=..., description="抓取账号账号id")
- article_index: int = Field(
- default=None, description="群发发文位置,常见于微信公众号"
- )
- title: str = Field(default=..., description="文章标题")
- link: str = Field(default=..., description="文章链接")
- read_cnt: int = Field(default=0, description="阅读量")
- like_cnt: int = Field(default=0, description="点赞量")
- description: Optional[str] = Field(default=None, description="文章简介")
- publish_time: int = Field(default=None, description="文章发布时间")
- crawler_time: int = Field(default=None, description="抓取时间")
- score: float = Field(default=None, description="相似度分")
- status: int = Field(default=1, description="文章状态")
- unique_index: str = Field(default=..., description="文章唯一index")
- source_article_title: str = Field(default=None, description="文章联想的种子文章")
- source_account: str = Field(default=None, description="账号联想种子账号")
- title_sensitivity: int = Field(default=0, description="标题是否敏感")
- category_status: int = Field(
- default=0,
- description="品类处理状态 0: init; 1: processing; 2: successfully; 99: fail",
- )
- has_video: int = Field(
- default=0,
- description="文章内嵌套视频状态 0: init; 1: processing; 2: successfully; 3:article link bad ;99: fail",
- )
- trace_id: str = Field(default=None, description="创建该条记录的任务ID")
- class CrawlerMetaAccount(BaseModel):
- account_name: str = Field(..., description="账号名称", min_length=1)
- account_id: str = Field(..., description="账号id", min_length=1)
- title_list: str = Field(default=None, description="账号主页第一页标题list")
- score_list: str = Field(
- default=None, description="账号主页第一页标题list契合得分(By LLM)"
- )
- avg_score: float = Field(default=None, description="score_list 的平均分")
- status: int = Field(
- default=0,
- description="分析状态,0: init, 1: processing, 2: successfully, 99: fail",
- )
- platform: str = Field(default=None, description="账号来源于哪个外部平台")
- crawler_date: datetime.date = Field(
- default=None, description="账号抓取日期,格式为“YYYY-MM-DD”"
- )
- using_status: int = Field(
- default=0,
- description="账号状态, 0: init, 1: processing, 2: successfully, 99: fail",
- )
- category_status: int = Field(
- default=0,
- description="账号品类处理状态, 0: init, 1: processing, 2: successfully, 99: fail",
- )
- category: str = Field(default=None, description="账号的品类")
- media_type: int = Field(default=2, description="账号抓取模态 1: 文章 2:视频")
|