models1.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. from datetime import datetime
  2. from sqlalchemy import Column, BigInteger, String, Text, Integer, Float, DateTime, Boolean, Enum as SQLEnum, JSON, Index
  3. from sqlalchemy.dialects.mysql import LONGTEXT
  4. from sqlalchemy.ext.declarative import declarative_base
  5. Base = declarative_base()
  6. class Post(Base):
  7. """帖子表 - 存储帖子基础信息"""
  8. __tablename__ = 'post'
  9. id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
  10. post_id = Column(String(100), nullable=False, unique=True, comment='帖子ID(平台原始ID)')
  11. title = Column(String(500), comment='帖子标题')
  12. body_text = Column(Text, comment='正文内容')
  13. merge_leve1 = Column(String(100), comment='一级品类')
  14. merge_leve2 = Column(String(100), comment='二级品类')
  15. gmt_create = Column(DateTime, comment='帖子创建时间')
  16. view_all = Column(Integer, default=0, comment='总阅读量(20230601后)')
  17. share_all = Column(Integer, default=0, comment='总分享量')
  18. return_all = Column(Integer, default=0, comment='总回流量')
  19. view_recent = Column(Integer, default=0, comment='近30天阅读量')
  20. share_recent = Column(Integer, default=0, comment='近30天分享量')
  21. return_recent = Column(Integer, default=0, comment='近30天回流量')
  22. like_count = Column(Integer, default=0, comment='点赞数')
  23. comment_count = Column(Integer, default=0, comment='评论数')
  24. collect_count = Column(Integer, default=0, comment='收藏数')
  25. images = Column(JSON, comment='图片/视频URL列表')
  26. platform = Column(String(50), nullable=False, comment='平台标识')
  27. platform_account_id = Column(String(100), comment='平台作者ID')
  28. platform_account_name = Column(String(200), comment='平台作者名称')
  29. publish_timestamp = Column(BigInteger, comment='发布时间戳(ms)')
  30. import_date = Column(DateTime, nullable=False, default=datetime.now, comment='导入日期')
  31. def __repr__(self):
  32. return f"<Post(id={self.id}, post_id={self.post_id}, title={self.title})>"
  33. class PostDecodeResult(Base):
  34. """帖子解构结果表 - 存储帖子的最终选题结果"""
  35. __tablename__ = 'post_decode_result'
  36. id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
  37. post_id = Column(String(100), nullable=False, comment='关联帖子ID')
  38. topic = Column(Text, comment='最终选题结果')
  39. topic_type = Column(String(255), comment='选题类型')
  40. core_attraction = Column(Text, comment='核心吸引力')
  41. main_purpose = Column(Text, comment='主要目的')
  42. confidence = Column(String(50), comment='置信度')
  43. import_date = Column(DateTime, nullable=False, default=datetime.now, comment='导入日期')
  44. __table_args__ = (
  45. Index('idx_post_decode_result_post_id', 'post_id'),
  46. )
  47. def __repr__(self):
  48. return f"<PostDecodeResult(id={self.id}, post_id={self.post_id})>"
  49. class PostDecodeTopicPoint(Base):
  50. """选题点表 - 存储解构结果中的选题点"""
  51. __tablename__ = 'post_decode_topic_point'
  52. id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
  53. post_decode_result_id = Column(BigInteger, nullable=False, comment='关联解构结果ID')
  54. post_id = Column(String(100), nullable=False, comment='关联帖子ID')
  55. topic_point_type = Column(SQLEnum('灵感点', '目的点', '关键点', name='topic_point_type_enum'),
  56. nullable=False, comment='选题点类型:灵感点/目的点/关键点')
  57. topic_point_result = Column(String(500), nullable=False, comment='选题点')
  58. topic_point_description = Column(Text, comment='选题点描述信息')
  59. __table_args__ = (
  60. Index('idx_topic_point_post_decode_result_id', 'post_decode_result_id'),
  61. Index('idx_topic_point_post_id', 'post_id'),
  62. )
  63. def __repr__(self):
  64. return f"<PostDecodeTopicPoint(id={self.id}, type={self.topic_point_type}, result={self.topic_point_result})>"
  65. class PostDecodeTopicPointElement(Base):
  66. """选题点元素表 - 存储解构结果中选题点的分类结果"""
  67. __tablename__ = 'post_decode_topic_point_element'
  68. id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
  69. post_decode_result_id = Column(BigInteger, nullable=False, comment='关联解构结果ID')
  70. post_id = Column(String(100), nullable=False, comment='关联帖子ID')
  71. topic_point_id = Column(BigInteger, nullable=False, comment='关联选题点ID')
  72. element_type = Column(SQLEnum('实质', '形式', '意图', name='element_type_enum'),
  73. nullable=False, comment='元素类型:实质/形式/意图')
  74. element_sub_type = Column(String(100), comment='元素子类型(如:具体元素/具象概念/抽象概念/整体形式等)')
  75. element_name = Column(String(500), nullable=False, comment='元素名称')
  76. element_description = Column(Text, comment='元素说明')
  77. element_source = Column(Text, comment='元素来源')
  78. element_reason = Column(Text, comment='分类理由')
  79. __table_args__ = (
  80. Index('idx_topic_point_element_topic_point_id', 'topic_point_id'),
  81. Index('idx_topic_point_element_post_id', 'post_id'),
  82. Index('idx_topic_point_element_type', 'element_type'),
  83. )
  84. def __repr__(self):
  85. return f"<PostDecodeTopicPointElement(id={self.id}, type={self.element_type}, name={self.element_name})>"
  86. # ============================================================================
  87. # 全局分类库 V2 - 时间版本标记模型
  88. # ============================================================================
  89. class ClassifyExecution(Base):
  90. """分类执行记录"""
  91. __tablename__ = 'classify_execution'
  92. id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
  93. execution_type = Column(String(50), nullable=False, default='classify',
  94. comment='执行类型: classify/optimize/rollback')
  95. source_type = Column(String(50), comment='元素类型: 实质/形式/意图')
  96. based_execution_id = Column(BigInteger, default=0, comment='基于哪次执行(执行链),0=初始')
  97. status = Column(String(50), nullable=False, default='running',
  98. comment='状态: running/success/failed/rolled_back')
  99. batch_info = Column(JSON, comment='批次信息: {"batch_id": "...", "element_count": N, "unique_count": M}')
  100. model_name = Column(String(100), comment='模型名称')
  101. trigger_context = Column(Text, comment='触发上下文')
  102. execution_summary = Column(Text, comment='Agent执行总结')
  103. input_tokens = Column(Integer, default=0, comment='输入Token数')
  104. output_tokens = Column(Integer, default=0, comment='输出Token数')
  105. cost_usd = Column(Float, default=0.0, comment='费用(USD)')
  106. start_time = Column(DateTime, nullable=False, default=datetime.now, comment='开始时间')
  107. end_time = Column(DateTime, comment='结束时间')
  108. error_message = Column(Text, comment='错误信息')
  109. __table_args__ = (
  110. Index('idx_classify_exec_source_type', 'source_type'),
  111. Index('idx_classify_exec_status', 'status'),
  112. Index('idx_classify_exec_based_id', 'based_execution_id'),
  113. )
  114. def __repr__(self):
  115. return f"<ClassifyExecution(id={self.id}, type={self.execution_type}, status={self.status})>"
  116. class GlobalCategory(Base):
  117. """全局分类库 - 时间版本标记"""
  118. __tablename__ = 'global_category'
  119. id = Column(BigInteger, primary_key=True, autoincrement=True, comment='行级主键,每次修改产生新行')
  120. stable_id = Column(BigInteger, nullable=False, comment='逻辑稳定ID,同一分类跨版本保持不变')
  121. name = Column(String(500), nullable=False, comment='分类名称')
  122. description = Column(Text, comment='分类说明')
  123. parent_stable_id = Column(BigInteger, comment='父分类的stable_id,NULL=根节点')
  124. source_type = Column(String(50), nullable=False, comment='元素类型: 实质/形式/意图')
  125. category_nature = Column(String(50), comment='分类性质: 内容/维度/领域')
  126. level = Column(Integer, default=1, comment='层级深度')
  127. path = Column(String(1000), comment='完整路径,如 /主体/角色类型/人物角色')
  128. # 版本生命周期
  129. created_at_execution_id = Column(BigInteger, nullable=False,
  130. comment='创建此行的执行ID')
  131. retired_at_execution_id = Column(BigInteger, comment='废弃此行的执行ID,NULL=当前有效')
  132. # 元数据
  133. create_reason = Column(Text, comment='创建/修改原因')
  134. __table_args__ = (
  135. Index('idx_gc_stable_id', 'stable_id'),
  136. Index('idx_gc_parent_stable_id', 'parent_stable_id'),
  137. Index('idx_gc_source_type', 'source_type'),
  138. Index('idx_gc_retired', 'retired_at_execution_id'),
  139. Index('idx_gc_created_retired', 'created_at_execution_id', 'retired_at_execution_id'),
  140. )
  141. def __repr__(self):
  142. return f"<GlobalCategory(id={self.id}, stable_id={self.stable_id}, name={self.name})>"
  143. class GlobalElement(Base):
  144. """全局库中的标准化元素 - 时间版本标记"""
  145. __tablename__ = 'global_element'
  146. id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
  147. name = Column(String(500), nullable=False, comment='标准化元素名称')
  148. description = Column(Text, comment='元素描述')
  149. belong_category_stable_id = Column(BigInteger, nullable=False,
  150. comment='所属分类的stable_id')
  151. source_type = Column(String(50), nullable=False, comment='元素类型: 实质/形式/意图')
  152. element_sub_type = Column(String(100), comment='具体元素/具象概念/抽象概念等')
  153. occurrence_count = Column(Integer, default=1, comment='出现次数(跨帖子去重计数)')
  154. # 版本生命周期
  155. created_at_execution_id = Column(BigInteger, nullable=False, comment='创建此行的执行ID')
  156. retired_at_execution_id = Column(BigInteger, comment='废弃此行的执行ID,NULL=当前有效')
  157. __table_args__ = (
  158. Index('idx_ge_category_stable_id', 'belong_category_stable_id'),
  159. Index('idx_ge_source_type', 'source_type'),
  160. Index('idx_ge_retired', 'retired_at_execution_id'),
  161. Index('idx_ge_name', 'name'),
  162. )
  163. def __repr__(self):
  164. return f"<GlobalElement(id={self.id}, name={self.name})>"
  165. class ElementClassificationMapping(Base):
  166. """PostDecodeTopicPointElement → GlobalCategory/GlobalElement 映射"""
  167. __tablename__ = 'element_classification_mapping'
  168. id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
  169. post_decode_topic_point_element_id = Column(BigInteger, nullable=False,
  170. comment='FK → PostDecodeTopicPointElement.id')
  171. post_id = Column(String(100), comment='冗余帖子ID,便于查询')
  172. element_name = Column(String(500), comment='原始元素名称')
  173. element_type = Column(String(50), comment='实质/形式/意图')
  174. element_sub_type = Column(String(100), comment='具体子类型')
  175. global_element_id = Column(BigInteger, comment='→ GlobalElement.id')
  176. global_category_stable_id = Column(BigInteger, comment='→ GlobalCategory.stable_id')
  177. classification_path = Column(String(1000), comment='分类路径,如 /主体/角色类型/人物角色')
  178. classify_execution_id = Column(BigInteger, nullable=False, comment='哪次执行分类的')
  179. created_at = Column(DateTime, nullable=False, default=datetime.now, comment='创建时间')
  180. __table_args__ = (
  181. Index('idx_ecm_element_id', 'post_decode_topic_point_element_id'),
  182. Index('idx_ecm_post_id', 'post_id'),
  183. Index('idx_ecm_global_element_id', 'global_element_id'),
  184. Index('idx_ecm_category_stable_id', 'global_category_stable_id'),
  185. Index('idx_ecm_execution_id', 'classify_execution_id'),
  186. Index('idx_ecm_element_type', 'element_type'),
  187. )
  188. def __repr__(self):
  189. return f"<ElementClassificationMapping(id={self.id}, element={self.element_name})>"
  190. class ClassifyExecutionLog(Base):
  191. """分类执行日志表 - 存储完整的执行日志内容"""
  192. __tablename__ = 'classify_execution_log'
  193. id = Column(BigInteger, primary_key=True, autoincrement=True)
  194. classify_execution_id = Column(BigInteger, nullable=False, unique=True, comment='关联的分类执行ID')
  195. log_content = Column(LONGTEXT, nullable=False, comment='完整的执行日志内容')
  196. log_type = Column(String(50), nullable=False, default='classify', comment='日志类型:classify/optimize')
  197. created_at = Column(DateTime, nullable=False, default=datetime.now, comment='日志保存时间')
  198. def __repr__(self):
  199. return f"<ClassifyExecutionLog(id={self.id}, classify_execution_id={self.classify_execution_id}, log_type={self.log_type})>"
  200. class ClassifyBatch(Base):
  201. """批次追踪"""
  202. __tablename__ = 'classify_batch'
  203. id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
  204. batch_name = Column(String(200), comment='批次名称,如 实质_batch_001')
  205. source_type = Column(String(50), nullable=False, comment='实质/形式/意图')
  206. total_element_count = Column(Integer, default=0, comment='总元素数')
  207. unique_element_count = Column(Integer, default=0, comment='去重后元素数')
  208. status = Column(String(50), nullable=False, default='pending',
  209. comment='状态: pending/running/success/failed')
  210. classify_execution_id = Column(BigInteger, comment='关联执行记录ID')
  211. created_at = Column(DateTime, nullable=False, default=datetime.now, comment='创建时间')
  212. completed_at = Column(DateTime, comment='完成时间')
  213. __table_args__ = (
  214. Index('idx_cb_source_type', 'source_type'),
  215. Index('idx_cb_status', 'status'),
  216. Index('idx_cb_execution_id', 'classify_execution_id'),
  217. )
  218. def __repr__(self):
  219. return f"<ClassifyBatch(id={self.id}, name={self.batch_name}, status={self.status})>"
  220. class PostClassificationStatus(Base):
  221. """帖子分类完成状态追踪表"""
  222. __tablename__ = 'post_classification_status'
  223. id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
  224. post_id = Column(String(100), nullable=False, comment='帖子ID')
  225. source_type = Column(String(50), nullable=False, comment='元素类型: 实质/形式/意图')
  226. total_elements = Column(Integer, nullable=False, default=0, comment='该帖子该类型的元素总数')
  227. classified_elements = Column(Integer, nullable=False, default=0, comment='已分类的元素数')
  228. is_completed = Column(Boolean, nullable=False, default=False, comment='是否全部分类完成')
  229. last_updated_execution_id = Column(BigInteger, default=0, comment='最近更新的执行ID')
  230. updated_at = Column(DateTime, nullable=False, default=datetime.now, onupdate=datetime.now, comment='最后更新时间')
  231. __table_args__ = (
  232. Index('uq_pcs_post_source', 'post_id', 'source_type', unique=True),
  233. Index('idx_pcs_source_completed', 'source_type', 'is_completed'),
  234. Index('idx_pcs_completed', 'is_completed'),
  235. )
  236. def __repr__(self):
  237. return f"<PostClassificationStatus(id={self.id}, post_id={self.post_id}, source_type={self.source_type}, completed={self.is_completed})>"