source.schema.json 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. {
  2. "$schema": "http://json-schema.org/draft-07/schema#",
  3. "title": "Source (Normalized)",
  4. "description": "标准化后的 source 数据格式,与 sota_research 的 result.json 对齐",
  5. "type": "object",
  6. "required": ["total-ref", "sources-boundary"],
  7. "properties": {
  8. "total-ref": {
  9. "type": "integer",
  10. "description": "总记录数",
  11. "minimum": 0
  12. },
  13. "cache_dir": {
  14. "type": "string",
  15. "description": "Cache 目录路径"
  16. },
  17. "sources-boundary": {
  18. "type": "array",
  19. "description": "标准化后的 source 记录列表",
  20. "items": {
  21. "type": "object",
  22. "required": ["index-ref", "title-ref", "body", "images-boundary", "url-ref", "_raw-boundary"],
  23. "properties": {
  24. "index-ref": {
  25. "type": "integer",
  26. "description": "记录序号",
  27. "minimum": 1
  28. },
  29. "category": {
  30. "type": "string",
  31. "description": "分类标签"
  32. },
  33. "user_kept-ref": {
  34. "type": "boolean",
  35. "description": "用户是否保留"
  36. },
  37. "user_comment": {
  38. "type": "string",
  39. "description": "用户备注"
  40. },
  41. "description": {
  42. "type": "string",
  43. "description": "描述"
  44. },
  45. "method": {
  46. "type": "string",
  47. "description": "方法"
  48. },
  49. "cover": {
  50. "type": "string",
  51. "description": "封面图 CDN URL"
  52. },
  53. "title-ref": {
  54. "type": "string",
  55. "description": "标题"
  56. },
  57. "author": {
  58. "type": "string",
  59. "description": "作者"
  60. },
  61. "body": {
  62. "type": "string",
  63. "description": "正文内容"
  64. },
  65. "images-boundary": {
  66. "type": "array",
  67. "description": "图片 CDN URL 列表",
  68. "items": {
  69. "type": "string"
  70. }
  71. },
  72. "url-ref": {
  73. "type": "string",
  74. "description": "原始帖子链接"
  75. },
  76. "note": {
  77. "type": "string",
  78. "description": "备注信息(platform, likes, comments)"
  79. },
  80. "_raw-boundary": {
  81. "type": "object",
  82. "description": "原始标识信息(稳定引用区)",
  83. "required": ["case_id-ref", "platform-ref", "channel_content_id-ref"],
  84. "properties": {
  85. "case_id-ref": {
  86. "type": "string",
  87. "description": "统一格式的 case ID: {platform}_{content_id}",
  88. "pattern": "^[a-z]+_[A-Za-z0-9_-]+$"
  89. },
  90. "platform-ref": {
  91. "type": "string",
  92. "description": "平台标识",
  93. "minLength": 1
  94. },
  95. "channel_content_id-ref": {
  96. "type": "string",
  97. "description": "平台内容 ID",
  98. "minLength": 1
  99. }
  100. }
  101. }
  102. }
  103. }
  104. }
  105. }
  106. }