__init__.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. """
  2. @author: luojunhui
  3. 任务常量配置文件
  4. """
  5. class ColdStartTaskConst:
  6. """
  7. 冷启动任务常量配置
  8. """
  9. PUBLISHED_STATUS = 2 # 文章已发布状态
  10. INIT_STATUS = 1 # 文章初始状态
  11. BAD_STATUS = 0 # 低质量文章状态
  12. # 常量
  13. ACCOUNT_GOOD_STATUS = 1
  14. # 账号是否每日抓取
  15. ACCOUNT_DAILY_SCRAPE = 1
  16. ACCOUNT_NOT_DAILY_SCRAPE = 0
  17. # 默认值
  18. DEFAULT_VIEW_COUNT = 0
  19. DEFAULT_LIKE_COUNT = 0
  20. DEFAULT_ARTICLE_STATUS = 1
  21. DEFAULT_TIMESTAMP = 1717171200
  22. # 标题sensitivity
  23. TITLE_SENSITIVE = 1
  24. TITLE_NOT_SENSITIVE = 0
  25. # 文章联想深度
  26. ARTICLE_ASSOCIATION_MAX_DEPTH = 4
  27. # 相关分百分位阈值
  28. PERCENT_THRESHOLD = 95
  29. # 相关性分阈值
  30. CORRELATION_THRESHOLD = 0.5
  31. class updatePublishedMsgTaskConst:
  32. """
  33. 更新已发布文章消息常量配置
  34. """
  35. # 爬虫详情接口返回code
  36. ARTICLE_ILLEGAL_CODE = 25012
  37. ARTICLE_DELETE_CODE = 25005
  38. ARTICLE_SUCCESS_CODE = 0
  39. ARTICLE_UNKNOWN_CODE = 10000
  40. # 请求爬虫详情接口状态码
  41. # 记录默认状态
  42. DEFAULT_STATUS = 0
  43. # 请求接口失败状态
  44. REQUEST_FAIL_STATUS = -1
  45. # 文章被删除状态
  46. DELETE_STATUS = -2
  47. # 未知原因无信息返回状态
  48. UNKNOWN_STATUS = -3
  49. # 文章违规状态
  50. ILLEGAL_STATUS = -4
  51. # 公众号类型(订阅号 or 服务号)
  52. # 订阅号
  53. SUBSCRIBE_TYPE_SET = {0, 1}
  54. # 服务号
  55. SERVICE_TYPE = 2
  56. # 监测周期(秒)
  57. MONITOR_PERIOD = 60 * 60 * 24 * 3
  58. # 新号抓文章周期
  59. NEW_ACCOUNT_CRAWL_PERIOD = 60 * 60 * 24 * 30
  60. # 订阅号,抓取失败失败率报警阈值
  61. SUBSCRIBE_FAIL_RATE_THRESHOLD = 0.3
  62. class updateAccountReadRateTaskConst:
  63. """
  64. 更新账号阅读率常量配置
  65. """
  66. # 阅读率统计周期(秒)
  67. STATISTICS_PERIOD = 31 * 24 * 60 * 60
  68. # 一天的秒数
  69. ONE_DAY_IN_SECONDS = 60 * 60 * 24
  70. # 相对变化率阈值
  71. RELATIVE_VALUE_THRESHOLD = 0.1
  72. # 发文类型
  73. UNLIMITED_PUBLISH_TYPE = 10002
  74. BULK_PUBLISH_TYPE = 9
  75. # 文章位置
  76. ARTICLE_INDEX_LIST = [1, 2, 3, 4, 5, 6, 7, 8]
  77. class updateAccountReadAvgTaskConst:
  78. """
  79. 更新账号阅读均值常量配置
  80. """
  81. # 投流账号
  82. TOULIU_ACCOUNTS = {
  83. 'gh_93e00e187787',
  84. 'gh_ac43e43b253b',
  85. 'gh_68e7fdc09fe4',
  86. 'gh_77f36c109fb1',
  87. 'gh_b181786a6c8c',
  88. 'gh_1ee2e1b39ccf',
  89. 'gh_d3f039c9db2b'
  90. }
  91. # 发文模式
  92. ARTICLES_DAILY = 1
  93. TOULIU = 2
  94. class WeixinVideoCrawlerConst:
  95. """
  96. 微信视频抓取常量配置
  97. """
  98. # 账号抓取状态
  99. ACCOUNT_CRAWL_STATUS = 1
  100. ACCOUNT_DO_NOT_CRAWL_STATUS = 0
  101. # 默认最早抓取时间戳(2024-01-01)
  102. DEFAULT_TIMESTAMP = 1704038400
  103. # 搜索爬虫最大页数
  104. MAX_SEARCH_PAGE_NUM = 10
  105. # 抓取每一页的等待时间
  106. SLEEP_SECONDS = 5
  107. # 种子标题最低阅读均值倍数
  108. READ_AVG_MULTIPLE = 1.3
  109. # 种子标题最低阅读量
  110. MIN_READ_COUNT = 2000
  111. # 获取种子标题的统计周期
  112. STAT_PERIOD = 7 * 24 * 60 * 60
  113. # 接口请求成功code
  114. REQUEST_SUCCESS = 0
  115. PUBLISHED_ILLEGAL_TITLE_CODE = 1015
  116. # 是否需要扫描查询源账号
  117. NEED_SCAN_SOURCE_ACCOUNT = 1
  118. DO_NOT_NEED_SOURCE_ACCOUNT = 0
  119. # 视频审核状态长文库
  120. VIDEO_AUDIT_INIT_STATUS = 0
  121. VIDEO_AUDIT_SUCCESS_STATUS = 1
  122. VIDEO_AUDIT_FAIL_STATUS = 2
  123. VIDEO_TITLE_GENERATE_FAIL_STATUS = 4
  124. VIDEO_AUDIT_PROCESSING_STATUS = -1
  125. # 票圈视频审核状态, 1 审核中,2 不通过 3 待修改,4 自己可见 5 通过
  126. PQ_AUDIT_PROCESSING_STATUS = 1
  127. PQ_AUDIT_FAIL_STATUS = 2
  128. PQ_AUDIT_WAIT_STATUS = 3
  129. PQ_AUDIT_SELF_VISIBLE_STATUS = 4
  130. PQ_AUDIT_SUCCESS_STATUS = 5
  131. # 默认账号
  132. DEFAULT_ACCOUNT_UID = 76862180
  133. # 每天发送的审核视频数量
  134. MAX_VIDEO_NUM = 500
  135. # 标题状态
  136. TITLE_DEFAULT_STATUS = 0
  137. TITLE_EXIT_STATUS = 1
  138. TITLE_FESTIVAL_STATUS = 2
  139. TITLE_SHORT_STATUS = 3
  140. # 标题最短长度
  141. TITLE_MIN_LENGTH = 15
  142. # safe score
  143. TITLE_SAFE_SCORE_THRESHOLD = 7
  144. class UpdateMiniProgramDetailConst(updatePublishedMsgTaskConst):
  145. """
  146. 更新小程序详情常量配置
  147. """
  148. # 账号联想
  149. class AccountAssociationTaskConst:
  150. """
  151. 账号联想任务常量配置
  152. """
  153. # 获取种子标题的统计周期
  154. STAT_PERIOD = 7 * 24 * 60 * 60
  155. # 阅读均值阈值
  156. READ_AVG_MULTIPLE = 1.3
  157. # 最小阅读量
  158. MIN_READ_COUNT = 2000
  159. # 种子数量限制
  160. SEED_TITLE_LIMIT = 100
  161. # 从aigc获取文章
  162. class ArticleCollectorConst:
  163. """
  164. 文章采集任务常量配置
  165. """
  166. # 发送方式
  167. # 手动推送
  168. MANUAL_PUSH = 1
  169. # 自动群发
  170. BULK_AUTO_PUSH = 2
  171. # 无限流推送
  172. UNLIMITED_PUSH = 3
  173. # 文章状态
  174. # 初始状态
  175. INIT_STATUS = 0
  176. # 成功状态
  177. SUCCESS_STATUS = 1
  178. # 失败状态
  179. FAIL_STATUS = -1
  180. # 发布状态
  181. PUBLISHED_STATUS = 2
  182. # 爬虫接口
  183. ARTICLE_ILLEGAL_CODE = 25012
  184. ARTICLE_DELETE_CODE = 25005
  185. ARTICLE_SUCCESS_CODE = 0
  186. ARTICLE_UNKNOWN_CODE = 10000
  187. class BaiduVideoCrawlerConst:
  188. """
  189. const for baidu video crawler
  190. """
  191. # account status
  192. BAIDU_ACCOUNT_GOOD_STATUS = 1
  193. BAIDU_ACCOUNT_BAD_STATUS = 0
  194. # earliest cursor, 2024-01-01 00:00:00
  195. DEFAULT_CURSOR = 17040384000000
  196. # no source account
  197. NO_SOURCE_ACCOUNT_STATUS = 0
  198. # timestamp To Cursor
  199. TIMESTAMP_TO_CURSOR = 10000
  200. # local path dir
  201. LOCAL_PATH_DIR = "static"