123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364 |
- """
- @author: luojunhui
- 任务常量配置文件
- """
- class ColdStartTaskConst:
- """
- 冷启动任务常量配置
- """
- PUBLISHED_STATUS = 2 # 文章已发布状态
- INIT_STATUS = 1 # 文章初始状态
- BAD_STATUS = 0 # 低质量文章状态
- # 常量
- ACCOUNT_GOOD_STATUS = 1
- # 账号是否每日抓取
- ACCOUNT_DAILY_SCRAPE = 1
- ACCOUNT_NOT_DAILY_SCRAPE = 0
- # 默认值
- DEFAULT_VIEW_COUNT = 0
- DEFAULT_LIKE_COUNT = 0
- DEFAULT_ARTICLE_STATUS = 1
- DEFAULT_TIMESTAMP = 1717171200
- # 标题sensitivity
- TITLE_SENSITIVE = 1
- TITLE_NOT_SENSITIVE = 0
- # 文章联想深度
- ARTICLE_ASSOCIATION_MAX_DEPTH = 4
- # 相关分百分位阈值
- PERCENT_THRESHOLD = 95
- # 相关性分阈值
- CORRELATION_THRESHOLD = 0.5
- # 阅读量阈值
- READ_COUNT_THRESHOLD = 1000
- # 阅读均值倍数阈值
- READ_AVG_THRESHOLD = 1.3
- # 群发类型
- BULK_PUBLISH_TYPE = 9
- # 种子文章数量
- SEED_ARTICLE_LIMIT_NUM = 60
- class updatePublishedMsgTaskConst:
- """
- 更新已发布文章消息常量配置
- """
- # 爬虫详情接口返回code
- ARTICLE_ILLEGAL_CODE = 25012
- ARTICLE_DELETE_CODE = 25005
- ARTICLE_SUCCESS_CODE = 0
- ARTICLE_UNKNOWN_CODE = 10000
- # 请求爬虫详情接口状态码
- # 记录默认状态
- DEFAULT_STATUS = 0
- # 请求接口失败状态
- REQUEST_FAIL_STATUS = -1
- # 文章被删除状态
- DELETE_STATUS = -2
- # 未知原因无信息返回状态
- UNKNOWN_STATUS = -3
- # 文章违规状态
- ILLEGAL_STATUS = -4
- # 公众号类型(订阅号 or 服务号)
- # 订阅号
- SUBSCRIBE_TYPE_SET = {0, 1}
- # 服务号
- SERVICE_TYPE = 2
- # 监测周期(秒)
- MONITOR_PERIOD = 60 * 60 * 24 * 3
- # 新号抓文章周期
- NEW_ACCOUNT_CRAWL_PERIOD = 60 * 60 * 24 * 30
- # 订阅号,抓取失败失败率报警阈值
- SUBSCRIBE_FAIL_RATE_THRESHOLD = 0.3
- class updateAccountReadRateTaskConst:
- """
- 更新账号阅读率常量配置
- """
- # 阅读率统计周期(秒)
- STATISTICS_PERIOD = 31 * 24 * 60 * 60
- # 一天的秒数
- ONE_DAY_IN_SECONDS = 60 * 60 * 24
- # 相对变化率阈值
- RELATIVE_VALUE_THRESHOLD = 0.1
- # 发文类型
- UNLIMITED_PUBLISH_TYPE = 10002
- BULK_PUBLISH_TYPE = 9
- # 文章位置
- ARTICLE_INDEX_LIST = [1, 2, 3, 4, 5, 6, 7, 8]
- class UpdateAccountReadAvgTaskConst:
- """
- 更新账号阅读均值常量配置
- """
- # 投流账号
- TOULIU_ACCOUNTS = {
- 'gh_93e00e187787',
- 'gh_ac43e43b253b',
- 'gh_68e7fdc09fe4',
- 'gh_77f36c109fb1',
- 'gh_b181786a6c8c',
- 'gh_1ee2e1b39ccf',
- 'gh_d3f039c9db2b'
- }
- # 发文模式
- ARTICLES_DAILY = 1
- TOULIU = 2
- # 默认粉丝
- DEFAULT_FANS = 0
- # index list
- ARTICLE_INDEX_LIST = [1, 2, 3, 4, 5, 6, 7, 8]
- # 默认点赞
- DEFAULT_LIKE = 0
- # 状态
- USING_STATUS = 1
- NOT_USING_STATUS = 0
- class WeixinVideoCrawlerConst:
- """
- 微信视频抓取常量配置
- """
- # 账号抓取状态
- ACCOUNT_CRAWL_STATUS = 1
- ACCOUNT_DO_NOT_CRAWL_STATUS = 0
- # 默认最早抓取时间戳(2024-01-01)
- DEFAULT_TIMESTAMP = 1704038400
- # 搜索爬虫最大页数
- MAX_SEARCH_PAGE_NUM = 10
- # 抓取每一页的等待时间
- SLEEP_SECONDS = 5
- # 种子标题最低阅读均值倍数
- READ_AVG_MULTIPLE = 1.3
- # 种子标题最低阅读量
- MIN_READ_COUNT = 2000
- # 获取种子标题的统计周期
- STAT_PERIOD = 7 * 24 * 60 * 60
- # 接口请求成功code
- REQUEST_SUCCESS = 0
- PUBLISHED_ILLEGAL_TITLE_CODE = 1015
- # 是否需要扫描查询源账号
- NEED_SCAN_SOURCE_ACCOUNT = 1
- DO_NOT_NEED_SOURCE_ACCOUNT = 0
- # 视频审核状态长文库
- VIDEO_AUDIT_INIT_STATUS = 0
- VIDEO_AUDIT_SUCCESS_STATUS = 1
- VIDEO_AUDIT_FAIL_STATUS = 2
- VIDEO_TITLE_GENERATE_FAIL_STATUS = 4
- VIDEO_AUDIT_PROCESSING_STATUS = -1
- # 票圈视频审核状态, 1 审核中,2 不通过 3 待修改,4 自己可见 5 通过
- PQ_AUDIT_PROCESSING_STATUS = 1
- PQ_AUDIT_FAIL_STATUS = 2
- PQ_AUDIT_WAIT_STATUS = 3
- PQ_AUDIT_SELF_VISIBLE_STATUS = 4
- PQ_AUDIT_SUCCESS_STATUS = 5
- # 默认账号
- DEFAULT_ACCOUNT_UID = 76862180
- # 每天发送的审核视频数量
- MAX_VIDEO_NUM = 1000
- # 标题状态
- TITLE_DEFAULT_STATUS = 0
- TITLE_EXIT_STATUS = 1
- TITLE_FESTIVAL_STATUS = 2
- TITLE_SHORT_STATUS = 3
- # 标题最短长度
- TITLE_MIN_LENGTH = 15
- # safe score
- TITLE_SAFE_SCORE_THRESHOLD = 7
- class UpdateMiniProgramDetailConst(updatePublishedMsgTaskConst):
- """
- 更新小程序详情常量配置
- """
- # 账号联想
- class AccountAssociationTaskConst:
- """
- 账号联想任务常量配置
- """
- # 获取种子标题的统计周期
- STAT_PERIOD = 7 * 24 * 60 * 60
- # 阅读均值阈值
- READ_AVG_MULTIPLE = 1.3
- # 最小阅读量
- MIN_READ_COUNT = 2000
- # 种子数量限制
- SEED_TITLE_LIMIT = 100
- # 从aigc获取文章
- class ArticleCollectorConst:
- """
- 文章采集任务常量配置
- """
- # 发送方式
- # 手动推送
- MANUAL_PUSH = 1
- # 自动群发
- BULK_AUTO_PUSH = 2
- # 无限流推送
- UNLIMITED_PUSH = 3
- # 文章状态
- # 初始状态
- INIT_STATUS = 0
- # 成功状态
- SUCCESS_STATUS = 1
- # 失败状态
- FAIL_STATUS = -1
- # 发布状态
- PUBLISHED_STATUS = 2
- # 爬虫接口
- ARTICLE_ILLEGAL_CODE = 25012
- ARTICLE_DELETE_CODE = 25005
- ARTICLE_SUCCESS_CODE = 0
- ARTICLE_UNKNOWN_CODE = 10000
- class BaiduVideoCrawlerConst:
- """
- const for baidu video crawler
- """
- # account status
- BAIDU_ACCOUNT_GOOD_STATUS = 1
- BAIDU_ACCOUNT_BAD_STATUS = 0
- # earliest cursor, 2024-01-01 00:00:00
- DEFAULT_CURSOR = 17040384000000
- # no source account
- NO_SOURCE_ACCOUNT_STATUS = 0
- # timestamp To Cursor
- TIMESTAMP_TO_CURSOR = 10000
- # local path dir
- LOCAL_PATH_DIR = "static"
- class TitleRewriteTaskConst:
- """
- title rewrite task const
- """
- # title rewrite status
- TITLE_REWRITE_INIT_STATUS = 0
- TITLE_REWRITE_SUCCESS_STATUS = 1
- TITLE_REWRITE_FAIL_STATUS = 99
- TITLE_REWRITE_LOCK_STATUS = 101
- # article status
- ARTICLE_AUDIT_PASSED_STATUS = 1
- ARTICLE_POSITIVE_STATUS = 0
- # title useful status
- TITLE_USEFUL_STATUS = 1
- # prompt version
- PROMPT_VERSION = "xx_250228" # 信欣2025-02-28提供
- # block expire time 1h
- TITLE_REWRITE_LOCK_TIME = 60 * 60
- class ChannelVideoCrawlerConst:
- """
- const for baidu video crawler
- """
- # account status
- CHANNEL_ACCOUNT_GOOD_STATUS = 1
- CHANNEL_ACCOUNT_BAD_STATUS = 0
- # earliest cursor, 2024-01-01 00:00:00
- DEFAULT_CURSOR = 1704038400
- # no source account
- NO_SOURCE_ACCOUNT_STATUS = 0
- # local path dir
- LOCAL_PATH_DIR = "static"
- # title length min
- MIN_TITLE_LENGTH = 10
- # max video length(second)
- MAX_VIDEO_LENGTH = 600
- # sleep second
- SLEEP_SECOND = 2
- class ToutiaoVideoCrawlerConst:
- """
- const for toutiao video crawler
- """
- # platform
- PLATFORM = "toutiao"
- # account status
- TOUTIAO_ACCOUNT_GOOD_STATUS = 1
- TOUTIAO_ACCOUNT_BAD_STATUS = 0
- # earliest cursor, 2021-01-01 00:00:00
- DEFAULT_CURSOR = 1609430400
- # no source account
- NO_SOURCE_ACCOUNT_STATUS = 0
- # title length min
- MIN_TITLE_LENGTH = 10
- # max video length(second)
- MAX_VIDEO_LENGTH = 600
- # sleep second
- SLEEP_SECOND = 3
|