""" @author: luojunhui 任务常量配置文件 """ class ColdStartTaskConst: """ 冷启动任务常量配置 """ PUBLISHED_STATUS = 2 # 文章已发布状态 INIT_STATUS = 1 # 文章初始状态 BAD_STATUS = 0 # 低质量文章状态 # 常量 ACCOUNT_GOOD_STATUS = 1 # 账号是否每日抓取 ACCOUNT_DAILY_SCRAPE = 1 ACCOUNT_NOT_DAILY_SCRAPE = 0 # 默认值 DEFAULT_VIEW_COUNT = 0 DEFAULT_LIKE_COUNT = 0 DEFAULT_ARTICLE_STATUS = 1 DEFAULT_TIMESTAMP = 1717171200 # 标题sensitivity TITLE_SENSITIVE = 1 TITLE_NOT_SENSITIVE = 0 # 文章联想深度 ARTICLE_ASSOCIATION_MAX_DEPTH = 4 # 相关分百分位阈值 PERCENT_THRESHOLD = 95 # 相关性分阈值 CORRELATION_THRESHOLD = 0.5 # 阅读量阈值 READ_COUNT_THRESHOLD = 1000 # 阅读均值倍数阈值 READ_AVG_THRESHOLD = 1.3 # 群发类型 BULK_PUBLISH_TYPE = 9 # 种子文章数量 SEED_ARTICLE_LIMIT_NUM = 60 class updatePublishedMsgTaskConst: """ 更新已发布文章消息常量配置 """ # 爬虫详情接口返回code ARTICLE_ILLEGAL_CODE = 25012 ARTICLE_DELETE_CODE = 25005 ARTICLE_SUCCESS_CODE = 0 ARTICLE_UNKNOWN_CODE = 10000 # 请求爬虫详情接口状态码 # 记录默认状态 DEFAULT_STATUS = 0 # 请求接口失败状态 REQUEST_FAIL_STATUS = -1 # 文章被删除状态 DELETE_STATUS = -2 # 未知原因无信息返回状态 UNKNOWN_STATUS = -3 # 文章违规状态 ILLEGAL_STATUS = -4 # 公众号类型(订阅号 or 服务号) # 订阅号 SUBSCRIBE_TYPE_SET = {0, 1} # 服务号 SERVICE_TYPE = 2 # 监测周期(秒) MONITOR_PERIOD = 60 * 60 * 24 * 3 # 新号抓文章周期 NEW_ACCOUNT_CRAWL_PERIOD = 60 * 60 * 24 * 30 # 订阅号,抓取失败失败率报警阈值 SUBSCRIBE_FAIL_RATE_THRESHOLD = 0.3 class UpdateAccountReadRateTaskConst: """ 更新账号阅读率常量配置 """ # 阅读率统计周期(秒) STATISTICS_PERIOD = 31 * 24 * 60 * 60 # 一天的秒数 ONE_DAY_IN_SECONDS = 60 * 60 * 24 # 相对变化率阈值 RELATIVE_VALUE_THRESHOLD = 0.1 # 发文类型 UNLIMITED_PUBLISH_TYPE = 10002 BULK_PUBLISH_TYPE = 9 # 文章位置 ARTICLE_INDEX_LIST = [1, 2, 3, 4, 5, 6, 7, 8] # 默认粉丝 DEFAULT_FANS = 0 # 最低粉丝量 MIN_FANS = 1000 class UpdateAccountReadAvgTaskConst: """ 更新账号阅读均值常量配置 """ # 投流账号 TOULIU_ACCOUNTS = { "gh_93e00e187787", "gh_ac43e43b253b", "gh_68e7fdc09fe4", "gh_77f36c109fb1", "gh_b181786a6c8c", "gh_1ee2e1b39ccf", "gh_d3f039c9db2b", } # 发文模式 ARTICLES_DAILY = 1 TOULIU = 2 # 默认粉丝 DEFAULT_FANS = 0 # index list ARTICLE_INDEX_LIST = [1, 2, 3, 4, 5, 6, 7, 8] # 默认点赞 DEFAULT_LIKE = 0 # 状态 USING_STATUS = 1 NOT_USING_STATUS = 0 # 统计周期 STAT_PERIOD = 30 class WeixinVideoCrawlerConst: """ 微信视频抓取常量配置 """ # 账号抓取状态 ACCOUNT_CRAWL_STATUS = 1 ACCOUNT_DO_NOT_CRAWL_STATUS = 0 # 默认最早抓取时间戳(2024-01-01) DEFAULT_TIMESTAMP = 1704038400 # 搜索爬虫最大页数 MAX_SEARCH_PAGE_NUM = 10 # 抓取每一页的等待时间 SLEEP_SECONDS = 5 # 种子标题最低阅读均值倍数 READ_AVG_MULTIPLE = 1.3 # 种子标题最低阅读量 MIN_READ_COUNT = 2000 # 获取种子标题的统计周期 STAT_PERIOD = 7 * 24 * 60 * 60 # 接口请求成功code REQUEST_SUCCESS = 0 PUBLISHED_ILLEGAL_TITLE_CODE = 1015 # 是否需要扫描查询源账号 NEED_SCAN_SOURCE_ACCOUNT = 1 DO_NOT_NEED_SOURCE_ACCOUNT = 0 # 视频审核状态长文库 VIDEO_AUDIT_INIT_STATUS = 0 VIDEO_AUDIT_SUCCESS_STATUS = 1 VIDEO_AUDIT_FAIL_STATUS = 2 VIDEO_TITLE_GENERATE_FAIL_STATUS = 4 VIDEO_AUDIT_PROCESSING_STATUS = -1 # 票圈视频审核状态, 1 审核中,2 不通过 3 待修改,4 自己可见 5 通过 PQ_AUDIT_PROCESSING_STATUS = 1 PQ_AUDIT_FAIL_STATUS = 2 PQ_AUDIT_WAIT_STATUS = 3 PQ_AUDIT_SELF_VISIBLE_STATUS = 4 PQ_AUDIT_SUCCESS_STATUS = 5 # 默认账号 DEFAULT_ACCOUNT_UID = 76862180 # 每天发送的审核视频数量 MAX_VIDEO_NUM = 1000 # 单次发布视频审核量 MAX_VIDEO_NUM_PER_PUBLISH = 350 # 标题状态 TITLE_DEFAULT_STATUS = 0 TITLE_EXIT_STATUS = 1 TITLE_FESTIVAL_STATUS = 2 TITLE_SHORT_STATUS = 3 # 标题最短长度 TITLE_MIN_LENGTH = 15 # safe score TITLE_SAFE_SCORE_THRESHOLD = 7 class UpdateMiniProgramDetailConst(updatePublishedMsgTaskConst): """ 更新小程序详情常量配置 """ # 账号联想 class AccountAssociationTaskConst: """ 账号联想任务常量配置 """ # 获取种子标题的统计周期 STAT_PERIOD = 7 * 24 * 60 * 60 # 阅读均值阈值 READ_AVG_MULTIPLE = 1.3 # 最小阅读量 MIN_READ_COUNT = 2000 # 种子数量限制 SEED_TITLE_LIMIT = 100 # 从aigc获取文章 class ArticleCollectorConst: """ 文章采集任务常量配置 """ # 发送方式 # 手动推送 MANUAL_PUSH = 1 # 自动群发 BULK_AUTO_PUSH = 2 # 无限流推送 UNLIMITED_PUSH = 3 # 文章状态 # 初始状态 INIT_STATUS = 0 # 成功状态 SUCCESS_STATUS = 1 # 失败状态 FAIL_STATUS = -1 # 发布状态 PUBLISHED_STATUS = 2 # 爬虫接口 ARTICLE_ILLEGAL_CODE = 25012 ARTICLE_DELETE_CODE = 25005 ARTICLE_SUCCESS_CODE = 0 ARTICLE_UNKNOWN_CODE = 10000 class BaiduVideoCrawlerConst: """ const for baidu video crawler """ # account status BAIDU_ACCOUNT_GOOD_STATUS = 1 BAIDU_ACCOUNT_BAD_STATUS = 0 # earliest cursor, 2024-01-01 00:00:00 DEFAULT_CURSOR = 17040384000000 # no source account NO_SOURCE_ACCOUNT_STATUS = 0 # timestamp To Cursor TIMESTAMP_TO_CURSOR = 10000 # local path dir LOCAL_PATH_DIR = "static" class TitleRewriteTaskConst: """ title rewrite task const """ # title rewrite status TITLE_REWRITE_INIT_STATUS = 0 TITLE_REWRITE_SUCCESS_STATUS = 1 TITLE_REWRITE_FAIL_STATUS = 99 TITLE_REWRITE_LOCK_STATUS = 101 # article status ARTICLE_AUDIT_PASSED_STATUS = 1 ARTICLE_POSITIVE_STATUS = 0 # title useful status TITLE_USEFUL_STATUS = 1 # prompt version PROMPT_VERSION = "xx_250228" # 信欣2025-02-28提供 # block expire time 1h TITLE_REWRITE_LOCK_TIME = 60 * 60 class ChannelVideoCrawlerConst: """ const for baidu video crawler """ # account status CHANNEL_ACCOUNT_GOOD_STATUS = 1 CHANNEL_ACCOUNT_BAD_STATUS = 0 # earliest cursor, 2024-01-01 00:00:00 DEFAULT_CURSOR = 1704038400 # no source account NO_SOURCE_ACCOUNT_STATUS = 0 # local path dir LOCAL_PATH_DIR = "static" # title length min MIN_TITLE_LENGTH = 10 # max video length(second) MAX_VIDEO_LENGTH = 600 # sleep second SLEEP_SECOND = 2 class ToutiaoVideoCrawlerConst: """ const for toutiao video crawler """ # platform PLATFORM = "toutiao" # account status TOUTIAO_ACCOUNT_GOOD_STATUS = 1 TOUTIAO_ACCOUNT_BAD_STATUS = 0 # earliest cursor, 2021-01-01 00:00:00 DEFAULT_CURSOR = 1609430400 # no source account NO_SOURCE_ACCOUNT_STATUS = 0 # title length min MIN_TITLE_LENGTH = 10 # max video length(second) MAX_VIDEO_LENGTH = 600 # sleep second SLEEP_SECOND = 3 class SohuVideoCrawlerConst: """ const for sohu video crawler """ # platform PLATFORM = "sohu" # account status GET_RECOMMEND_INIT_STATUS = 0 GET_RECOMMEND_SUCCESS_STATUS = 1 GET_RECOMMEND_FAIL_STATUS = 99 # title length min MIN_TITLE_LENGTH = 10 # max video length(second) MAX_VIDEO_LENGTH = 600 # sleep second SLEEP_SECOND = 3 # 获取推荐的最低相关性分 GET_RECOMMEND_THRESHOLD_SCORE = 0.6 # 审核状态 AUDIT_SUCCESS_STATUS = 1 # 视频状态 VIDEO_NOT_BAD_STATUS = 0 # PAGE_LIST PAGE_LIST = [i for i in range(1, 8)] class SingleVideoPoolPublishTaskConst: """ const for single video pool publish task """ TRANSFORM_INIT_STATUS = 0 TRANSFORM_SUCCESS_STATUS = 1 TRANSFORM_FAIL_STATUS = 99 class GoogleVideoUnderstandTaskConst: # task batch size BATCH_SIZE = 100 # task status INIT_STATUS = 0 PROCESSING_STATUS = 1 SUCCESS_STATUS = 2 FAIL_STATUS = 99 # sleep seconds SLEEP_SECONDS = 60 # max processing time MAX_PROCESSING_TIME = 3600 # task info TABLE_NAME = "long_articles_new_video_cover" TASK_NAME = "extract_video_best_frame_as_cover" DIR_NAME = "static" POOL_SIZE = 15