Просмотр исходного кода

Merge branch 'feature/luojunhui/20260414-filter-article-improve' of Server/LongArticleTaskServer into master

luojunhui 3 недель назад
Родитель
Сommit
0707f979be

+ 295 - 105
README.md

@@ -29,12 +29,28 @@ docker compose up -d
 │   │   │   └── rate_limiter.py
 │   │   │   └── rate_limiter.py
 │   │   ├── service
 │   │   ├── service
 │   │   │   ├── __init__.py
 │   │   │   ├── __init__.py
+│   │   │   ├── daily_rank_manager.py
 │   │   │   ├── gzh_cookie_manager.py
 │   │   │   ├── gzh_cookie_manager.py
 │   │   │   ├── task_manager_service.py
 │   │   │   ├── task_manager_service.py
 │   │   │   └── task_scheduler.py
 │   │   │   └── task_scheduler.py
 │   │   └── v1
 │   │   └── v1
-│   │       ├── __init__.py
-│   │       └── routes.py
+│   │       ├── endpoints
+│   │       │   ├── __init__.py
+│   │       │   ├── abtest.py
+│   │       │   ├── health.py
+│   │       │   ├── mcp.py
+│   │       │   ├── monitor.py
+│   │       │   ├── rank_log.py
+│   │       │   ├── tasks.py
+│   │       │   └── tokens.py
+│   │       ├── routes
+│   │       │   ├── __init__.py
+│   │       │   └── routes.py
+│   │       └── utils
+│   │           ├── __init__.py
+│   │           ├── _utils.py
+│   │           ├── deps.py
+│   │           └── schemas.py
 │   ├── core
 │   ├── core
 │   │   ├── bootstrap
 │   │   ├── bootstrap
 │   │   │   ├── __init__.py
 │   │   │   ├── __init__.py
@@ -53,6 +69,7 @@ docker compose up -d
 │   │   │       ├── deepseek.py
 │   │   │       ├── deepseek.py
 │   │   │       ├── elasticsearch.py
 │   │   │       ├── elasticsearch.py
 │   │   │       ├── mysql.py
 │   │   │       ├── mysql.py
+│   │   │       ├── read_rate_limited.py
 │   │   │       └── task_chinese_name.py
 │   │   │       └── task_chinese_name.py
 │   │   ├── database
 │   │   ├── database
 │   │   │   ├── __init__.py
 │   │   │   ├── __init__.py
@@ -79,9 +96,20 @@ docker compose up -d
 │   │   ├── analysis_task
 │   │   ├── analysis_task
 │   │   │   ├── __init__.py
 │   │   │   ├── __init__.py
 │   │   │   ├── account_position_info.py
 │   │   │   ├── account_position_info.py
-│   │   │   └── crawler_detail.py
+│   │   │   ├── crawler_detail.py
+│   │   │   └── rate_limited_article_filter
+│   │   │       ├── __init__.py
+│   │   │       ├── _mapper.py
+│   │   │       ├── _utils.py
+│   │   │       └── entrance.py
 │   │   ├── cold_start_tasks
 │   │   ├── cold_start_tasks
 │   │   │   ├── __init__.py
 │   │   │   ├── __init__.py
+│   │   │   ├── ad_platform_articles
+│   │   │   │   ├── __init__.py
+│   │   │   │   ├── _const.py
+│   │   │   │   ├── _mapper.py
+│   │   │   │   ├── _utils.py
+│   │   │   │   └── entrance.py
 │   │   │   ├── article_pool
 │   │   │   ├── article_pool
 │   │   │   │   ├── __init__.py
 │   │   │   │   ├── __init__.py
 │   │   │   │   ├── article_pool_cold_start_const.py
 │   │   │   │   ├── article_pool_cold_start_const.py
@@ -104,21 +132,78 @@ docker compose up -d
 │   │   │   ├── article_detail_stat.py
 │   │   │   ├── article_detail_stat.py
 │   │   │   ├── recycle_daily_publish_articles.py
 │   │   │   ├── recycle_daily_publish_articles.py
 │   │   │   ├── recycle_mini_program_detail.py
 │   │   │   ├── recycle_mini_program_detail.py
+│   │   │   ├── recycle_mini_program_info
+│   │   │   │   ├── __init__.py
+│   │   │   │   ├── _const.py
+│   │   │   │   ├── _mapper.py
+│   │   │   │   ├── _util.py
+│   │   │   │   └── entrance.py
 │   │   │   └── recycle_outside_account_articles.py
 │   │   │   └── recycle_outside_account_articles.py
 │   │   ├── llm_tasks
 │   │   ├── llm_tasks
 │   │   │   ├── __init__.py
 │   │   │   ├── __init__.py
+│   │   │   ├── aigc_decode_task
+│   │   │   │   ├── __init__.py
+│   │   │   │   ├── _const.py
+│   │   │   │   ├── _mapper.py
+│   │   │   │   ├── _utils.py
+│   │   │   │   ├── create_decode_tasks.py
+│   │   │   │   ├── extract_decode_task_detail.py
+│   │   │   │   └── fetch_decode_results.py
 │   │   │   ├── candidate_account_process.py
 │   │   │   ├── candidate_account_process.py
 │   │   │   ├── process_title.py
 │   │   │   ├── process_title.py
 │   │   │   └── prompts.py
 │   │   │   └── prompts.py
-│   │   └── monitor_tasks
+│   │   ├── mcp
+│   │   │   ├── __init__.py
+│   │   │   ├── _const.py
+│   │   │   ├── _handler_map.py
+│   │   │   └── _mapper.py
+│   │   ├── monitor_tasks
+│   │   │   ├── __init__.py
+│   │   │   ├── ad_platform_accounts_monitor
+│   │   │   │   ├── __init__.py
+│   │   │   │   ├── _const.py
+│   │   │   │   ├── _mapper.py
+│   │   │   │   ├── _utils.py
+│   │   │   │   └── entrance.py
+│   │   │   ├── auto_reply_cards_monitor
+│   │   │   │   ├── __init__.py
+│   │   │   │   ├── _const.py
+│   │   │   │   ├── _mapper.py
+│   │   │   │   ├── _utils.py
+│   │   │   │   └── entrance.py
+│   │   │   ├── cooperate_accounts_monitor.py
+│   │   │   ├── fwh_group_publish_monitor.py
+│   │   │   ├── get_off_videos.py
+│   │   │   ├── gzh_article_monitor.py
+│   │   │   ├── kimi_balance.py
+│   │   │   ├── limited_account_analysis.py
+│   │   │   ├── rank_log_monitor
+│   │   │   │   ├── __init__.py
+│   │   │   │   ├── _const.py
+│   │   │   │   ├── _mapper.py
+│   │   │   │   ├── _utils.py
+│   │   │   │   └── entrance.py
+│   │   │   └── task_processing_monitor.py
+│   │   └── recommend
 │   │       ├── __init__.py
 │   │       ├── __init__.py
-│   │       ├── auto_reply_cards_monitor.py
-│   │       ├── cooperate_accounts_monitor.py
-│   │       ├── get_off_videos.py
-│   │       ├── gzh_article_monitor.py
-│   │       ├── kimi_balance.py
-│   │       ├── limited_account_analysis.py
-│   │       └── task_processing_monitor.py
+│   │       ├── i2i_recommend
+│   │       │   ├── __init__.py
+│   │       │   ├── _const.py
+│   │       │   ├── _mapper.py
+│   │       │   ├── _utils.py
+│   │       │   └── entrance.py
+│   │       └── offline_recommend
+│   │           ├── __init__.py
+│   │           ├── core.py
+│   │           ├── strategy
+│   │           │   ├── __init__.py
+│   │           │   ├── base.py
+│   │           │   ├── get_top_article.py
+│   │           │   └── i2i.py
+│   │           └── utils
+│   │               ├── __init__.py
+│   │               ├── produce_data.py
+│   │               └── recommend_apollo.py
 │   ├── infra
 │   ├── infra
 │   │   ├── crawler
 │   │   ├── crawler
 │   │   │   ├── __init__.py
 │   │   │   ├── __init__.py
@@ -142,31 +227,58 @@ docker compose up -d
 │   │   │   ├── apollo.py
 │   │   │   ├── apollo.py
 │   │   │   ├── deepseek_official.py
 │   │   │   ├── deepseek_official.py
 │   │   │   ├── elastic_search.py
 │   │   │   ├── elastic_search.py
-│   │   │   └── feishu.py
+│   │   │   ├── feishu.py
+│   │   │   └── odps_service.py
 │   │   ├── internal
 │   │   ├── internal
 │   │   │   ├── __init__.py
 │   │   │   ├── __init__.py
 │   │   │   ├── aigc_system.py
 │   │   │   ├── aigc_system.py
 │   │   │   ├── long_articles.py
 │   │   │   ├── long_articles.py
-│   │   │   └── piaoquan.py
-│   │   ├── shared
+│   │   │   ├── piaoquan.py
+│   │   │   └── piaoquan_decode_server.py
+│   │   ├── mapper
 │   │   │   ├── __init__.py
 │   │   │   ├── __init__.py
-│   │   │   ├── async_tasks.py
-│   │   │   ├── http_client.py
-│   │   │   └── tools.py
-│   │   └── utils
+│   │   │   ├── aigc_mapper.py
+│   │   │   ├── long_article_mapper.py
+│   │   │   ├── long_video_mapper.py
+│   │   │   └── piaoquan_crawler_mapper.py
+│   │   └── shared
 │   │       ├── __init__.py
 │   │       ├── __init__.py
-│   │       ├── get_cover.py
-│   │       └── response.py
-│   └── jobs
-│       ├── task_config.py
-│       ├── task_handler.py
-│       ├── task_mapper.py
-│       └── task_utils.py
+│   │       ├── async_tasks.py
+│   │       ├── http_client.py
+│   │       ├── image.py
+│   │       ├── oss.py
+│   │       ├── response.py
+│   │       └── tools.py
+│   ├── jobs
+│   │   ├── domains
+│   │   │   ├── __init__.py
+│   │   │   ├── algorithm.py
+│   │   │   ├── anaylsis.py
+│   │   │   ├── cold_start.py
+│   │   │   ├── crawler_tasks.py
+│   │   │   ├── data_recycle.py
+│   │   │   ├── llm_task.py
+│   │   │   ├── monitor_task.py
+│   │   │   └── recommend.py
+│   │   ├── task_config.py
+│   │   ├── task_handler.py
+│   │   ├── task_mapper.py
+│   │   └── task_utils.py
+│   └── schemas
+│       ├── __init__.py
+│       └── image.py
 ├── app_config.toml
 ├── app_config.toml
+├── dev
+│   ├── crontab_back.txt
 ├── docker-compose.yaml
 ├── docker-compose.yaml
+├── docs
+│   ├── i2i_recommend_data_sync.md
+│   ├── rank_log_monitor_tech_plan.md
+│   └── rate_limited_article_filter_tech_plan.md
 ├── jenkins_bash.sh
 ├── jenkins_bash.sh
 ├── requirements.txt
 ├── requirements.txt
 └── task_app.py
 └── task_app.py
+
 ```
 ```
 
 
 ### get code strategy
 ### get code strategy
@@ -174,90 +286,168 @@ docker compose up -d
 tree -I "__pycache__|*.pyc"
 tree -I "__pycache__|*.pyc"
 ```
 ```
 
 
-## 1. 数据任务
-#### daily发文数据回收
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "daily_publish_articles_recycle"}'
-```
-#### daily发文更新root_source_id
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_root_source_id"}'
-```
-#### 账号质量处理
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "candidate_account_quality_analysis"}'
-```
-## 2. 抓取任务
+## Crontab(生产调度)
 
 
-#### 今日头条账号内文章抓取
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_toutiao"}'
-```
-#### 今日头条推荐抓取文章
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_toutiao", "method": "recommend"}'
-```
-#### 今日头条搜索抓取账号
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_toutiao", "method": "search"}'
-```
-#### 抓取账号管理(微信)
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_account_manager", "platform": "weixin"}'
-```
-#### 抓取微信文章(抓账号模式)
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_gzh_articles", "account_method": "account_association", "crawl_mode": "account"}'
-```
-#### 抓取微信文章(搜索模式)
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_gzh_articles", "account_method": "search", "crawl_mode": "search"}'
-```
+以下为服务器上配置的定时任务(路径与主机以实际环境为准)。
 
 
-## 3. 冷启动发布任务
-#### 发布头条文章
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_cold_start", "platform": "toutiao", "crawler_methods": ["toutiao_account_association"]}'
-```
-#### 发布公众号文章
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_cold_start"}'
-```
+```cron
+0 9,15,21 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "auto_follow_account"}'
 
 
-## 4. 其他
-#### 校验kimi余额
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "check_kimi_balance"}'
-```
-#### 自动下架视频
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "get_off_videos"}'
-```
-#### 校验视频可见状态
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "check_publish_video_audit_status"}'
-```
-#### 外部服务号监测
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "outside_article_monitor"}'
-```
-#### 站内服务号发文监测
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "inner_article_monitor"}'
-```
-#### 标题重写
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "title_rewrite"}'
-```
-#### 为标题增加品类(文章池)
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_category_generation", "limit": "1000"}'
-```
-#### 候选账号质量分析
-```aiignore
-curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "candidate_account_quality_analysis"}'
-```
+26 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "rate_limited_article_filter"}'
+
+30 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "get_follow_result"}'
+
+50 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "extract_reply_result"}'
+
+
+0 3 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_gzh_articles", "account_method": "1030-手动挑号", "crawl_mode": "account", "strategy": "V1"}'
+
+0 4 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_gzh_articles", "account_method": "cooperate_account", "crawl_mode": "account", "strategy": "V1"}'
+
+
+# 定时清理文件
+0 1 * * * find /root/luojunhui/LongArticlesJob/static -type f -name "*.mp4" -mtime +5 -delete
+# 每天 9 点, 18 点执行 gzh 视频抓取
+0 9,18 * * * bash /root/luojunhui/LongArticlesJob/sh/run_gzh_video_crawler.sh
+
+* * * * * bash /root/luojunhui/LongArticlesJob/sh/run_long_articles_job.sh
+
+# 外部服务号监测
+0 13 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "cooperate_accounts_monitor"}'
+
+30 * * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "cooperate_accounts_detail"}'
+
+# 每天凌晨 4点,下午 4 点各执行一次头条视频抓取
+0 4,16 * * * bash /root/luojunhui/LongArticlesJob/sh/run_toutiao_account_video_crawler.sh
+
+# 更新服务号数据
+0 10,17 * * * bash /root/luojunhui/LongArticlesJob/sh/run_fwh_data_manager.sh
+
+# 每15分钟执行一次今日头条推荐流抓取
+*/15 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_toutiao_recommend.sh
+
+# 每10分钟执行一次从aigc系统获取发布文章
+*/10 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_article_info_from_aigc.sh
+
+# 每10分钟执行一次标题相似度计算任务
+*/10 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_title_similarity_task.sh
+
+# 每小时执行一次标题改写
+0 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_title_process_task.sh
+
+# 凌晨2点30执行更新小程序信息任务
+30 2 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_minigram_info_daily.sh
+
+# 凌晨3:00,下午3:00执行视频号抓取任务
+0 3,15 * * * bash /root/luojunhui/LongArticlesJob/sh/run_sph_video_crawler.sh
+
+# 每天上午10点30执行文章退场 && 晋升任务
+30 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_article_title_exit_v1.sh
+
+# 晚上6点执行头条文章冷启动
+0 18 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_cold_start", "platform": "toutiao", "crawler_methods": ["toutiao_account_association"]}'
+
+# 17:50执行公众号文章战冷启动
+30 17 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_cold_start", "strategy": "strategy_v3"}'
+
+0 4 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_cold_start", "strategy": "strategy_v1"}'
+
+# 早上执行sohu 抓取
+0 6 * * * bash /root/luojunhui/LongArticlesJob/sh/run_schedule_app.sh
+0 2 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_toutiao"}'
+
+#
+0 14 * * * bash /root/luojunhui/LongArticlesJob/sh/run_cold_start_publish.sh
+
+# 每日上午9点执行账号联想任务
+0 9 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_association.sh
+
+# 执行阅读率均值
+0 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_account_read_rate_avg"}'
+
+# 执行阅读均值
+40 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_account_read_avg"}'
+
+# 执行打开率均值
+50 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_account_open_rate_avg"}'
+
+
+# 每天11点执行文章联想任务
+0 11 * * * bash /root/luojunhui/LongArticlesJob/sh/run_article_association.sh
+
+# 每小时执行一次校验视频状态
+24 */4 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "check_publish_video_audit_status"}'
+
+
+# 每天凌晨4:30 15:30执行视频发布和审核流程
+0 1,8,19 * * * bash /root/luojunhui/LongArticlesJob/sh/run_video_publish_and_audit.sh
+
+
+30 8,15 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "daily_publish_articles_recycle"}'
+
+0 21 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "daily_publish_articles_recycle"}'
+
+10 22 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "daily_publish_articles_recycle"}'
+
+30 9,16,21 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_root_source_id"}'
+
+30 22 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_root_source_id"}'
+
+# 每天上午 9:30 点,下午 2 点,晚上 7 点执行下架视频任务
+0 9,15,19 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "get_off_videos"}'
+
+# 执行内部文章违规检测
+0 9,16,23 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "inner_article_monitor"}'
+
+# 每开始执行百度视频
+20 0,12 * * * bash /root/luojunhui/LongArticlesJob/sh/run_baidu_video_crawler.sh
+
+# check kimo balance hourly
+# 每4h校验一次kimi余额
+# 25 */4 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "check_kimi_balance"}'
+
+# 更新小程序信息
+0 3,4,5,6,7,8 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "mini_program_detail_process"}'
+
+
+# */8 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "extract_title_features", "batch_size": 50, "version": 2}'
+
+# 0 9,16 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "recycle_outside_account_articles"}'
+
+# 35 16 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_outside_account_article_root_source_id"}'
+
+# 执行限流文章分析
+40 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_limited_account_info"}'
+
+# 早上 11 点获取前日文章详情
+30 11 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_detail_stat"}'
+
+# 执行广告平台账号内容抓取
+0 3 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "ad_platform_accounts_crawler"}'
+
+# 执广告平台账号详情抓取
+32 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "ad_platform_article_detail"}'
+
+# 执行创建解构任务
+# 15 20 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "create_ad_platform_accounts_decode_task"}'
+
+# 定时获取解构结果
+30 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "fetch_decode_result"}'
+
+
+# 定时获取解构结果
+50 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "extract_decode_result"}'
+
+# 腾讯广告互相平台账号--发文至头条
+0 6 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "ad_platform_article_publish"}'
 
 
+# 排序日志更新
+0 14 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "rank_log_monitor"}'
 
 
+# I2I 排序日志更新
+0 5 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "i2i_recommend_data_sync"}'
 
 
 
 
+# 候选账号质量分析
+# 0 5,10,15,20 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "candidate_account_quality_analysis"}'
+```

+ 9 - 3
app/core/config/settings/read_rate_limited.py

@@ -12,10 +12,16 @@ class ReadRateLimited(BaseSettings):
     )
     )
 
 
     # 认为满足限流阅读均值倍数阈值
     # 认为满足限流阅读均值倍数阈值
-    read_on_avg_threshold: float = 0.2
+    read_on_avg_threshold: float = 0.25
 
 
     # 探索次数
     # 探索次数
     base_discover_time: int = 1
     base_discover_time: int = 1
 
 
-    # 认为文章限流占比阈值
-    low_read_rate_threshold: float = 0.5
+    # 文章限流占比阈值
+    low_read_rate_threshold: float = 0.3
+
+    # 周期内最多限流篇数
+    MAX_RATE_LIMITED_ARTICLES: int = 2
+
+    # 持续限流天数
+    CONSIST_DAYS: int = 2

+ 0 - 114
app/domains/analysis_task/rate_limited_article_filter.py

@@ -1,114 +0,0 @@
-import asyncio
-import datetime
-import hashlib, json
-from typing import List, Dict
-from tqdm.asyncio import tqdm
-
-from app.core.config import GlobalConfigSettings
-from app.core.database import DatabaseManager
-from app.core.observability import LogService
-
-from app.infra.internal import delete_illegal_gzh_articles
-
-
-class RateLimitedArticleMapper:
-    def __init__(self, pool: DatabaseManager):
-        self.pool = pool
-
-    async def find_rate_limited_articles(
-        self,
-        days_duration: int,
-        read_on_avg_threshold: float = 0.2,
-        base_discover_time: int = 1,
-        low_read_rate_threshold: float = 0.5,
-    ) -> List[Dict]:
-        query = f"""
-            SELECT
-                title,
-                gh_id,
-                COUNT(*) AS publish_count,
-                CAST(
-                    SUM(CASE WHEN read_rate < {read_on_avg_threshold} THEN 1 ELSE 0 END) AS UNSIGNED
-                )
-                AS low_read_count
-            FROM datastat_sort_strategy
-            WHERE position = 1
-              AND account_type = '订阅号'
-              AND date_str < DATE_FORMAT(CURDATE(), '%Y%m%d')
-              AND date_str >= DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL {days_duration} DAY), '%Y%m%d')
-              AND date_str >= '20250501'
-            GROUP BY title
-            HAVING SUM(CASE WHEN read_rate < {read_on_avg_threshold} THEN 1 ELSE 0 END) / (COUNT(*) + {base_discover_time}) >= {low_read_rate_threshold}
-            ;
-        """
-        return await self.pool.async_fetch(query=query)
-
-    async def save_record(self, article_tuple):
-        query = """
-            INSERT IGNORE INTO rate_limited_title
-            (title_md5, title, remark)
-            VALUES
-            (%s, %s, %s)
-        """
-        return await self.pool.async_save(query=query, params=article_tuple)
-
-
-class RateLimitedArticleFilter(RateLimitedArticleMapper):
-    def __init__(
-        self,
-        pool: DatabaseManager,
-        config: GlobalConfigSettings,
-        # log_service: LogService,
-    ):
-        super().__init__(pool=pool)
-        self.config = config.read_rate_limit
-
-    async def _process_single_article(
-        self, data: Dict, days: int, semaphore: asyncio.Semaphore
-    ):
-        """处理单个文章的异步任务"""
-        async with semaphore:
-            gh_id = data["gh_id"]
-            title = data["title"]
-            title_md5 = hashlib.md5(title.encode("utf-8")).hexdigest()
-            remark = json.dumps(
-                {
-                    "发文数量": data["publish_count"],
-                    "限流数量": data["low_read_count"],
-                    "周期": days,
-                    "执行日期": datetime.datetime.today().strftime("%Y-%m-%d"),
-                },
-                ensure_ascii=False,
-            )
-            insert_rows = await self.save_record(
-                article_tuple=(title_md5, title, remark)
-            )
-            if insert_rows:
-                await delete_illegal_gzh_articles(gh_id=gh_id, title=title, delete_flag=2)
-            else:
-                print("该文章已经删过")
-
-    async def process_single_task(self, days: int, max_concurrent: int = 10):
-        """并发处理所有文章任务"""
-        data_list = await self.find_rate_limited_articles(
-            days_duration=days,
-            read_on_avg_threshold=self.config.read_on_avg_threshold,
-            base_discover_time=self.config.base_discover_time,
-            low_read_rate_threshold=self.config.low_read_rate_threshold,
-        )
-
-        # 创建信号量限制并发数
-        semaphore = asyncio.Semaphore(max_concurrent)
-
-        # 创建所有任务
-        tasks = [
-            self._process_single_article(data, days, semaphore) for data in data_list
-        ]
-
-        # 使用 tqdm 显示进度并发执行所有任务
-        for coro in tqdm.as_completed(tasks, total=len(tasks)):
-            await coro
-
-    async def deal(self):
-        for _day in self.config.stat_durations:
-            await self.process_single_task(_day)

+ 6 - 0
app/domains/analysis_task/rate_limited_article_filter/__init__.py

@@ -0,0 +1,6 @@
+from .entrance import RateLimitedArticleFilter
+
+
+__all__ = [
+    "RateLimitedArticleFilter",
+]

+ 37 - 0
app/domains/analysis_task/rate_limited_article_filter/_mapper.py

@@ -0,0 +1,37 @@
+from typing import List, Dict
+
+from app.core.database import DatabaseManager
+
+
+class RateLimitedArticleMapper:
+    def __init__(self, pool: DatabaseManager):
+        self.pool = pool
+
+    async def find_rate_limited_articles(
+        self,
+        days_duration: int,
+    ) -> List[Dict]:
+        query = f"""
+            SELECT
+                title,
+                gh_id,
+                date_str,
+                IFNULL(read_rate, 0) AS read_rate
+            FROM datastat_sort_strategy
+            WHERE position = 1
+              AND account_type = '订阅号'
+              AND date_str < DATE_FORMAT(CURDATE(), '%Y%m%d')
+              AND date_str >= DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL {days_duration} DAY), '%Y%m%d')
+              AND date_str >= '20250501'
+            ORDER BY gh_id, date_str;
+        """
+        return await self.pool.async_fetch(query=query)
+
+    async def save_record(self, article_tuple):
+        query = """
+            INSERT IGNORE INTO rate_limited_title
+            (title_md5, title, remark)
+            VALUES
+            (%s, %s, %s)
+        """
+        return await self.pool.async_save(query=query, params=article_tuple)

+ 125 - 0
app/domains/analysis_task/rate_limited_article_filter/_utils.py

@@ -0,0 +1,125 @@
+from collections import defaultdict
+from typing import Dict, List
+
+from app.core.observability import LogService
+
+
+class RateLimitedArticleUtils:
+    _LOG_TASK = "rate_limited_article_filter"
+
+    def __init__(self, log_service: LogService):
+        self._log_service = log_service
+
+    async def trace_log(self, contents: dict) -> None:
+        if not self._log_service:
+            return
+        payload = {"task": self._LOG_TASK, **contents}
+        await self._log_service.log(contents=payload)
+
+    @staticmethod
+    def _sort_records_by_date(records: List[Dict]) -> List[Dict]:
+        return sorted(records, key=lambda item: item["date_str"])
+
+    def filter_account_disabled_records(
+        self, records: List[Dict], read_on_avg_threshold: float, consist_days: int
+    ) -> List[Dict]:
+        """
+        账号头条连续 N 天低于阈值后,账号进入停用状态,停用期间文章不参与限流判定。
+        当 read_rate >= 阈值时立即恢复,当天文章重新参与判定。
+        """
+        account_records = defaultdict(list)
+        for row in records:
+            account_records[row["gh_id"]].append(row)
+
+        filtered_records: List[Dict] = []
+
+        for gh_records in account_records.values():
+            low_streak = 0
+            is_disabled = False
+
+            for row in self._sort_records_by_date(gh_records):
+                read_rate = float(row.get("read_rate") or 0)
+                is_low = read_rate < read_on_avg_threshold
+
+                if is_disabled:
+                    if is_low:
+                        continue
+                    is_disabled = False
+                    low_streak = 0
+                    filtered_records.append(row)
+                    continue
+
+                if is_low:
+                    low_streak += 1
+                    if low_streak >= consist_days:
+                        is_disabled = True
+                        continue
+                    filtered_records.append(row)
+                    continue
+
+                low_streak = 0
+                filtered_records.append(row)
+
+        return filtered_records
+
+    @staticmethod
+    def aggregate_rate_limited_titles(
+        records: List[Dict],
+        read_on_avg_threshold: float,
+        base_discover_time: int,
+        low_read_rate_threshold: float,
+        max_rate_limited_articles: int,
+    ) -> List[Dict]:
+        title_stats: Dict[str, Dict] = {}
+
+        for row in records:
+            title = row["title"]
+            read_rate = float(row.get("read_rate") or 0)
+
+            if title not in title_stats:
+                title_stats[title] = {
+                    "title": title,
+                    "publish_count": 0,
+                    "low_read_count": 0,
+                    "gh_ids": set(),
+                }
+
+            stat = title_stats[title]
+            stat["publish_count"] += 1
+            stat["gh_ids"].add(row["gh_id"])
+            if read_rate < read_on_avg_threshold:
+                stat["low_read_count"] += 1
+
+        results: List[Dict] = []
+        for stat in title_stats.values():
+            publish_count = stat["publish_count"]
+            low_read_count = stat["low_read_count"]
+            low_read_ratio = (
+                low_read_count / (publish_count + base_discover_time)
+                if publish_count + base_discover_time > 0
+                else 0
+            )
+
+            hit_by_ratio = low_read_ratio >= low_read_rate_threshold
+            hit_by_count = low_read_count >= max_rate_limited_articles
+            if not (hit_by_ratio or hit_by_count):
+                continue
+
+            trigger_rules = []
+            if hit_by_ratio:
+                trigger_rules.append("low_read_ratio")
+            if hit_by_count:
+                trigger_rules.append("low_read_count")
+
+            results.append(
+                {
+                    "title": stat["title"],
+                    "publish_count": publish_count,
+                    "low_read_count": low_read_count,
+                    "low_read_ratio": low_read_ratio,
+                    "gh_ids": sorted(stat["gh_ids"]),
+                    "trigger_rules": trigger_rules,
+                }
+            )
+
+        return results

+ 143 - 0
app/domains/analysis_task/rate_limited_article_filter/entrance.py

@@ -0,0 +1,143 @@
+import datetime
+import hashlib
+import json
+from typing import Dict
+
+from app.core.config import GlobalConfigSettings
+from app.core.database import DatabaseManager
+from app.core.observability import LogService
+from app.infra.internal import delete_illegal_gzh_articles
+from app.infra.shared import run_tasks_with_asyncio_task_group
+
+from ._mapper import RateLimitedArticleMapper
+from ._utils import RateLimitedArticleUtils
+
+
+class RateLimitedArticleFilter(RateLimitedArticleMapper):
+    RATE_LIMITED = 2
+
+    def __init__(
+        self,
+        pool: DatabaseManager,
+        config: GlobalConfigSettings,
+        log_service: LogService,
+    ):
+        super().__init__(pool=pool)
+        self.config = config.read_rate_limit
+        self.tool = RateLimitedArticleUtils(log_service=log_service)
+
+    async def _process_single_article(self, data: Dict):
+        """处理单个文章的异步任务"""
+        title = data["title"]
+        title_md5 = hashlib.md5(title.encode("utf-8")).hexdigest()
+        remark = json.dumps(
+            {
+                "发文数量": data["publish_count"],
+                "限流数量": data["low_read_count"],
+                "限流比例": data["low_read_ratio"],
+                "周期": data["days"],
+                "触发规则": data["trigger_rules"],
+                "执行日期": datetime.datetime.today().strftime("%Y-%m-%d"),
+            },
+            ensure_ascii=False,
+        )
+        try:
+            insert_rows = await self.save_record(
+                article_tuple=(title_md5, title, remark)
+            )
+            if insert_rows:
+                gh_id = data["gh_ids"][0]
+                await delete_illegal_gzh_articles(
+                    gh_id=gh_id, title=title, delete_flag=self.RATE_LIMITED
+                )
+                await self.tool.trace_log(
+                    {
+                        "event": "title_shielded",
+                        "title": title,
+                        "days": data["days"],
+                        "trigger_rules": data["trigger_rules"],
+                        "gh_id": gh_id,
+                        "low_read_count": data["low_read_count"],
+                        "publish_count": data["publish_count"],
+                    }
+                )
+        except Exception as e:
+            await self.tool.trace_log(
+                {
+                    "event": "process_single_article_failed",
+                    "title": title,
+                    "days": data.get("days"),
+                    "status": "error",
+                    "message": str(e),
+                }
+            )
+            raise
+
+    async def process_single_task(self, days: int, max_concurrent: int = 5):
+        """并发处理所有文章任务"""
+        await self.tool.trace_log(
+            {
+                "event": "period_start",
+                "days": days,
+            }
+        )
+        raw_records = await self.find_rate_limited_articles(
+            days_duration=days,
+        )
+        effective_records = self.tool.filter_account_disabled_records(
+            records=raw_records,
+            read_on_avg_threshold=self.config.read_on_avg_threshold,
+            consist_days=self.config.CONSIST_DAYS,
+        )
+
+        aggregated = self.tool.aggregate_rate_limited_titles(
+            records=effective_records,
+            read_on_avg_threshold=self.config.read_on_avg_threshold,
+            base_discover_time=self.config.base_discover_time,
+            low_read_rate_threshold=self.config.low_read_rate_threshold,
+            max_rate_limited_articles=self.config.MAX_RATE_LIMITED_ARTICLES,
+        )
+        data_list = [{**item, "days": days} for item in aggregated]
+
+        result = await run_tasks_with_asyncio_task_group(
+            task_list=data_list,
+            handler=self._process_single_article,
+            description="执行限流删文处理",
+            max_concurrency=max_concurrent,
+            unit="per_title",
+        )
+
+        await self.tool.trace_log(
+            {
+                "event": "period_complete",
+                "days": days,
+                "raw_row_count": len(raw_records),
+                "effective_row_count": len(effective_records),
+                "hit_title_count": len(data_list),
+                "total_task": result["total_task"],
+                "processed_task": result["processed_task"],
+                "error_count": len(result["errors"]),
+            }
+        )
+        for _idx, task_obj, err in result["errors"]:
+            await self.tool.trace_log(
+                {
+                    "event": "period_item_error",
+                    "days": days,
+                    "title": task_obj.get("title"),
+                    "status": "error",
+                    "message": str(err),
+                }
+            )
+
+    async def deal(self):
+        await self.tool.trace_log(
+            {
+                "event": "deal_start",
+                "stat_durations": list(self.config.stat_durations),
+            }
+        )
+        for _day in self.config.stat_durations:
+            await self.process_single_task(_day)
+
+        await self.tool.trace_log({"event": "deal_complete"})

+ 5 - 1
app/jobs/task_handler.py

@@ -429,7 +429,11 @@ class TaskHandler:
     @register("rate_limited_article_filter")
     @register("rate_limited_article_filter")
     async def _rate_limited_article_filter(self) -> int:
     async def _rate_limited_article_filter(self) -> int:
         """限流文章删除"""
         """限流文章删除"""
-        task = RateLimitedArticleFilter(pool=self.db_client, config=self.config)
+        task = RateLimitedArticleFilter(
+            pool=self.db_client,
+            config=self.config,
+            log_service=self.log_client,
+        )
         await task.deal()
         await task.deal()
         return TaskStatus.SUCCESS
         return TaskStatus.SUCCESS
 
 

+ 51 - 43
dev/crontab_back.txt

@@ -1,7 +1,16 @@
+0 9,15,21 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "auto_follow_account"}'
 
 
-0 3 * * * curl -X POST http://192.168.100.31:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_gzh_articles", "account_method": "1030-手动挑号", "crawl_mode": "account", "strategy": "V1"}'
+26 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "rate_limited_article_filter"}'
+
+30 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "get_follow_result"}'
+
+50 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "extract_reply_result"}'
+
+
+0 3 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_gzh_articles", "account_method": "1030-手动挑号", "crawl_mode": "account", "strategy": "V1"}'
+
+0 4 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_gzh_articles", "account_method": "cooperate_account", "crawl_mode": "account", "strategy": "V1"}'
 
 
-0 4 * * * curl -X POST http://192.168.100.31:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_gzh_articles", "account_method": "cooperate_account", "crawl_mode": "account", "strategy": "V1"}'
 
 
 # 定时清理文件
 # 定时清理文件
 0 1 * * * find /root/luojunhui/LongArticlesJob/static -type f -name "*.mp4" -mtime +5 -delete
 0 1 * * * find /root/luojunhui/LongArticlesJob/static -type f -name "*.mp4" -mtime +5 -delete
@@ -11,13 +20,15 @@
 * * * * * bash /root/luojunhui/LongArticlesJob/sh/run_long_articles_job.sh
 * * * * * bash /root/luojunhui/LongArticlesJob/sh/run_long_articles_job.sh
 
 
 # 外部服务号监测
 # 外部服务号监测
-# 0 13 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "outside_article_monitor"}'
+0 13 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "cooperate_accounts_monitor"}'
+
+30 * * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "cooperate_accounts_detail"}'
 
 
 # 每天凌晨 4点,下午 4 点各执行一次头条视频抓取
 # 每天凌晨 4点,下午 4 点各执行一次头条视频抓取
 0 4,16 * * * bash /root/luojunhui/LongArticlesJob/sh/run_toutiao_account_video_crawler.sh
 0 4,16 * * * bash /root/luojunhui/LongArticlesJob/sh/run_toutiao_account_video_crawler.sh
 
 
 # 更新服务号数据
 # 更新服务号数据
-0 11,17 * * * bash /root/luojunhui/LongArticlesJob/sh/run_fwh_data_manager.sh
+0 10,17 * * * bash /root/luojunhui/LongArticlesJob/sh/run_fwh_data_manager.sh
 
 
 # 每15分钟执行一次今日头条推荐流抓取
 # 每15分钟执行一次今日头条推荐流抓取
 */15 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_toutiao_recommend.sh
 */15 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_toutiao_recommend.sh
@@ -40,9 +51,6 @@
 # 每天上午10点30执行文章退场 && 晋升任务
 # 每天上午10点30执行文章退场 && 晋升任务
 30 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_article_title_exit_v1.sh
 30 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_article_title_exit_v1.sh
 
 
-# 每天上午4点执行账号冷启动任务
-# 0 1 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_cold_start_daily.sh
-
 # 晚上6点执行头条文章冷启动
 # 晚上6点执行头条文章冷启动
 0 18 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_cold_start", "platform": "toutiao", "crawler_methods": ["toutiao_account_association"]}'
 0 18 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_cold_start", "platform": "toutiao", "crawler_methods": ["toutiao_account_association"]}'
 
 
@@ -61,12 +69,6 @@
 # 每日上午9点执行账号联想任务
 # 每日上午9点执行账号联想任务
 0 9 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_association.sh
 0 9 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_association.sh
 
 
-# 每天 10 点执行前一天的阅读率均值代码
-# 0 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_account_read_rate_avg.sh
-
-# 每天10点40执行阅读均值任务
-# 40 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_account_avg_v3.sh
-
 # 执行阅读率均值
 # 执行阅读率均值
 0 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_account_read_rate_avg"}'
 0 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_account_read_rate_avg"}'
 
 
@@ -81,18 +83,12 @@
 0 11 * * * bash /root/luojunhui/LongArticlesJob/sh/run_article_association.sh
 0 11 * * * bash /root/luojunhui/LongArticlesJob/sh/run_article_association.sh
 
 
 # 每小时执行一次校验视频状态
 # 每小时执行一次校验视频状态
-# 20 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_check_video_status_hourly.sh
 24 */4 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "check_publish_video_audit_status"}'
 24 */4 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "check_publish_video_audit_status"}'
 
 
 
 
 # 每天凌晨4:30 15:30执行视频发布和审核流程
 # 每天凌晨4:30 15:30执行视频发布和审核流程
 0 1,8,19 * * * bash /root/luojunhui/LongArticlesJob/sh/run_video_publish_and_audit.sh
 0 1,8,19 * * * bash /root/luojunhui/LongArticlesJob/sh/run_video_publish_and_audit.sh
 
 
-# 每天 上午8:30, 下午1:00, 晚上8:50执行
-
-# 30 8 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_published_articles_daily.sh
-# 20 14 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_published_articles_daily.sh
-# 50 20 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_published_articles_daily.sh
 
 
 30 8,15 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "daily_publish_articles_recycle"}'
 30 8,15 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "daily_publish_articles_recycle"}'
 
 
@@ -103,37 +99,19 @@
 30 9,16,21 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_root_source_id"}'
 30 9,16,21 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_root_source_id"}'
 
 
 30 22 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_root_source_id"}'
 30 22 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_root_source_id"}'
-# 每天上午9点,下午2点,晚上9点执行v2代码
-# 0 9,14,21 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_published_articles_daily_v2.sh
-
 
 
 # 每天上午 9:30 点,下午 2 点,晚上 7 点执行下架视频任务
 # 每天上午 9:30 点,下午 2 点,晚上 7 点执行下架视频任务
-
-# 30 9 * * * bash /root/luojunhui/LongArticlesJob/sh/run_get_off_videos_three_times_per_day.sh
-
-# 0 15 * * * bash /root/luojunhui/LongArticlesJob/sh/run_get_off_videos_three_times_per_day.sh
-
-# 0 19 * * * bash /root/luojunhui/LongArticlesJob/sh/run_get_off_videos_three_times_per_day.sh
-
 0 9,15,19 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "get_off_videos"}'
 0 9,15,19 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "get_off_videos"}'
 
 
-# 每天早上9点,下午2:30, 晚上7:30
-
-# 0 10,16,20 * * * bash /root/luojunhui/LongArticlesJob/sh/published_articles_monitor.sh
-0 10,16,20 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "inner_article_monitor"}'
+# 执行内部文章违规检测
+0 9,16,23 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "inner_article_monitor"}'
 
 
 # 每开始执行百度视频
 # 每开始执行百度视频
 20 0,12 * * * bash /root/luojunhui/LongArticlesJob/sh/run_baidu_video_crawler.sh
 20 0,12 * * * bash /root/luojunhui/LongArticlesJob/sh/run_baidu_video_crawler.sh
 
 
 # check kimo balance hourly
 # check kimo balance hourly
-
-# 30 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_kimi_balance_monitor.sh
 # 每4h校验一次kimi余额
 # 每4h校验一次kimi余额
-25 */4 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "check_kimi_balance"}'
-
-# 0 11,23 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_explore.sh
-
-# 0 5,10,15,20 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_quality_analysis.sh
+# 25 */4 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "check_kimi_balance"}'
 
 
 # 更新小程序信息
 # 更新小程序信息
 0 3,4,5,6,7,8 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "mini_program_detail_process"}'
 0 3,4,5,6,7,8 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "mini_program_detail_process"}'
@@ -141,11 +119,41 @@
 
 
 # */8 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "extract_title_features", "batch_size": 50, "version": 2}'
 # */8 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "extract_title_features", "batch_size": 50, "version": 2}'
 
 
-0 9,16 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "recycle_outside_account_articles"}'
+# 0 9,16 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "recycle_outside_account_articles"}'
 
 
-35 16 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_outside_account_article_root_source_id"}'
+# 35 16 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_outside_account_article_root_source_id"}'
 
 
 # 执行限流文章分析
 # 执行限流文章分析
 40 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_limited_account_info"}'
 40 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_limited_account_info"}'
+
+# 早上 11 点获取前日文章详情
+30 11 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_detail_stat"}'
+
+# 执行广告平台账号内容抓取
+0 3 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "ad_platform_accounts_crawler"}'
+
+# 执广告平台账号详情抓取
+32 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "ad_platform_article_detail"}'
+
+# 执行创建解构任务
+# 15 20 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "create_ad_platform_accounts_decode_task"}'
+
+# 定时获取解构结果
+30 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "fetch_decode_result"}'
+
+
+# 定时获取解构结果
+50 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "extract_decode_result"}'
+
+# 腾讯广告互相平台账号--发文至头条
+0 6 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "ad_platform_article_publish"}'
+
+# 排序日志更新
+0 14 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "rank_log_monitor"}'
+
+# I2I 排序日志更新
+0 5 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "i2i_recommend_data_sync"}'
+
+
 # 候选账号质量分析
 # 候选账号质量分析
-0 5,10,15,20 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "candidate_account_quality_analysis"}'
+# 0 5,10,15,20 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "candidate_account_quality_analysis"}'