luojunhui 3 недель назад
Родитель
Сommit
4fe032f8d8

+ 2 - 0
applications/api/__init__.py

@@ -23,6 +23,7 @@ from .aliyun_log_api import log
 from .async_aigc_system_api import delete_illegal_gzh_articles
 from .async_aigc_system_api import auto_create_crawler_task
 from .async_aigc_system_api import auto_bind_crawler_task_to_generate_task
+from .async_aigc_system_api import insert_crawler_relation_to_aigc_system
 
 feishu_robot = FeishuBotApi()
 feishu_sheet = FeishuSheetApi()
@@ -42,4 +43,5 @@ __all__ = [
     "auto_create_crawler_task",
     "auto_bind_crawler_task_to_generate_task",
     "AsyncElasticSearchClient",
+    "insert_crawler_relation_to_aigc_system"
 ]

+ 15 - 0
applications/api/async_aigc_system_api.py

@@ -1,4 +1,5 @@
 import json
+from typing import Optional, Dict, List, TypedDict
 from applications.utils import AsyncHttpClient
 
 HEADERS = {
@@ -23,6 +24,11 @@ PERSON_COOKIE = {
     "uid": 1,
 }
 
+class RelationDict(TypedDict):
+    videoPoolTraceId: str
+    channelContentId: str
+    platform: str
+
 
 async def delete_illegal_gzh_articles(gh_id: str, title: str):
     """
@@ -166,3 +172,12 @@ async def get_generate_task_detail(generate_task_id):
         return res["data"]
     else:
         return {}
+
+
+async def insert_crawler_relation_to_aigc_system(relation_list: List[RelationDict]) -> Optional[Dict]:
+        url = "http://aigc-api.cybertogether.net/aigc/crawler/content/videoPoolCrawlerRelation"
+        payload = json.dumps({"params": {"relations": relation_list}})
+        async with AsyncHttpClient(timeout=60) as client:
+            res = await client.post(url=url, headers=HEADERS, data=payload)
+
+        return res

+ 0 - 0
applications/tasks/cold_start_tasks/video_pool/__init__.py


+ 94 - 0
applications/tasks/cold_start_tasks/video_pool/video_pool_audit_strategy.py

@@ -0,0 +1,94 @@
+from applications.api import fetch_piaoquan_video_list_detail
+from applications.api import insert_crawler_relation_to_aigc_system
+
+from .video_pool_const import VideoPoolConst
+
+
+class VideoPoolAuditStrategy(VideoPoolConst):
+    def __init__(self, pool, log_client, trace_id):
+        super().__init__()
+        self.pool = pool
+        self.log_client = log_client
+        self.trace_id = trace_id
+
+    async def update_video_audit_status(self, video_id, ori_status, new_status):
+        """修改视频审核状态"""
+        query = """
+            UPDATE publish_single_video_source
+            SET audit_status = %s 
+            WHERE audit_video_id = %s AND audit_status = %s;
+        """
+        return await self.pool.async_save(query=query, params=(new_status, video_id, ori_status))
+
+    async def get_auditing_video_list(self):
+        """get auditing video list"""
+        query = """
+            select content_trace_id, audit_video_id, score, platform 
+            from publish_single_video_source
+            where audit_status = %s
+        """
+        return await self.pool.async_fetch(query=query, params=(-1, ))
+
+    async def get_video_audit_info(self, video_obj):
+        """
+        get audit video info from piaoquan
+        """
+        video_id = video_obj['audit_video_id']
+        response = await fetch_piaoquan_video_list_detail([video_id])
+        response_data = response.get("data")
+        if not response_data:
+            audit_status = self.PQ_AUDIT_FAIL_STATUS
+        else:
+            audit_status = response_data[0].get("auditStatus")
+
+        match audit_status:
+            case self.PQ_AUDIT_SUCCESS_STATUS:
+                # 更新小程序标题字段
+                mini_program_title_flag = self.update_mini_program_title(video_id)
+                if mini_program_title_flag:
+                    # 处理成功,修改审核状态为1
+                    affected_rows = await self.update_video_audit_status(
+                        video_id=video_id,
+                        ori_status=self.VIDEO_AUDIT_PROCESSING_STATUS,
+                        new_status=self.VIDEO_AUDIT_SUCCESS_STATUS
+                    )
+                    # 将视频存储到任务队列
+                    self.insert_into_task_queue(video_obj)
+
+                    # 将视频存储到 aigc 表
+                    await insert_crawler_relation_to_aigc_system(
+                        relation_list=[
+                            {
+                                "videoPoolTraceId": video_obj['content_trace_id'],
+                                "channelContentId": str(video_id),
+                                "platform": video_obj['platform'],
+                            }
+                        ]
+                    )
+                else:
+                    # 修改小程序标题失败,修改审核状态为4
+                    affected_rows = await self.update_video_audit_status(
+                        video_id=video_id,
+                        ori_status=self.VIDEO_AUDIT_PROCESSING_STATUS,
+                        new_status=self.VIDEO_TITLE_GENERATE_FAIL_STATUS
+                    )
+
+            case self.PQ_AUDIT_SELF_VISIBLE_STATUS, self.PQ_AUDIT_FAIL_STATUS:
+                # 视频审核失败,修改审核状态为2
+                affected_rows = await self.update_video_audit_status(
+                    video_id=video_id,
+                    ori_status=self.VIDEO_AUDIT_PROCESSING_STATUS,
+                    new_status=self.VIDEO_AUDIT_FAIL_STATUS
+                )
+
+            case self.PQ_AUDIT_PROCESSING_STATUS:
+                affected_rows = 0
+
+            case _:
+                affected_rows = 0
+
+        return {
+            "affected_rows": affected_rows,
+            "video_id": video_id,
+            "audit_status": audit_status
+        }

+ 75 - 0
applications/tasks/cold_start_tasks/video_pool/video_pool_const.py

@@ -0,0 +1,75 @@
+class VideoPoolConst:
+    """
+    微信视频抓取常量配置
+    """
+
+    # 账号抓取状态
+    ACCOUNT_CRAWL_STATUS = 1
+    ACCOUNT_DO_NOT_CRAWL_STATUS = 0
+
+    # 默认最早抓取时间戳(2024-01-01)
+    DEFAULT_TIMESTAMP = 1704038400
+
+    # 搜索爬虫最大页数
+    MAX_SEARCH_PAGE_NUM = 10
+
+    # 抓取每一页的等待时间
+    SLEEP_SECONDS = 5
+
+    # 种子标题最低阅读均值倍数
+    READ_AVG_MULTIPLE = 1.3
+
+    # 种子标题最低阅读量
+    MIN_READ_COUNT = 2000
+
+    # 获取种子标题的统计周期
+    STAT_PERIOD = 7 * 24 * 60 * 60
+
+    # 接口请求成功code
+    REQUEST_SUCCESS = 0
+    PUBLISHED_ILLEGAL_TITLE_CODE = 1015
+
+    # 是否需要扫描查询源账号
+    NEED_SCAN_SOURCE_ACCOUNT = 1
+    DO_NOT_NEED_SOURCE_ACCOUNT = 0
+
+    # 视频审核状态长文库
+    VIDEO_AUDIT_INIT_STATUS = 0
+    VIDEO_AUDIT_SUCCESS_STATUS = 1
+    VIDEO_AUDIT_FAIL_STATUS = 2
+    VIDEO_TITLE_GENERATE_FAIL_STATUS = 4
+    VIDEO_AUDIT_PROCESSING_STATUS = -1
+
+    # 票圈视频审核状态, 1 审核中,2 不通过 3 待修改,4 自己可见 5 通过
+    PQ_AUDIT_PROCESSING_STATUS = 1
+    PQ_AUDIT_FAIL_STATUS = 2
+    PQ_AUDIT_WAIT_STATUS = 3
+    PQ_AUDIT_SELF_VISIBLE_STATUS = 4
+    PQ_AUDIT_SUCCESS_STATUS = 5
+
+    # 默认账号
+    DEFAULT_ACCOUNT_UID = 76862180
+
+    # 每天发送的审核视频数量
+    MAX_VIDEO_NUM = 1000
+
+    # 单次发布视频审核量
+    MAX_VIDEO_NUM_PER_PUBLISH = 350
+
+    # 标题状态
+    TITLE_DEFAULT_STATUS = 0
+    TITLE_EXIT_STATUS = 1
+    TITLE_FESTIVAL_STATUS = 2
+    TITLE_SHORT_STATUS = 3
+
+    # 标题最短长度
+    TITLE_MIN_LENGTH = 15
+
+    # safe score
+    TITLE_SAFE_SCORE_THRESHOLD = 7
+
+    # Task Status
+    INIT_STATUS = 0
+    PROCESSING_STATUS = 1
+    SUCCESS_STATUS = 2
+    FAIL_STATUS = 99

+ 4 - 0
applications/tasks/cold_start_tasks/video_pool_cold_start.py

@@ -0,0 +1,4 @@
+
+
+class VideoPoolColdStart:
+    pass

+ 1 - 1
applications/utils/common.py

@@ -239,7 +239,7 @@ def get_task_chinese_name(data):
     elif task_name == 'article_pool_cold_start':
         platform = data.get('platform')
         platform = platform.replace('toutiao', '今日头条').replace("weixin", "微信")
-        strategy = data.get('strategy')
+        strategy = data.get('strategy', '')
         strategy = strategy.replace("strategy", "策略")
         category_list = data.get('category_list', [])
         category_list = "、".join(category_list)