Bladeren bron

Merge branch 'feature/luojunhui/20260508-aigc-decode-article' of Server/LongArticleTaskServer into master

luojunhui 14 uur geleden
bovenliggende
commit
8f1c9eee74

+ 1 - 1
README.md

@@ -141,7 +141,7 @@ docker compose up -d
 │   │   │   └── recycle_outside_account_articles.py
 │   │   ├── llm_tasks
 │   │   │   ├── __init__.py
-│   │   │   ├── aigc_decode_task
+│   │   │   ├── decode_article
 │   │   │   │   ├── __init__.py
 │   │   │   │   ├── _const.py
 │   │   │   │   ├── _mapper.py

+ 0 - 47
app/domains/llm_tasks/aigc_decode_task/_const.py

@@ -1,47 +0,0 @@
-class DecodeTaskConst:
-    TASK_BATCH = 100
-
-    class TaskStatus:
-        # 任务状态
-        INIT = 0
-        PROCESSING = 1
-        SUCCESS = 2
-        FAILED = 99
-
-    class ExtractStatus(TaskStatus): ...
-
-    class DecodeStatus:
-        # 解构结果状态
-        PENDING = 0
-        RUNNING = 1
-        SUCCESS = 2
-        FAILED = 3
-
-    class BusinessScene:
-        # 业务场景
-        POINT_PICK = 0
-        CREATE = 1
-        MAKE = 2
-
-    class ContentType:
-        # 内容类型
-        LONG_ARTICLE = 1
-        PICTURE_TEXT = 2
-        VIDEO = 3
-
-    class SourceType:
-        AD_PLATFORM = 1
-        INNER = 2
-
-    class ProduceModuleType:
-        COVER = 1
-        IMAGE = 2
-        TITLE = 3
-        CONTENT = 4
-        SUMMARY = 18
-
-    class RequestDecode:
-        SUCCESS = 0
-
-
-__all__ = ["DecodeTaskConst"]

+ 0 - 179
app/domains/llm_tasks/aigc_decode_task/_mapper.py

@@ -1,179 +0,0 @@
-from typing import List, Dict
-
-from app.core.database import DatabaseManager
-
-from ._const import DecodeTaskConst
-
-
-class ArticlesDecodeTaskMapper(DecodeTaskConst):
-    def __init__(self, pool: DatabaseManager):
-        self.pool = pool
-
-    # 存储解构任务
-    async def record_decode_task(
-        self, task_id: str, wx_sn: str, remark: str = None
-    ) -> int:
-        query = """
-            INSERT INTO long_articles_decode_tasks (task_id, wx_sn, remark)
-            VALUES (%s, %s, %s)
-        """
-        return await self.pool.async_save(query=query, params=(task_id, wx_sn, remark))
-
-    # 更新解构任务状态
-    async def update_decode_task_status(
-        self, task_id: str, ori_status: int, new_status: int, remark: str = None
-    ) -> int:
-        query = """
-            UPDATE long_articles_decode_tasks
-            SET status = %s, remark = %s
-            WHERE task_id = %s AND status = %s;
-        """
-        return await self.pool.async_save(
-            query=query, params=(new_status, remark, task_id, ori_status)
-        )
-
-    # 设置解构结果
-    async def set_decode_result(
-        self, task_id: str, result: str, remark: str = None
-    ) -> int:
-        query = """
-            UPDATE long_articles_decode_tasks
-            SET status = %s, remark = %s, result = %s
-            WHERE task_id = %s AND status = %s;
-        """
-        return await self.pool.async_save(
-            query=query,
-            params=(
-                self.TaskStatus.SUCCESS,
-                remark,
-                result,
-                task_id,
-                self.TaskStatus.PROCESSING,
-            ),
-        )
-
-    # 获取待拉取结果的解构任务(status=INIT,尚未拿到解构结果)
-    async def fetch_decoding_tasks(self) -> List[Dict]:
-        query = """
-            SELECT task_id FROM long_articles_decode_tasks WHERE status = %s LIMIT %s;
-        """
-        return await self.pool.async_fetch(
-            query=query, params=(self.TaskStatus.INIT, self.TASK_BATCH)
-        )
-
-    # 获取待解析的任务(获取处理成功的任务)
-    async def fetch_extract_tasks(self):
-        query = """
-            SELECT id, result FROM long_articles_decode_tasks
-            WHERE extract_status = %s AND status = %s;
-        """
-        return await self.pool.async_fetch(
-            query=query, params=(self.ExtractStatus.INIT, self.TaskStatus.SUCCESS)
-        )
-
-    # 修改解析状态(用于加锁与状态流转)
-    async def update_extract_status(self, task_id, ori_status, new_status):
-        query = """
-            UPDATE long_articles_decode_tasks
-            SET extract_status = %s WHERE extract_status = %s AND id = %s;
-        """
-        return await self.pool.async_save(
-            query=query,
-            params=(
-                new_status,
-                ori_status,
-                task_id,
-            ),
-        )
-
-    # 记录解析结果明细到 long_articles_decode_task_detail
-    async def record_extract_detail(self, decode_task_id: int, detail: Dict) -> int:
-        query = """
-            INSERT INTO long_articles_decode_task_detail
-                (decode_task_id, inspiration, purpose, key_point, topic)
-            VALUES (%s, %s, %s, %s, %s);
-        """
-        return await self.pool.async_save(
-            query=query,
-            params=(
-                decode_task_id,
-                detail.get("inspiration", ""),
-                detail.get("purpose", ""),
-                detail.get("key_point", ""),
-                detail.get("topic", ""),
-            ),
-        )
-
-
-class AdPlatformArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
-    def __init__(self, pool: DatabaseManager):
-        super().__init__(pool)
-
-    # 修改文章解构状态
-    async def update_article_decode_status(
-        self, id_: int, ori_status: int, new_status: int
-    ) -> int:
-        query = """
-            UPDATE ad_platform_accounts_daily_detail
-            SET decode_status = %s
-            WHERE id = %s AND decode_status = %s;
-        """
-        return await self.pool.async_save(
-            query=query, params=(new_status, id_, ori_status)
-        )
-
-    # 获取待解构文章
-    async def fetch_decode_articles(self) -> List[Dict]:
-        query = """
-            SELECT id, account_name, gh_id, article_title, article_cover,
-                   article_text, article_images, wx_sn
-            FROM ad_platform_accounts_daily_detail WHERE fetch_status = %s AND decode_status = %s
-            LIMIT %s;
-        """
-        return await self.pool.async_fetch(
-            query=query,
-            params=(self.TaskStatus.SUCCESS, self.TaskStatus.INIT, self.TASK_BATCH),
-        )
-
-
-class InnerArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
-    def __init__(self, pool: DatabaseManager):
-        super().__init__(pool)
-
-    # 获取内部文章
-    async def fetch_inner_articles(self):
-        query = """
-            SELECT  title
-                  ,SUM(fans) AS total_fans
-                  ,SUM(view_count) AS total_view
-                  ,SUM(view_count) / SUM(fans) AS avg_read_rate
-                  ,SUM(first_level) AS total_first_level
-                  ,MAX(source_id) as source_id
-                  ,MAX(wx_sn) as wx_sn
-            FROM    datastat_sort_strategy
-            WHERE  date_str >= '20250101'
-            GROUP BY title
-            HAVING total_fans > 100000
-            AND     avg_read_rate > 0.008
-            AND     total_first_level > 0;
-
-        """
-        return await self.pool.async_fetch(query=query)
-
-    # 获取内部文章生成信息
-    async def fetch_inner_articles_produce_detail(self, source_id) -> List[Dict]:
-        query = """
-            SELECT produce_module_type, output
-            FROM produce_plan_module_output WHERE plan_exe_id = %s
-            AND produce_module_type in (1,2,3,4,18); 
-        """
-        return await self.pool.async_fetch(
-            query=query, db_name="aigc", params=(source_id,)
-        )
-
-
-__all__ = [
-    "ArticlesDecodeTaskMapper",
-    "AdPlatformArticlesDecodeTaskMapper",
-    "InnerArticlesDecodeTaskMapper",
-]

+ 0 - 119
app/domains/llm_tasks/aigc_decode_task/_utils.py

@@ -1,119 +0,0 @@
-import json
-from typing import Dict, List
-
-from app.infra.internal import DecodeServer
-
-from ._const import DecodeTaskConst
-
-
-class DecodeTaskUtil(DecodeTaskConst):
-    decode_server = DecodeServer()
-
-    def prepare_extract_body(self, article: Dict) -> Dict:
-        return {
-            "scene": self.BusinessScene.POINT_PICK,
-            "content_type": self.ContentType.LONG_ARTICLE,
-            "content": {
-                "channel_content_id": article.get("wx_sn", ""),
-                "video_url": "",
-                "images": article.get("article_images"),
-                "body_text": article.get("article_text", ""),
-                "title": article.get("article_title", ""),
-                "channel_account_id": article.get("gh_id", ""),
-                "channel_account_name": article.get("account_name", ""),
-            },
-        }
-
-    @staticmethod
-    def extract_decode_result(result: Dict) -> Dict:
-        """
-        从结构的结果中,解析出灵感点、目的点、关键点;
-        """
-        final_result = result.get("final_normalization_rebuild")
-        if not final_result:
-            return {"error": "解构结果中无 final_normalization_rebuild 信息"}
-        # 灵感点
-        inspiration_list = final_result.get("inspiration_final_result", {}).get(
-            "最终灵感点列表", []
-        )
-        # 目的
-        purpose_list = final_result.get("purpose_final_result", {}).get(
-            "最终目的点列表", []
-        )
-        # 关键点
-        keypoint_list = final_result.get("keypoint_final", {}).get("最终关键点列表", [])
-
-        topic_fusion = final_result.get("topic_fusion_result", {})
-        # 选题
-        topic_text = (
-            topic_fusion.get("最终选题", {}).get("选题", "")
-            if isinstance(topic_fusion.get("最终选题"), dict)
-            else ""
-        )
-
-        def _join_points(items: list, key: str) -> str:
-            parts = [str(p[key]) for p in items if isinstance(p, dict) and p.get(key)]
-            return ",".join(parts)
-
-        return {
-            "inspiration": _join_points(inspiration_list, "灵感点"),
-            "purpose": _join_points(purpose_list, "目的点"),
-            "key_point": _join_points(keypoint_list, "关键点"),
-            "topic": topic_text,
-        }
-
-    async def fetch_decode_result(self, task_id: str):
-        return await self.decode_server.fetch_result(task_id)
-
-
-class AdPlatformArticlesDecodeUtils(DecodeTaskUtil):
-    @staticmethod
-    def format_images(images: str) -> List[str]:
-        """
-        格式化图片字符串,空/非法 JSON 返回空列表。
-        """
-        if not images or not images.strip():
-            return []
-        try:
-            image_list = json.loads(images)
-        except (json.JSONDecodeError, TypeError):
-            return []
-        if not isinstance(image_list, list):
-            return []
-        return [
-            i.get("image_url")
-            for i in image_list
-            if isinstance(i, dict) and i.get("image_url")
-        ]
-
-    async def create_decode_task(self, article: Dict):
-        images = self.format_images(article.get("article_images") or "")
-        article["article_images"] = images
-        request_body = self.prepare_extract_body(article)
-        return await self.decode_server.create_decode_task(request_body)
-
-
-class InnerArticlesDecodeUtils(DecodeTaskUtil):
-    async def create_decode_task(self, article: Dict, article_produce_info: List[Dict]):
-        images = [
-            i["output"]
-            for i in article_produce_info
-            if i["produce_module_type"]
-            in (self.ProduceModuleType.COVER, self.ProduceModuleType.IMAGE)
-        ]
-        article["article_images"] = images
-        text = [
-            i["output"]
-            for i in article_produce_info
-            if i["produce_module_type"] == self.ProduceModuleType.CONTENT
-        ]
-        article["article_text"] = "\n".join(text)
-        request_body = self.prepare_extract_body(article)
-        return await self.decode_server.create_decode_task(request_body)
-
-
-__all__ = [
-    "AdPlatformArticlesDecodeUtils",
-    "InnerArticlesDecodeUtils",
-    "DecodeTaskUtil",
-]

+ 0 - 174
app/domains/llm_tasks/aigc_decode_task/create_decode_tasks.py

@@ -1,174 +0,0 @@
-from typing import Dict
-from tqdm import tqdm
-
-from app.core.database import DatabaseManager
-from app.core.observability import LogService
-
-from ._const import DecodeTaskConst
-from ._mapper import AdPlatformArticlesDecodeTaskMapper, InnerArticlesDecodeTaskMapper
-from ._utils import AdPlatformArticlesDecodeUtils, InnerArticlesDecodeUtils
-
-
-class CreateAdPlatformArticlesDecodeTask(DecodeTaskConst):
-    def __init__(self, pool: DatabaseManager, log_service: LogService):
-        self.pool = pool
-        self.log_service = log_service
-        self.mapper = AdPlatformArticlesDecodeTaskMapper(self.pool)
-        self.tool = AdPlatformArticlesDecodeUtils()
-
-    async def create_single_decode_task(self, article: Dict):
-        # Acquire Lock
-        article_id = article["id"]
-        acquire_lock = await self.mapper.update_article_decode_status(
-            article_id, self.TaskStatus.INIT, self.TaskStatus.PROCESSING
-        )
-        if not acquire_lock:
-            await self.log_service.log(
-                contents={
-                    "article_id": article_id,
-                    "task": "create_decode_task",
-                    "status": "skip",
-                    "message": "acquire lock failed",
-                }
-            )
-            return
-
-        # 与解构系统交互,创建解构任务
-        response = await self.tool.create_decode_task(article)
-        response_code = response.get("code")
-        if response_code != self.RequestDecode.SUCCESS:
-            # 解构任务创建失败
-            await self.mapper.update_article_decode_status(
-                article_id, self.TaskStatus.PROCESSING, self.TaskStatus.FAILED
-            )
-            await self.log_service.log(
-                contents={
-                    "article_id": article_id,
-                    "task": "create_decode_task",
-                    "status": "fail",
-                    "data": response,
-                }
-            )
-            return
-
-        task_id = response.get("data", {}).get("task_id") or response.get(
-            "data", {}
-        ).get("taskId")
-        if not task_id:
-            # 解构任务创建失败
-            await self.mapper.update_article_decode_status(
-                article_id, self.TaskStatus.PROCESSING, self.TaskStatus.FAILED
-            )
-            await self.log_service.log(
-                contents={
-                    "article_id": article_id,
-                    "task": "create_decode_task",
-                    "status": "fail",
-                    "data": response,
-                }
-            )
-            return
-
-        # 创建 decode 任务成功
-        await self.log_service.log(
-            contents={
-                "article_id": article_id,
-                "task": "create_decode_task",
-                "status": "success",
-                "data": response,
-            }
-        )
-
-        wx_sn = article["wx_sn"]
-        remark = f"task_id: {task_id}-创建解构任务"
-        record_row = await self.mapper.record_decode_task(task_id, wx_sn, remark)
-        if not record_row:
-            # 记录解构任务失败
-            await self.mapper.update_article_decode_status(
-                article_id, self.TaskStatus.PROCESSING, self.TaskStatus.FAILED
-            )
-            await self.log_service.log(
-                contents={
-                    "article_id": article_id,
-                    "task": "record_decode_task",
-                    "status": "fail",
-                    "message": "创建 decode 记录失败",
-                    "data": response,
-                }
-            )
-            return
-
-        # 记录创建成功
-        await self.mapper.update_article_decode_status(
-            article_id, self.TaskStatus.PROCESSING, self.TaskStatus.SUCCESS
-        )
-
-    async def create_tasks(self):
-        article_list = await self.mapper.fetch_decode_articles()
-        if not article_list:
-            await self.log_service.log(
-                contents={
-                    "task": "create_tasks",
-                    "message": "No more articles to decode",
-                }
-            )
-            return
-
-        for article in tqdm(article_list, desc="Creating decode tasks"):
-            await self.create_single_decode_task(article)
-
-    async def deal(self):
-        await self.create_tasks()
-
-
-class CreateInnerArticlesDecodeTask(DecodeTaskConst):
-    def __init__(self, pool: DatabaseManager, log_service: LogService):
-        self.pool = pool
-        self.log_service = log_service
-        self.mapper = InnerArticlesDecodeTaskMapper(self.pool)
-        self.tool = InnerArticlesDecodeUtils()
-
-    async def create_single_decode_task(self, article: Dict):
-        # Acquire Lock
-        source_id = article["source_id"]
-        article_produce_info = await self.mapper.fetch_inner_articles_produce_detail(
-            source_id
-        )
-
-        # 与解构系统交互,创建解构任务
-        response = await self.tool.create_decode_task(article, article_produce_info)
-        response_code = response.get("code")
-        if response_code != self.RequestDecode.SUCCESS:
-            return
-
-        task_id = response.get("data", {}).get("task_id") or response.get(
-            "data", {}
-        ).get("taskId")
-        if not task_id:
-            return
-
-        wx_sn = article["wx_sn"]
-        remark = f"task_id: {task_id}-创建解构任务"
-        record_row = await self.mapper.record_decode_task(task_id, wx_sn, remark)
-        if not record_row:
-            return
-
-    async def create_tasks(self):
-        article_list = await self.mapper.fetch_inner_articles()
-        if not article_list:
-            await self.log_service.log(
-                contents={
-                    "task": "create_tasks",
-                    "message": "No more articles to decode",
-                }
-            )
-            return
-
-        for article in tqdm(article_list, desc="Creating decode tasks"):
-            await self.create_single_decode_task(article)
-
-    async def deal(self):
-        await self.create_tasks()
-
-
-__all__ = ["CreateAdPlatformArticlesDecodeTask", "CreateInnerArticlesDecodeTask"]

+ 0 - 127
app/domains/llm_tasks/aigc_decode_task/fetch_decode_results.py

@@ -1,127 +0,0 @@
-import json
-from typing import Dict
-
-from app.core.database import DatabaseManager
-from app.core.observability import LogService
-
-from ._const import DecodeTaskConst
-from ._mapper import ArticlesDecodeTaskMapper
-from ._utils import DecodeTaskUtil
-
-
-class FetchDecodeResults(DecodeTaskConst):
-    def __init__(self, pool: DatabaseManager, log_service: LogService):
-        self.pool = pool
-        self.log_service = log_service
-        self.mapper = ArticlesDecodeTaskMapper(self.pool)
-        self.tool = DecodeTaskUtil()
-
-    async def fetch_single_task(self, task: Dict):
-        task_id = task["task_id"]
-
-        # acquire lock
-        acquire_lock = await self.mapper.update_decode_task_status(
-            task_id, self.TaskStatus.INIT, self.TaskStatus.PROCESSING
-        )
-        if not acquire_lock:
-            return
-
-        response = await self.tool.fetch_decode_result(task_id)
-        if not response:
-            await self.mapper.update_decode_task_status(
-                task_id=task_id,
-                ori_status=self.TaskStatus.PROCESSING,
-                new_status=self.TaskStatus.INIT,
-                remark="获取解构结果失败,服务异常,已回滚状态",
-            )
-            return
-
-        # 请求成功
-        response_code = response.get("code")
-        if response_code != self.RequestDecode.SUCCESS:
-            # 解构任务获取失败
-            await self.mapper.update_decode_task_status(
-                task_id=task_id,
-                ori_status=self.TaskStatus.PROCESSING,
-                new_status=self.TaskStatus.FAILED,
-                remark=f"请求解构接口返回异常,标记为失败:{json.dumps(response, ensure_ascii=False)}",
-            )
-            return
-
-        response_data = response.get("data", {})
-        response_task_id = response_data.get("taskId") or response_data.get("task_id")
-        if task_id != response_task_id:
-            # 解构任务获取失败
-            await self.mapper.update_decode_task_status(
-                task_id=task_id,
-                ori_status=self.TaskStatus.PROCESSING,
-                new_status=self.TaskStatus.FAILED,
-                remark=f"请求解构接口TaskId异常:{json.dumps(response, ensure_ascii=False)}",
-            )
-            return
-
-        status = response_data.get("status")
-        match status:
-            case self.DecodeStatus.PENDING:
-                await self.mapper.update_decode_task_status(
-                    task_id=task_id,
-                    ori_status=self.TaskStatus.PROCESSING,
-                    new_status=self.TaskStatus.INIT,
-                    remark=f"解构任务状态为PENDING,继续轮询",
-                )
-
-            case self.DecodeStatus.RUNNING:
-                await self.mapper.update_decode_task_status(
-                    task_id=task_id,
-                    ori_status=self.TaskStatus.PROCESSING,
-                    new_status=self.TaskStatus.INIT,
-                    remark=f"解构任务状态为RUNNING,继续轮询",
-                )
-
-            case self.DecodeStatus.SUCCESS:
-                await self.mapper.set_decode_result(
-                    task_id=task_id,
-                    result=json.dumps(response_data, ensure_ascii=False),
-                )
-
-            case self.DecodeStatus.FAILED:
-                await self.mapper.update_decode_task_status(
-                    task_id=task_id,
-                    ori_status=self.TaskStatus.PROCESSING,
-                    new_status=self.TaskStatus.FAILED,
-                    remark=f"解构任务状态为FAILED,标记为失败",
-                )
-
-            case _:
-                await self.mapper.update_decode_task_status(
-                    task_id=task_id,
-                    ori_status=self.TaskStatus.PROCESSING,
-                    new_status=self.TaskStatus.INIT,
-                    remark=f"解构任务状态未知(status={status}),回滚待重试:{json.dumps(response_data, ensure_ascii=False)}",
-                )
-                await self.log_service.log(
-                    contents={
-                        "task": "fetch_single_task",
-                        "task_id": task_id,
-                        "status": "unknown",
-                        "message": f"unexpected decode status: {status}",
-                        "data": response_data,
-                    }
-                )
-
-    async def fetch_results(self):
-        decoding_tasks = await self.mapper.fetch_decoding_tasks()
-        if not decoding_tasks:
-            await self.log_service.log(
-                contents={"task": "fetch_results", "message": "No more tasks to fetch"}
-            )
-            return
-
-        for task in decoding_tasks:
-            await self.fetch_single_task(task)
-
-    async def deal(self):
-        await self.fetch_results()
-
-
-__all__ = ["FetchDecodeResults"]

+ 0 - 0
app/domains/llm_tasks/aigc_decode_task/__init__.py → app/domains/llm_tasks/decode_article/__init__.py


+ 53 - 0
app/domains/llm_tasks/decode_article/_const.py

@@ -0,0 +1,53 @@
+class DecodeArticleConst:
+    CONFIG_ID = 66  # 长文头条-文章解构-正式
+    TASK_BATCH = 500  # 每批处理数
+    # TASK_BATCH = 20  # 每批处理数
+    SUBMIT_BATCH = 50  # 提交 API 每批帖子上限
+
+    class TaskStatus:
+        INIT = 0
+        PROCESSING = 1
+        SUCCESS = 2
+        FAILED = 99
+
+    class ExtractStatus(TaskStatus): ...
+
+    class SubmitStatus:
+        SUCCESS = "SUCCESS"
+        PENDING = "PENDING"
+        FAILED = "FAILED"
+
+    class QueryStatus:
+        SUCCESS = "SUCCESS"
+        PENDING = "PENDING"
+        RUNNING = "RUNNING"
+        FAILED = "FAILED"
+
+    class SourceType:
+        AD_PLATFORM = 1
+        INNER = 2
+
+    class ContentModal:
+        PICTURE_TEXT = 2  # 图文
+        LONG_ARTICLE = 3  # 长文
+        VIDEO = 4  # 视频
+        AUDIO = 5  # 音频
+
+    class Channel:
+        XIAOHONGSHU = 1  # 小红书
+        DOUYIN = 2  # 抖音
+        PINTEREST = 3  # Pinterest
+        REDDIT = 4  # Reddit
+        WECHAT = 5  # 微信公众号
+        TOUTIAO = 6  # 头条号
+        PIAOQUAN = 10  # 票圈
+
+    class ProduceModuleType:
+        COVER = 1  # 封面
+        IMAGE = 2  # 图片
+        TITLE = 3  # 标题
+        CONTENT = 4  # 正文
+        SUMMARY = 18  # 摘要
+
+
+__all__ = ["DecodeArticleConst"]

+ 260 - 0
app/domains/llm_tasks/decode_article/_mapper.py

@@ -0,0 +1,260 @@
+from typing import Dict, List
+
+from app.core.database import DatabaseManager
+
+from ._const import DecodeArticleConst
+
+TABLE = "long_articles_decode_tasks"
+
+
+class ArticlesDecodeTaskMapper(DecodeArticleConst):
+    def __init__(self, pool: DatabaseManager):
+        self.pool = pool
+
+    async def insert_decode_task(
+        self,
+        source_id: str,
+        source: int,
+        payload: str,
+        remark: str = None,
+        status: int = None,
+    ) -> int:
+        if status is not None:
+            query = f"""
+                INSERT IGNORE INTO {TABLE}
+                    (source_id, config_id, source, payload, remark, status)
+                VALUES (%s, %s, %s, %s, %s, %s)
+            """
+            params = (
+                source_id,
+                self.CONFIG_ID,
+                source,
+                payload,
+                remark,
+                status,
+            )
+        else:
+            query = f"""
+                INSERT IGNORE INTO {TABLE}
+                    (source_id, config_id, source, payload, remark)
+                VALUES (%s, %s, %s, %s, %s)
+            """
+            params = (
+                source_id,
+                self.CONFIG_ID,
+                source,
+                payload,
+                remark,
+            )
+        return await self.pool.async_save(query=query, params=params)
+
+    async def update_task_status_by_source_id(
+        self,
+        source_id: str,
+        ori_status: int,
+        new_status: int,
+        remark: str = None,
+    ) -> int:
+        query = f"""
+            UPDATE {TABLE}
+            SET status = %s, remark = %s
+            WHERE source_id = %s AND status = %s AND config_id = %s
+        """
+        return await self.pool.async_save(
+            query=query,
+            params=(new_status, remark, source_id, ori_status, self.CONFIG_ID),
+        )
+
+    async def set_decode_result(
+        self,
+        source_id: str,
+        result: str,
+        remark: str = None,
+    ) -> int:
+        query = f"""
+            UPDATE {TABLE}
+            SET status = %s, result = %s, remark = %s
+            WHERE source_id = %s AND status IN (%s, %s) AND config_id = %s
+        """
+        return await self.pool.async_save(
+            query=query,
+            params=(
+                self.TaskStatus.SUCCESS,
+                result,
+                remark,
+                source_id,
+                self.TaskStatus.INIT,
+                self.TaskStatus.PROCESSING,
+                self.CONFIG_ID,
+            ),
+        )
+
+    async def fetch_pending_tasks(self, source: int = None) -> List[Dict]:
+        if source is not None:
+            query = f"""
+                SELECT source_id
+                FROM {TABLE}
+                WHERE status IN (%s, %s) AND source = %s AND config_id = %s
+                LIMIT %s
+            """
+            params = (
+                self.TaskStatus.INIT,
+                self.TaskStatus.PROCESSING,
+                source,
+                self.CONFIG_ID,
+                self.TASK_BATCH,
+            )
+        else:
+            query = f"""
+                SELECT source_id
+                FROM {TABLE}
+                WHERE status IN (%s, %s) AND config_id = %s
+                LIMIT %s
+            """
+            params = (
+                self.TaskStatus.INIT,
+                self.TaskStatus.PROCESSING,
+                self.CONFIG_ID,
+                self.TASK_BATCH,
+            )
+        return await self.pool.async_fetch(query=query, params=params)
+
+    async def fetch_existing_source_ids(self, source_ids: List[str]) -> set:
+        """批量查询哪些 source_id 已有进行中或成功的解构任务,用于去重跳过"""
+        if not source_ids:
+            return set()
+        placeholders = ",".join(["%s"] * len(source_ids))
+        query = f"""
+            SELECT source_id FROM {TABLE}
+            WHERE source_id IN ({placeholders})
+              AND config_id = %s
+              AND status IN (%s, %s, %s)
+        """
+        rows = await self.pool.async_fetch(
+            query=query,
+            params=(
+                *source_ids,
+                self.CONFIG_ID,
+                self.TaskStatus.INIT,
+                self.TaskStatus.PROCESSING,
+                self.TaskStatus.SUCCESS,
+            ),
+        )
+        return {r["source_id"] for r in rows}
+
+    async def fetch_extract_tasks(self) -> List[Dict]:
+        query = f"""
+            SELECT id, result FROM {TABLE}
+            WHERE extract_status = %s AND status = %s AND config_id = %s
+        """
+        return await self.pool.async_fetch(
+            query=query,
+            params=(self.ExtractStatus.INIT, self.TaskStatus.SUCCESS, self.CONFIG_ID),
+        )
+
+    async def update_extract_status(
+        self, task_id: int, ori_status: int, new_status: int
+    ) -> int:
+        query = f"""
+            UPDATE {TABLE}
+            SET extract_status = %s
+            WHERE extract_status = %s AND id = %s
+        """
+        return await self.pool.async_save(
+            query=query, params=(new_status, ori_status, task_id)
+        )
+
+    async def record_extract_detail(self, decode_task_id: int, detail: Dict) -> int:
+        query = """
+            INSERT INTO long_articles_decode_task_detail
+                (decode_task_id, inspiration, purpose, key_point, topic)
+            VALUES (%s, %s, %s, %s, %s)
+        """
+        return await self.pool.async_save(
+            query=query,
+            params=(
+                decode_task_id,
+                detail.get("inspiration", ""),
+                detail.get("purpose", ""),
+                detail.get("key_point", ""),
+                detail.get("topic", ""),
+            ),
+        )
+
+
+class AdPlatformArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
+    def __init__(self, pool: DatabaseManager):
+        super().__init__(pool)
+
+    async def update_article_decode_status(
+        self, id_: int, ori_status: int, new_status: int
+    ) -> int:
+        query = """
+            UPDATE ad_platform_accounts_daily_detail
+            SET decode_status = %s
+            WHERE id = %s AND decode_status = %s
+        """
+        return await self.pool.async_save(
+            query=query, params=(new_status, id_, ori_status)
+        )
+
+    async def fetch_decode_articles(self) -> List[Dict]:
+        query = """
+            SELECT id, account_name, gh_id, article_title, article_cover,
+                   article_text, article_images, wx_sn
+            FROM ad_platform_accounts_daily_detail
+            WHERE fetch_status = %s AND decode_status = %s
+            LIMIT %s
+        """
+        return await self.pool.async_fetch(
+            query=query,
+            params=(self.TaskStatus.SUCCESS, self.TaskStatus.INIT, self.TASK_BATCH),
+        )
+
+
+class InnerArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
+    TABLE_INNER = "long_articles_decode_articles"
+
+    def __init__(self, pool: DatabaseManager):
+        super().__init__(pool)
+
+    async def fetch_inner_articles(self) -> List[Dict]:
+        query = f"""
+            SELECT id, title, source_id, coverimgurl, article_text, summary, card_title
+            FROM {self.TABLE_INNER}
+            WHERE status = %s
+            LIMIT %s
+        """
+        return await self.pool.async_fetch(
+            query=query, params=(self.TaskStatus.INIT, self.TASK_BATCH)
+        )
+
+    async def update_inner_article_status(
+        self, id_: int, ori_status: int, new_status: int
+    ) -> int:
+        query = f"""
+            UPDATE {self.TABLE_INNER}
+            SET status = %s
+            WHERE id = %s AND status = %s
+        """
+        return await self.pool.async_save(
+            query=query, params=(new_status, id_, ori_status)
+        )
+
+    async def fetch_inner_articles_produce_detail(self, source_id) -> List[Dict]:
+        query = """
+            SELECT produce_module_type, output
+            FROM produce_plan_module_output
+            WHERE plan_exe_id = %s
+            AND produce_module_type IN (1, 2, 4)
+        """
+        return await self.pool.async_fetch(
+            query=query, db_name="aigc", params=(source_id,)
+        )
+
+
+__all__ = [
+    "ArticlesDecodeTaskMapper",
+    "AdPlatformArticlesDecodeTaskMapper",
+    "InnerArticlesDecodeTaskMapper",
+]

+ 169 - 0
app/domains/llm_tasks/decode_article/_utils.py

@@ -0,0 +1,169 @@
+import json
+from typing import Dict, List
+
+from app.infra.internal.aigc_decode_server import AigcDecodeServer
+
+from ._const import DecodeArticleConst
+
+
+class AigcDecodeUtils(DecodeArticleConst):
+    decode_server = AigcDecodeServer()
+
+    async def submit_decode_batch(
+        self, posts: List[Dict], *, config_id: int = None, skip_completed: bool = False
+    ) -> Dict[str, Dict]:
+        """分批提交解构任务,返回 {content_id: {status, errorMessage}}"""
+        cfg_id = config_id or self.CONFIG_ID
+        result = {}
+        for i in range(0, len(posts), self.SUBMIT_BATCH):
+            batch = posts[i : i + self.SUBMIT_BATCH]
+            response = await self.decode_server.submit_decode(
+                config_id=cfg_id, posts=batch, skip_completed=skip_completed
+            )
+            if response.get("code") == 0:
+                for item in response.get("data", []):
+                    result[item["channelContentId"]] = item
+            else:
+                # 整批失败,标记所有帖子为 FAILED
+                for post in batch:
+                    cid = post["channelContentId"]
+                    result[cid] = {
+                        "channelContentId": cid,
+                        "status": "FAILED",
+                        "errorMessage": f"batch submit failed: {response}",
+                    }
+        return result
+
+    async def query_decode_results_batch(
+        self, content_ids: List[str], *, config_id: int = None
+    ) -> Dict[str, Dict]:
+        """分批查询解构结果,返回 {content_id: {status, dataContent, html, errorMessage}}
+        当 API 调用失败时,对应条目 status 为 API_ERROR,调用方应保持 INIT 等待重试。
+        """
+        cfg_id = config_id or self.CONFIG_ID
+        result = {}
+        for i in range(0, len(content_ids), self.SUBMIT_BATCH):
+            batch = content_ids[i : i + self.SUBMIT_BATCH]
+            response = await self.decode_server.query_decode_results(
+                config_id=cfg_id, channel_content_ids=batch
+            )
+            if response.get("code") == 0:
+                for item in response.get("data", []):
+                    result[item["channelContentId"]] = item
+            else:
+                for cid in batch:
+                    result[cid] = {
+                        "channelContentId": cid,
+                        "status": "API_ERROR",
+                        "errorMessage": f"query API failed: {response}",
+                    }
+        return result
+
+    @staticmethod
+    def extract_decode_result(result: Dict) -> Dict:
+        """从解构结果中解析出灵感点、目的点、关键点、选题
+        兼容新旧两种数据格式:v1 有 final_normalization_rebuild 包裹层,v2 无
+        """
+        final_result = result.get("final_normalization_rebuild") or result
+
+        inspiration_list = final_result.get("inspiration_final_result", {}).get(
+            "最终灵感点列表", []
+        )
+        purpose_list = final_result.get("purpose_final_result", {}).get(
+            "最终目的点列表", []
+        )
+        keypoint_list = final_result.get("keypoint_final", {}).get("最终关键点列表", [])
+
+        topic_fusion = final_result.get("topic_fusion_result", {})
+        topic_text = (
+            topic_fusion.get("最终选题", {}).get("选题", "")
+            if isinstance(topic_fusion.get("最终选题"), dict)
+            else ""
+        )
+
+        def _join_points(items: list, key: str) -> str:
+            parts = [str(p[key]) for p in items if isinstance(p, dict) and p.get(key)]
+            return ",".join(parts)
+
+        return {
+            "inspiration": _join_points(inspiration_list, "灵感点"),
+            "purpose": _join_points(purpose_list, "目的点"),
+            "key_point": _join_points(keypoint_list, "关键点"),
+            "topic": topic_text,
+        }
+
+
+class AdPlatformArticlesDecodeUtils(AigcDecodeUtils):
+    @staticmethod
+    def format_images(images: str) -> List[str]:
+        if not images or not images.strip():
+            return []
+        try:
+            image_list = json.loads(images)
+        except (json.JSONDecodeError, TypeError):
+            return []
+        if not isinstance(image_list, list):
+            return []
+        return [
+            i.get("image_url")
+            for i in image_list
+            if isinstance(i, dict) and i.get("image_url")
+        ]
+
+    def prepare_posts(self, articles: List[Dict]) -> List[Dict]:
+        posts = []
+        for article in articles:
+            images = self.format_images(article.get("article_images") or "")
+            posts.append(
+                {
+                    "channelContentId": article["wx_sn"],
+                    "title": article.get("article_title", ""),
+                    "bodyText": article.get("article_text", ""),
+                    "images": images,
+                    "video": None,
+                    "contentModal": self.ContentModal.LONG_ARTICLE,
+                    "channel": self.Channel.WECHAT,
+                }
+            )
+        return posts
+
+
+class InnerArticlesDecodeUtils(AigcDecodeUtils):
+    def prepare_posts(
+        self, articles: List[Dict], produce_info_map: Dict[str, List[Dict]]
+    ) -> List[Dict]:
+        posts = []
+        for article in articles:
+            source_id = str(article["source_id"])
+            produce_info = produce_info_map.get(source_id, [])
+
+            # 收集图片:封面(coverimgurl) + produce COVER + produce IMAGE
+            images = []
+            if article.get("coverimgurl"):
+                images.append(article["coverimgurl"])
+            for pi in produce_info:
+                if pi["produce_module_type"] == self.ProduceModuleType.COVER:
+                    images.append(pi["output"])
+            for pi in produce_info:
+                if pi["produce_module_type"] == self.ProduceModuleType.IMAGE:
+                    images.append(pi["output"])
+
+            posts.append(
+                {
+                    "title": article.get("title", ""),
+                    "bodyText": article.get("article_text", ""),
+                    "images": images,
+                    "video": None,
+                    "contentModal": self.ContentModal.LONG_ARTICLE,
+                    "channel": self.Channel.WECHAT,
+                    "channelContentId": source_id,
+                }
+            )
+        return posts
+
+
+__all__ = [
+    "AigcDecodeUtils",
+    "AdPlatformArticlesDecodeUtils",
+    "InnerArticlesDecodeUtils",
+]

+ 409 - 0
app/domains/llm_tasks/decode_article/create_decode_tasks.py

@@ -0,0 +1,409 @@
+import json
+
+from tqdm import tqdm
+from typing import Dict, List
+
+from app.core.database import DatabaseManager
+from app.core.observability import LogService
+
+from ._const import DecodeArticleConst
+from ._mapper import (
+    AdPlatformArticlesDecodeTaskMapper,
+    InnerArticlesDecodeTaskMapper,
+)
+from ._utils import AdPlatformArticlesDecodeUtils, InnerArticlesDecodeUtils
+
+
+class CreateAdPlatformArticlesDecodeTask(DecodeArticleConst):
+    def __init__(self, pool: DatabaseManager, log_service: LogService):
+        self.pool = pool
+        self.log_service = log_service
+        self.mapper = AdPlatformArticlesDecodeTaskMapper(self.pool)
+        self.tool = AdPlatformArticlesDecodeUtils()
+
+    async def _acquire_articles(self) -> List[Dict]:
+        """获取待解构文章,并加锁(decode_status INIT → PROCESSING)"""
+        article_list = await self.mapper.fetch_decode_articles()
+        locked = []
+        for article in article_list:
+            article_id = article["id"]
+            acquired = await self.mapper.update_article_decode_status(
+                article_id, self.TaskStatus.INIT, self.TaskStatus.PROCESSING
+            )
+            if acquired:
+                locked.append(article)
+            else:
+                await self.log_service.log(
+                    contents={
+                        "article_id": article_id,
+                        "task": "create_decode_task_v2",
+                        "status": "skip",
+                        "message": "acquire lock failed",
+                    }
+                )
+        return locked
+
+    async def _submit_and_record(self, articles: List[Dict]):
+        if not articles:
+            return
+
+        posts = self.tool.prepare_posts(articles)
+        submit_results = await self.tool.submit_decode_batch(posts)
+        posts_by_wx = {p["channelContentId"]: p for p in posts}
+
+        for article in articles:
+            wx_sn = article["wx_sn"]
+            article_id = article["id"]
+            result = submit_results.get(wx_sn)
+
+            if not result:
+                await self.mapper.update_article_decode_status(
+                    article_id, self.TaskStatus.PROCESSING, self.TaskStatus.INIT
+                )
+                await self.log_service.log(
+                    contents={
+                        "article_id": article_id,
+                        "wx_sn": wx_sn,
+                        "task": "create_decode_task_v2",
+                        "status": "fail",
+                        "message": "no response for content_id, rolled back to INIT",
+                    }
+                )
+                continue
+
+            status = result.get("status")
+            if status == self.SubmitStatus.FAILED:
+                await self.mapper.update_article_decode_status(
+                    article_id, self.TaskStatus.PROCESSING, self.TaskStatus.INIT
+                )
+                await self.log_service.log(
+                    contents={
+                        "article_id": article_id,
+                        "wx_sn": wx_sn,
+                        "task": "create_decode_task_v2",
+                        "status": "fail",
+                        "data": result,
+                    }
+                )
+                continue
+
+            if status == self.SubmitStatus.SUCCESS:
+                # 已有解构结果,直接查询结果并落库
+                query_results = await self.tool.query_decode_results_batch([wx_sn])
+                result_data = query_results.get(wx_sn)
+                if (
+                    result_data
+                    and result_data.get("status") == self.QueryStatus.SUCCESS
+                ):
+                    data_content = result_data.get("dataContent") or "{}"
+                    html = result_data.get("html")
+                    await self.mapper.insert_decode_task(
+                        source_id=wx_sn,
+                        source=self.SourceType.AD_PLATFORM,
+                        payload=json.dumps(
+                            posts_by_wx.get(wx_sn, {}), ensure_ascii=False
+                        ),
+                        remark="提交时已有解构结果,直接落库",
+                    )
+                    await self.mapper.set_decode_result(
+                        source_id=wx_sn,
+                        result=json.dumps(
+                            {"dataContent": data_content, "html": html},
+                            ensure_ascii=False,
+                        ),
+                        remark="提交时已返回 SUCCESS,结果已落库",
+                    )
+                    await self.mapper.update_article_decode_status(
+                        article_id, self.TaskStatus.PROCESSING, self.TaskStatus.SUCCESS
+                    )
+                    await self.log_service.log(
+                        contents={
+                            "article_id": article_id,
+                            "wx_sn": wx_sn,
+                            "task": "create_decode_task_v2",
+                            "status": "success",
+                            "message": "decode result already available on submit",
+                        }
+                    )
+                else:
+                    # 提交返回 SUCCESS 但查询不到结果,插入记录等待轮询
+                    await self.mapper.insert_decode_task(
+                        source_id=wx_sn,
+                        source=self.SourceType.AD_PLATFORM,
+                        payload=json.dumps(
+                            posts_by_wx.get(wx_sn, {}), ensure_ascii=False
+                        ),
+                        remark="提交返回SUCCESS,查询未果,等待轮询",
+                        status=self.TaskStatus.PROCESSING,
+                    )
+                    await self.mapper.update_article_decode_status(
+                        article_id,
+                        self.TaskStatus.PROCESSING,
+                        self.TaskStatus.SUCCESS,
+                    )
+                    await self.log_service.log(
+                        contents={
+                            "article_id": article_id,
+                            "wx_sn": wx_sn,
+                            "task": "create_decode_task_v2",
+                            "status": "pending",
+                            "message": "submit SUCCESS but query not ready, inserted for polling",
+                        }
+                    )
+            elif status == self.SubmitStatus.PENDING:
+                await self.mapper.insert_decode_task(
+                    source_id=wx_sn,
+                    source=self.SourceType.AD_PLATFORM,
+                    payload=json.dumps(posts_by_wx.get(wx_sn, {}), ensure_ascii=False),
+                    remark="任务已提交,等待轮询",
+                    status=self.TaskStatus.PROCESSING,
+                )
+                await self.mapper.update_article_decode_status(
+                    article_id,
+                    self.TaskStatus.PROCESSING,
+                    self.TaskStatus.SUCCESS,
+                )
+                await self.log_service.log(
+                    contents={
+                        "article_id": article_id,
+                        "wx_sn": wx_sn,
+                        "task": "create_decode_task_v2",
+                        "status": "pending",
+                        "message": "task submitted, waiting for polling",
+                    }
+                )
+            else:
+                await self.mapper.update_article_decode_status(
+                    article_id, self.TaskStatus.PROCESSING, self.TaskStatus.INIT
+                )
+                await self.log_service.log(
+                    contents={
+                        "article_id": article_id,
+                        "wx_sn": wx_sn,
+                        "task": "create_decode_task_v2",
+                        "status": "fail",
+                        "message": f"unexpected submit status: {status}, rolled back to INIT",
+                        "data": result,
+                    }
+                )
+
+    async def deal(self):
+        article_list = await self._acquire_articles()
+        if not article_list:
+            await self.log_service.log(
+                contents={
+                    "task": "create_decode_task_v2",
+                    "message": "No more articles to decode",
+                }
+            )
+            return
+
+        await self._submit_and_record(article_list)
+        await self.log_service.log(
+            contents={
+                "task": "create_decode_task_v2",
+                "message": f"Processed {len(article_list)} articles",
+            }
+        )
+
+
+class CreateInnerArticlesDecodeTask(DecodeArticleConst):
+    _TEST_MODE = False
+
+    def __init__(self, pool: DatabaseManager, log_service: LogService):
+        self.pool = pool
+        self.log_service = log_service
+        self.mapper = InnerArticlesDecodeTaskMapper(self.pool)
+        self.tool = InnerArticlesDecodeUtils()
+
+    async def _acquire_articles(self) -> List[Dict]:
+        """获取待解构文章,并加锁(status INIT → PROCESSING)"""
+        article_list = await self.mapper.fetch_inner_articles()
+        if self._TEST_MODE:
+            return article_list
+
+        locked = []
+        for article in article_list:
+            article_id = article["id"]
+            acquired = await self.mapper.update_inner_article_status(
+                article_id, self.TaskStatus.INIT, self.TaskStatus.PROCESSING
+            )
+            if acquired:
+                locked.append(article)
+            else:
+                await self.log_service.log(
+                    contents={
+                        "article_id": article_id,
+                        "task": "create_inner_decode_task",
+                        "status": "skip",
+                        "message": "acquire lock failed",
+                    }
+                )
+        return locked
+
+    async def _handle_result(
+        self,
+        article: Dict,
+        source_id: str,
+        result: Dict,
+        posts_by_cid: Dict,
+        config_id: int,
+    ):
+        if not result:
+            await self.log_service.log(
+                contents={
+                    "source_id": source_id,
+                    "task": "create_inner_decode_task",
+                    "status": "fail",
+                    "message": "no response for source_id",
+                }
+            )
+            return
+
+        status = result.get("status")
+        if status == self.SubmitStatus.FAILED:
+            await self.log_service.log(
+                contents={
+                    "source_id": source_id,
+                    "task": "create_inner_decode_task",
+                    "status": "fail",
+                    "data": result,
+                }
+            )
+        elif status == self.SubmitStatus.PENDING:
+            await self.mapper.insert_decode_task(
+                source_id=source_id,
+                source=self.SourceType.INNER,
+                payload=json.dumps(posts_by_cid.get(source_id, {}), ensure_ascii=False),
+                remark="内部文章解构任务已提交",
+                status=self.TaskStatus.PROCESSING,
+            )
+        elif status == self.SubmitStatus.SUCCESS:
+            query_results = await self.tool.query_decode_results_batch(
+                [source_id], config_id=config_id
+            )
+            result_data = query_results.get(source_id)
+            data_content = result_data.get("dataContent") if result_data else None
+            if data_content:
+                await self.mapper.insert_decode_task(
+                    source_id=source_id,
+                    source=self.SourceType.INNER,
+                    payload=json.dumps(
+                        posts_by_cid.get(source_id, {}), ensure_ascii=False
+                    ),
+                    remark="内部文章解构结果已获取",
+                )
+                await self.mapper.set_decode_result(
+                    source_id=source_id,
+                    result=json.dumps(
+                        {"dataContent": data_content}, ensure_ascii=False
+                    ),
+                )
+            else:
+                await self.mapper.insert_decode_task(
+                    source_id=source_id,
+                    source=self.SourceType.INNER,
+                    payload=json.dumps(result, ensure_ascii=False),
+                    remark="提交返回SUCCESS,查询未果,等待轮询",
+                    status=self.TaskStatus.PROCESSING,
+                )
+        else:
+            await self.log_service.log(
+                contents={
+                    "source_id": source_id,
+                    "task": "create_inner_decode_task",
+                    "status": "fail",
+                    "message": f"unexpected submit status: {status}",
+                    "data": result,
+                }
+            )
+
+    async def _submit_and_record(self, articles: List[Dict]):
+        if not articles:
+            return
+
+        # 过滤已有任务记录的文章(测试模式跳过)
+        if not self._TEST_MODE:
+            all_source_ids = [str(a["source_id"]) for a in articles]
+            existing = await self.mapper.fetch_existing_source_ids(all_source_ids)
+            new_articles = [a for a in articles if str(a["source_id"]) not in existing]
+            skipped = len(articles) - len(new_articles)
+            if skipped > 0:
+                await self.log_service.log(
+                    contents={
+                        "task": "create_inner_decode_task",
+                        "message": f"Skipped {skipped} already-submitted articles",
+                    }
+                )
+
+            for article in articles:
+                if article not in new_articles:
+                    await self.mapper.update_inner_article_status(
+                        article["id"],
+                        self.TaskStatus.PROCESSING,
+                        self.TaskStatus.SUCCESS,
+                    )
+        else:
+            new_articles = articles
+
+        if not new_articles:
+            return
+
+        # 批量获取 produce 信息
+        produce_info_map: Dict[str, list] = {}
+        for article in new_articles:
+            source_id = article["source_id"]
+            produce_info = await self.mapper.fetch_inner_articles_produce_detail(
+                source_id
+            )
+            produce_info_map[str(article["source_id"])] = produce_info
+
+        posts = self.tool.prepare_posts(new_articles, produce_info_map)
+
+        submit_results = await self.tool.submit_decode_batch(
+            posts, config_id=self.CONFIG_ID, skip_completed=True
+        )
+        posts_by_cid = {p["channelContentId"]: p for p in posts}
+
+        for article in tqdm(new_articles):
+            source_id = str(article["source_id"])
+            article_id = article["id"]
+
+            result = submit_results.get(source_id)
+            await self._handle_result(
+                article, source_id, result, posts_by_cid, self.CONFIG_ID
+            )
+
+            if not self._TEST_MODE:
+                ok = result and result.get("status") != self.SubmitStatus.FAILED
+                if ok:
+                    await self.mapper.update_inner_article_status(
+                        article_id, self.TaskStatus.PROCESSING, self.TaskStatus.SUCCESS
+                    )
+                else:
+                    # 提交失败或无响应,回锁为 INIT 等待下次重试
+                    await self.mapper.update_inner_article_status(
+                        article_id, self.TaskStatus.PROCESSING, self.TaskStatus.INIT
+                    )
+
+    async def deal(self):
+        article_list = await self._acquire_articles()
+        if not article_list:
+            await self.log_service.log(
+                contents={
+                    "task": "create_inner_decode_task",
+                    "message": "No more articles to decode",
+                }
+            )
+            return
+
+        await self._submit_and_record(article_list)
+        await self.log_service.log(
+            contents={
+                "task": "create_inner_decode_task",
+                "message": f"Processed {len(article_list)} articles",
+            }
+        )
+
+
+__all__ = ["CreateAdPlatformArticlesDecodeTask", "CreateInnerArticlesDecodeTask"]

+ 28 - 19
app/domains/llm_tasks/aigc_decode_task/extract_decode_task_detail.py → app/domains/llm_tasks/decode_article/extract_decode_task_detail.py

@@ -1,26 +1,26 @@
 import json
+from typing import Dict
 
 from app.core.database import DatabaseManager
 from app.core.observability import LogService
 
 from app.infra.shared import run_tasks_with_asyncio_task_group
 
-from ._const import DecodeTaskConst
+from ._const import DecodeArticleConst
 from ._mapper import ArticlesDecodeTaskMapper
-from ._utils import DecodeTaskUtil
+from ._utils import AigcDecodeUtils
 
 
-class ExtractDecodeTaskDetail(DecodeTaskConst):
+class ExtractDecodeTaskDetail(DecodeArticleConst):
     def __init__(self, pool: DatabaseManager, log_service: LogService):
         self.pool = pool
         self.log_service = log_service
         self.mapper = ArticlesDecodeTaskMapper(self.pool)
-        self.tool = DecodeTaskUtil()
+        self.tool = AigcDecodeUtils()
 
-    async def extract_single_result(self, task):
+    async def extract_single_result(self, task: Dict):
         task_id = task["id"]
 
-        # acquire lock by extract_status
         acquire_lock = await self.mapper.update_extract_status(
             task_id, self.ExtractStatus.INIT, self.ExtractStatus.PROCESSING
         )
@@ -28,7 +28,13 @@ class ExtractDecodeTaskDetail(DecodeTaskConst):
             return
 
         try:
-            result = json.loads(task["result"])["result"]
+            raw_result = json.loads(task["result"])
+            # 新 API 结果格式: {"dataContent": "{...}", "html": "..."}
+            data_content = raw_result.get("dataContent")
+            if isinstance(data_content, str):
+                inner_result = json.loads(data_content)
+            else:
+                inner_result = data_content or {}
         except (TypeError, KeyError, json.JSONDecodeError) as e:
             await self.mapper.update_extract_status(
                 task_id,
@@ -37,7 +43,7 @@ class ExtractDecodeTaskDetail(DecodeTaskConst):
             )
             await self.log_service.log(
                 contents={
-                    "task": "extract_single_result",
+                    "task": "extract_decode_result_v2",
                     "task_id": task_id,
                     "status": "fail",
                     "message": f"parse decode result error: {e}",
@@ -46,8 +52,7 @@ class ExtractDecodeTaskDetail(DecodeTaskConst):
             )
             return
 
-        detail = self.tool.extract_decode_result(result)
-        # 如果工具返回错误信息,直接标记为失败
+        detail = self.tool.extract_decode_result(inner_result)
         if detail.get("error"):
             await self.mapper.update_extract_status(
                 task_id,
@@ -56,7 +61,7 @@ class ExtractDecodeTaskDetail(DecodeTaskConst):
             )
             await self.log_service.log(
                 contents={
-                    "task": "extract_single_result",
+                    "task": "extract_decode_result_v2",
                     "task_id": task_id,
                     "status": "fail",
                     "message": detail["error"],
@@ -64,7 +69,6 @@ class ExtractDecodeTaskDetail(DecodeTaskConst):
             )
             return
 
-        # 写入明细表
         saved = await self.mapper.record_extract_detail(task_id, detail)
         if not saved:
             await self.mapper.update_extract_status(
@@ -74,7 +78,7 @@ class ExtractDecodeTaskDetail(DecodeTaskConst):
             )
             await self.log_service.log(
                 contents={
-                    "task": "extract_single_result",
+                    "task": "extract_decode_result_v2",
                     "task_id": task_id,
                     "status": "fail",
                     "message": "insert long_articles_decode_task_detail failed",
@@ -83,24 +87,29 @@ class ExtractDecodeTaskDetail(DecodeTaskConst):
             )
             return
 
-        # 写入成功,更新状态为成功
         await self.mapper.update_extract_status(
             task_id,
             self.ExtractStatus.PROCESSING,
             self.ExtractStatus.SUCCESS,
         )
 
-    async def extract_task(self):
+    async def deal(self):
         tasks = await self.mapper.fetch_extract_tasks()
+        if not tasks:
+            await self.log_service.log(
+                contents={
+                    "task": "extract_decode_result_v2",
+                    "message": "No more tasks to extract",
+                }
+            )
+            return
+
         await run_tasks_with_asyncio_task_group(
             task_list=tasks,
             handler=self.extract_single_result,
-            description="批量解析结构结果",
+            description="批量解析构结果",
             unit="task",
         )
 
-    async def deal(self):
-        await self.extract_task()
-
 
 __all__ = ["ExtractDecodeTaskDetail"]

+ 112 - 0
app/domains/llm_tasks/decode_article/fetch_decode_results.py

@@ -0,0 +1,112 @@
+import json
+from typing import List, Dict
+
+from app.core.database import DatabaseManager
+from app.core.observability import LogService
+
+from app.infra.shared import run_tasks_with_asyncio_task_group
+
+from ._const import DecodeArticleConst
+from ._mapper import ArticlesDecodeTaskMapper
+from ._utils import AigcDecodeUtils
+
+
+class FetchDecodeResults(DecodeArticleConst):
+    def __init__(self, pool: DatabaseManager, log_service: LogService):
+        self.pool = pool
+        self.log_service = log_service
+        self.mapper = ArticlesDecodeTaskMapper(self.pool)
+        self.tool = AigcDecodeUtils()
+
+    async def _process_batch(self, tasks: List[Dict]):
+        source_ids = [t["source_id"] for t in tasks]
+        results = await self.tool.query_decode_results_batch(source_ids)
+
+        for task in tasks:
+            source_id = task["source_id"]
+            result = results.get(source_id)
+
+            if not result:
+                await self.mapper.update_task_status_by_source_id(
+                    source_id=source_id,
+                    ori_status=self.TaskStatus.INIT,
+                    new_status=self.TaskStatus.FAILED,
+                    remark="解构任务在结果查询中未返回,可能不存在",
+                )
+                await self.log_service.log(
+                    contents={
+                        "task": "fetch_decode_results_v2",
+                        "source_id": source_id,
+                        "status": "fail",
+                        "message": "source_id not in query response",
+                    }
+                )
+                continue
+
+            status = result.get("status")
+            if status == "API_ERROR":
+                # 查询 API 调用失败,保持 INIT 等待重试
+                continue
+            elif status == self.QueryStatus.SUCCESS:
+                data_content = result.get("dataContent") or "{}"
+                html = result.get("html")
+                await self.mapper.set_decode_result(
+                    source_id=source_id,
+                    result=json.dumps(
+                        {"dataContent": data_content, "html": html},
+                        ensure_ascii=False,
+                    ),
+                    remark="解构结果获取成功",
+                )
+            elif status in (self.QueryStatus.PENDING, self.QueryStatus.RUNNING):
+                pass
+            elif status == self.QueryStatus.FAILED:
+                await self.mapper.update_task_status_by_source_id(
+                    source_id=source_id,
+                    ori_status=self.TaskStatus.INIT,
+                    new_status=self.TaskStatus.FAILED,
+                    remark=f"解构任务失败: {result.get('errorMessage', '')}",
+                )
+            else:
+                await self.log_service.log(
+                    contents={
+                        "task": "fetch_decode_results_v2",
+                        "source_id": source_id,
+                        "status": "unknown",
+                        "message": f"unexpected query status: {status}",
+                        "data": result,
+                    }
+                )
+
+    async def deal(self):
+        pending_tasks = await self.mapper.fetch_pending_tasks()
+        if not pending_tasks:
+            await self.log_service.log(
+                contents={
+                    "task": "fetch_decode_results_v2",
+                    "message": "No more tasks to fetch",
+                }
+            )
+            return
+
+        # 拆成多个批次,并发查询
+        batches = [
+            pending_tasks[i : i + self.SUBMIT_BATCH]
+            for i in range(0, len(pending_tasks), self.SUBMIT_BATCH)
+        ]
+        await run_tasks_with_asyncio_task_group(
+            task_list=batches,
+            handler=self._process_batch,
+            description="批量查询解构结果",
+            unit="batch",
+        )
+
+        await self.log_service.log(
+            contents={
+                "task": "fetch_decode_results_v2",
+                "message": f"Processed {len(pending_tasks)} pending tasks in {len(batches)} batches",
+            }
+        )
+
+
+__all__ = ["FetchDecodeResults"]

+ 2 - 0
app/infra/internal/__init__.py

@@ -3,6 +3,7 @@ from .piaoquan import change_video_audit_status
 from .piaoquan import publish_video_to_piaoquan
 from .piaoquan import fetch_piaoquan_video_list_detail
 from .piaoquan_decode_server import DecodeServer
+from .aigc_decode_server import AigcDecodeServer
 
 # aigc system api
 from .aigc_system import delete_illegal_gzh_articles
@@ -28,5 +29,6 @@ __all__ = [
     "get_top_article_title_list",
     "get_hot_titles",
     "DecodeServer",
+    "AigcDecodeServer",
     "insert_crawler_plan",
 ]

+ 56 - 0
app/infra/internal/aigc_decode_server.py

@@ -0,0 +1,56 @@
+from typing import Dict, List
+
+from app.infra.shared import AsyncHttpClient
+
+
+class AigcDecodeServer:
+    base_url: str = "https://aigc-api.aiddit.com"
+
+    async def submit_decode(
+        self, config_id: int, posts: List[Dict], skip_completed: bool = False
+    ) -> Dict:
+        """批量提交帖子解构
+        POST /aigc/api/task/decode
+        """
+        url = f"{self.base_url}/aigc/api/task/decode"
+        headers = {"Content-Type": "application/json"}
+        payload = {
+            "params": {
+                "configId": config_id,
+                "skipCompleted": skip_completed,
+                "posts": posts,
+            }
+        }
+        async with AsyncHttpClient() as client:
+            return await client.post(url, json=payload, headers=headers)
+
+    async def query_decode_results(
+        self, config_id: int, channel_content_ids: List[str]
+    ) -> Dict:
+        """批量查询解构结果
+        POST /aigc/api/task/decode/result
+        """
+        url = f"{self.base_url}/aigc/api/task/decode/result"
+        headers = {"Content-Type": "application/json"}
+        payload = {
+            "params": {"configId": config_id, "channelContentIds": channel_content_ids}
+        }
+        async with AsyncHttpClient() as client:
+            return await client.post(url, json=payload, headers=headers)
+
+    async def cancel_decode_tasks(
+        self, config_id: int, channel_content_ids: List[str]
+    ) -> Dict:
+        """取消待执行解构任务
+        POST /aigc/api/task/decode/cancel
+        """
+        url = f"{self.base_url}/aigc/api/task/decode/cancel"
+        headers = {"Content-Type": "application/json"}
+        payload = {
+            "params": {"configId": config_id, "channelContentIds": channel_content_ids}
+        }
+        async with AsyncHttpClient() as client:
+            return await client.post(url, json=payload, headers=headers)
+
+
+__all__ = ["AigcDecodeServer"]

+ 4 - 4
app/jobs/domains/llm_task.py

@@ -1,7 +1,7 @@
-from app.domains.llm_tasks.aigc_decode_task import CreateAdPlatformArticlesDecodeTask
-from app.domains.llm_tasks.aigc_decode_task import CreateInnerArticlesDecodeTask
-from app.domains.llm_tasks.aigc_decode_task import FetchDecodeResults
-from app.domains.llm_tasks.aigc_decode_task import ExtractDecodeTaskDetail
+from app.domains.llm_tasks.decode_article import CreateAdPlatformArticlesDecodeTask
+from app.domains.llm_tasks.decode_article import CreateInnerArticlesDecodeTask
+from app.domains.llm_tasks.decode_article import FetchDecodeResults
+from app.domains.llm_tasks.decode_article import ExtractDecodeTaskDetail
 from app.domains.llm_tasks import TitleRewrite
 from app.domains.llm_tasks import ArticlePoolCategoryGeneration
 from app.domains.llm_tasks import CandidateAccountQualityScoreRecognizer

+ 11 - 0
app/jobs/task_handler.py

@@ -467,6 +467,17 @@ class TaskHandler:
         await task.deal()
         return TaskStatus.SUCCESS
 
+    # ====================== V2 解构任务(新 AIGC API)======================
+
+    @register("create_inner_articles_decode_task")
+    async def _create_inner_decode_task(self) -> int:
+        """创建内部文章解构任务"""
+        task = CreateInnerArticlesDecodeTask(
+            pool=self.db_client, log_service=self.log_client
+        )
+        await task.deal()
+        return TaskStatus.SUCCESS
+
     # ====================== Recommend Tasks=====================
     @register("i2i_recommend_data_sync")
     async def _i2i_recommend_data_sync_handler(self) -> int: