jihuaqiang 1 week ago
parent
commit
efee026cad
5 changed files with 116 additions and 55 deletions
  1. 10 0
      README.md
  2. 6 1
      agent.py
  3. 46 54
      agent_tools.py
  4. 6 0
      start_service.sh
  5. 48 0
      utils/mysql_db.py

+ 10 - 0
README.md

@@ -98,6 +98,16 @@ uvicorn agent:app --host 0.0.0.0 --port 8080 --reload
 
 - `GEMINI_API_KEY`: Gemini API 密钥(必需)
 
+### LangSmith 配置
+
+为了避免网络连接错误,系统会自动禁用 LangSmith 追踪。如果需要启用,可以设置以下环境变量:
+
+```bash
+export LANGCHAIN_TRACING_V2=true
+export LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
+export LANGCHAIN_API_KEY="your_langsmith_api_key"
+```
+
 ### 数据库配置
 
 在 `utils/mysql_db.py` 中配置数据库连接信息。

+ 6 - 1
agent.py

@@ -15,6 +15,11 @@ from contextlib import asynccontextmanager
 # 保证可以导入本项目模块
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 
+# 禁用 LangSmith 追踪,避免网络连接错误
+os.environ["LANGCHAIN_TRACING_V2"] = "false"
+os.environ["LANGCHAIN_ENDPOINT"] = ""
+os.environ["LANGCHAIN_API_KEY"] = ""
+
 from fastapi import FastAPI, HTTPException, BackgroundTasks
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel, Field
@@ -207,7 +212,7 @@ def create_langgraph_workflow():
         }
     )
     
-    # 编译工作流
+    # 编译工作流,禁用 LangSmith 追踪
     return workflow.compile()
 
 # 全局工作流实例

+ 46 - 54
agent_tools.py

@@ -24,49 +24,6 @@ class QueryDataTool:
             # 返回默认数据
             default_data = {
                 "crawl_data": {
-                    "channel": 1,
-                    "channel_content_id": "684a789b000000002202a61b",
-                    "content_link": "https://www.xiaohongshu.com/explore/684a789b000000002202a61b",
-                    "wx_sn": None,
-                    "title": "一个视频学会,5个剪辑工具,超详细教程",
-                    "content_type": "video",
-                    "body_text": "#剪辑教程[话题]# #剪辑[话题]# #手机剪辑[话题]# #视频制作[话题]# #视频剪辑[话题]# #自学剪辑[话题]# #原创视频[话题]# #新手小白学剪辑[话题]#",
-                    "location": "未知",
-                    "source_url": None,
-                    "mini_program": None,
-                    "topic_list": [],
-                    "image_url_list": [
-                        {
-                            "image_type": 2,
-                            "image_url": "http://rescdn.yishihui.com/pipeline/image/5be8f08a-4691-41b6-8dda-0b63cc2c1056.jpg"
-                        }
-                    ],
-                    "video_url_list": [
-                        {
-                            "video_url": "http://rescdn.yishihui.com/pipeline/video/6c2330e3-0674-4f01-b5b2-fc8c240158f8.mp4",
-                            "video_duration": 615
-                        }
-                    ],
-                    "bgm_data": None,
-                    "ad_info": None,
-                    "is_original": False,
-                    "voice_data": None,
-                    "channel_account_id": "670a10ac000000001d0216ec",
-                    "channel_account_name": "小伍剪辑视频",
-                    "channel_account_avatar": "https://sns-avatar-qc.xhscdn.com/avatar/1040g2jo31e469dkq0e005poa22m7c5ncbtuk1g0?imageView2/2/w/80/format/jpg",
-                    "item_index": None,
-                    "view_count": None,
-                    "play_count": None,
-                    "like_count": 692,
-                    "collect_count": 996,
-                    "comment_count": 37,
-                    "share_count": None,
-                    "looking_count": None,
-                    "publish_timestamp": 1749711589000,
-                    "modify_timestamp": 1749711589000,
-                    "update_timestamp": 1755239186502
-                },
-                "raw": {
                     "channel": 1,
                     "channel_content_id": "684a789b000000002202a61b",
                     "content_link": "https://www.xiaohongshu.com/explore/684a789b000000002202a61b",
@@ -205,14 +162,49 @@ class StructureTool:
 
     @staticmethod
     def store_parsing_result(request_id: str, crawl_raw: Dict[str, Any], identify_result: Dict[str, Any]) -> Optional[int]:
-        payload = {
-            'request_id': request_id,
-            'crawl_raw': crawl_raw,
-            'identify_result': identify_result,
-        }
-        sql = (
-            "INSERT INTO knowledge_parsing_content (request_id, parsing_result, created_at) "
-            "VALUES (%s, %s, NOW())"
-        )
-        params = (request_id, json.dumps(payload, ensure_ascii=False))
-        return MysqlHelper.update_values(sql, params)
+        """
+        存储解析结果到 knowledge_parsing_content 表
+        
+        Args:
+            request_id: 请求ID
+            crawl_raw: 原始爬取数据
+            identify_result: 识别结果
+            
+        Returns:
+            插入的行ID,失败返回None
+        """
+        try:
+            # 从原始数据中提取必要字段
+            content_id = crawl_raw.get('channel_content_id') or ''
+            task_id = 0  # 默认任务ID,可根据需要调整
+            
+            # 构建存储数据
+            parsing_data = {
+                'crawl_raw': crawl_raw,
+                'identify_result': identify_result,
+            }
+            
+            sql = (
+                "INSERT INTO knowledge_parsing_content "
+                "(content_id, request_id, task_id, parsing_data, create_time, status) "
+                "VALUES (%s, %s, %s, %s, NOW(), %s)"
+            )
+            
+            # 状态:2 表示处理完成
+            status = 2
+            params = (
+                content_id, 
+                request_id, 
+                task_id, 
+                json.dumps(parsing_data, ensure_ascii=False),
+                status
+            )
+            
+            result = MysqlHelper.insert_and_get_id(sql, params)
+            if result:
+                logger.info(f"存储解析结果成功: request_id={request_id}, content_id={content_id}, insert_id={result}")
+            return result
+            
+        except Exception as e:
+            logger.error(f"存储解析结果失败: request_id={request_id}, error={e}")
+            return None

+ 6 - 0
start_service.sh

@@ -34,9 +34,15 @@ echo "📍 服务地址: http://localhost:8080"
 echo "📚 API文档: http://localhost:8080/docs"
 echo "🔍 健康检查: http://localhost:8080/health"
 echo "🔄 LangGraph 状态: 将在健康检查中显示"
+echo "🚫 LangSmith 追踪已禁用"
 echo ""
 echo "按 Ctrl+C 停止服务"
 echo ""
 
+# 设置环境变量禁用 LangSmith
+export LANGCHAIN_TRACING_V2=false
+export LANGCHAIN_ENDPOINT=""
+export LANGCHAIN_API_KEY=""
+
 # 启动服务
 python3 agent.py 

+ 48 - 0
utils/mysql_db.py

@@ -94,6 +94,54 @@ class MysqlHelper:
             if connect:
                 connect.close()
 
+    @classmethod
+    def insert_and_get_id(cls, sql, params=None):
+        """
+        执行INSERT操作并返回插入的行ID
+        
+        参数:
+            sql: 要执行的INSERT SQL语句
+            params: SQL参数(可选,元组或字典)
+        
+        返回:
+            成功时返回插入的行ID,失败返回None
+        """
+        connect = None
+        cursor = None
+        
+        try:
+            connect = cls.connect_mysql()
+            cursor = connect.cursor()
+            
+            # 执行SQL语句
+            if params:
+                cursor.execute(sql, params)
+            else:
+                cursor.execute(sql)
+            
+            # 获取最后插入的行ID
+            insert_id = cursor.lastrowid
+            
+            connect.commit()
+            return insert_id
+            
+        except Exception as e:
+            logger.error(f"INSERT执行失败: {e}")
+            logger.error(f"SQL语句: {sql}")
+            if params:
+                logger.error(f"参数: {params}")
+            
+            if connect:
+                connect.rollback()
+            return None
+            
+        finally:
+            # 确保资源关闭
+            if cursor:
+                cursor.close()
+            if connect:
+                connect.close()
+