jihuaqiang 1 month ago
parent
commit
6f3d6d8290
3 changed files with 37 additions and 20 deletions
  1. BIN
      文件/~$视频分析报告_new.xlsx
  2. BIN
      文件/视频分析报告_new.xlsx
  3. 37 20
      获取trigger任务.py

BIN
文件/~$视频分析报告_new.xlsx


BIN
文件/视频分析报告_new.xlsx


+ 37 - 20
获取trigger任务.py

@@ -99,7 +99,7 @@ def call_coze_api(summary, timeline):
         "Authorization": "Bearer pat_ddPm5K5tCKXU2zH1NChGHoOKGOSECyzxmXPEcrtJg52UcCIndRAfiWRRxCH95pdB"
     }
     payload = {
-        "workflow_id": "7506810742659579904",
+        "workflow_id": "7511970477477904393",
         "parameters": {
             "summary": summary,
             "timeline": timeline
@@ -115,7 +115,7 @@ def extract_fields_from_response(resp):
     Args:
         resp: API响应字典,可能包含多层嵌套的JSON字符串
     Returns:
-        tuple: (time_str, theme_str, trigger_str) 三个字符串,分别包含时间、主题和触发语
+        tuple: (time_str, query_str, hook_str) 三个字符串,分别包含时间、问题和钩子话术
     """
     import json
     import re
@@ -146,8 +146,24 @@ def extract_fields_from_response(resp):
                 return json_str
     
     try:
+        # 处理新的API响应格式
+        if isinstance(resp, dict) and 'data' in resp:
+            # 如果data是字符串,尝试解析它
+            if isinstance(resp['data'], str):
+                try:
+                    data = json.loads(resp['data'])
+                    if isinstance(data, dict) and 'output' in data:
+                        current_data = data['output']
+                    else:
+                        current_data = data
+                except json.JSONDecodeError:
+                    current_data = resp['data']
+            else:
+                current_data = resp['data']
+        else:
+            current_data = resp
+
         # 处理多层嵌套的JSON
-        current_data = resp
         while isinstance(current_data, (str, dict)):
             if isinstance(current_data, dict):
                 # 如果是字典,尝试获取data或output字段
@@ -182,8 +198,8 @@ def extract_fields_from_response(resp):
         
         # 提取并验证每个对象
         time_list = []
-        theme_list = []
-        trigger_list = []
+        query_list = []
+        hook_list = []
         
         for item in current_data:
             if not isinstance(item, dict):
@@ -191,26 +207,26 @@ def extract_fields_from_response(resp):
                 continue
             
             try:
-                # 使用get方法安全地获取值,并提供默认
-                time = str(item.get('time', '')).strip()
-                theme = str(item.get('theme', '')).strip()
-                trigger = str(item.get('trigger', '')).strip()
+                # 使用新的中文字段名获取
+                time = str(item.get('钩子出现时间', '')).strip()
+                query = str(item.get('钩子到AI大模型的问题', '')).strip()
+                hook = str(item.get('钩子话术', '')).strip()
                 
-                if time or theme or trigger:  # 只添加非空值
+                if time or query or hook:  # 只添加非空值
                     time_list.append(time)
-                    theme_list.append(theme)
-                    trigger_list.append(trigger)
+                    query_list.append(query)
+                    hook_list.append(hook)
             except Exception as e:
                 logger.warning(f"Error extracting fields from item: {e}")
                 continue
         
         # 将列表转换为字符串,用换行符连接
         time_str = '\n'.join(time_list) if time_list else ''
-        theme_str = '\n'.join(theme_list) if theme_list else ''
-        trigger_str = '\n'.join(trigger_list) if trigger_list else ''
+        query_str = '\n'.join(query_list) if query_list else ''
+        hook_str = '\n'.join(hook_list) if hook_list else ''
         
-        logger.info(f"Extracted - Time: {time_str[:50]}..., Theme: {theme_str[:50]}..., Trigger: {trigger_str[:50]}...")
-        return time_str, theme_str, trigger_str
+        logger.info(f"Extracted - Time: {time_str[:50]}..., Query: {query_str[:50]}..., Hook: {hook_str[:50]}...")
+        return time_str, query_str, hook_str
         
     except json.JSONDecodeError as je:
         logger.error(f"JSON decode error: {str(je)}")
@@ -245,23 +261,24 @@ def process_excel(input_excel, output_excel=None):
                 print(f"Timeline: {timeline[:100]}...")
                 
                 resp = call_coze_api(summary, timeline)
-                print(f"API Response: {json.dumps(resp, ensure_ascii=False)[:200]}...")  # 打印API响应
+                print(f"API Response: {json.dumps(resp, ensure_ascii=False)}")  # 打印API响应
                 
-                time, theme, hook = extract_fields_from_response(resp)
+                time, query, hook = extract_fields_from_response(resp)
                 
                 # 更新数据 - 使用单个值赋值而不是批量赋值
                 df.at[idx, df.columns[7]] = time
                 df.at[idx, df.columns[8]] = hook
+                df.at[idx, df.columns[9]] = query
                 
                 print(f"第{idx+1}行处理完成")
                 print(f"Time: {time[:100]}...")
                 print(f"Hook: {hook[:100]}...")
-                
+                print(f"Query: {query[:100]}...")
             except Exception as e:
                 print(f"第{idx+1}行处理失败: {str(e)}")
                 df.at[idx, df.columns[7]] = "error"
                 df.at[idx, df.columns[8]] = "error"
-            
+                df.at[idx, df.columns[9]] = "error"
             # 每处理一行就保存一次,防止中断丢失
             df.to_excel(input_excel, index=False)
             print(f"已保存到第{idx+1}行")