parse_gen_data.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. import pandas as pd
  2. import ast
  3. # 读取Excel文件
  4. input_file = '视频分析报告1.xlsx'
  5. df = pd.read_excel(input_file)
  6. # 第8列(索引7)为"钩子提取"
  7. def extract_fields(hook_col):
  8. times, queries, hooks = [], [], []
  9. try:
  10. # 兼容字符串形式的列表
  11. items = ast.literal_eval(hook_col) if isinstance(hook_col, str) else []
  12. for item in items:
  13. if isinstance(item, dict):
  14. times.append(item.get('需求钩子出现时间', ''))
  15. queries.append(item.get('需求详细query', ''))
  16. hooks.append(item.get('需求钩子话术', ''))
  17. except Exception:
  18. pass
  19. return '\n'.join(times), '\n'.join(queries), '\n'.join(hooks)
  20. # 新增三列
  21. new_times, new_queries, new_hooks = [], [], []
  22. for val in df.iloc[:, 8]:
  23. t, q, h = extract_fields(val)
  24. new_times.append(t)
  25. new_queries.append(q)
  26. new_hooks.append(h)
  27. df.insert(11, 'time', new_times)
  28. df.insert(12, 'query', new_queries)
  29. df.insert(13, 'hook', new_hooks)
  30. # 保存为新文件
  31. output_file = '视频分析报告1_拆分钩子.xlsx'
  32. df.to_excel(output_file, index=False)
  33. print(f'已保存到 {output_file}')