yangxiaohui 1 week geleden
bovenliggende
commit
7be8d2474c

File diff suppressed because it is too large
+ 253 - 283
data/data_1118/分类层级映射.json


File diff suppressed because it is too large
+ 181 - 128
data/data_1118/当前帖子_how解构结果/690d977d0000000007036331_how.json


File diff suppressed because it is too large
+ 186 - 134
data/data_1118/当前帖子_how解构结果/69114f150000000007001f30_how.json


File diff suppressed because it is too large
+ 184 - 133
data/data_1118/当前帖子_how解构结果/6915dfc400000000070224d9_how.json


File diff suppressed because it is too large
+ 220 - 220
data/data_1118/当前帖子_how解构结果_可视化.html


File diff suppressed because it is too large
+ 577 - 554
data/data_1118/特征名称_分类映射.json


File diff suppressed because it is too large
+ 427 - 421
data/data_1118/过去帖子_pattern聚合结果.json


+ 96 - 0
script/data_processing/run_all.sh

@@ -0,0 +1,96 @@
+#!/bin/bash
+
+# 数据处理脚本执行脚本
+# 按顺序执行所有数据处理步骤
+
+set -e  # 遇到错误立即退出
+
+# 获取脚本所在目录的绝对路径
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# 项目根目录
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+echo "=========================================="
+echo "数据处理流程开始"
+echo "项目根目录: $PROJECT_ROOT"
+echo "=========================================="
+echo ""
+
+# 切换到项目根目录
+cd "$PROJECT_ROOT"
+
+# 步骤1: 提取特征分类映射
+echo "[1/5] 正在提取特征分类映射..."
+echo "  输入: data/data_1118/过去帖子_pattern聚合结果.json"
+echo "  输入: data/data_1118/当前帖子_what解构结果/*.json (时间阈值)"
+echo "  输出: data/data_1118/特征名称_分类映射.json"
+echo "  输出: data/data_1118/分类层级映射.json"
+python script/data_processing/extract_feature_categories.py
+if [ $? -eq 0 ]; then
+    echo "✓ 特征分类映射提取完成"
+else
+    echo "✗ 特征分类映射提取失败"
+    exit 1
+fi
+echo ""
+
+# 步骤2: 提取特征帖子来源
+echo "[2/5] 正在提取特征帖子来源..."
+echo "  输入: data/data_1118/过去帖子_what解构结果/*.json"
+echo "  输入: data/data_1118/当前帖子_what解构结果/*.json (时间阈值)"
+echo "  输出: data/data_1118/特征名称_帖子来源.json"
+python script/data_processing/extract_features_from_posts.py
+if [ $? -eq 0 ]; then
+    echo "✓ 特征帖子来源提取完成"
+else
+    echo "✗ 特征帖子来源提取失败"
+    exit 1
+fi
+echo ""
+
+# 步骤3: 提取当前帖子解构任务列表
+echo "[3/5] 正在提取当前帖子解构任务列表..."
+echo "  输入: data/data_1118/当前帖子_what解构结果/*.json"
+echo "  输出: data/data_1118/当前帖子_解构任务列表.json"
+python script/data_processing/extract_current_posts.py
+if [ $? -eq 0 ]; then
+    echo "✓ 当前帖子解构任务列表提取完成"
+else
+    echo "✗ 当前帖子解构任务列表提取失败"
+    exit 1
+fi
+echo ""
+
+# 步骤4: 匹配灵感特征
+echo "[4/5] 正在匹配灵感特征..."
+echo "  输入: data/data_1118/当前帖子_解构任务列表.json"
+echo "  输入: data/data_1118/特征名称_帖子来源.json"
+echo "  输入: data/data_1118/特征名称_分类映射.json"
+echo "  输出: data/data_1118/当前帖子_how解构结果/*_how.json"
+python script/data_processing/match_inspiration_features.py
+if [ $? -eq 0 ]; then
+    echo "✓ 灵感特征匹配完成"
+else
+    echo "✗ 灵感特征匹配失败"
+    exit 1
+fi
+echo ""
+
+# 步骤5: 可视化结果
+echo "[5/5] 正在生成可视化结果..."
+echo "  输入: data/data_1118/当前帖子_how解构结果/*.json"
+echo "  输入: data/data_1118/特征名称_分类映射.json"
+echo "  输入: data/data_1118/分类层级映射.json"
+echo "  输出: data/data_1118/当前帖子_how解构结果_可视化.html"
+python script/data_processing/visualize_how_results.py
+if [ $? -eq 0 ]; then
+    echo "✓ 可视化结果生成完成"
+else
+    echo "✗ 可视化结果生成失败"
+    exit 1
+fi
+echo ""
+
+echo "=========================================="
+echo "✓ 所有数据处理步骤已完成!"
+echo "=========================================="

Some files were not shown because too many files changed in this diff