| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- # 读取罕见画面结果的表格,从第4列开始,统计每一列每个值出现的次数,并保存数量到新的表格中
- import pandas as pd
- import logging
- # 配置日志
- logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s - %(levelname)s - %(message)s',
- handlers=[
- logging.FileHandler('analysis_statistics.log', encoding='utf-8'),
- logging.StreamHandler()
- ]
- )
- logger = logging.getLogger(__name__)
- def analyze_result_statistics():
- """分析结果表格的统计信息"""
- try:
- # 读取罕见画面结果表格
- input_file = "历史名人结果.xlsx"
- logger.info(f"正在读取文件: {input_file}")
-
- df = pd.read_excel(input_file)
- logger.info(f"成功读取文件,数据形状: {df.shape}")
- logger.info(f"列名: {list(df.columns)}")
-
- # 从第4列开始统计(索引从0开始,所以是索引3)
- start_column_index = 4 # 第4列
- columns_to_analyze = df.columns[start_column_index:]
-
- logger.info(f"从第4列开始分析,共 {len(columns_to_analyze)} 列")
- logger.info(f"分析的列: {list(columns_to_analyze)}")
-
- # 创建统计结果字典
- statistics_results = {}
-
- # 对每一列进行统计
- for column in columns_to_analyze:
- logger.info(f"正在统计列: {column}")
-
- # 统计每个值出现的次数
- value_counts = df[column].value_counts()
-
- # 将统计结果添加到字典中
- statistics_results[column] = value_counts
-
- logger.info(f"列 {column} 的统计结果:")
- for value, count in value_counts.items():
- logger.info(f" {value}: {count} 次")
-
- # 创建统计结果表格
- # 将所有统计结果合并到一个DataFrame中
- all_statistics = []
-
- for column, value_counts in statistics_results.items():
- for value, count in value_counts.items():
- all_statistics.append({
- '列名': column,
- '值': value,
- '出现次数': count,
- '占比': f"{count/len(df)*100:.2f}%"
- })
-
- # 创建统计结果DataFrame
- stats_df = pd.DataFrame(all_statistics)
-
- # 保存统计结果到新的Excel文件
- output_file = "analysis_statistics.xlsx"
- stats_df.to_excel(output_file, index=False, engine='openpyxl')
-
- logger.info(f"✅ 统计结果已保存到: {output_file}")
- logger.info(f"✅ 统计表格包含 {len(stats_df)} 行数据")
-
- # 显示统计摘要
- logger.info("=== 统计摘要 ===")
- for column in columns_to_analyze:
- unique_values = df[column].nunique()
- logger.info(f"{column}: {unique_values} 个不同值")
-
- return stats_df
-
- except Exception as e:
- logger.error(f"分析统计时出错: {e}")
- raise
- def main():
- """主函数"""
- try:
- logger.info("开始分析罕见画面结果表格的统计信息...")
- result = analyze_result_statistics()
- logger.info("分析完成!")
-
- except Exception as e:
- logger.error(f"程序执行出错: {e}")
- if __name__ == "__main__":
- main()
|