Prechádzať zdrojové kódy

feat:添加评估结果分析脚本

zhaohaipeng 1 rok pred
rodič
commit
4928f6637d
1 zmenil súbory, kde vykonal 6 pridanie a 9 odobranie
  1. 6 9
      ad/model_predict_analyse.py

+ 6 - 9
ad/model_predict_analyse.py

@@ -2,21 +2,16 @@ import argparse
 import sys
 import gzip
 
-from hdfs import InsecureClient
-
-client = InsecureClient("http://master-1-1.c-7f31a3eea195cb73.cn-hangzhou.emr.aliyuncs.com:9870", user="spark")
+from pyspark.sql import SparkSession
 
 
 def read_predict(hdfs_path: str):
-    for file in client.list(hdfs_path):
-        with client.read(hdfs_path + file) as reader:
-            with gzip.GzipFile(fileobj=reader, mode="rb") as gz_file:
-                content = gz_file.read()
-                print(f"Content of {hdfs_path + file}:\n {content}")
+    df = spark.read.text(hdfs_path)
+    df.show(truncate=False)
 
 
 def _main():
-    read_predict("/dw/recommend/model/34_ad_predict_data/20241004_351_0927_1003_1000/")
+    read_predict("/dw/recommend/model/34_ad_predict_data/20241004_351_0927_1003_1000/*")
 
 
 if __name__ == '__main__':
@@ -28,4 +23,6 @@ if __name__ == '__main__':
     # # 判断参数是否正常
     # if len(predict_path_list) != 2:
     #     sys.exit(1)
+    spark = SparkSession.builder.appName("model_predict_analyse").getOrCreate()
     _main()
+    spark.stop()