|  | @@ -2,21 +2,16 @@ import argparse
 | 
	
		
			
				|  |  |  import sys
 | 
	
		
			
				|  |  |  import gzip
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -from hdfs import InsecureClient
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -client = InsecureClient("http://master-1-1.c-7f31a3eea195cb73.cn-hangzhou.emr.aliyuncs.com:9870", user="spark")
 | 
	
		
			
				|  |  | +from pyspark.sql import SparkSession
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def read_predict(hdfs_path: str):
 | 
	
		
			
				|  |  | -    for file in client.list(hdfs_path):
 | 
	
		
			
				|  |  | -        with client.read(hdfs_path + file) as reader:
 | 
	
		
			
				|  |  | -            with gzip.GzipFile(fileobj=reader, mode="rb") as gz_file:
 | 
	
		
			
				|  |  | -                content = gz_file.read()
 | 
	
		
			
				|  |  | -                print(f"Content of {hdfs_path + file}:\n {content}")
 | 
	
		
			
				|  |  | +    df = spark.read.text(hdfs_path)
 | 
	
		
			
				|  |  | +    df.show(truncate=False)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def _main():
 | 
	
		
			
				|  |  | -    read_predict("/dw/recommend/model/34_ad_predict_data/20241004_351_0927_1003_1000/")
 | 
	
		
			
				|  |  | +    read_predict("/dw/recommend/model/34_ad_predict_data/20241004_351_0927_1003_1000/*")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  if __name__ == '__main__':
 | 
	
	
		
			
				|  | @@ -28,4 +23,6 @@ if __name__ == '__main__':
 | 
	
		
			
				|  |  |      # # 判断参数是否正常
 | 
	
		
			
				|  |  |      # if len(predict_path_list) != 2:
 | 
	
		
			
				|  |  |      #     sys.exit(1)
 | 
	
		
			
				|  |  | +    spark = SparkSession.builder.appName("model_predict_analyse").getOrCreate()
 | 
	
		
			
				|  |  |      _main()
 | 
	
		
			
				|  |  | +    spark.stop()
 |