|
@@ -2,21 +2,16 @@ import argparse
|
|
import sys
|
|
import sys
|
|
import gzip
|
|
import gzip
|
|
|
|
|
|
-from hdfs import InsecureClient
|
|
|
|
-
|
|
|
|
-client = InsecureClient("http://master-1-1.c-7f31a3eea195cb73.cn-hangzhou.emr.aliyuncs.com:9870", user="spark")
|
|
|
|
|
|
+from pyspark.sql import SparkSession
|
|
|
|
|
|
|
|
|
|
def read_predict(hdfs_path: str):
|
|
def read_predict(hdfs_path: str):
|
|
- for file in client.list(hdfs_path):
|
|
|
|
- with client.read(hdfs_path + file) as reader:
|
|
|
|
- with gzip.GzipFile(fileobj=reader, mode="rb") as gz_file:
|
|
|
|
- content = gz_file.read()
|
|
|
|
- print(f"Content of {hdfs_path + file}:\n {content}")
|
|
|
|
|
|
+ df = spark.read.text(hdfs_path)
|
|
|
|
+ df.show(truncate=False)
|
|
|
|
|
|
|
|
|
|
def _main():
|
|
def _main():
|
|
- read_predict("/dw/recommend/model/34_ad_predict_data/20241004_351_0927_1003_1000/")
|
|
|
|
|
|
+ read_predict("/dw/recommend/model/34_ad_predict_data/20241004_351_0927_1003_1000/*")
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
@@ -28,4 +23,6 @@ if __name__ == '__main__':
|
|
# # 判断参数是否正常
|
|
# # 判断参数是否正常
|
|
# if len(predict_path_list) != 2:
|
|
# if len(predict_path_list) != 2:
|
|
# sys.exit(1)
|
|
# sys.exit(1)
|
|
|
|
+ spark = SparkSession.builder.appName("model_predict_analyse").getOrCreate()
|
|
_main()
|
|
_main()
|
|
|
|
+ spark.stop()
|