|
@@ -4,12 +4,13 @@ import gzip
|
|
|
|
|
|
from hdfs import InsecureClient
|
|
from hdfs import InsecureClient
|
|
|
|
|
|
-client = InsecureClient("http://master-1-1.c-7f31a3eea195cb73.cn-hangzhou.emr.aliyuncs.com:9870", user="root")
|
|
|
|
|
|
+client = InsecureClient("http://master-1-1.c-7f31a3eea195cb73.cn-hangzhou.emr.aliyuncs.com:9870", user="spark")
|
|
|
|
|
|
|
|
|
|
def read_predict(hdfs_path):
|
|
def read_predict(hdfs_path):
|
|
- for file in client.list("/dw/recommend/model/34_ad_predict_data/20241004_351_0927_1003_1000/"):
|
|
|
|
- with client.read(file, encoding="utf-8") as reador:
|
|
|
|
|
|
+ dir = "/dw/recommend/model/34_ad_predict_data/20241004_351_0927_1003_1000/"
|
|
|
|
+ for file in client.list(dir):
|
|
|
|
+ with client.read(dir + file, encoding="utf-8") as reador:
|
|
with gzip.GzipFile(fileobj=reador, mode="rb") as gz_file:
|
|
with gzip.GzipFile(fileobj=reador, mode="rb") as gz_file:
|
|
content = gz_file.read().decode("utf-8")
|
|
content = gz_file.read().decode("utf-8")
|
|
print(f"Content of {file}:\n {content}")
|
|
print(f"Content of {file}:\n {content}")
|