丁云鹏 4 månader sedan
förälder
incheckning
f4a28450c7

+ 5 - 14
recommend-model-produce/src/main/python/models/dssm/milvus_data_process.py

@@ -5,14 +5,10 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
 #sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
 
-
-import numpy as np
 import json
 from concurrent.futures import ThreadPoolExecutor
-from utils.oss_client import HangZhouOSSClient
 import utils.compress as compress
 from utils.my_hdfs_client import MyHDFSClient
-import paddle.inference as paddle_infer
 
 # Hadoop 安装目录和配置信息
 hadoop_home = "/app/env/hadoop-3.2.4"
@@ -52,23 +48,18 @@ def thread_task(name, file_list, model_file, params_file):
         result=process_file(file_path, model_file, params_file))
         file_name, file_suffix = os.path.splitext(os.path.basename(file_path))
         output_file = f"/app/milvus-{file_name}.json"
-        write_results({"rows":result}, output_file)
-        hdfs_client.upload(output_file, f"/dyp/vec/{output_file}", multi_processes=1, overwrite=True):
-        results=[]
+        write_results(json.dumps({"rows":result}), output_file)
+        compress.compress_file_tar(output_file, f"{output_file}.tar.gz")
+        hdfs_client.upload(f"{output_file}.tar.gz", f"/dyp/milvus/{file_name}.gz", multi_processes=1, overwrite=True)
         print(f"Thread {name}: ending file:{file_path} {i}/{count}")
-    
     print(f"Thread {name}: finishing")
 
 def main():
-    init_model_path = "/app/output_model_dssm"
-    client = HangZhouOSSClient("art-recommend")
-    
 
     max_workers = 2
-
     split_file_list = [
-        ['/dyp/vec/data_part-00017.gz.json'],
-        ['/dyp/vec/data_part-00018.gz.json']
+        ['/dyp/vec/vec-part-00017.gz'],
+        ['/dyp/vec/vec-part-00018.gz']
     ]
     future_list = []
     with ThreadPoolExecutor(max_workers=max_workers) as executor:

+ 1 - 1
recommend-model-produce/src/main/python/tools/inferv2.py

@@ -71,7 +71,7 @@ def thread_task(name, file_list, model_file, params_file):
         output_file = f"/app/vec-{file_name}.json"
         write_results(results, output_file)
         compress.compress_file_tar(output_file, f"{output_file}.tar.gz")
-        hdfs_client.upload(f"{output_file}.tar.gz", f"/dyp/vec/{output_file}.gz", multi_processes=1, overwrite=True):
+        hdfs_client.upload(f"{output_file}.tar.gz", f"/dyp/vec/{file_name}.gz", multi_processes=1, overwrite=True):
         results=[]
         print(f"Thread {name}: ending file:{file_path} {i}/{count}")