丁云鹏 пре 4 месеци
родитељ
комит
733195283b

+ 4 - 4
recommend-model-produce/src/main/python/models/dssm/milvus_data_process.py

@@ -18,7 +18,7 @@ configs = {
 }
 hdfs_client = MyHDFSClient(hadoop_home, configs)
 
-def process_file(file_path, model_file, params_file):
+def process_file(file_path):
     """处理单个文件"""
     ret, out = hdfs_client._run_cmd(f"text {file_path}")
     result=[]
@@ -37,7 +37,7 @@ def write_results(results, output_file):
         for s in results:
             json_file.write(s + "\n")
 
-def thread_task(name, file_list, model_file, params_file):
+def thread_task(name, file_list):
     """线程任务"""
     print(f"Thread {name}: starting file_list:{file_list}")
     i=0
@@ -45,7 +45,7 @@ def thread_task(name, file_list, model_file, params_file):
         i=i+1
         count=len(file_list)
         print(f"Thread {name}: starting file:{file_path} {i}/{count}")
-        result=process_file(file_path, model_file, params_file))
+        result=process_file(file_path)
         file_name, file_suffix = os.path.splitext(os.path.basename(file_path))
         output_file = f"/app/milvus-{file_name}.json"
         write_results(json.dumps({"rows":result}), output_file)
@@ -64,7 +64,7 @@ def main():
     future_list = []
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
         for i, file_list in enumerate(split_file_list):
-            future_list.append(executor.submit(thread_task, f"thread{i}", file_list, model_file, params_file))
+            future_list.append(executor.submit(thread_task, f"thread{i}", file_list))
 
     for future in future_list:
         future.result()