丁云鹏 4 ماه پیش
والد
کامیت
3e06cd9aa4

+ 8 - 5
recommend-model-produce/src/main/python/models/dssm/milvus_data_process.py

@@ -24,11 +24,14 @@ def process_file(file_path):
     result=[]
     for line in out:
         sample_values = line.rstrip('\n').split('\t')
-        vid, vec = sample_values
-        result.append({
-            "vid":vid,
-            "vec":vec
-        })
+        if(len(sample_values) == 2):
+            vid, vec = sample_values
+            result.append({
+                "vid":vid,
+                "vec":vec
+            })
+        else :
+            print(f"{sample_values}")
     return result
 
 def write_results(results, output_file):

+ 2 - 1
recommend-model-produce/src/main/python/tools/inferv2.py

@@ -71,7 +71,8 @@ def thread_task(name, file_list, model_file, params_file):
         output_file = f"/app/vec-{file_name}.json"
         write_results(results, output_file)
         compress.compress_file_tar(output_file, f"{output_file}.tar.gz")
-        hdfs_client.upload(f"{output_file}.tar.gz", f"/dyp/vec/{file_name}.gz", multi_processes=1, overwrite=True)
+        hdfs_client.delete(f"/dyp/vec/{file_name}.gz")
+        hdfs_client.upload(f"{output_file}.tar.gz", f"/dyp/vec/{file_name}.gz", multi_processes=1, overwrite=False)
         results=[]
         print(f"Thread {name}: ending file:{file_path} {i}/{count}")
     

+ 1 - 1
recommend-model-produce/src/main/python/tools/utils/compress.py

@@ -22,7 +22,7 @@ def compress_file_tar(file_path, output_filename):
     # 创建一个 tarfile 对象,使用 'w:gz' 模式表示写入 gzip 压缩的 tar 包
     with tarfile.open(output_filename, "w:gz") as tar:
         # 将文件添加到 tar 包中,arcname 指定在 tar 包中的相对路径
-        tar.add(file_path, arcname=os.path.relpath(file_path))
+        tar.add(file_path, arcname=file_path)
 
 def compress_tar(folder_path, output_filename):
     # 确保输出文件名以 .tar.gz 结尾