inferv2.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. import os
  2. import sys
  3. import numpy as np
  4. __dir__ = os.path.dirname(os.path.abspath(__file__))
  5. sys.path.append(__dir__)
  6. from utils.oss_client import HangZhouOSSClient
  7. import utils.compress as compress
  8. from utils.my_hdfs_client import MyHDFSClient
  9. # 引用 paddle inference 推理库
  10. import paddle.inference as paddle_infer
  11. import json
  12. hadoop_home = "/app/env/hadoop-3.2.4" # Hadoop 安装目录
  13. configs = {
  14. "fs.default.name": "hdfs://192.168.141.208:9000", # HDFS 名称和端口
  15. "hadoop.job.ugi": "" # HDFS 用户和密码
  16. }
  17. hdfs_client = MyHDFSClient(hadoop_home, configs)
  18. def main():
  19. init_model_path = "/app/output_model_dssm"
  20. client = HangZhouOSSClient("art-recommend")
  21. oss_object_name = "dyp/dssm.tar.gz"
  22. client.get_object_to_file(oss_object_name, "model.tar.gz")
  23. compress.uncompress_tar("model.tar.gz", init_model_path)
  24. assert os.path.exists(init_model_path)
  25. model_file=os.path.join(init_model_path, "dssm.pdmodel")
  26. params_file=os.path.join(init_model_path, "dssm.pdiparams")
  27. # 创建 config
  28. config = paddle_infer.Config(model_file, params_file)
  29. # 根据 config 创建 predictor
  30. predictor = paddle_infer.create_predictor(config)
  31. # 获取输入的名称
  32. input_names = predictor.get_input_names()
  33. input_handle = predictor.get_input_handle(input_names[0])
  34. output_names = predictor.get_output_names()
  35. output_handle = predictor.get_output_handle(output_names[0])
  36. ret, out = hdfs_client._run_cmd("text /dw/recommend/model/56_dssm_i2i_itempredData/20241206/part-00016.gz")
  37. input_data = {}
  38. for line in out:
  39. sample_values = line.rstrip('\n').split('\t')
  40. vid, left_features_str = sample_values
  41. left_features = [float(x) for x in left_features_str.split(',')]
  42. input_data[vid] = left_features
  43. # 设置输入
  44. result = []
  45. i=0
  46. count = len(input_data)
  47. for k,v in input_data.items():
  48. v2 = np.array([v], dtype=np.float32)
  49. input_handle.copy_from_cpu(v2)
  50. # 运行predictor
  51. predictor.run()
  52. # 获取输出
  53. output_data = output_handle.copy_to_cpu() # numpy.ndarray类型
  54. result.append(k + "\t" + str(output_data.tolist()[0]))
  55. i=i+1
  56. if i % 1000 == 0:
  57. print("write batch {}/{}".format(i, count))
  58. json_data = json.dumps(result, indent=4) # indent参数用于美化输出,使其更易读
  59. # 写入文件
  60. with open('/app/data.json', 'w') as json_file:
  61. for s in result:
  62. json_file.write(s + "\n")
  63. if __name__ == "__main__":
  64. main()