embedding_manager_user.py 1006 B

1234567891011121314151617181920212223242526272829303132333435
  1. # encoding:utf-8
  2. import pandas as pd
  3. import json
  4. import numpy as np
  5. # import faiss
  6. import time
  7. class EmbeddingManagerUser(object):
  8. def __init__(self, fpath, key_name, value_name):
  9. begin_time = time.time()
  10. # pandas.dataframe
  11. self.df = pd.read_csv(fpath)
  12. read_time = time.time()
  13. print("read csv embedding file cost time is: " + str(read_time - begin_time))
  14. # 将文件中的embedding加载到内存
  15. self.dict_embedding = self.load_embedding_to_dict(key_name, value_name)
  16. emb_time = time.time()
  17. print("load embedding to dict cost time is: " + str(emb_time - read_time))
  18. def get_embedding(self, key):
  19. if str(key) in self.dict_embedding.keys():
  20. return self.dict_embedding[str(key)]
  21. else:
  22. return ""
  23. def load_embedding_to_dict(self, key_name, value_name):
  24. return {
  25. str(row[key_name]): row[value_name]
  26. for index, row in self.df.iterrows()
  27. }