1234567891011121314151617181920212223242526272829303132333435 |
- # encoding:utf-8
- import pandas as pd
- import json
- import numpy as np
- # import faiss
- import time
- class EmbeddingManagerUser(object):
- def __init__(self, fpath, key_name, value_name):
- begin_time = time.time()
- # pandas.dataframe
- self.df = pd.read_csv(fpath)
- read_time = time.time()
- print("read csv embedding file cost time is: " + str(read_time - begin_time))
- # 将文件中的embedding加载到内存
- self.dict_embedding = self.load_embedding_to_dict(key_name, value_name)
- emb_time = time.time()
- print("load embedding to dict cost time is: " + str(emb_time - read_time))
- def get_embedding(self, key):
- if str(key) in self.dict_embedding.keys():
- return self.dict_embedding[str(key)]
- else:
- return ""
- def load_embedding_to_dict(self, key_name, value_name):
- return {
- str(row[key_name]): row[value_name]
- for index, row in self.df.iterrows()
- }
|