1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- from BertDemo import collection, text_to_vector, List, Doc
- import pandas as pd
- query_cache = {}
- def vector_to_tuple(vector):
-
- return tuple(vector)
- def queryCollection(vector) -> List[Doc]:
-
- vector_tuple = vector_to_tuple(vector)
- if vector_tuple in query_cache:
- return query_cache[vector_tuple]
-
- ret = collection.query(
- vector=vector,
- topk=10,
-
- include_vector=True
- )
- if ret is None or ret.code != 0:
- print('查询失败')
- return None
-
- return ret.output
- def calculate_rov(row):
- print(f"title={row['title']} ")
- try:
- vector = text_to_vector(row['title'])
- docs = queryCollection(vector)
- sumRov = 0
- for doc in docs:
- sumRov += doc.fields['rntHeadCount'] / doc.fields['exposureCount']
- rov = sumRov / len(docs)
- except:
- rov = 0
- print(f"预测ROV={rov}")
- print("=====================================")
- return rov
- def calculate_and_export(filename):
-
- df = pd.read_excel(filename)
-
- df['实际ROV(回流人数/曝光次数)'] = df.apply(lambda row: row['回流人数'] /
- row['曝光次数'] if row['曝光次数'] != 0 else 0, axis=1)
- df['预测ROV'] = df.apply(calculate_rov, axis=1)
-
- df.to_excel('videos-result.xlsx', index=False)
- if __name__ == '__main__':
- calculate_and_export('20231115_flow_pool.xlsx')
|