1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- from BertDemo import collection, text_to_vector, List, Doc
- import pandas as pd
- query_cache = {}
- def vector_to_tuple(vector):
-
- return tuple(vector)
- def queryCollection(vector) -> List[Doc]:
-
- vector_tuple = vector_to_tuple(vector)
- if vector_tuple in query_cache:
- return query_cache[vector_tuple]
-
- ret = collection.query(
- vector=vector,
- topk=10,
-
- include_vector=True
- )
- if ret is None or ret.code != 0:
- print('查询失败')
- return None
- query_cache[vector_tuple] = ret.output
- return ret.output
- def calculate_ros(row):
- print(f"title={row['title']} ")
- vector = text_to_vector(row['title'])
- docs = queryCollection(vector)
- sumRos = 0
- for doc in docs:
- sumRos += doc.fields['rntHeadCount'] / doc.fields['shareCount']
- try:
- ros = sumRos / len(docs)
- except:
- ros = 0
- print(f"预测ROS={ros}")
- print("=====================================")
- return ros
- def calculate_return(row):
- print(f"title={row['title']} ")
- vector = text_to_vector(row['title'])
- docs = queryCollection(vector)
- sumHeadCount = 0
- for doc in docs:
- sumHeadCount += doc.fields['rntHeadCount']
- try:
- headCount = sumHeadCount / len(docs)
- except:
- headCount = 0
- print(f"预测回流人数={headCount}")
- print("=====================================")
- return headCount
- df = pd.read_excel('videos-202309.xlsx')
- df['实际ROS(回流人数/分享次数)'] = df.apply(lambda row: row['回流人数'] / row['分享次数'], axis=1)
- df['预测ROS'] = df.apply(calculate_ros, axis=1)
- df['实际回流人数'] = df['回流人数']
- df['预测回流人数'] = df.apply(calculate_return, axis=1)
- df.to_excel('videos-result.xlsx', index=False)
|