sunxiaoyi
/
aigc-content-vector


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
							from BertDemo import collection, text_to_vector, List, Doc
import pandas as pd

# 创建一个缓存字典，用于存储查询结果
query_cache = {}


def vector_to_tuple(vector):
    # 将列表转换为元组，以便作为字典的键使用
    return tuple(vector)


def queryCollection(vector) -> List[Doc]:
    # 如果向量已经查询过，则直接返回结果
    vector_tuple = vector_to_tuple(vector)

    if vector_tuple in query_cache:
        return query_cache[vector_tuple]

    # 根据向量进行相似性检索 + 条件过滤
    ret = collection.query(
        vector=vector,   # 向量检索，也可设置主键检索
        topk=10,
        # filter='playCount > 1000',
        include_vector=True
    )
    if ret is None or ret.code != 0:
        print('查询失败')
        return None
    # query_cache[vector_tuple] = ret.output
    return ret.output


def calculate_rov(row):
    print(f"title={row['title']} ")
    try:
        vector = text_to_vector(row['title'])
        docs = queryCollection(vector)
        sumRov = 0
        for doc in docs:
            sumRov += doc.fields['rntHeadCount'] / doc.fields['exposureCount']
        rov = sumRov / len(docs)
    except:
        rov = 0

    print(f"预测ROV={rov}")
    print("=====================================")
    return rov


# def calculate_return(row):
#     print(f"title={row['title']} ")
#     vector = text_to_vector(row['title'])
#     docs = queryCollection(vector)
#     sumHeadCount = 0
#     for doc in docs:
#         sumHeadCount += doc.fields['rntHeadCount']

#     try:
#         headCount = sumHeadCount / len(docs)
#     except:
#         headCount = 0

#     print(f"预测回流人数={headCount}")
#     print("=====================================")
#     return headCount


def calculate_and_export(filename):
    # 读取表格数据
    df = pd.read_excel(filename)

    # 应用计算函数并创建新的列, 跳过曝光次数为0的数据
    df['实际ROV(回流人数/曝光次数)'] = df.apply(lambda row: row['回流人数'] /
                                      row['曝光次数'] if row['曝光次数'] != 0 else 0, axis=1)
    df['预测ROV'] = df.apply(calculate_rov, axis=1)

    # 将结果保存回表格
    df.to_excel('videos-result.xlsx', index=False)


if __name__ == '__main__':
    calculate_and_export('20231115_flow_pool.xlsx')