sunxy 1 рік тому
батько
коміт
3a8c113bed
2 змінених файлів з 47 додано та 13 видалено
  1. 15 13
      BertQuery.py
  2. 32 0
      ClientQuery.py

+ 15 - 13
BertQuery.py

@@ -2,7 +2,7 @@ from BertDemo import collection, text_to_vector, List, Doc
 import pandas as pd
 import pandas as pd
 
 
 # 创建一个缓存字典,用于存储查询结果
 # 创建一个缓存字典,用于存储查询结果
-query_cache = {}
+# query_cache = {}
 
 
 
 
 def vector_to_tuple(vector):
 def vector_to_tuple(vector):
@@ -14,8 +14,8 @@ def queryCollection(vector) -> List[Doc]:
     # 如果向量已经查询过,则直接返回结果
     # 如果向量已经查询过,则直接返回结果
     vector_tuple = vector_to_tuple(vector)
     vector_tuple = vector_to_tuple(vector)
 
 
-    if vector_tuple in query_cache:
-        return query_cache[vector_tuple]
+    # if vector_tuple in query_cache:
+    #     return query_cache[vector_tuple]
 
 
     # 根据向量进行相似性检索 + 条件过滤
     # 根据向量进行相似性检索 + 条件过滤
     ret = collection.query(
     ret = collection.query(
@@ -27,7 +27,7 @@ def queryCollection(vector) -> List[Doc]:
     if ret is None or ret.code != 0:
     if ret is None or ret.code != 0:
         print('查询失败')
         print('查询失败')
         return None
         return None
-    query_cache[vector_tuple] = ret.output
+    # query_cache[vector_tuple] = ret.output
     return ret.output
     return ret.output
 
 
 
 
@@ -67,14 +67,16 @@ def calculate_return(row):
     return headCount
     return headCount
 
 
 
 
-# 读取表格数据
-df = pd.read_excel('videos-202309.xlsx')
+def calculate_and_export(filename):
+    # 读取表格数据
+    df = pd.read_excel(filename)
 
 
-# 应用计算函数并创建新的列
-df['实际ROS(回流人数/分享次数)'] = df.apply(lambda row: row['回流人数'] / row['分享次数'], axis=1)
-df['预测ROS'] = df.apply(calculate_ros, axis=1)
-df['实际回流人数'] = df['回流人数']
-df['预测回流人数'] = df.apply(calculate_return, axis=1)
+    # 应用计算函数并创建新的列
+    df['实际ROS(回流人数/分享次数)'] = df.apply(lambda row: row['回流人数'] /
+                                      row['分享次数'], axis=1)
+    df['预测ROS'] = df.apply(calculate_ros, axis=1)
+    df['实际回流人数'] = df['回流人数']
+    df['预测回流人数'] = df.apply(calculate_return, axis=1)
 
 
-# 将结果保存回表格
-df.to_excel('videos-result.xlsx', index=False)
+    # 将结果保存回表格
+    df.to_excel('videos-result.xlsx', index=False)

+ 32 - 0
ClientQuery.py

@@ -0,0 +1,32 @@
+import gradio as gr
+import BertQuery
+
+
+def queryColelctionByText(text):
+    docs = BertQuery.queryCollection(BertQuery.text_to_vector(text))
+    if docs is None or len(docs) == 0:
+        return '查询失败'
+    # docs 返回id和fields
+    result = []
+    result.append("videoId 标题 回流人数 分享次数 分享人数 曝光次数 曝光人数 播放次数 播放人数 相似度")
+    for doc in docs:
+        videoId = doc.id
+        title = doc.fields['title']
+        rntHeadCount = doc.fields['rntHeadCount']
+        shareCount = doc.fields['shareCount']
+        shareHeadCount = doc.fields['shareHeadCount']
+        exposureCount = doc.fields['exposureCount']
+        exposureHeadCount = doc.fields['exposureHeadCount']
+        playCount = doc.fields['playCount']
+        playHeadCount = doc.fields['playHeadCount']
+        result.append(
+            f'{videoId}\t{title}\t{rntHeadCount}\t{shareCount}\t{shareHeadCount}\t{exposureCount}\t{exposureHeadCount}\t{playCount}\t{playHeadCount}\t{doc.score}')
+    return '\n\n'.join(result)
+
+
+iface = gr.Interface(fn=queryColelctionByText,
+                     inputs=gr.components.Textbox(
+                         lines=7, label="请输入文本内容"),
+                     outputs="text",
+                     title="视频内容向量检索,相似度匹配")
+iface.launch(share=False)