embedding_utils.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import json
  2. import requests
  3. from core.config import logger
  4. from core.database import DBHelper
  5. from data_models.content_chunks import ContentChunks
  6. from data_models.dataset import Dataset
  7. def get_embedding_data(query, dataset_ids, limit=10):
  8. try:
  9. response = requests.post(
  10. url='http://61.48.133.26:8001/api/search',
  11. json={
  12. "query_text": query,
  13. "search_type": "hybrid",
  14. "filters": {
  15. "dataset_id": dataset_ids
  16. },
  17. "limit": limit},
  18. headers={"Content-Type": "application/json"},
  19. )
  20. return response.json()['results']
  21. except Exception as e:
  22. logger.error(e)
  23. def get_embedding_content_data(query, dataset_ids):
  24. res = []
  25. db_helper = DBHelper()
  26. results = get_embedding_data(query, dataset_ids)
  27. if results:
  28. for result in results:
  29. content_chunk = db_helper.get(ContentChunks, doc_id=result['doc_id'], chunk_id=result['chunk_id'])
  30. dataset = db_helper.get(Dataset, id=content_chunk.dataset_id)
  31. dataset_name = None
  32. if dataset:
  33. dataset_name = dataset.name
  34. res.append(
  35. {'docId': content_chunk.doc_id, 'content': content_chunk.text,
  36. 'contentSummary': content_chunk.summary, 'score': result['score'], 'datasetName': dataset_name})
  37. return res