data_utils.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import json
  2. import requests
  3. from core.config import logger
  4. from core.database_data import DatabaseHelper
  5. def add_data(text):
  6. try:
  7. response = requests.post(
  8. url='http://61.48.133.26:8001/api/chunk',
  9. json={
  10. "text": text,
  11. "text_type": 1},
  12. headers={"Content-Type": "application/json"},
  13. )
  14. return response.json()['doc_id']
  15. except Exception as e:
  16. logger.error(e)
  17. return e
  18. def select_data():
  19. db_helper = DatabaseHelper()
  20. # 执行查询
  21. query = """
  22. SELECT c.crawl_data as json_text
  23. FROM knowledge_extraction_content a
  24. LEFT JOIN knowledge_parsing_content b ON a.parsing_id = b.id AND b.request_id = a.request_id
  25. LEFT JOIN knowledge_crawl_content c ON c.content_id = b.content_id AND c.request_id = a.request_id
  26. LEFT JOIN knowledge_request d ON d.request_id = a.request_id
  27. LEFT JOIN knowledge_query e ON e.id = d.query_id
  28. WHERE a.request_id > '20250905022700393495252' AND e.knowledge_type = '整体' AND a.score >= 0 AND e.category_id = 0
  29. ORDER BY a.id DESC
  30. """
  31. result = db_helper.execute_query(query)
  32. for row in result:
  33. add_data(json.loads(row['json_text'])['body_text'])