dy_search.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import requests
  6. from applications.functions.common import sensitive_flag
  7. def douyin_search(keyword, sensitive_words):
  8. """
  9. Search with dou cha cha
  10. rank the relevance and recall the best three videos
  11. :param sensitive_words: sensitive words in pq
  12. :param keyword: the words needs to be searched
  13. :return:
  14. """
  15. url = "http://8.217.190.241:8888/crawler/dou_yin/top_hub_content"
  16. payload = json.dumps({
  17. "keyword": keyword,
  18. "category": "全部",
  19. "period": "近7天",
  20. "content_modal": "视频",
  21. "cursor": ""
  22. })
  23. headers = {
  24. 'Content-Type': 'application/json'
  25. }
  26. response = requests.request("POST", url, headers=headers, data=payload)
  27. dt_list = response.json()['data']['data']
  28. L = []
  29. for obj in dt_list:
  30. try:
  31. title = obj['video_desc']
  32. video_id = obj['video_id']
  33. if sensitive_flag(sensitive_words, title):
  34. res = douyin_detail(video_id)
  35. L.append(res)
  36. return L
  37. else:
  38. continue
  39. except Exception as e:
  40. continue
  41. return []
  42. def douyin_detail(video_id):
  43. """
  44. get video url address
  45. :param video_id:
  46. :return:
  47. """
  48. url = "http://8.217.190.241:8888/crawler/dou_yin/detail"
  49. payload = json.dumps({
  50. "content_id": video_id
  51. })
  52. headers = {
  53. 'Content-Type': 'application/json'
  54. }
  55. response = requests.request("POST", url, headers=headers, data=payload).json()
  56. video_info = response['data']['data']
  57. return video_info