dy_search.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import requests
  6. from applications.functions.common import sensitive_flag
  7. def douyin_search(keyword, sensitive_words):
  8. """
  9. Search with dou cha cha
  10. rank the relevance and recall the best three videos
  11. :param sensitive_words: sensitive words in pq
  12. :param keyword: the words needs to be searched
  13. :return:
  14. """
  15. url = "http://8.217.190.241:8888/crawler/dou_yin/top_hub_content"
  16. payload = json.dumps({
  17. "keyword": keyword,
  18. "category": "全部",
  19. "period": "近90天",
  20. "content_modal": "视频",
  21. "cursor": ""
  22. })
  23. headers = {
  24. 'Content-Type': 'application/json'
  25. }
  26. response = requests.request("POST", url, headers=headers, data=payload)
  27. try:
  28. dt_list = response.json()['data']['data']
  29. L = []
  30. for obj in dt_list:
  31. try:
  32. title = obj['video_desc']
  33. video_id = obj['video_id']
  34. if sensitive_flag(sensitive_words, title):
  35. res = douyin_detail(video_id)
  36. L.append(res)
  37. else:
  38. continue
  39. except Exception as e:
  40. continue
  41. return L
  42. except Exception as e:
  43. print("search_fail---{}, error---{}".format(keyword, e))
  44. return []
  45. def douyin_detail(video_id):
  46. """
  47. get video url address
  48. :param video_id:
  49. :return:
  50. """
  51. url = "http://8.217.190.241:8888/crawler/dou_yin/detail"
  52. payload = json.dumps({
  53. "content_id": video_id
  54. })
  55. headers = {
  56. 'Content-Type': 'application/json'
  57. }
  58. response = requests.request("POST", url, headers=headers, data=payload).json()
  59. video_info = response['data']['data']
  60. return video_info