dy_search.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import requests
  6. from applications.functions.common import sensitive_flag
  7. def douyin_search(keyword, sensitive_words):
  8. """
  9. Search with dou cha cha
  10. rank the relevance and recall the best three videos
  11. :param sensitive_words: sensitive words in pq
  12. :param keyword: the words needs to be searched
  13. :return:
  14. """
  15. url = "http://8.217.190.241:8888/crawler/dou_yin/top_hub_content"
  16. payload = json.dumps({
  17. "keyword": keyword,
  18. "category": "全部",
  19. "period": "近7天",
  20. "content_modal": "视频",
  21. "cursor": ""
  22. })
  23. headers = {
  24. 'Content-Type': 'application/json'
  25. }
  26. response = requests.request("POST", url, headers=headers, data=payload)
  27. try:
  28. dt_list = response.json()['data']['data']
  29. L = []
  30. for obj in dt_list:
  31. try:
  32. title = obj['video_desc']
  33. video_id = obj['video_id']
  34. if sensitive_flag(sensitive_words, title):
  35. res = douyin_detail(video_id)
  36. L.append(res)
  37. return L
  38. else:
  39. continue
  40. except Exception as e:
  41. continue
  42. return []
  43. except:
  44. print("search_fail---{}".format(keyword))
  45. return []
  46. def douyin_detail(video_id):
  47. """
  48. get video url address
  49. :param video_id:
  50. :return:
  51. """
  52. url = "http://8.217.190.241:8888/crawler/dou_yin/detail"
  53. payload = json.dumps({
  54. "content_id": video_id
  55. })
  56. headers = {
  57. 'Content-Type': 'application/json'
  58. }
  59. response = requests.request("POST", url, headers=headers, data=payload).json()
  60. video_info = response['data']['data']
  61. return video_info