dy_search.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import requests
  6. from applications.functions.common import sensitive_flag
  7. from applications.functions.log import logging
  8. def douyin_search(keyword, sensitive_words, trace_id):
  9. """
  10. Search with dou cha cha
  11. rank the relevance and recall the best three videos
  12. :param trace_id:
  13. :param sensitive_words: sensitive words in pq
  14. :param keyword: the words needs to be searched
  15. :return:
  16. """
  17. url = "http://8.217.190.241:8888/crawler/dou_yin/top_hub_content"
  18. payload = json.dumps({
  19. "keyword": keyword,
  20. "category": "全部",
  21. "period": "近90天",
  22. "content_modal": "视频",
  23. "cursor": ""
  24. })
  25. headers = {
  26. 'Content-Type': 'application/json'
  27. }
  28. response = requests.request("POST", url, headers=headers, data=payload)
  29. try:
  30. dt_list = response.json()['data']['data']
  31. L = []
  32. for obj in dt_list:
  33. try:
  34. title = obj['video_desc']
  35. video_id = obj['video_id']
  36. duration = int(obj['duration'])
  37. if sensitive_flag(sensitive_words, title) and duration < 30000:
  38. res = douyin_detail(video_id)
  39. if res:
  40. L.append(res)
  41. else:
  42. continue
  43. except Exception as e:
  44. continue
  45. logging(
  46. code="8001",
  47. info="抖音搜索",
  48. data={
  49. "keys": keyword,
  50. "search_count": len(dt_list),
  51. "useful_count": len(L)
  52. },
  53. trace_id=trace_id
  54. )
  55. return L
  56. except Exception as e:
  57. logging(
  58. code="4003",
  59. info="抖音搜索失败-搜索词:{} 原因:-{}".format(keyword, e),
  60. trace_id=trace_id
  61. )
  62. return []
  63. def douyin_detail(video_id):
  64. """
  65. get video url address
  66. :param video_id:
  67. :return:
  68. """
  69. url = "http://8.217.190.241:8888/crawler/dou_yin/detail"
  70. payload = json.dumps({
  71. "content_id": video_id
  72. })
  73. headers = {
  74. 'Content-Type': 'application/json'
  75. }
  76. response = requests.request("POST", url, headers=headers, data=payload).json()
  77. video_info = response['data']['data']
  78. if video_info['content_type'] == "note":
  79. return None
  80. else:
  81. return video_info