dy_search.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import requests
  6. from applications.functions.common import sensitive_flag
  7. from applications.functions.log import logging
  8. def douyin_search(keyword, sensitive_words, trace_id):
  9. """
  10. Search with dou cha cha
  11. rank the relevance and recall the best three videos
  12. :param trace_id:
  13. :param sensitive_words: sensitive words in pq
  14. :param keyword: the words needs to be searched
  15. :return:
  16. """
  17. # url = "http://8.217.190.241:8888/crawler/dou_yin/top_hub_content"
  18. # payload = json.dumps({
  19. # "keyword": keyword,
  20. # "category": "全部",
  21. # "period": "近90天",
  22. # "content_modal": "视频",
  23. # "cursor": ""
  24. # })
  25. # headers = {
  26. # 'Content-Type': 'application/json'
  27. # }
  28. #
  29. # response = requests.request("POST", url, headers=headers, data=payload)
  30. # try:
  31. # dt_list = response.json()['data']['data']
  32. # L = []
  33. # for obj in dt_list:
  34. # try:
  35. # title = obj['video_desc']
  36. # video_id = obj['video_id']
  37. # duration = int(obj['duration'])
  38. # if sensitive_flag(sensitive_words, title) and duration < 30000:
  39. # res = douyin_detail(video_id)
  40. # if res:
  41. # L.append(res)
  42. # else:
  43. # continue
  44. # except Exception as e:
  45. # continue
  46. # logging(
  47. # code="8001",
  48. # info="抖音搜索",
  49. # data={
  50. # "keys": keyword,
  51. # "search_count": len(dt_list),
  52. # "useful_count": len(L)
  53. # },
  54. # trace_id=trace_id
  55. # )
  56. # return L
  57. # except Exception as e:
  58. # logging(
  59. # code="4003",
  60. # info="抖音搜索失败-搜索词:{} 原因:-{}".format(keyword, e),
  61. # trace_id=trace_id
  62. # )
  63. # return []
  64. logging(
  65. code="4003",
  66. info="抖音搜索失败-搜索词:{} 原因:-{}".format(keyword, "抖查查暂停服务"),
  67. trace_id=trace_id
  68. )
  69. return []
  70. def douyin_detail(video_id):
  71. """
  72. get video url address
  73. :param video_id:
  74. :return:
  75. """
  76. url = "http://8.217.190.241:8888/crawler/dou_yin/detail"
  77. payload = json.dumps({
  78. "content_id": video_id
  79. })
  80. headers = {
  81. 'Content-Type': 'application/json'
  82. }
  83. response = requests.request("POST", url, headers=headers, data=payload).json()
  84. video_info = response['data']['data']
  85. if video_info['content_type'] == "note":
  86. return None
  87. else:
  88. return video_info