dy_search.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import traceback
  6. import requests
  7. from applications.functions.common import sensitive_flag
  8. from applications.log import logging
  9. from applications.feishu import bot
  10. def douyin_search(keyword, sensitive_words, trace_id):
  11. """
  12. Search with dou cha cha
  13. rank the relevance and recall the best three videos
  14. :param trace_id:
  15. :param sensitive_words: sensitive words in pq
  16. :param keyword: the words needs to be searched
  17. :return:
  18. """
  19. url = "http://crawler-cn.aiddit.com/crawler/dou_yin/top_hub_content"
  20. payload = json.dumps({
  21. "keyword": keyword,
  22. "category": "全部",
  23. "period": "近90天",
  24. "content_modal": "视频",
  25. "cursor": ""
  26. })
  27. headers = {
  28. 'Content-Type': 'application/json'
  29. }
  30. response = requests.request("POST", url, headers=headers, data=payload)
  31. try:
  32. dt_list = response.json()['data']['data']
  33. L = []
  34. for obj in dt_list:
  35. try:
  36. title = obj['video_desc']
  37. video_id = obj['video_id']
  38. duration = int(obj['duration'])
  39. if sensitive_flag(sensitive_words, title) and duration < 30000:
  40. res = douyin_detail(video_id)
  41. if res:
  42. L.append(res)
  43. else:
  44. continue
  45. except Exception as e:
  46. print(traceback.format_exc())
  47. continue
  48. logging(
  49. code="8001",
  50. info="抖音搜索",
  51. data={
  52. "keys": keyword,
  53. "search_count": len(dt_list),
  54. "useful_count": len(L)
  55. },
  56. trace_id=trace_id
  57. )
  58. if not L and len(dt_list) > 0:
  59. bot(
  60. title="抖音搜索失败",
  61. detail={
  62. "keys": keyword,
  63. "搜索的视频数量": len(dt_list),
  64. "详情请求的视频数量": len(L)
  65. },
  66. mention=False
  67. )
  68. return L
  69. except Exception as e:
  70. logging(
  71. code="4003",
  72. info="抖音搜索失败-搜索词:{} 原因:-{}".format(keyword, e),
  73. trace_id=trace_id
  74. )
  75. return []
  76. # logging(
  77. # code="4003",
  78. # info="抖音搜索失败-搜索词:{} 原因:-{}".format(keyword, "抖查查暂停服务"),
  79. # trace_id=trace_id
  80. # )
  81. # return []
  82. def douyin_detail(video_id):
  83. """
  84. get video url address
  85. :param video_id:
  86. :return:
  87. """
  88. url = "http://crawler-cn.aiddit.com/crawler/dou_yin/detail"
  89. payload = json.dumps({
  90. "content_id": video_id
  91. })
  92. headers = {
  93. 'Content-Type': 'application/json'
  94. }
  95. response = requests.request("POST", url, headers=headers, data=payload).json()
  96. video_info = response['data']['data']
  97. if video_info['content_type'] == "note":
  98. return None
  99. else:
  100. return video_info