search_video.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. """
  2. @author: luojunhui
  3. """
  4. import requests
  5. import json
  6. import time
  7. from tqdm import tqdm
  8. def search_spider(params):
  9. """
  10. 通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布
  11. :param params: []
  12. :return:
  13. """
  14. search_keys = params['search_keys']
  15. url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
  16. payload = json.dumps({
  17. "keyword": ",".join(search_keys),
  18. "cursor": "0",
  19. "content_type": "video"
  20. })
  21. headers = {
  22. 'Content-Type': 'application/json'
  23. }
  24. response = requests.request("POST", url, headers=headers, data=payload)
  25. data_list = response.json()['data']['data']
  26. for item in tqdm(data_list[:1]):
  27. video_obj = item['items'][0]
  28. print(json.dumps(video_obj, ensure_ascii=False, indent=4))
  29. video_id = video_obj['hashDocID']
  30. video_url = video_obj['videoUrl']
  31. video_title = video_obj['title']
  32. etl(vid=video_id, video_url=video_url, title=video_title)
  33. def etl(vid, title, video_url):
  34. print(vid)
  35. url = "http://lightgbm-internal-test.piaoquantv.com/etl"
  36. payload = {
  37. "video_title": title,
  38. "video_url": video_url,
  39. "video_id": vid,
  40. }
  41. t = time.time()
  42. res = requests.post(url, json=payload)
  43. e = time.time()
  44. print(e - t)
  45. print(res.json())
  46. # p = {
  47. # "search_keys": ["王者荣耀", "李白", "五杀"]
  48. # }
  49. # search_spider(p)