search_video.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. """
  2. @author: luojunhui
  3. """
  4. import requests
  5. import json
  6. import asyncio
  7. from tqdm import tqdm
  8. from applications.functions.upload import upload_to_pq, upload_to_oss
  9. async def process_video_obj(video_obj):
  10. """
  11. Process video object
  12. :param video_obj:
  13. :return:
  14. """
  15. video_dict = {
  16. "video_url": video_obj['videoUrl'],
  17. "video_title": video_obj['title'],
  18. "publish_time": video_obj['pubTime'],
  19. "like_num": video_obj['likeNum'],
  20. "video_id": video_obj['hashDocID'],
  21. "video_duration": video_obj['duration'],
  22. "video_cover": video_obj['image']
  23. }
  24. res = await upload_to_oss(
  25. video_id=video_obj['hashDocID'],
  26. video_url=video_obj['videoUrl'])
  27. print(res)
  28. res2 = upload_to_pq(
  29. oss_object_key=res['oss_object_key'],
  30. title=video_obj['title']
  31. )
  32. async def search_spider(params):
  33. """
  34. 通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布
  35. :param params: []
  36. :return:
  37. """
  38. search_keys = params['search_keys']
  39. url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
  40. payload = json.dumps({
  41. "keyword": ",".join(search_keys),
  42. "cursor": "0",
  43. "content_type": "video"
  44. })
  45. headers = {
  46. 'Content-Type': 'application/json'
  47. }
  48. response = requests.request("POST", url, headers=headers, data=payload)
  49. data_list = response.json()['data']['data']
  50. for item in tqdm(data_list[:2]):
  51. video_obj = item['items'][0]
  52. await process_video_obj(video_obj)
  53. async def main():
  54. p = {
  55. "search_keys": ["同学聚会", "演讲", "点赞"]
  56. }
  57. await search_spider(p)
  58. # 执行异步函数
  59. asyncio.run(main())