123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- """
- @author: luojunhui
- """
- import requests
- import json
- import asyncio
- from tqdm import tqdm
- from applications.functions.upload import upload_to_pq, upload_to_oss
- async def process_video_obj(video_obj):
- """
- Process video object
- :param video_obj:
- :return:
- """
- video_dict = {
- "video_url": video_obj['videoUrl'],
- "video_title": video_obj['title'],
- "publish_time": video_obj['pubTime'],
- "like_num": video_obj['likeNum'],
- "video_id": video_obj['hashDocID'],
- "video_duration": video_obj['duration'],
- "video_cover": video_obj['image']
- }
- res = await upload_to_oss(
- video_id=video_obj['hashDocID'],
- video_url=video_obj['videoUrl'])
- print(res)
- res2 = upload_to_pq(
- oss_object_key=res['oss_object_key'],
- title=video_obj['title']
- )
- async def search_spider(params):
- """
- 通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布
- :param params: []
- :return:
- """
- search_keys = params['search_keys']
- url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
- payload = json.dumps({
- "keyword": ",".join(search_keys),
- "cursor": "0",
- "content_type": "video"
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- data_list = response.json()['data']['data']
- for item in tqdm(data_list[:2]):
- video_obj = item['items'][0]
- await process_video_obj(video_obj)
- async def main():
- p = {
- "search_keys": ["同学聚会", "演讲", "点赞"]
- }
- await search_spider(p)
- # 执行异步函数
- asyncio.run(main())
|