""" @author: luojunhui """ import requests import json import time from tqdm import tqdm def search_spider(params): """ 通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布 :param params: [] :return: """ search_keys = params['search_keys'] url = "http://8.217.190.241:8888/crawler/wei_xin/keyword" payload = json.dumps({ "keyword": ",".join(search_keys), "cursor": "0", "content_type": "video" }) headers = { 'Content-Type': 'application/json' } response = requests.request("POST", url, headers=headers, data=payload) data_list = response.json()['data']['data'] for item in tqdm(data_list[:1]): video_obj = item['items'][0] print(json.dumps(video_obj, ensure_ascii=False, indent=4)) video_id = video_obj['hashDocID'] video_url = video_obj['videoUrl'] video_title = video_obj['title'] etl(vid=video_id, video_url=video_url, title=video_title) def etl(vid, title, video_url): print(vid) url = "http://lightgbm-internal-test.piaoquantv.com/etl" payload = { "video_title": title, "video_url": video_url, "video_id": vid, } t = time.time() res = requests.post(url, json=payload) e = time.time() print(e - t) print(res.json()) # p = { # "search_keys": ["王者荣耀", "李白", "五杀"] # } # search_spider(p)