12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- """
- @author: luojunhui
- """
- import requests
- import json
- import time
- from tqdm import tqdm
- def search_spider(params):
- """
- 通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布
- :param params: []
- :return:
- """
- search_keys = params['search_keys']
- url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
- payload = json.dumps({
- "keyword": ",".join(search_keys),
- "cursor": "0",
- "content_type": "video"
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- data_list = response.json()['data']['data']
- for item in tqdm(data_list[:1]):
- video_obj = item['items'][0]
- print(json.dumps(video_obj, ensure_ascii=False, indent=4))
- video_id = video_obj['hashDocID']
- video_url = video_obj['videoUrl']
- video_title = video_obj['title']
- etl(vid=video_id, video_url=video_url, title=video_title)
- def etl(vid, title, video_url):
- print(vid)
- url = "http://lightgbm-internal-test.piaoquantv.com/etl"
- payload = {
- "video_title": title,
- "video_url": video_url,
- "video_id": vid,
- }
- t = time.time()
- res = requests.post(url, json=payload)
- e = time.time()
- print(e - t)
- print(res.json())
- # p = {
- # "search_keys": ["王者荣耀", "李白", "五杀"]
- # }
- # search_spider(p)
|