|
@@ -1,58 +0,0 @@
|
|
-"""
|
|
|
|
-@author: luojunhui
|
|
|
|
-"""
|
|
|
|
-import requests
|
|
|
|
-import json
|
|
|
|
-import time
|
|
|
|
-
|
|
|
|
-from tqdm import tqdm
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-def search_spider(params):
|
|
|
|
- """
|
|
|
|
- 通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布
|
|
|
|
- :param params: []
|
|
|
|
- :return:
|
|
|
|
- """
|
|
|
|
- search_keys = params['search_keys']
|
|
|
|
- url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
|
|
|
|
-
|
|
|
|
- payload = json.dumps({
|
|
|
|
- "keyword": ",".join(search_keys),
|
|
|
|
- "cursor": "0",
|
|
|
|
- "content_type": "video"
|
|
|
|
- })
|
|
|
|
- headers = {
|
|
|
|
- 'Content-Type': 'application/json'
|
|
|
|
- }
|
|
|
|
- response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
|
- data_list = response.json()['data']['data']
|
|
|
|
- for item in tqdm(data_list[:1]):
|
|
|
|
- video_obj = item['items'][0]
|
|
|
|
- print(json.dumps(video_obj, ensure_ascii=False, indent=4))
|
|
|
|
- video_id = video_obj['hashDocID']
|
|
|
|
- video_url = video_obj['videoUrl']
|
|
|
|
- video_title = video_obj['title']
|
|
|
|
- etl(vid=video_id, video_url=video_url, title=video_title)
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-def etl(vid, title, video_url):
|
|
|
|
- print(vid)
|
|
|
|
- url = "http://lightgbm-internal-test.piaoquantv.com/etl"
|
|
|
|
- payload = {
|
|
|
|
- "video_title": title,
|
|
|
|
- "video_url": video_url,
|
|
|
|
- "video_id": vid,
|
|
|
|
-
|
|
|
|
- }
|
|
|
|
- t = time.time()
|
|
|
|
- res = requests.post(url, json=payload)
|
|
|
|
- e = time.time()
|
|
|
|
- print(e - t)
|
|
|
|
- print(res.json())
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-# p = {
|
|
|
|
-# "search_keys": ["王者荣耀", "李白", "五杀"]
|
|
|
|
-# }
|
|
|
|
-# search_spider(p)
|
|
|