| 
					
				 | 
			
			
				@@ -1,58 +0,0 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-@author: luojunhui 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import requests 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import json 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from tqdm import tqdm 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def search_spider(params): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    :param params: [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    :return: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    search_keys = params['search_keys'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    url = "http://8.217.190.241:8888/crawler/wei_xin/keyword" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    payload = json.dumps({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "keyword": ",".join(search_keys), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "cursor": "0", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "content_type": "video" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    headers = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        'Content-Type': 'application/json' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    response = requests.request("POST", url, headers=headers, data=payload) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    data_list = response.json()['data']['data'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    for item in tqdm(data_list[:1]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        video_obj = item['items'][0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print(json.dumps(video_obj, ensure_ascii=False, indent=4)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        video_id = video_obj['hashDocID'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        video_url = video_obj['videoUrl'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        video_title = video_obj['title'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        etl(vid=video_id, video_url=video_url, title=video_title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def etl(vid, title, video_url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(vid) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    url = "http://lightgbm-internal-test.piaoquantv.com/etl" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    payload = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "video_title": title, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "video_url": video_url, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "video_id": vid, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    t = time.time() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    res = requests.post(url, json=payload) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    e = time.time() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(e - t) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(res.json()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-# p = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-#     "search_keys": ["王者荣耀", "李白", "五杀"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-# } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-# search_spider(p) 
			 |