Server
/
title_with_video


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
							"""
@author: luojunhui
"""
import requests
import json
import time

from tqdm import tqdm


def search_spider(params):
    """
    通过搜索爬虫 + search_keys 来获取视频信息，并且以 MQ 的方式发送给 ETL， 正常上传发布
    :param params: []
    :return:
    """
    search_keys = params['search_keys']
    url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"

    payload = json.dumps({
        "keyword": ",".join(search_keys),
        "cursor": "0",
        "content_type": "video"
    })
    headers = {
        'Content-Type': 'application/json'
    }
    response = requests.request("POST", url, headers=headers, data=payload)
    data_list = response.json()['data']['data']
    for item in tqdm(data_list[:1]):
        video_obj = item['items'][0]
        print(json.dumps(video_obj, ensure_ascii=False, indent=4))
        video_id = video_obj['hashDocID']
        video_url = video_obj['videoUrl']
        video_title = video_obj['title']
        etl(vid=video_id, video_url=video_url, title=video_title)


def etl(vid, title, video_url):
    print(vid)
    url = "http://lightgbm-internal-test.piaoquantv.com/etl"
    payload = {
        "video_title": title,
        "video_url": video_url,
        "video_id": vid,

    }
    t = time.time()
    res = requests.post(url, json=payload)
    e = time.time()
    print(e - t)
    print(res.json())


# p = {
#     "search_keys": ["王者荣耀", "李白", "五杀"]
# }
# search_spider(p)