12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- """
- @author: luojunhui
- """
- import json
- import requests
- from applications.functions.common import sensitive_flag
- def douyin_search(keyword, sensitive_words):
- """
- Search with dou cha cha
- rank the relevance and recall the best three videos
- :param sensitive_words: sensitive words in pq
- :param keyword: the words needs to be searched
- :return:
- """
- url = "http://8.217.190.241:8888/crawler/dou_yin/top_hub_content"
- payload = json.dumps({
- "keyword": keyword,
- "category": "全部",
- "period": "近7天",
- "content_modal": "视频",
- "cursor": ""
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- try:
- dt_list = response.json()['data']['data']
- L = []
- for obj in dt_list:
- try:
- title = obj['video_desc']
- video_id = obj['video_id']
- if sensitive_flag(sensitive_words, title):
- res = douyin_detail(video_id)
- L.append(res)
- return L
- else:
- continue
- except Exception as e:
- continue
- return []
- except:
- print("search_fail---{}".format(keyword))
- return []
- def douyin_detail(video_id):
- """
- get video url address
- :param video_id:
- :return:
- """
- url = "http://8.217.190.241:8888/crawler/dou_yin/detail"
- payload = json.dumps({
- "content_id": video_id
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload).json()
- video_info = response['data']['data']
- return video_info
|