""" @author: luojunhui """ import json import requests from applications.functions.common import sensitive_flag from applications.functions.log import logging def douyin_search(keyword, sensitive_words, trace_id): """ Search with dou cha cha rank the relevance and recall the best three videos :param trace_id: :param sensitive_words: sensitive words in pq :param keyword: the words needs to be searched :return: """ # url = "http://8.217.190.241:8888/crawler/dou_yin/top_hub_content" # payload = json.dumps({ # "keyword": keyword, # "category": "全部", # "period": "近90天", # "content_modal": "视频", # "cursor": "" # }) # headers = { # 'Content-Type': 'application/json' # } # # response = requests.request("POST", url, headers=headers, data=payload) # try: # dt_list = response.json()['data']['data'] # L = [] # for obj in dt_list: # try: # title = obj['video_desc'] # video_id = obj['video_id'] # duration = int(obj['duration']) # if sensitive_flag(sensitive_words, title) and duration < 30000: # res = douyin_detail(video_id) # if res: # L.append(res) # else: # continue # except Exception as e: # continue # logging( # code="8001", # info="抖音搜索", # data={ # "keys": keyword, # "search_count": len(dt_list), # "useful_count": len(L) # }, # trace_id=trace_id # ) # return L # except Exception as e: # logging( # code="4003", # info="抖音搜索失败-搜索词:{} 原因:-{}".format(keyword, e), # trace_id=trace_id # ) # return [] logging( code="4003", info="抖音搜索失败-搜索词:{} 原因:-{}".format(keyword, "抖查查暂停服务"), trace_id=trace_id ) return [] def douyin_detail(video_id): """ get video url address :param video_id: :return: """ url = "http://8.217.190.241:8888/crawler/dou_yin/detail" payload = json.dumps({ "content_id": video_id }) headers = { 'Content-Type': 'application/json' } response = requests.request("POST", url, headers=headers, data=payload).json() video_info = response['data']['data'] if video_info['content_type'] == "note": return None else: return video_info