| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 | """@author: luojunhui"""import jsonimport requestsfrom applications.functions.common import sensitive_flagfrom applications.log import loggingdef douyin_search(keyword, sensitive_words, trace_id):    """    Search with dou cha cha    rank the relevance and recall the best three videos    :param trace_id:    :param sensitive_words: sensitive words in pq    :param keyword: the words needs to be searched    :return:    """    url = "http://8.217.190.241:8888/crawler/dou_yin/top_hub_content"    payload = json.dumps({        "keyword": keyword,        "category": "全部",        "period": "近90天",        "content_modal": "视频",        "cursor": ""    })    headers = {        'Content-Type': 'application/json'    }    response = requests.request("POST", url, headers=headers, data=payload)    try:        dt_list = response.json()['data']['data']        L = []        for obj in dt_list:            try:                title = obj['video_desc']                video_id = obj['video_id']                duration = int(obj['duration'])                if sensitive_flag(sensitive_words, title) and duration < 30000:                    res = douyin_detail(video_id)                    if res:                        L.append(res)                else:                    continue            except Exception as e:                continue        logging(            code="8001",            info="抖音搜索",            data={                "keys": keyword,                "search_count": len(dt_list),                "useful_count": len(L)            },            trace_id=trace_id        )        return L    except Exception as e:        logging(            code="4003",            info="抖音搜索失败-搜索词:{} 原因:-{}".format(keyword, e),            trace_id=trace_id        )        return []    # logging(    #         code="4003",    #         info="抖音搜索失败-搜索词:{} 原因:-{}".format(keyword, "抖查查暂停服务"),    #         trace_id=trace_id    #     )    # return []def douyin_detail(video_id):    """    get video url address    :param video_id:    :return:    """    url = "http://8.217.190.241:8888/crawler/dou_yin/detail"    payload = json.dumps({        "content_id": video_id    })    headers = {        'Content-Type': 'application/json'    }    response = requests.request("POST", url, headers=headers, data=payload).json()    video_info = response['data']['data']    if video_info['content_type'] == "note":        return None    else:        return video_info
 |