123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116 |
- """
- @author: luojunhui
- """
- import json
- import requests
- from applications.functions.common import sensitive_flag
- from applications.log import logging
- from applications.const import server_const
- def douyin_search(keyword, sensitive_words, trace_id):
- """
- Search with dou cha cha
- rank the relevance and recall the best three videos
- :param trace_id:
- :param sensitive_words: sensitive words in pq
- :param keyword: the words needs to be searched
- :return:
- """
- url = "http://crawler-cn.aiddit.com/crawler/dou_yin/top_hub_content"
- payload = json.dumps({
- "keyword": keyword,
- "category": "全部",
- "period": "近90天",
- "content_modal": "视频",
- "cursor": ""
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- try:
- response = requests.request("POST", url, headers=headers, data=payload, timeout=60)
- dt_list = response.json()['data']['data']
- logging(
- code="4002",
- info="抖音搜索成功",
- trace_id=trace_id
- )
- L = []
- for obj in dt_list:
- try:
- title = obj['video_desc']
- video_id = obj['video_id']
- duration = int(obj['duration'])
- if sensitive_flag(sensitive_words, title) and duration < server_const.MAX_VIDEO_DURATION * 1000:
- res = douyin_detail(video_id)
- if res:
- L.append(res)
- else:
- continue
- except Exception as e:
- # print(traceback.format_exc())
- continue
- logging(
- code="8001",
- info="抖音搜索",
- data={
- "keys": keyword,
- "search_count": len(dt_list),
- "useful_count": len(L)
- },
- trace_id=trace_id
- )
- return L
- except Exception as e:
- logging(
- code="4003",
- info="抖音搜索失败",
- trace_id=trace_id,
- data={"error": str(e)}
- )
- return []
- def douyin_detail(video_id):
- """
- get video url address
- :param video_id:
- :return:
- """
- url = "http://crawler-cn.aiddit.com/crawler/dou_yin/detail"
- payload = json.dumps({
- "content_id": video_id
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload, timeout=60).json()
- logging(
- code="4005",
- info="抖音请求详情",
- data=response
- )
- if response['code'] != 0:
- logging(
- code="4006",
- info="抖音请求详情失败",
- data={"error": response['msg']}
- )
- else:
- try:
- video_info = response['data']['data']
- if video_info['content_type'] == "note":
- return None
- else:
- return video_info
- except Exception as e:
- logging(
- code="4006",
- info="抖音请求详情失败",
- data={"error": str(e)}
- )
- return None
|