|
@@ -187,46 +187,6 @@ class SearchMethod(object):
|
|
|
"""
|
|
|
s_words = select_sensitive_words()
|
|
|
|
|
|
- @classmethod
|
|
|
- async def search_v0(cls, text, trace_id):
|
|
|
- """
|
|
|
- 搜索顺序-wx --> baidu --> xigua
|
|
|
- 一共需要返回三条视频
|
|
|
- :return:
|
|
|
- """
|
|
|
- wx_result = []
|
|
|
- if wx_result:
|
|
|
- return {"platform": "wx_search", "result": wx_result[0]}
|
|
|
- else:
|
|
|
- logging(
|
|
|
- code="7001",
|
|
|
- info="通过微信搜索失败---{}".format(text),
|
|
|
- trace_id=trace_id,
|
|
|
- )
|
|
|
- # 微信搜不到的话,采用好看视频搜索
|
|
|
- time.sleep(1)
|
|
|
- baidu_result = hksp_search(key=text, sensitive_words=cls.s_words)
|
|
|
- if baidu_result:
|
|
|
- return {"platform": "baidu_search", "result": baidu_result[0]}
|
|
|
- else:
|
|
|
- # 若好看视频未搜到,则采用西瓜搜索
|
|
|
- logging(
|
|
|
- code="7001",
|
|
|
- info="通过baidu搜索失败---{}".format(text),
|
|
|
- trace_id=trace_id,
|
|
|
- )
|
|
|
- # return None
|
|
|
- xigua_result = xigua_search_v2(keyword=text, sensitive_words=cls.s_words)
|
|
|
- if xigua_result:
|
|
|
- return {"platform": "xg_search", "result": xigua_result[0]}
|
|
|
- else:
|
|
|
- logging(
|
|
|
- code="7001",
|
|
|
- info="通过西瓜搜索失败---{}, 启用兜底方式".format(text),
|
|
|
- trace_id=trace_id,
|
|
|
- )
|
|
|
- return None
|
|
|
-
|
|
|
@classmethod
|
|
|
async def search_v1(cls, text, trace_id):
|
|
|
"""
|
|
@@ -235,36 +195,19 @@ class SearchMethod(object):
|
|
|
:param trace_id:
|
|
|
:return:
|
|
|
"""
|
|
|
- douyin_result = douyin_search(keyword=text, sensitive_words=cls.s_words)
|
|
|
+ douyin_result = douyin_search(keyword=text, sensitive_words=cls.s_words, trace_id=trace_id)
|
|
|
if douyin_result:
|
|
|
return {"platform": "dy_search", "result": douyin_result[0]}
|
|
|
else:
|
|
|
- logging(
|
|
|
- code="7001",
|
|
|
- info="抖音搜索失败--{}".format(text),
|
|
|
- trace_id=trace_id
|
|
|
- )
|
|
|
time.sleep(1)
|
|
|
- baidu_result = hksp_search(key=text, sensitive_words=cls.s_words)
|
|
|
+ baidu_result = hksp_search(key=text, sensitive_words=cls.s_words, trace_id=trace_id)
|
|
|
if baidu_result:
|
|
|
return {"platform": "baidu_search", "result": baidu_result[0]}
|
|
|
else:
|
|
|
- # 若好看视频未搜到,则采用西瓜搜索
|
|
|
- logging(
|
|
|
- code="7001",
|
|
|
- info="通过baidu搜索失败---{}".format(text),
|
|
|
- trace_id=trace_id,
|
|
|
- )
|
|
|
- # return None
|
|
|
xigua_result = xigua_search_v2(keyword=text, sensitive_words=cls.s_words)
|
|
|
if xigua_result:
|
|
|
return {"platform": "xg_search", "result": xigua_result[0]}
|
|
|
else:
|
|
|
- logging(
|
|
|
- code="7001",
|
|
|
- info="通过西瓜搜索失败---{}, 启用兜底方式".format(text),
|
|
|
- trace_id=trace_id,
|
|
|
- )
|
|
|
return None
|
|
|
|
|
|
@classmethod
|
|
@@ -276,14 +219,13 @@ class SearchMethod(object):
|
|
|
:return:
|
|
|
"""
|
|
|
L = []
|
|
|
- print(trace_id)
|
|
|
- douyin_result = douyin_search(keyword=text, sensitive_words=cls.s_words)
|
|
|
+ douyin_result = douyin_search(keyword=text, sensitive_words=cls.s_words, trace_id=trace_id)
|
|
|
for vid_obj in douyin_result:
|
|
|
L.append({"platform": "dy_search", "result": vid_obj})
|
|
|
if len(L) >= 3:
|
|
|
return L
|
|
|
else:
|
|
|
- baidu_result = hksp_search(key=text, sensitive_words=cls.s_words)
|
|
|
+ baidu_result = hksp_search(key=text, sensitive_words=cls.s_words, trace_id=trace_id)
|
|
|
if baidu_result:
|
|
|
L.append({"platform": "baidu_search", "result": baidu_result[0]})
|
|
|
xigua_result = xigua_search_v2(keyword=text, sensitive_words=cls.s_words)
|
|
@@ -336,7 +278,7 @@ async def video_sender(video_obj, user, trace_id, platform, index):
|
|
|
"Content-Type": "application/json",
|
|
|
}
|
|
|
await request_etl(
|
|
|
- url="http://192.168.203.137:4612/etl",
|
|
|
+ url="http://localhost:4612/etl",
|
|
|
headers=header,
|
|
|
json_data=mq_obj
|
|
|
)
|
|
@@ -353,7 +295,6 @@ async def search_videos(params, trace_id, gh_id, mysql_client):
|
|
|
"""
|
|
|
K = KimiServer()
|
|
|
kimi_info = await K.search_kimi_schedule(params=params)
|
|
|
- print("{}---kimi 挖掘正常".format(trace_id))
|
|
|
kimi_title = kimi_info['k_title']
|
|
|
content_title = kimi_info['content_title'].replace("'", "").replace('"', "")
|
|
|
content_keys = json.dumps(kimi_info['content_keys'], ensure_ascii=False)
|
|
@@ -369,7 +310,12 @@ async def search_videos(params, trace_id, gh_id, mysql_client):
|
|
|
SearchAB = SearchABTest(info=kimi_info, gh_id=gh_id)
|
|
|
# 启三个搜索,每个搜索都保证要搜索到, 分别用key1, key2, key3去搜索
|
|
|
recall_list = await SearchAB.ab_5()
|
|
|
- print("一共搜索到{}条视频".format(len(recall_list)))
|
|
|
+ logging(
|
|
|
+ code="1006",
|
|
|
+ info="搜索到{}条视频".format(len(recall_list)),
|
|
|
+ data=recall_list,
|
|
|
+ trace_id=trace_id
|
|
|
+ )
|
|
|
index = 0
|
|
|
for recall_obj in recall_list:
|
|
|
if recall_obj:
|
|
@@ -385,60 +331,19 @@ async def search_videos(params, trace_id, gh_id, mysql_client):
|
|
|
index=index
|
|
|
)
|
|
|
logging(
|
|
|
- code="7004",
|
|
|
+ code="1007",
|
|
|
info="成功请求etl",
|
|
|
+ data=recall_video,
|
|
|
trace_id=trace_id
|
|
|
)
|
|
|
if index >= 3:
|
|
|
print("already downloaded 3 videos")
|
|
|
+ logging(
|
|
|
+ code="1008",
|
|
|
+ info="成功下载三条视频",
|
|
|
+ trace_id=trace_id
|
|
|
+ )
|
|
|
break
|
|
|
- # SearchAB = SearchABTest(info=kimi_info, gh_id=gh_id)
|
|
|
- # recall_obj_1 = await SearchAB.ab_1()
|
|
|
- # # recall_obj_1 = await SearchAB.ab_0()
|
|
|
- # await asyncio.sleep(3)
|
|
|
- # recall_obj_2 = await SearchAB.ab_2()
|
|
|
- # await asyncio.sleep(3)
|
|
|
- # recall_obj_3 = await SearchAB.ab_3()
|
|
|
- # print("{}---视频搜索正常".format(trace_id))
|
|
|
- # recall_list = [recall_obj_1, recall_obj_2, recall_obj_3]
|
|
|
- # un_empty_list = [i for i in recall_list if i]
|
|
|
- # if len(un_empty_list) < 3:
|
|
|
- # await asyncio.sleep(3)
|
|
|
- # recall_obj_4 = await SearchAB.ab_4()
|
|
|
- # if recall_obj_4:
|
|
|
- # un_empty_list.append(recall_obj_4)
|
|
|
- #
|
|
|
- # # 逐条下载,逐条写表
|
|
|
- # if un_empty_list:
|
|
|
- # for index, recall_obj in enumerate(un_empty_list, 1):
|
|
|
- # platform = recall_obj["platform"]
|
|
|
- # recall_video = recall_obj["result"]
|
|
|
- # if recall_video:
|
|
|
- # logging(
|
|
|
- # code="7002",
|
|
|
- # info="视频搜索成功, 搜索平台为--{}".format(platform),
|
|
|
- # trace_id=trace_id,
|
|
|
- # data=recall_video,
|
|
|
- # )
|
|
|
- # response = await video_sender(
|
|
|
- # video_obj=recall_video,
|
|
|
- # user=gh_id_dict.get(gh_id),
|
|
|
- # trace_id=trace_id,
|
|
|
- # platform=platform,
|
|
|
- # index=index
|
|
|
- # )
|
|
|
- # logging(
|
|
|
- # code="7004",
|
|
|
- # info="成功请求etl",
|
|
|
- # trace_id=trace_id,
|
|
|
- # data=response
|
|
|
- # )
|
|
|
- # else:
|
|
|
- # logging(
|
|
|
- # code="7003",
|
|
|
- # info="视频搜索失败, 被敏感词过滤",
|
|
|
- # trace_id=trace_id
|
|
|
- # )
|
|
|
|
|
|
|
|
|
async def re_search_videos(params, trace_id, gh_id):
|