|
@@ -141,6 +141,46 @@ class SearchABTest(object):
|
|
|
else:
|
|
|
return await cls.base_line()
|
|
|
|
|
|
+ @classmethod
|
|
|
+ async def ab_5(cls):
|
|
|
+ """
|
|
|
+ 增量搜索, 返回result_list
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ result_list = await SearchMethod().search_v2(
|
|
|
+ text=cls.article_summary[:15],
|
|
|
+ trace_id=cls.trace_id
|
|
|
+ )
|
|
|
+ if len(result_list) > 3:
|
|
|
+ return result_list
|
|
|
+ else:
|
|
|
+ result_list += await SearchMethod().search_v2(
|
|
|
+ text=cls.ori_title[:15],
|
|
|
+ trace_id=cls.trace_id
|
|
|
+ )
|
|
|
+ if len(result_list) > 3:
|
|
|
+ return result_list
|
|
|
+ else:
|
|
|
+ result_list += await SearchMethod().search_v2(
|
|
|
+ text=cls.article_keys[0],
|
|
|
+ trace_id=cls.trace_id
|
|
|
+ )
|
|
|
+ if len(result_list) > 3:
|
|
|
+ return result_list
|
|
|
+ else:
|
|
|
+ result_list += await SearchMethod().search_v2(
|
|
|
+ text=cls.article_keys[1],
|
|
|
+ trace_id=cls.trace_id
|
|
|
+ )
|
|
|
+ if result_list:
|
|
|
+ return result_list
|
|
|
+ else:
|
|
|
+ result_list += await SearchMethod().search_v2(
|
|
|
+ text=cls.article_keys[2],
|
|
|
+ trace_id=cls.trace_id
|
|
|
+ )
|
|
|
+ return result_list
|
|
|
+
|
|
|
|
|
|
class SearchMethod(object):
|
|
|
"""
|
|
@@ -228,6 +268,30 @@ class SearchMethod(object):
|
|
|
)
|
|
|
return None
|
|
|
|
|
|
+ @classmethod
|
|
|
+ async def search_v2(cls, text, trace_id):
|
|
|
+ """
|
|
|
+ dy ---> baidu ---> xigua
|
|
|
+ :param trace_id:
|
|
|
+ :param text:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ L = []
|
|
|
+ print(trace_id)
|
|
|
+ douyin_result = douyin_search(keyword=text, sensitive_words=cls.s_words)
|
|
|
+ for vid_obj in douyin_result:
|
|
|
+ L.append({"platform": "dy_search", "result": vid_obj})
|
|
|
+ if len(L) >= 3:
|
|
|
+ return L
|
|
|
+ else:
|
|
|
+ baidu_result = hksp_search(key=text, sensitive_words=cls.s_words)
|
|
|
+ if baidu_result:
|
|
|
+ L.append({"platform": "baidu_search", "result": baidu_result[0]})
|
|
|
+ xigua_result = xigua_search_v2(keyword=text, sensitive_words=cls.s_words)
|
|
|
+ if xigua_result:
|
|
|
+ L.append({"platform": "xg_search", "result": xigua_result[0]})
|
|
|
+ return L
|
|
|
+
|
|
|
|
|
|
async def video_sender(video_obj, user, trace_id, platform):
|
|
|
"""
|
|
@@ -271,9 +335,9 @@ async def video_sender(video_obj, user, trace_id, platform):
|
|
|
video_id = await AE.etl_deal()
|
|
|
logging(
|
|
|
code="6002",
|
|
|
- info="视频下载完成",
|
|
|
+ info="视频下载完成, 平台是---{}".format(platform),
|
|
|
data=mq_obj,
|
|
|
- trace_id=trace_id
|
|
|
+ trace_id=trace_id,
|
|
|
)
|
|
|
return video_id
|
|
|
|
|
@@ -352,6 +416,31 @@ async def search_videos(params, trace_id, gh_id, mysql_client):
|
|
|
)
|
|
|
|
|
|
|
|
|
+async def insert_into_mysql(index, mysql_client, recall_video, gh_id, trace_id, platform):
|
|
|
+ """
|
|
|
+ :param platform:
|
|
|
+ :param trace_id:
|
|
|
+ :param gh_id:
|
|
|
+ :param index:
|
|
|
+ :param mysql_client:
|
|
|
+ :param recall_video:
|
|
|
+ """
|
|
|
+ video_id = await video_sender(
|
|
|
+ video_obj=recall_video,
|
|
|
+ user=gh_id_dict.get(gh_id),
|
|
|
+ trace_id=trace_id,
|
|
|
+ platform=platform,
|
|
|
+ )
|
|
|
+ update_id_sql = f"""
|
|
|
+ UPDATE long_articles_video
|
|
|
+ SET
|
|
|
+ recall_video_id{index} = {video_id}
|
|
|
+ WHERE
|
|
|
+ trace_id = '{trace_id}'
|
|
|
+ """
|
|
|
+ await mysql_client.async_insert(update_id_sql)
|
|
|
+
|
|
|
+
|
|
|
async def re_search_videos(params, trace_id, gh_id, mysql_client):
|
|
|
"""
|
|
|
重新搜索接口
|
|
@@ -360,63 +449,34 @@ async def re_search_videos(params, trace_id, gh_id, mysql_client):
|
|
|
:param gh_id:
|
|
|
:param mysql_client:
|
|
|
:return:
|
|
|
- cls.ori_title = info["ori_title"]
|
|
|
- cls.article_summary = info["content_title"]
|
|
|
- cls.article_keys = info["content_keys"]
|
|
|
- cls.trace_id = info["trace_id"]
|
|
|
"""
|
|
|
obj = {
|
|
|
- "ori_title": params['ori_title'],
|
|
|
+ "ori_title": params['title'],
|
|
|
"content_title": params['kimi_summary'],
|
|
|
"content_keys": params['kimi_keys'],
|
|
|
"trace_id": params['trace_id']
|
|
|
}
|
|
|
SearchAB = SearchABTest(info=obj, gh_id=gh_id)
|
|
|
- recall_obj_1 = await SearchAB.ab_1()
|
|
|
- # recall_obj_1 = await SearchAB.ab_0()
|
|
|
- await asyncio.sleep(3)
|
|
|
- recall_obj_2 = await SearchAB.ab_2()
|
|
|
- await asyncio.sleep(3)
|
|
|
- recall_obj_3 = await SearchAB.ab_3()
|
|
|
- print("{}---视频搜索正常".format(trace_id))
|
|
|
- recall_list = [recall_obj_1, recall_obj_2, recall_obj_3]
|
|
|
- un_empty_list = [i for i in recall_list if i]
|
|
|
- if len(un_empty_list) < 3:
|
|
|
- await asyncio.sleep(3)
|
|
|
- recall_obj_4 = await SearchAB.ab_4()
|
|
|
- if recall_obj_4:
|
|
|
- un_empty_list.append(recall_obj_4)
|
|
|
-
|
|
|
- # 逐条下载,逐条写表
|
|
|
- if un_empty_list:
|
|
|
- for index, recall_obj in enumerate(un_empty_list, 1):
|
|
|
- platform = recall_obj["platform"]
|
|
|
- recall_video = recall_obj["result"]
|
|
|
+ # 启三个搜索,每个搜索都保证要搜索到, 分别用key1, key2, key3去搜索
|
|
|
+ recall_list = await SearchAB.ab_5()
|
|
|
+ print("一共搜索到{}条视频".format(len(recall_list)))
|
|
|
+ index = 0
|
|
|
+ for recall_obj in recall_list:
|
|
|
+ if recall_obj:
|
|
|
+ platform = recall_obj['platform']
|
|
|
+ recall_video = recall_obj['result']
|
|
|
if recall_video:
|
|
|
- logging(
|
|
|
- code="7002",
|
|
|
- info="视频搜索成功, 搜索平台为--{}".format(platform),
|
|
|
- trace_id=trace_id,
|
|
|
- data=recall_video,
|
|
|
- )
|
|
|
- video_id = await video_sender(
|
|
|
- video_obj=recall_video,
|
|
|
- user=gh_id_dict.get(gh_id),
|
|
|
+ index += 1
|
|
|
+ await insert_into_mysql(
|
|
|
+ index=index,
|
|
|
+ mysql_client=mysql_client,
|
|
|
+ recall_video=recall_video,
|
|
|
+ gh_id=gh_id,
|
|
|
trace_id=trace_id,
|
|
|
- platform=platform,
|
|
|
+ platform=platform
|
|
|
)
|
|
|
- update_id_sql = f"""
|
|
|
- UPDATE long_articles_video
|
|
|
- SET
|
|
|
- recall_video_id{index} = {video_id}
|
|
|
- WHERE
|
|
|
- trace_id = '{trace_id}'
|
|
|
- """
|
|
|
- await mysql_client.async_insert(update_id_sql)
|
|
|
- else:
|
|
|
- logging(
|
|
|
- code="7003",
|
|
|
- info="视频搜索失败, 被敏感词过滤",
|
|
|
- trace_id=trace_id
|
|
|
- )
|
|
|
+ if index >= 3:
|
|
|
+ print("already downloaded 3 videos")
|
|
|
+ break
|
|
|
|
|
|
+ print("一个匹配到{}条文章".format(index))
|