""" @author: luojunhui """ from datetime import datetime from applications.functions.video_item import VideoProducer from applications.functions.log import logging from applications.match_algorithm.rank import title_similarity_rank from .spiderAB import SearchABTest from .spiderSchedule import SearchMethod async def videoSender(video_obj, user, trace_id, platform, content_id, table, dbClient): """ 异步处理微信 video_obj 公众号和站内账号一一对应 :param dbClient: :param table: :param content_id: :param platform: :param user: :param trace_id: :param video_obj: :return: """ Video = VideoProducer() if platform == "xg_search": mq_obj = Video.xg_video_producer( video_obj=video_obj, user=user, trace_id=trace_id, ) elif platform == "baidu_search": mq_obj = Video.baidu_video_producer( video_obj=video_obj, user=user, trace_id=trace_id, ) elif platform == "wx_search": mq_obj = Video.wx_video_producer( video_obj=video_obj, user=user, trace_id=trace_id, ) elif platform == "dy_search": mq_obj = Video.dy_video_producer( video_obj=video_obj, user=user, trace_id=trace_id, ) else: mq_obj = {} mq_obj['trace_id'] = trace_id mq_obj['content_id'] = content_id insert_sql = f""" INSERT INTO {table} (content_id, out_video_id, platform, video_title, play_count, like_count, publish_time, crawler_time, duration, video_url, cover_url, user_id, trace_id) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); """ await dbClient.asyncInsert( sql=insert_sql, params=( content_id, mq_obj['video_id'], platform, mq_obj['video_title'], mq_obj['play_cnt'], mq_obj['like_cnt'], datetime.fromtimestamp(mq_obj['publish_time_stamp']).strftime('%Y-%m-%d %H:%M:%S'), datetime.now().__str__(), mq_obj['duration'], mq_obj['video_url'], mq_obj['cover_url'], mq_obj['user_id'], trace_id ) ) async def searchVideos(info, ghIdMap, dbClient): """ search and send msg to ETL :param dbClient: :param ghIdMap: :param info: :return: """ SearchAB = SearchABTest(info=info, searchMethod=SearchMethod()) # 启三个搜索,每个搜索都保证要搜索到, 分别用key1, key2, key3去搜索 trace_id = info['traceId'] gh_id = info['ghId'] content_id = info['contentId'] recall_list = await SearchAB.ab_5() logging( code="1006", info="搜索到{}条视频".format(len(recall_list)), data=recall_list, trace_id=info['traceId'] ) # 按照标题相似度排序 ranked_list = title_similarity_rank(content_title=info['oriTitle'].split("@@")[-1], recall_list=recall_list) for recall_obj in ranked_list: if recall_obj: platform = recall_obj['platform'] recall_video = recall_obj['result'] if recall_video: await videoSender( video_obj=recall_video, user=ghIdMap.get(gh_id, 69637498), trace_id=trace_id, platform=platform, content_id=content_id, table=info['spider'], dbClient=dbClient )