瀏覽代碼

分表,修改task1, task2, search_schedule.py

罗俊辉 10 月之前
父節點
當前提交
0de513f9ea
共有 4 個文件被更改,包括 40 次插入82 次删除
  1. 1 1
      applications/schedule/__init__.py
  2. 18 68
      applications/schedule/search_schedule.py
  3. 4 4
      static/config.py
  4. 17 9
      tasks/task1.py

+ 1 - 1
applications/schedule/__init__.py

@@ -2,5 +2,5 @@
 @author: luojunhui
 """
 from .process_schedule import recall_videos
-from .search_schedule import search_videos, re_search_videos
+from .search_schedule import search_videos
 # from .process_schedule import return_info_v2

+ 18 - 68
applications/schedule/search_schedule.py

@@ -234,7 +234,7 @@ class SearchMethod(object):
             return L
 
 
-async def video_sender(video_obj, user, trace_id, platform, content_id, index):
+async def video_sender(video_obj, user, trace_id, platform, content_id):
     """
     异步处理微信 video_obj
     公众号和站内账号一一对应
@@ -273,22 +273,23 @@ async def video_sender(video_obj, user, trace_id, platform, content_id, index):
         )
     else:
         mq_obj = {}
-    mq_obj['index'] = index
     mq_obj['trace_id'] = trace_id
     mq_obj['content_id'] = content_id
     header = {
         "Content-Type": "application/json",
     }
-    await request_etl(
+    response = await request_etl(
         url="http://192.168.203.137:4612/etl",
         headers=header,
         json_data=mq_obj
     )
-    # await request_etl(
+    return response
+    # response = await request_etl(
     #     url="http://localhost:4612/etl",
     #     headers=header,
     #     json_data=mq_obj
     # )
+    # return response
 
 
 async def search_videos(params, trace_id, gh_id, mysql_client):
@@ -328,29 +329,28 @@ async def search_videos(params, trace_id, gh_id, mysql_client):
     )
     # 按照标题相似度排序
     ranked_list = title_similarity_rank(content_title=params['title'].split("@@")[-1], recall_list=recall_list)
-    for i in ranked_list:
-        print(i['title'], i['score'])
     index = 0
     for recall_obj in ranked_list:
         if recall_obj:
             platform = recall_obj['platform']
             recall_video = recall_obj['result']
             if recall_video:
-                index += 1
-                await video_sender(
+                response = await video_sender(
                     video_obj=recall_video,
                     user=gh_id_dict.get(gh_id),
                     trace_id=trace_id,
                     platform=platform,
-                    content_id=params['content_id'],
-                    index=index
-                )
-                logging(
-                    code="1007",
-                    info="成功请求etl",
-                    data=recall_video,
-                    trace_id=trace_id
+                    content_id=params['content_id']
                 )
+                print(response)
+                if response['status'] == "success":
+                    index += 1
+                    logging(
+                        code="1007",
+                        info="成功请求etl",
+                        data=recall_video,
+                        trace_id=trace_id
+                    )
                 if index >= 3:
                     print("already downloaded 3 videos")
                     logging(
@@ -358,56 +358,6 @@ async def search_videos(params, trace_id, gh_id, mysql_client):
                         info="成功下载三条视频",
                         trace_id=trace_id
                     )
-                    break
-
-
-async def re_search_videos(params, trace_id, gh_id):
-    """
-    重新搜索接口
-    :param params:
-    :param trace_id:
-    :param gh_id:
-    :return:
-    """
-    try:
-        obj = {
-            "ori_title": params['title'],
-            "content_title": params['kimi_summary'],
-            "content_keys": json.loads(params['kimi_keys']),
-            "trace_id": params['trace_id']
-        }
-    except:
-        obj = {
-            "ori_title": params['title'],
-            "content_title": params['kimi_summary'],
-            "content_keys": params['kimi_keys'],
-            "trace_id": params['trace_id']
-        }
-    SearchAB = SearchABTest(info=obj, gh_id=gh_id)
-    # 启三个搜索,每个搜索都保证要搜索到, 分别用key1, key2, key3去搜索
-    recall_list = await SearchAB.ab_5()
-    print("一共搜索到{}条视频".format(len(recall_list)))
-    index = 0
-    for recall_obj in recall_list:
-        if recall_obj:
-            platform = recall_obj['platform']
-            recall_video = recall_obj['result']
-            if recall_video:
-                index += 1
-                await video_sender(
-                    video_obj=recall_video,
-                    user=gh_id_dict.get(gh_id),
-                    trace_id=trace_id,
-                    platform=platform,
-                    index=index
-                )
-                logging(
-                    code="7004",
-                    info="成功请求etl",
-                    trace_id=trace_id
-                )
-                if index >= 3:
-                    print("already downloaded 3 videos")
-                    break
+                    return index
+    return index
 
-    print("一个匹配到{}条".format(index))

+ 4 - 4
static/config.py

@@ -507,12 +507,12 @@ gh_id_dict = {
 
 
 # prod
-# db_article = "long_articles_video"
-# db_video = "article_match_videos"
+db_article = "long_articles_video"
+db_video = "article_match_videos"
 
 # dev
-db_article = "long_articles_video_dev"
-db_video = "article_match_videos_dev"
+# db_article = "long_articles_video_dev"
+# db_video = "article_match_videos_dev"
 
 # spider coroutines
 spider_coroutines = 6

+ 17 - 9
tasks/task1.py

@@ -173,7 +173,7 @@ class MatchTask1(object):
             )
         )
         try:
-            await search_videos(
+            video_count = await search_videos(
                 params={
                     "title": params['title'],
                     "content": params['text'],
@@ -186,7 +186,7 @@ class MatchTask1(object):
             )
             select_sql = f"""
                 SELECT video_id
-                FROM article_match_videos
+                FROM {db_video}
                 WHERE content_id = '{params['content_id']}'
             """
             result = await self.mysql_client.async_select(sql=select_sql)
@@ -194,15 +194,23 @@ class MatchTask1(object):
             if vid1 or vid2 or vid3:
                 update_sql2 = f"""
                     UPDATE {db_article}
-                    SET 
-                       content_status = %s,
-                       process_times = %s
-                       WHERE trace_id = %s;
+                    SET
+                        recall_video_id1 = %s,
+                        recall_video_id2 = %s,
+                        recall_video_id3 = %s,
+                        content_status = %s,
+                        process_times = %s
+                        WHERE trace_id = %s;
                 """
                 await self.mysql_client.async_insert(
                     sql=update_sql2,
                     params=(
-                        2, {int(params['process_times']) + 1}, params['trace_id']
+                        vid1 if vid1 else "NULL",
+                        vid2 if vid2 else "NULL",
+                        vid3 if vid3 else "NULL",
+                        2,
+                        {int(params['process_times']) + 1},
+                        params['trace_id']
                     )
                 )
                 logging(
@@ -254,7 +262,7 @@ class MatchTask1(object):
                 )
                 update_sql4 = f"""
                     UPDATE {db_article}
-                    SET 
+                    SET
                        content_status = %s,
                        process_times = %s
                     WHERE trace_id = %s;
@@ -271,7 +279,7 @@ class MatchTask1(object):
                 )
                 update_sql4 = f"""
                                     UPDATE {db_article}
-                                    SET 
+                                    SET
                                        content_status = %s,
                                        process_times = %s
                                     WHERE trace_id = %s;