Browse Source

匹配过程中,取消illegal字段

luojunhui 3 months ago
parent
commit
2300f16c69
3 changed files with 7 additions and 7 deletions
  1. 1 1
      tasks/history_task.py
  2. 1 1
      tasks/new_contentId_task.py
  3. 5 5
      tasks/utils/spider_task.py

+ 1 - 1
tasks/history_task.py

@@ -52,7 +52,7 @@ class historyContentIdTask(object):
             JOIN (
                 select content_id, count(1) as cnt 
                 from {self.article_crawler_video_table}
-                where download_status = {self.const.VIDEO_DOWNLOAD_SUCCESS_STATUS} and is_illegal = {self.const.VIDEO_SAFE}
+                where download_status = {self.const.VIDEO_DOWNLOAD_SUCCESS_STATUS}
                 group by content_id
             ) VID on ART.content_id = VID.content_id and VID.cnt >= 3
             WHERE ART.content_status = {self.const.TASK_INIT_STATUS} and ART.process_times <= {self.const.TASK_MAX_PROCESS_TIMES}

+ 1 - 1
tasks/new_contentId_task.py

@@ -49,7 +49,7 @@ class NewContentIdTask(object):
                 SELECT content_id, count(1) as cnt
                 FROM {self.article_crawler_video_table}
                 WHERE download_status = {NewContentIdTaskConst.VIDEO_DOWNLOAD_SUCCESS_STATUS}
-                    AND is_illegal = {NewContentIdTaskConst.VIDEO_SAFE}
+                --  AND is_illegal = {NewContentIdTaskConst.VIDEO_SAFE}
                 GROUP BY content_id
             ) t2
             ON t1.content_id = t2.content_id

+ 5 - 5
tasks/utils/spider_task.py

@@ -18,8 +18,8 @@ async def whether_downloaded_videos_exists(content_id, article_crawler_video_tab
     SELECT id
     FROM {article_crawler_video_table}
     WHERE content_id = '{content_id}' 
-        AND download_status = {new_content_id_task_const.VIDEO_DOWNLOAD_SUCCESS_STATUS}
-        AND is_illegal = {new_content_id_task_const.VIDEO_SAFE};
+        AND download_status = {new_content_id_task_const.VIDEO_DOWNLOAD_SUCCESS_STATUS};
+    --   AND is_illegal = {new_content_id_task_const.VIDEO_SAFE};
     """
     res_tuple = await db_client.async_select(sql)
     if len(res_tuple) >= new_content_id_task_const.MIN_MATCH_VIDEO_NUM:
@@ -38,7 +38,7 @@ async def get_downloaded_videos(content_id, article_crawler_video_table, db_clie
         FROM {article_crawler_video_table}
         WHERE content_id = '{content_id}' 
             AND download_status = {new_content_id_task_const.VIDEO_DOWNLOAD_SUCCESS_STATUS}
-            AND is_illegal = {new_content_id_task_const.VIDEO_SAFE}
+            -- AND is_illegal = {new_content_id_task_const.VIDEO_SAFE}
         ORDER BY score DESC;
     """
 
@@ -65,8 +65,8 @@ async def update_crawler_table_with_exist_content_id(root_content_id, content_id
             duration, video_url, cover_url, download_status, video_oss_path, cover_oss_path, user_id, score, is_illegal
         FROM {article_crawler_video_table}
         WHERE content_id = '{root_content_id}' 
-            AND download_status = {new_content_id_task_const.VIDEO_DOWNLOAD_SUCCESS_STATUS}
-            AND is_illegal = {new_content_id_task_const.VIDEO_SAFE};
+            AND download_status = {new_content_id_task_const.VIDEO_DOWNLOAD_SUCCESS_STATUS};
+            -- AND is_illegal = {new_content_id_task_const.VIDEO_SAFE};
     """
     res_tuple = await db_client.async_select(select_sql)
     if res_tuple: