ehlxr před 1 rokem
rodič
revize
16bdba1744

+ 0 - 3
etl-core/src/main/java/com/tzld/crawler/etl/config/LoggerLevelRefresher.java

@@ -28,7 +28,6 @@ import com.ctrip.framework.apollo.model.ConfigChangeEvent;
 import com.ctrip.framework.apollo.spring.annotation.ApolloConfigChangeListener;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.logging.LogLevel;
 import org.springframework.boot.logging.LoggingSystem;
 import org.springframework.stereotype.Component;
@@ -45,12 +44,10 @@ public class LoggerLevelRefresher {
     private static final String LOGGER_TAG = "logging.level.";
     private final LoggingSystem loggingSystem;
 
-    @Autowired
     public LoggerLevelRefresher(LoggingSystem loggingSystem) {
         this.loggingSystem = loggingSystem;
     }
 
-    @SuppressWarnings("unused")
     @ApolloConfigChangeListener(interestedKeyPrefixes = LOGGER_TAG)
     private void onChange(ConfigChangeEvent changeEvent) {
         for (String key : changeEvent.changedKeys()) {

+ 36 - 33
etl-core/src/main/java/com/tzld/crawler/etl/service/impl/EtlServiceImpl.java

@@ -72,7 +72,9 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.Executor;
 import java.util.concurrent.Executors;
+import java.util.function.BooleanSupplier;
 import java.util.function.Function;
+import java.util.function.Predicate;
 import java.util.function.Supplier;
 
 /**
@@ -81,7 +83,7 @@ import java.util.function.Supplier;
  */
 @Service
 public class EtlServiceImpl implements EtlService {
-    private final static Logger log = LoggerFactory.getLogger(EtlServiceImpl.class);
+    private static final Logger log = LoggerFactory.getLogger(EtlServiceImpl.class);
     private final StrategyHandlerService strategyHandlerService;
     private final AliyunOssManager aliyunOssManager;
     private final LongVideoFeign longVideoFeign;
@@ -119,8 +121,8 @@ public class EtlServiceImpl implements EtlService {
     private Executor pool;
 
     public EtlServiceImpl(StrategyHandlerService strategyHandlerService, AliyunOssManager aliyunOssManager,
-                          LongVideoFeign longVideoFeign, CrawlerVideoMapper crawlerVideoMapper, SlsService slsService,
-                          CrawlerVideoExtMapper crawlerVideoExtMapper, CrawlerUserV3Mapper crawlerUserV3Mapper) {
+            LongVideoFeign longVideoFeign, CrawlerVideoMapper crawlerVideoMapper, SlsService slsService,
+            CrawlerVideoExtMapper crawlerVideoExtMapper, CrawlerUserV3Mapper crawlerUserV3Mapper) {
         this.strategyHandlerService = strategyHandlerService;
         this.aliyunOssManager = aliyunOssManager;
         this.longVideoFeign = longVideoFeign;
@@ -186,18 +188,20 @@ public class EtlServiceImpl implements EtlService {
     }
 
     private long save2db(CrawlerVideo crawlerVideo) {
-        return retrySupplierR(() -> {
+        Long id = retrySupplierR(() -> {
             try {
                 crawlerVideoExtMapper.insertSelectiveReturnId(crawlerVideo);
-                slsService.log("message", "视频信息写入数据库成功", "crawler", crawlerVideo.getPlatform(), "mode", crawlerVideo.getStrategy());
-                return Pair.of(true, crawlerVideo.getId());
+                slsService.log("message", "视频信息写入数据库成功", "crawler", crawlerVideo.getPlatform(), "mode",
+                        crawlerVideo.getStrategy());
+                return Pair.of(false, crawlerVideo.getId());
             } catch (DuplicateKeyException e) {
                 // 根据站外视频 ID 唯一约束 key 做去重校验
                 log.info("out video id {} of platform {} strategy {} has exist!", crawlerVideo.getOutVideoId(),
                         crawlerVideo.getPlatform(), crawlerVideo.getStrategy());
-                return Pair.of(true, -1L);
+                return Pair.of(false, 0L);
             }
         }, "video2db", String.format("save video info [%s] to db", crawlerVideo));
+        return id == null ? 0 : id;
     }
 
     private long videoSend(StrategyDataDto data) {
@@ -250,7 +254,8 @@ public class EtlServiceImpl implements EtlService {
                 }
                 String range = feishuRangeMap.get(platform + strategyType);
 
-                log.info("platform [{}] strategy [{}] sheetToken is [{}], sheetId is [{}], range is [{}]", platform, strategy, sheetToken, sheetId, range);
+                log.info("platform [{}] strategy [{}] sheetToken is [{}], sheetId is [{}], range is [{}]", platform,
+                        strategy, sheetToken, sheetId, range);
                 String fsResp = FeishuUtils.insertRows(feishuAppid, feishuAppsecret, sheetToken, sheetId, 1, 2);
                 log.debug("insert columns to feishu sheet response is [{}]", fsResp);
 
@@ -261,7 +266,7 @@ public class EtlServiceImpl implements EtlService {
                     CrawlerUserV3Example example = new CrawlerUserV3Example();
                     example.createCriteria().andUidEqualTo(data.getUserId());
                     List<CrawlerUserV3> crawlerUserV3s = crawlerUserV3Mapper.selectByExample(example);
-                    value.add(crawlerUserV3s.size() > 0 ? crawlerUserV3s.get(0).getLink() : "");
+                    value.add(!crawlerUserV3s.isEmpty() ? crawlerUserV3s.get(0).getLink() : "");
                 }
 
                 value.add(data.getTitleScore());
@@ -306,11 +311,11 @@ public class EtlServiceImpl implements EtlService {
         // 音、视频合成
         if (!Strings.isNullOrEmpty(audioUrl)) {
             String audioPath = urlDownload(data.getAudioUrl(), "longvideo/crawler_local/audio", title);
-            retryFunc((t) -> {
+            retryFunc(t -> {
                 try {
                     VideoUtils.videoSynthesis(ffmpegPath, downloadPath + File.separator + t,
                             downloadPath + File.separator + audioPath, downloadPath + File.separator + t + "_comp.mp4");
-                    return true;
+                    return false;
                 } catch (Exception e) {
                     throw new RuntimeException(e);
                 }
@@ -359,26 +364,23 @@ public class EtlServiceImpl implements EtlService {
         String localFilePath = downloadPath + File.separator + videoFilePath;
 
         File localFile = new File(localFilePath);
-        if (!localFile.exists()) {
-            if (!localFile.mkdirs()) {
-                log.warn("mkdir dir [{}] failed!", localFilePath);
-            }
+        if (!localFile.exists() && (!localFile.mkdirs())) {
+            log.warn("mkdir dir [{}] failed!", localFilePath);
         }
 
         retrySupplier(() -> {
             try {
                 // 下载文件
                 FileUtils.download(fileUrl, localFilePath + File.separator + titleMmd5);
-                return true;
-            } catch (Exception e) {
-                if (e instanceof CommonException) {
-                    CommonException ce = (CommonException) e;
-                    if (ce.getCode() == ExceptionEnum.URL_FORBIDDEN.getCode()) {
-                        log.error("access to the url [{}] of remote server is prohibited.", fileUrl);
-                        return true;
-                    }
+                return false;
+            } catch (CommonException e) {
+                if (e.getCode() == ExceptionEnum.URL_FORBIDDEN.getCode()) {
+                    log.error("access to the url [{}] of remote server is prohibited.", fileUrl);
+                    return false;
                 }
                 throw new RuntimeException(e);
+            } catch (Exception e) {
+                throw new RuntimeException(e);
             }
         }, "download", String.format("download file from [%s] to [%s]", fileUrl, filePath));
 
@@ -390,22 +392,22 @@ public class EtlServiceImpl implements EtlService {
         slsService.log("message", "开始上传视频... ", "crawler", crawler, "mode", mode);
         log.info("begin upload {} to oss key {}", localFile, ossBucketKey);
 
-        retryFunc((t) -> {
+        retrySupplier(() -> {
             try {
                 aliyunOssManager.putObject(ossBucket, ossBucketKey, Files.newInputStream(Paths.get(localFile)));
-                return true;
+                return false;
             } catch (IOException e) {
                 throw new RuntimeException(e);
             }
-        }, "", "2oss", String.format("upload file [%s] to oss [%s]", localFile, ossBucketKey));
+        }, "2oss", String.format("upload file [%s] to oss [%s]", localFile, ossBucketKey));
     }
 
-    private void retrySupplier(Supplier<Boolean> supplier, String type, String errorMsg) {
-        retryFunc((c) -> supplier.get(), null, type, errorMsg);
+    private void retrySupplier(BooleanSupplier supplier, String type, String errorMsg) {
+        retryFunc(c -> supplier.getAsBoolean(), null, type, errorMsg);
     }
 
     private <R> R retrySupplierR(Supplier<Pair<Boolean, R>> supplier, String type, String errorMsg) {
-        return retryFuncR((c) -> supplier.get(), null, type, errorMsg);
+        return retryFuncR(c -> supplier.get(), null, type, errorMsg);
     }
 
     private <T, R> R retryFuncR(Function<T, Pair<Boolean, R>> func, T t, String type, String errorMsg) {
@@ -417,21 +419,22 @@ public class EtlServiceImpl implements EtlService {
             try {
                 Pair<Boolean, R> apply = func.apply(t);
                 r = apply.getRight();
-                if (Boolean.TRUE.equals(apply.getLeft())) {
+                if (Boolean.FALSE.equals(apply.getLeft())) {
                     break;
                 }
             } catch (Exception e) {
                 log.error("the operation '{}' has failed on the {}th retry.", errorMsg, retry, e);
                 if (retry >= retryTimes) {
-                    throw new CommonException(ExceptionEnum.SYSTEM_ERROR, "the operation '" + errorMsg + "' has failed after " + retry + " times retry.");
+                    throw new CommonException(ExceptionEnum.SYSTEM_ERROR,
+                            "the operation '" + errorMsg + "' has failed after " + retry + " times retry.");
                 }
             }
         }
         return r;
     }
 
-    private <T> void retryFunc(Function<T, Boolean> func, T t, String type, String errorMsg) {
-        retryFuncR((t1) -> Pair.of(func.apply(t1), null), t, type, errorMsg);
+    private <T> void retryFunc(Predicate<T> func, T t, String type, String errorMsg) {
+        retryFuncR(c -> Pair.of(func.test(c), null), t, type, errorMsg);
     }
 
     @PostConstruct

+ 3 - 17
etl-core/src/main/java/com/tzld/crawler/etl/util/FileUtils.java

@@ -40,18 +40,9 @@ import java.net.URL;
  * @since 2023-06-09 15:54.
  */
 public class FileUtils {
-    private static final Logger log = LoggerFactory.getLogger(FileUtils.class);
-
-    public static void main(String[] args) {
-        String fileUrl = "https://v2.kwaicdn.com/upic/2023/06/05/13/BMjAyMzA2MDUxMzA1MDNfNDU4Mjk4ODc5XzEwNDgwODM0NzMyNF8xXzM=_b_B4c86f13e02feb5462f484f95626229f6.mp4?pkey=AAVsuVfgud8EyKY8wn5cED5iBHD_2CCfZLuih27xJbjMwhqxIU-IHLkRhoDV0RxWUBjRvYtWWswKPL_n6u3csHgYD-euPHV1phmxa0r3ndOom3mRtowdHKCs5C9mr-PlXT8&tag=1-1686310919-unknown-0-5o0hj64qdg-0ee9b724883fdad8&clientCacheKey=3xz7hyd9c8h4zwa_b.mp4&di=ab7f961c&bp=14944&tt=b&ss=vp";
+    private FileUtils() {}
 
-        String filePath = "/Users/ehlxr/Workspaces/tzld/crawler-etl/video1.mp4";
-        try {
-            download(fileUrl, filePath);
-        } catch (Exception e) {
-            throw new RuntimeException(e);
-        }
-    }
+    private static final Logger log = LoggerFactory.getLogger(FileUtils.class);
 
     public static String formatFileSize(long sizeInBytes) {
         if (sizeInBytes < 1024) {
@@ -66,7 +57,6 @@ public class FileUtils {
     }
 
     public static void download(String fileUrl, String filePath) throws Exception {
-        // try {
         log.info("begin download [{}] to [{}]", fileUrl, filePath);
         URL url = new URL(fileUrl);
         HttpURLConnection conn = (HttpURLConnection) url.openConnection();
@@ -87,10 +77,6 @@ public class FileUtils {
         inputStream.close();
         outputStream.close();
         log.info("downloaded successfully [{}] to [{}]", fileUrl, filePath);
-        // return true;
-        // } catch (Exception e) {
-        //     log.error("downloaded error file url: {} local path: {} ", fileUrl, filePath, e);
-        //     return false;
-        // }
+
     }
 }

+ 8 - 4
etl-core/src/main/java/com/tzld/crawler/etl/util/HttpUtil.java

@@ -26,7 +26,8 @@ public class HttpUtil {
                     .build();
             Response response = OK_HTTP_CLIENT.newCall(request).execute();
             if (response.isSuccessful()) {
-                resp = response.body() != null ? response.body().string() : "";
+                ResponseBody body = response.body();
+                resp = body != null ? body.string() : "";
             } else {
                 throw new RuntimeException("send get http request failed. Unexpected code: " + response);
             }
@@ -53,7 +54,8 @@ public class HttpUtil {
 
             Response response = OK_HTTP_CLIENT.newCall(request).execute();
             if (response.isSuccessful()) {
-                resp = response.body() != null ? response.body().string() : "";
+                ResponseBody respBody = response.body();
+                resp = respBody != null ? respBody.string() : "";
             } else {
                 throw new RuntimeException("send post http request failed. Unexpected code: " + response);
             }
@@ -76,7 +78,8 @@ public class HttpUtil {
 
             Response response = OK_HTTP_CLIENT.newCall(request).execute();
             if (response.isSuccessful()) {
-                resp = response.body() != null ? response.body().string() : "";
+                ResponseBody respBody = response.body();
+                resp = respBody != null ? respBody.string() : "";
             } else {
                 throw new RuntimeException("send post http request failed. Unexpected code: " + response);
             }
@@ -118,7 +121,8 @@ public class HttpUtil {
                     .build();
             Response response = OK_HTTP_CLIENT.newCall(request).execute();
             if (response.isSuccessful()) {
-                resp = response.body() != null ? response.body().string() : "";
+                ResponseBody respBody = response.body();
+                resp = respBody != null ? respBody.string() : "";
             } else {
                 throw new RuntimeException("send multipart post http request failed. Unexpected code: " + response);
             }

+ 3 - 2
etl-core/src/main/java/com/tzld/crawler/etl/util/VideoUtils.java

@@ -38,7 +38,7 @@ import java.util.List;
  * @since 2023-06-26 18:01.
  */
 public class VideoUtils {
-    private final static Logger log = LoggerFactory.getLogger(VideoUtils.class);
+    private static final Logger log = LoggerFactory.getLogger(VideoUtils.class);
 
     public static VideoInfoDto getVideoInfo(String videoPath, String ffprobePath) {
         try {
@@ -46,7 +46,8 @@ public class VideoUtils {
             FFmpegProbeResult probeResult = ffprobe.probe(videoPath);
 
             List<FFmpegStream> streams = probeResult.getStreams();
-            int height = 0, width = 0;
+            int height = 0;
+            int width = 0;
             for (FFmpegStream stream : streams) {
                 height = height <= 0 ? stream.height : height;
                 width = width <= 0 ? stream.width : width;