123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- package com.tzld.crawler.etl;
- //import com.aliyun.mq.http.MQClient;
- //import com.aliyun.mq.http.MQProducer;
- //import com.aliyun.mq.http.model.TopicMessage;
- import com.google.common.collect.Lists;
- import com.huaban.analysis.jieba.JiebaSegmenter;
- import com.huaban.analysis.jieba.SegToken;
- //import com.tzld.crawler.etl.mq.EtlMQConsumer;
- import net.bramp.ffmpeg.FFprobe;
- import net.bramp.ffmpeg.probe.FFmpegFormat;
- import net.bramp.ffmpeg.probe.FFmpegProbeResult;
- import net.bramp.ffmpeg.probe.FFmpegStream;
- import org.junit.jupiter.api.Test;
- import org.springframework.beans.factory.annotation.Value;
- import org.springframework.boot.test.context.SpringBootTest;
- import org.springframework.boot.test.mock.mockito.MockBean;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.stream.IntStream;
- @SpringBootTest
- class EtlServerApplicationTests {
- @Value("${rocketmq.accessKey}")
- private String accessKey;
- @Value("${rocketmq.secretKey}")
- private String secretKey;
- @Value("${rocketmq.httpEndpoint}")
- private String httpEndpoint;
- @Value("${rocketmq.instanceId}")
- private String instanceId;
- /**
- * 排除 EtlMQConsumer 的加载
- */
- // @MockBean
- // private EtlMQConsumer etlMQConsumer;
- @Test
- void produceMsgTest() {
- // MQClient mqClient = new MQClient(httpEndpoint, accessKey, secretKey);
- // MQProducer producer = mqClient.getProducer(instanceId, "topic_crawler_etl_test");
- // ArrayList<String> platforms = Lists.newArrayList("benshanzhufu", "kuaishou", "douyin", "xigua", "gongzhonghao", "xiaoniangao");
- //
- // IntStream.range(0, 20).forEach(x -> {
- // try {
- // String s = "{\"user_id\": 6281907,\"out_user_id\": \"53322270\",\"platform\": \"" + platforms.get(x % 6) + "\",\"strategy\": \"author\"," +
- // "\"out_video_id\": \"" + System.currentTimeMillis() + "\"," +
- // "\"video_title\": \"" + x + "世界上最强百米对决\",\"cover_url\": \"https://cdn-xphoto2.xiaoniangao.cn/5200474225?Expires=1704038400&OSSAccessKeyId=LTAI4G2W1FsgwzAWYpPoB3v6&Signature=ncvtSP8FSrwuU8unZMtxdXIuWBE%3D&x-oss-process=image%2Fresize%2Cw_690%2Ch_385%2Climit_0%2Finterlace%2C1%2Fformat%2Cjpg%2Fauto-orient%2C0\"," +
- // "\"video_url\": \"https://cdn-xalbum2.xiaoniangao.cn/6506ec4500000104bd7c0623?Expires=1704038400&OSSAccessKeyId=LTAI5tB7cRkYiqHcTdkVprwb&Signature=CxJEEcwUR87is9X3li5xP5ZiDvQ%3D\"" +
- // ",\"duration\": 40,\"publish_time\": \"2023-06-08 23:01:47\",\"play_cnt\": 602,\"like_cnt\": 0,\"share_cnt\": 0,\"collection_cnt\": 0,\"comment_cnt\": 0,\"crawler_rule\": {\"period\": { \"max\": 3, \"min\": 3 },\"duration\": { \"max\": 999999999999999, \"min\": 40 },\"play_cnt\": { \"max\": 999999999999999, \"min\": 500 }},\"width\": 450,\"height\": 254}";
- //
- // TopicMessage pubMsg = new TopicMessage(s.getBytes());
- // producer.publishMessage(pubMsg);
- // } catch (Exception e) {
- // System.out.println(e);
- // }
- // });
- }
- @Test
- public void testDemo() {
- JiebaSegmenter segmenter = new JiebaSegmenter();
- List<SegToken> process = segmenter.process("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Java和Rust还有Golang。", JiebaSegmenter.SegMode.SEARCH);
- for (SegToken segToken : process) {
- System.out.println(segToken.word);
- }
- }
- public static void main(String[] args) throws IOException {
- // getVideoSize("/Users/ehlxr/logs/crawler/videos/longvideo/crawler_local/video/dev/20230614/9778d5d219c5080b9a6a17bef029331c.mp4");
- getVideoSize("http://cdn-xalbum-baishan.xiaoniangao.cn/4607211824?Expires=1704038400&OSSAccessKeyId=LTAI5tB7cRkYiqHcTdkVprwb&Signature=oFnKr5ObYT2xqNTadYEPHfzMdIM%3D");
- }
- public static void getVideoSize(String videoPath) throws IOException {
- FFprobe ffprobe = new FFprobe("ffprobe");
- FFmpegProbeResult probeResult = ffprobe.probe(videoPath);
- FFmpegFormat format = probeResult.getFormat();
- FFmpegStream stream = probeResult.getStreams().get(0);
- System.out.format("Duration: %f Width: %d ; Height: %d", format.duration,
- stream.width,
- stream.height
- );
- }
- }
|