Browse Source

Merge branch 'master' into wyp/1017-rank1314

wangyunpeng 9 months ago
parent
commit
ee0aa5accd
19 changed files with 843 additions and 2 deletions
  1. 3 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java
  2. 54 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/crawler/GetOffVideoCrawler.java
  3. 76 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/crawler/LongArticlesRootSourceId.java
  4. 164 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/crawler/LongArticlesVideo.java
  5. 54 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/GetOffVideoArticle.java
  6. 104 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/LongArticlesMatchVideo.java
  7. 29 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/vo/FirstContentScoreExport.java
  8. 13 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/GetOffVideoCrawlerRepository.java
  9. 9 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/LongArticlesRootSourceIdRepository.java
  10. 13 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/LongArticlesVideoRepository.java
  11. 2 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/PublishSortLogRepository.java
  12. 14 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/GetOffVideoArticleRepository.java
  13. 13 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/LongArticlesMatchVideoRepository.java
  14. 130 2
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/DataDashboardService.java
  15. 128 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/XxlJobService.java
  16. 10 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/util/DateUtils.java
  17. 7 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/DataDashboardController.java
  18. 10 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/XxlJobController.java
  19. 10 0
      long-article-recommend-service/src/main/resources/mapper/crawler/CrawlerBaseMapper.xml

+ 3 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java

@@ -1,6 +1,7 @@
 package com.tzld.longarticle.recommend.server.mapper.crawler;
 
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.LongArticlesRootSourceId;
 
 import java.util.List;
 
@@ -8,4 +9,6 @@ public interface CrawlerBaseMapper {
 
     void batchInsertAccountCorrelation(List<AccountCorrelation> list);
 
+    void batchInsertLongArticlesRootSourceId(List<LongArticlesRootSourceId> list);
+
 }

+ 54 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/crawler/GetOffVideoCrawler.java

@@ -0,0 +1,54 @@
+package com.tzld.longarticle.recommend.server.model.entity.crawler;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.Id;
+import javax.persistence.Column;
+import javax.persistence.Entity;
+import javax.persistence.Table;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "get_off_videos")
+public class GetOffVideoCrawler {
+    /**
+     * 视频id
+     */
+    @Id
+    @Column(name = "video_id", nullable = false, unique = true, columnDefinition = "bigint(16) COMMENT '视频id'")
+    private Long videoId;
+
+    /**
+     * 视频发布时间
+     */
+    @Column(name = "publish_time", columnDefinition = "bigint(16) COMMENT '视频发布时间'")
+    private Long publishTime;
+
+    /**
+     * 视频状态
+     */
+    @Column(name = "video_status", columnDefinition = "int(1) COMMENT '视频状态'")
+    private Integer videoStatus;
+
+    /**
+     * 最终id
+     */
+    @Column(name = "trace_id", nullable = false, length = 128, columnDefinition = "varchar(128) COMMENT '最终id'")
+    private String traceId;
+
+    /**
+     * 视频下架时间
+     */
+    @Column(name = "get_off_time", columnDefinition = "bigint(16) COMMENT '视频下架时间'")
+    private Long getOffTime;
+
+    /**
+     * 校验状态
+     */
+    @Column(name = "check_status", nullable = false, columnDefinition = "int(1) DEFAULT '0' COMMENT '校验状态'")
+    private Integer checkStatus = 0;
+}

+ 76 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/crawler/LongArticlesRootSourceId.java

@@ -0,0 +1,76 @@
+package com.tzld.longarticle.recommend.server.model.entity.crawler;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.Column;
+import javax.persistence.Entity;
+import javax.persistence.Id;
+import javax.persistence.Table;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "long_articles_root_source_id")
+public class LongArticlesRootSourceId {
+
+    /**
+     * rootSourceId
+     */
+    @Id
+    @Column(name = "rootSourceId", nullable = false, unique = true, length = 128,
+            columnDefinition = "varchar(128) NOT NULL COMMENT 'rootSourceId'")
+    private String rootSourceId;
+
+    /**
+     * 账号名字
+     */
+    @Column(name = "accountName", length = 128,
+            columnDefinition = "varchar(128) DEFAULT NULL COMMENT '账号名字'")
+    private String accountName;
+
+    /**
+     * ghid
+     */
+    @Column(name = "ghId", length = 16,
+            columnDefinition = "varchar(16) DEFAULT NULL COMMENT 'ghid'")
+    private String ghId;
+
+    /**
+     * 文章标题
+     */
+    @Column(name = "articleTitle", length = 255,
+            columnDefinition = "varchar(255) DEFAULT NULL COMMENT '文章标题'")
+    private String articleTitle;
+
+    /**
+     * 请求时间
+     */
+    @Column(name = "requestTime",
+            columnDefinition = "int(16) DEFAULT NULL COMMENT '请求时间'")
+    private Long requestTime;
+
+    /**
+     * 请求trace_id
+     */
+    @Column(name = "trace_id", length = 255,
+            columnDefinition = "varchar(255) DEFAULT NULL COMMENT '请求trace_id'")
+    private String traceId;
+
+    /**
+     * 状态: 2: 日常发文  1:自动回复文章
+     */
+    @Column(name = "push_type",
+            columnDefinition = "int(11) DEFAULT NULL COMMENT '状态: 2: 日常发文  1:自动回复文章'")
+    private Integer pushType;
+
+    /**
+     * 视频id
+     */
+    @Column(name = "video_id",
+            columnDefinition = "bigint(16) DEFAULT NULL COMMENT '视频id'")
+    private Long videoId;
+
+}

+ 164 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/crawler/LongArticlesVideo.java

@@ -0,0 +1,164 @@
+package com.tzld.longarticle.recommend.server.model.entity.crawler;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.*;
+import java.io.Serializable;
+import java.util.Date;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "long_articles_video")
+@IdClass(com.tzld.longarticle.recommend.server.model.entity.crawler.LongArticlesVideo.PK.class)
+public class LongArticlesVideo implements Serializable {
+
+    @Id
+    @GeneratedValue(strategy = GenerationType.IDENTITY)
+    private Integer id;
+
+    /**
+     * 请求唯一最终 id
+     */
+    @Id
+    private String traceId;
+
+    /**
+     * 文章id
+     */
+    @Column(name = "content_id", length = 64, columnDefinition = "varchar(64) DEFAULT NULL COMMENT '文章id'")
+    private String contentId;
+
+    /**
+     * 公众号 id
+     */
+    @Column(name = "gh_id", length = 255, columnDefinition = "varchar(255) CHARACTER SET utf8 DEFAULT NULL COMMENT '公众号 id'")
+    private String ghId;
+
+    /**
+     * 公众号名称
+     */
+    @Column(name = "account_name", length = 255, columnDefinition = "varchar(255) DEFAULT NULL COMMENT '公众号名称'")
+    private String accountName;
+
+    /**
+     * 文章标题
+     */
+    @Column(name = "article_title", length = 255, columnDefinition = "varchar(255) DEFAULT NULL COMMENT '文章标题'")
+    private String articleTitle;
+
+    /**
+     * 文章正文
+     */
+    @Lob
+    @Column(name = "article_text", columnDefinition = "text COMMENT '文章正文'")
+    private String articleText;
+
+    /**
+     * 内容状态
+     * 0: 未处理
+     * 1: 处理中
+     * 2: 处理完成
+     */
+    @Column(name = "content_status", nullable = false, columnDefinition = "int(1) DEFAULT '0' COMMENT '0: 未处理, 1: 处理中, 2: 处理完成'")
+    private Integer contentStatus = 0;
+
+    /**
+     * kimi生成的符合小程序的标题
+     */
+    @Column(name = "kimi_title", length = 255, columnDefinition = "varchar(255) DEFAULT NULL COMMENT 'kimi生成的符合小程序的标题'")
+    private String kimiTitle;
+
+    /**
+     * kimi 通过文本生成的总结性文本
+     */
+    @Column(name = "kimi_summary", length = 255, columnDefinition = "varchar(255) DEFAULT NULL COMMENT 'kimi 通过文本生成的总结性文本'")
+    private String kimiSummary;
+
+    /**
+     * kimi 通过文本生成的总结性搜索关键词
+     */
+    @Column(name = "kimi_keys", length = 255, columnDefinition = "varchar(255) DEFAULT NULL COMMENT 'kimi 通过文本生成的总结性搜索关键词'")
+    private String kimiKeys;
+
+    /**
+     * 召回 id1
+     */
+    @Column(name = "recall_video_id1", columnDefinition = "int(11) DEFAULT NULL COMMENT '召回 id1'")
+    private Integer recallVideoId1;
+
+    /**
+     * 召回 id2
+     */
+    @Column(name = "recall_video_id2", columnDefinition = "int(11) DEFAULT NULL COMMENT '召回 id2'")
+    private Integer recallVideoId2;
+
+    /**
+     * 召回 id3
+     */
+    @Column(name = "recall_video_id3", columnDefinition = "int(11) DEFAULT NULL COMMENT '召回 id3'")
+    private Integer recallVideoId3;
+
+    /**
+     * 返回结果 1
+     */
+    @Lob
+    @Column(name = "result1", columnDefinition = "text COMMENT '返回结果 1'")
+    private String result1;
+
+    /**
+     * 返回结果 2
+     */
+    @Lob
+    @Column(name = "result2", columnDefinition = "text COMMENT '返回结果 2'")
+    private String result2;
+
+    /**
+     * 返回结果 3
+     */
+    @Lob
+    @Column(name = "result3", columnDefinition = "text COMMENT '返回结果 3'")
+    private String result3;
+
+    /**
+     * 是否成功, 1表示成功
+     */
+    @Column(name = "success", nullable = false, columnDefinition = "tinyint(1) DEFAULT '0' COMMENT '是否成功'")
+    private Boolean success = false;
+
+    /**
+     * 更新时间
+     */
+    @Column(name = "update_time", nullable = false, columnDefinition = "datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间'")
+    private Date updateTime;
+
+    /**
+     * 请求时间戳
+     */
+    @Column(name = "request_time_stamp", columnDefinition = "int(16) DEFAULT NULL COMMENT '请求时间戳'")
+    private Long requestTimeStamp;
+
+    /**
+     * 执行次数
+     */
+    @Column(name = "process_times", nullable = false, columnDefinition = "int(1) DEFAULT '0' COMMENT '执行次数'")
+    private Integer processTimes = 0;
+
+
+    @Data
+    public static class PK implements Serializable {
+
+        @Column(name = "id")
+        private Integer id;
+        @Column(name = "trace_id")
+        private String traceId;
+
+        public PK() {
+        }
+
+
+    }
+}

+ 54 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/GetOffVideoArticle.java

@@ -0,0 +1,54 @@
+package com.tzld.longarticle.recommend.server.model.entity.longArticle;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.Column;
+import javax.persistence.Entity;
+import javax.persistence.Id;
+import javax.persistence.Table;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "get_off_videos")
+public class GetOffVideoArticle {
+    /**
+     * 视频id
+     */
+    @Id
+    @Column(name = "video_id", nullable = false, unique = true, columnDefinition = "bigint(16) COMMENT '视频id'")
+    private Long videoId;
+
+    /**
+     * 视频发布时间
+     */
+    @Column(name = "publish_time", columnDefinition = "bigint(16) COMMENT '视频发布时间'")
+    private Long publishTime;
+
+    /**
+     * 视频状态
+     */
+    @Column(name = "video_status", columnDefinition = "int(1) COMMENT '视频状态'")
+    private Integer videoStatus;
+
+    /**
+     * 最终id
+     */
+    @Column(name = "trace_id", nullable = false, length = 128, columnDefinition = "varchar(128) COMMENT '最终id'")
+    private String traceId;
+
+    /**
+     * 视频下架时间
+     */
+    @Column(name = "get_off_time", columnDefinition = "bigint(16) COMMENT '视频下架时间'")
+    private Long getOffTime;
+
+    /**
+     * 校验状态
+     */
+    @Column(name = "check_status", nullable = false, columnDefinition = "int(1) DEFAULT '0' COMMENT '校验状态'")
+    private Integer checkStatus = 0;
+}

+ 104 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/LongArticlesMatchVideo.java

@@ -0,0 +1,104 @@
+package com.tzld.longarticle.recommend.server.model.entity.longArticle;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.*;
+import java.util.Date;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "long_articles_match_videos")
+public class LongArticlesMatchVideo {
+
+    /**
+     * 自增 id
+     */
+    @Id
+    @GeneratedValue(strategy = GenerationType.IDENTITY)
+    @Column(name = "id", nullable = false, unique = true, columnDefinition = "int(16) COMMENT '自增 id'")
+    private Integer id;
+
+    /**
+     * 唯一追踪 id
+     */
+    @Column(name = "trace_id", nullable = false, length = 64, columnDefinition = "varchar(64) COMMENT '唯一追踪 id'")
+    private String traceId;
+
+    /**
+     * 文章 id
+     */
+    @Column(name = "content_id", length = 64, columnDefinition = "varchar(64) COMMENT '文章 id'")
+    private String contentId;
+
+    /**
+     * 流量池等级
+     */
+    @Column(name = "flow_pool_level", length = 32, columnDefinition = "varchar(32) COMMENT '流量池等级 '")
+    private String flowPoolLevel;
+
+    /**
+     * 公众号 Id
+     */
+    @Column(name = "gh_id", length = 32, columnDefinition = "varchar(32) COMMENT '公众号 Id'")
+    private String ghId;
+
+    /**
+     * 公众号名称
+     */
+    @Column(name = "account_name", length = 64, columnDefinition = "varchar(64) COMMENT '公众号名称'")
+    private String accountName;
+
+    /**
+     * 内容状态
+     * 0 - 还未匹配
+     * 1 - kimi 执行完成
+     * 2 - 爬虫执行完成
+     * 3 - etl 执行完成
+     * 4 - 发布完成已经获取 vid
+     * 101 - 有一个进程正在处理这条记录
+     * 99 - 处理失败
+     */
+    @Column(name = "content_status", nullable = false, columnDefinition = "int(8) DEFAULT '0' COMMENT '内容状态'")
+    private Integer contentStatus = 0;
+
+    /**
+     * 文章状态 id 的修改时间
+     */
+    @Column(name = "content_status_update_time", columnDefinition = "int(16) COMMENT '文章状态 id 的修改时间'")
+    private Long contentStatusUpdateTime;
+
+    /**
+     * 是否请求成功, 1表示成功
+     */
+    @Column(name = "success_status", nullable = false, columnDefinition = "int(1) DEFAULT '0' COMMENT '是否请求成功, 1表示成功'")
+    private Integer successStatus = 0;
+
+    /**
+     * 请求时间
+     */
+    @Column(name = "request_timestamp", nullable = false, columnDefinition = "int(16) COMMENT '请求时间'")
+    private Long requestTimestamp;
+
+    /**
+     * 更新时间
+     */
+    @Column(name = "update_time", nullable = false, columnDefinition = "datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间'")
+    private Date updateTime;
+
+    /**
+     * 结果
+     */
+    @Lob
+    @Column(name = "response", columnDefinition = "text COMMENT '结果'")
+    private String response;
+
+    /**
+     * 处理次数
+     */
+    @Column(name = "process_times", nullable = false, columnDefinition = "int(1) NOT NULL DEFAULT '0' COMMENT '处理次数'")
+    private Integer processTimes = 0;
+}

+ 29 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/vo/FirstContentScoreExport.java

@@ -0,0 +1,29 @@
+package com.tzld.longarticle.recommend.server.model.vo;
+
+import lombok.Data;
+
+@Data
+public class FirstContentScoreExport {
+
+    private String dateStr;
+    private String ghId;
+    private String accountName;
+    private String title;
+    private String strategy;
+
+    private Double score;
+    private Double hisFissionFansRateRateStrategy;
+    private Double hisFissionAvgReadRateRateStrategy;
+    private Double publishTimesStrategy;
+    private Double viewCountRateCorrelationStrategy;
+    private Double hisFissionAvgReadSumRateStrategy;
+    private Double hisFissionAvgReadRateCorrelationRateStrategy;
+    private Double hisFissionFansSumRateStrategy;
+    private Double similarityStrategy;
+    private Double viewCountStrategy;
+    private Double viewCountRateStrategy;
+    private Double hisFissionDeWeightAvgReadSumRateStrategy;
+    private Integer readCount;
+    private Double readAvg;
+    private Double readAvgRate;
+}

+ 13 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/GetOffVideoCrawlerRepository.java

@@ -0,0 +1,13 @@
+package com.tzld.longarticle.recommend.server.repository.crawler;
+
+import com.tzld.longarticle.recommend.server.model.entity.crawler.GetOffVideoCrawler;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+
+@Repository
+public interface GetOffVideoCrawlerRepository extends JpaRepository<GetOffVideoCrawler, Long> {
+
+    List<GetOffVideoCrawler> getByPublishTimeBetween(Long startTime, Long endTime);
+}

+ 9 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/LongArticlesRootSourceIdRepository.java

@@ -0,0 +1,9 @@
+package com.tzld.longarticle.recommend.server.repository.crawler;
+
+import com.tzld.longarticle.recommend.server.model.entity.crawler.LongArticlesRootSourceId;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+@Repository
+public interface LongArticlesRootSourceIdRepository extends JpaRepository<LongArticlesRootSourceId, String> {
+}

+ 13 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/LongArticlesVideoRepository.java

@@ -0,0 +1,13 @@
+package com.tzld.longarticle.recommend.server.repository.crawler;
+
+import com.tzld.longarticle.recommend.server.model.entity.crawler.LongArticlesVideo;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+
+@Repository
+public interface LongArticlesVideoRepository extends JpaRepository<LongArticlesVideo, LongArticlesVideo.PK> {
+
+    List<LongArticlesVideo> getByTraceIdIn(List<String> traceIds);
+}

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/PublishSortLogRepository.java

@@ -21,4 +21,6 @@ public interface PublishSortLogRepository extends JpaRepository<PublishSortLog,
 
     List<PublishSortLog> findByStrategyInAndDateStrGreaterThanEqual(List<String> strategies, String dateStr);
 
+    List<PublishSortLog> findByDateStrIn(List<String> dateStrList);
+
 }

+ 14 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/GetOffVideoArticleRepository.java

@@ -0,0 +1,14 @@
+package com.tzld.longarticle.recommend.server.repository.longArticle;
+
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.GetOffVideoArticle;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+
+@Repository
+public interface GetOffVideoArticleRepository extends JpaRepository<GetOffVideoArticle, Long> {
+
+    List<GetOffVideoArticle> getByPublishTimeBetween(Long startTime, Long endTime);
+
+}

+ 13 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/LongArticlesMatchVideoRepository.java

@@ -0,0 +1,13 @@
+package com.tzld.longarticle.recommend.server.repository.longArticle;
+
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesMatchVideo;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+
+@Repository
+public interface LongArticlesMatchVideoRepository extends JpaRepository<LongArticlesMatchVideo, Integer> {
+
+    List<LongArticlesMatchVideo> getByTraceIdIn(List<String> traceIds);
+}

+ 130 - 2
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/DataDashboardService.java

@@ -18,6 +18,7 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.PublishSortLog
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy;
 import com.tzld.longarticle.recommend.server.model.param.MiniprogramTaskParam;
 import com.tzld.longarticle.recommend.server.model.param.PublishContentParam;
+import com.tzld.longarticle.recommend.server.model.vo.FirstContentScoreExport;
 import com.tzld.longarticle.recommend.server.model.vo.IntermediateIndicatorsExport;
 import com.tzld.longarticle.recommend.server.model.vo.NewSortStrategyExport;
 import com.tzld.longarticle.recommend.server.model.vo.ProduceAuditLayoutContentObjVO;
@@ -459,8 +460,8 @@ public class DataDashboardService {
             }
             List<Article> hisArticles = hisArticleMap.getOrDefault(article.getTitle(), new ArrayList<>());
             hisArticles = hisArticles.stream().filter(o -> o.getUpdateTime() < (article.getUpdateTime() - 3600 * 8)
-                            && (1 == o.getItemIndex() || 2 == o.getItemIndex())
-                            && !ScoreStrategy.hisContentLateFilter(o.getUpdateTime())).collect(Collectors.toList());
+                    && (1 == o.getItemIndex() || 2 == o.getItemIndex())
+                    && !ScoreStrategy.hisContentLateFilter(o.getUpdateTime())).collect(Collectors.toList());
             Integer readCount = 0;
             Double readAvgCount = 0.0;
             int firstCount = 0;
@@ -1317,4 +1318,131 @@ public class DataDashboardService {
         return result;
     }
 
+    @XxlJob("scheduleExportFirstContentScore")
+    public ReturnT<String> scheduleExportFirstContentScore(String param) {
+        List<String> dateStrList = DateUtils.getBeforeDays(null, 1);
+        exportFeishuFirstContentScore(dateStrList, sheetToken, "XBFd16");
+        return ReturnT.SUCCESS;
+    }
+
+    public void firstContentScoreExport(String dateStr) {
+        List<String> dateStrList = DateUtils.getBeforeDays(dateStr, 1);
+        exportFeishuFirstContentScore(dateStrList, sheetToken, "XBFd16");
+    }
+
+    private void exportFeishuFirstContentScore(List<String> dateStrList, String sheetToken, String sheetId) {
+        List<FirstContentScoreExport> scoreDataList = firstContentScoreData(dateStrList);
+        if (CollectionUtil.isEmpty(scoreDataList)) {
+            return;
+        }
+        int rowNum = scoreDataList.size();
+        List<List<Object>> rows = new ArrayList<>();
+        Field[] fields = FirstContentScoreExport.class.getDeclaredFields();
+        for (FirstContentScoreExport datum : scoreDataList) {
+            List<Object> rowDatas = new ArrayList<>();
+            rows.add(rowDatas);
+
+            for (Field field : fields) {
+                field.setAccessible(true);
+                try {
+                    rowDatas.add(field.get(datum));
+                } catch (IllegalAccessException e) {
+                    log.error("获取值出错:{}", field.getName());
+                } catch (Exception e) {
+                    throw new RuntimeException(e.getMessage());
+                }
+            }
+        }
+
+        List<Pair<String, String>> styles = Arrays
+                .asList(
+                        Pair.of("F", "#,##0.00"),
+                        Pair.of("G", "#,##0.00"),
+                        Pair.of("H", "#,##0.00"),
+                        Pair.of("I", "#,##0.00"),
+                        Pair.of("J", "#,##0.00"),
+                        Pair.of("K", "#,##0.00"),
+                        Pair.of("L", "#,##0.00"),
+                        Pair.of("M", "#,##0.00"),
+                        Pair.of("N", "#,##0.00"),
+                        Pair.of("O", "#,##0.00"),
+                        Pair.of("P", "#,##0.00"),
+                        Pair.of("Q", "#,##0.00"),
+                        Pair.of("T", "#,##0.00")
+                );
+
+        doSendFeishuSheet(dateStrList, sheetToken, sheetId, rowNum, rows, 2, styles);
+    }
+
+    private List<FirstContentScoreExport> firstContentScoreData(List<String> dateStrList) {
+        List<FirstContentScoreExport> result = new ArrayList<>();
+        List<PublishSortLog> sortLogList = publishSortLogRepository.findByDateStrIn(dateStrList);
+        sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList());
+        sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr));
+        List<String> ghIds = sortLogList.stream().map(PublishSortLog::getGhId).distinct().collect(Collectors.toList());
+        long minTimestamp = DateUtils.dateStrToTimestamp(dateStrList.get(0), "yyyyMMdd");
+        List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, minTimestamp, "9");
+        articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList());
+        Map<String, Map<String, Article>> articleMap = articleList.stream().collect(
+                Collectors.groupingBy(Article::getGhId, Collectors.toMap(
+                        o -> DateUtils.timestampToYMDStr(o.getUpdateTime(), "yyyyMMdd"), o -> o,
+                        (existing, replacement) -> replacement)));
+        String ymd = DateUtils.timestampToYMDStr(minTimestamp - 86400 * 7, "yyyy-MM-dd");
+        List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByUpdateTimeGreaterThanEqual(ymd);
+        Map<String, Map<String, AccountAvgInfo>> accountAvgInfoMap = accountAvgInfoList.stream()
+                .filter(o -> Objects.equals(o.getPosition(), "1")).collect(Collectors.groupingBy(AccountAvgInfo::getGhId,
+                        Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o)));
+
+        String title = "";
+        for (PublishSortLog publishSortLog : sortLogList) {
+            Map<String, Article> dateArticleMap = articleMap.get(publishSortLog.getGhId());
+            if (Objects.isNull(dateArticleMap)) {
+                continue;
+            }
+            Article article = dateArticleMap.get(publishSortLog.getDateStr());
+            if (Objects.isNull(article) || !publishSortLog.getTitle().equals(article.getTitle())) {
+                continue;
+            }
+            if (publishSortLog.getTitle().equals(title)) {
+                continue;
+            }
+            FirstContentScoreExport item = new FirstContentScoreExport();
+            title = publishSortLog.getTitle();
+            item.setDateStr(publishSortLog.getDateStr());
+            item.setGhId(publishSortLog.getGhId());
+            item.setAccountName(publishSortLog.getAccountName());
+            item.setTitle(publishSortLog.getTitle());
+            item.setStrategy(publishSortLog.getStrategy());
+            item.setScore(Double.valueOf(publishSortLog.getScore()));
+            JSONObject scoreMap = JSONObject.parseObject(publishSortLog.getScoreMap());
+            item.setHisFissionFansRateRateStrategy(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy"));
+            item.setHisFissionAvgReadRateRateStrategy(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy"));
+            item.setPublishTimesStrategy(scoreMap.getDoubleValue("PublishTimesStrategy"));
+            item.setViewCountRateCorrelationStrategy(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy"));
+            item.setHisFissionAvgReadSumRateStrategy(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy"));
+            item.setHisFissionAvgReadRateCorrelationRateStrategy(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy"));
+            item.setHisFissionFansSumRateStrategy(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy"));
+            item.setSimilarityStrategy(scoreMap.getDoubleValue("SimilarityStrategy"));
+            item.setViewCountStrategy(scoreMap.getDoubleValue("ViewCountStrategy"));
+            item.setViewCountRateStrategy(scoreMap.getDoubleValue("ViewCountRateStrategy"));
+            item.setHisFissionDeWeightAvgReadSumRateStrategy(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy"));
+            item.setReadCount(article.getShowViewCount());
+            Map<String, AccountAvgInfo> map = accountAvgInfoMap.get(article.getGhId());
+            if (Objects.nonNull(map)) {
+                List<String> avgMapDateList = new ArrayList<>(map.keySet());
+                String publishDate = DateUtils.findNearestDate(avgMapDateList,
+                        DateUtils.timestampToYMDStr(article.getUpdateTime(), "yyyy-MM-dd"), "yyyy-MM-dd");
+                AccountAvgInfo accountAvgInfo = map.get(publishDate);
+                if (Objects.nonNull(accountAvgInfo)) {
+                    item.setReadAvg(accountAvgInfo.getReadAvg());
+                    item.setReadAvgRate(article.getShowViewCount() / (double) accountAvgInfo.getReadAvg());
+                }
+            }
+            result.add(item);
+        }
+        result.sort(Comparator.comparing(FirstContentScoreExport::getDateStr).reversed()
+                .thenComparing(FirstContentScoreExport::getGhId));
+        return result;
+    }
+
 }

+ 128 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/XxlJobService.java

@@ -1,10 +1,22 @@
 package com.tzld.longarticle.recommend.server.service;
 
 import cn.hutool.core.collection.CollectionUtil;
+import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
 import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
+import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.NotPublishPlan;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.GetOffVideoCrawler;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.LongArticlesRootSourceId;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.LongArticlesVideo;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.GetOffVideoArticle;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesMatchVideo;
+import com.tzld.longarticle.recommend.server.repository.crawler.GetOffVideoCrawlerRepository;
+import com.tzld.longarticle.recommend.server.repository.crawler.LongArticlesRootSourceIdRepository;
+import com.tzld.longarticle.recommend.server.repository.crawler.LongArticlesVideoRepository;
+import com.tzld.longarticle.recommend.server.repository.longArticle.GetOffVideoArticleRepository;
+import com.tzld.longarticle.recommend.server.repository.longArticle.LongArticlesMatchVideoRepository;
 import com.tzld.longarticle.recommend.server.util.DateUtils;
 import com.tzld.longarticle.recommend.server.util.feishu.FeishuMessageSender;
 import com.xxl.job.core.biz.model.ReturnT;
@@ -25,6 +37,18 @@ public class XxlJobService {
 
     @Autowired
     private AigcBaseMapper aigcBaseMapper;
+    @Autowired
+    private CrawlerBaseMapper crawlerBaseMapper;
+    @Autowired
+    private GetOffVideoCrawlerRepository getOffVideoCrawlerRepository;
+    @Autowired
+    private LongArticlesVideoRepository longArticlesVideoRepository;
+    @Autowired
+    private GetOffVideoArticleRepository getOffVideoArticleRepository;
+    @Autowired
+    private LongArticlesMatchVideoRepository longArticlesMatchVideoRepository;
+    @Autowired
+    private LongArticlesRootSourceIdRepository longArticlesRootSourceIdRepository;
 
     @XxlJob("checkPublishPlan")
     public ReturnT<String> checkPublishPlan(String param) {
@@ -68,4 +92,108 @@ public class XxlJobService {
                         + "<at user_id=\"all\">所有人</at> ");
     }
 
+    @XxlJob("migrateCrawlerRootSourceId")
+    public ReturnT<String> migrateCrawlerRootSourceId(String param) {
+        try {
+            long timeStamp = DateUtils.getBeforeDayStart(1);
+            if (StringUtils.hasText(param)) {
+                timeStamp = DateUtils.getStartOfDay(param, "yyyyMMdd");
+            }
+            List<GetOffVideoCrawler> getOffVideoCrawlerList = getOffVideoCrawlerRepository.getByPublishTimeBetween(timeStamp, timeStamp + 86400);
+            List<String> traceIds = getOffVideoCrawlerList.stream().map(GetOffVideoCrawler::getTraceId).distinct().collect(Collectors.toList());
+            List<LongArticlesVideo> longArticlesVideoList = longArticlesVideoRepository.getByTraceIdIn(traceIds);
+            for (LongArticlesVideo longArticlesVideo : longArticlesVideoList) {
+                processCrawlerEachData(longArticlesVideo);
+            }
+        } catch (Exception e) {
+            log.error("migrateCrawlerRootSourceId exception: {}", e.getMessage(), e);
+            return ReturnT.FAIL;
+        }
+        return ReturnT.SUCCESS;
+    }
+
+    /**
+     * Processes each data tuple and updates the rootSourceId.
+     */
+    public void processCrawlerEachData(LongArticlesVideo longArticlesVideo) {
+        try {
+            String[] results = {longArticlesVideo.getResult1(), longArticlesVideo.getResult2(), longArticlesVideo.getResult3()};
+            List<LongArticlesRootSourceId> saveList = new ArrayList<>();
+            for (String result : results) {
+                if (result != null && !result.isEmpty()) {
+                    try {
+                        JSONObject jsonNode = JSONObject.parseObject(result);
+                        String productionPath = jsonNode.getString("productionPath");
+                        String rootSourceId = productionPath.split("rootSourceId%3D")[1];
+                        String videoId = productionPath.split("videos%3Fid%3D")[1].split("%26su%")[0];
+                        LongArticlesRootSourceId saveItem = new LongArticlesRootSourceId();
+                        saveItem.setRootSourceId(rootSourceId);
+                        saveItem.setAccountName(longArticlesVideo.getAccountName());
+                        saveItem.setGhId(longArticlesVideo.getGhId());
+                        saveItem.setArticleTitle(jsonNode.getString("productionName"));
+                        saveItem.setRequestTime(longArticlesVideo.getRequestTimeStamp());
+                        saveItem.setTraceId(longArticlesVideo.getTraceId());
+                        saveItem.setPushType(2);
+                        saveItem.setVideoId(Long.valueOf(videoId));
+                        saveList.add(saveItem);
+                    } catch (Exception e) {
+                        log.error("processCrawlerEachData LongArticlesRootSourceId saveError: {}", result, e);
+                    }
+                }
+            }
+            if (CollectionUtil.isNotEmpty(saveList)) {
+                crawlerBaseMapper.batchInsertLongArticlesRootSourceId(saveList);
+            }
+        } catch (Exception e) {
+            log.error("Error processCrawlerEachData: {}", JSONObject.toJSONString(longArticlesVideo), e);
+        }
+    }
+
+    @XxlJob("migrateArticleRootSourceId")
+    public ReturnT<String> migrateArticleRootSourceId(String param) {
+        try {
+            long timeStamp = DateUtils.getBeforeDayStart(1);
+            if (StringUtils.hasText(param)) {
+                timeStamp = DateUtils.getStartOfDay(param, "yyyyMMdd");
+            }
+            List<GetOffVideoArticle> getOffVideoArticleList = getOffVideoArticleRepository.getByPublishTimeBetween(timeStamp, timeStamp + 86400);
+            List<String> traceIds = getOffVideoArticleList.stream().map(GetOffVideoArticle::getTraceId).distinct().collect(Collectors.toList());
+            List<LongArticlesMatchVideo> longArticlesMatchVideoList = longArticlesMatchVideoRepository.getByTraceIdIn(traceIds);
+            for (LongArticlesMatchVideo longArticlesMatchVideo : longArticlesMatchVideoList) {
+                processArticleEachData(longArticlesMatchVideo);
+            }
+        } catch (Exception e) {
+            log.error("migrateArticleRootSourceId exception: {}", e.getMessage(), e);
+            return ReturnT.FAIL;
+        }
+        return ReturnT.SUCCESS;
+    }
+
+    /**
+     * Processes each data tuple and updates the rootSourceId.
+     */
+    public void processArticleEachData(LongArticlesMatchVideo longArticlesMatchVideo) {
+        try {
+            List<String> results = JSONArray.parseArray(longArticlesMatchVideo.getResponse(), String.class);
+            List<LongArticlesRootSourceId> saveList = new ArrayList<>();
+            for (String result : results) {
+                JSONObject jsonNode = JSONObject.parseObject(result);
+                LongArticlesRootSourceId saveItem = new LongArticlesRootSourceId();
+                saveItem.setRootSourceId(jsonNode.getString("rootSourceId"));
+                saveItem.setAccountName(longArticlesMatchVideo.getAccountName());
+                saveItem.setGhId(longArticlesMatchVideo.getGhId());
+                saveItem.setRequestTime(longArticlesMatchVideo.getRequestTimestamp());
+                saveItem.setTraceId(longArticlesMatchVideo.getTraceId());
+                saveItem.setPushType(2);
+                saveItem.setVideoId(jsonNode.getLong("videoId"));
+                saveList.add(saveItem);
+            }
+            if (CollectionUtil.isNotEmpty(saveList)) {
+                crawlerBaseMapper.batchInsertLongArticlesRootSourceId(saveList);
+            }
+        } catch (Exception e) {
+            log.error("Error processArticleEachData: {}", JSONObject.toJSONString(longArticlesMatchVideo), e);
+        }
+    }
+
 }

+ 10 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/util/DateUtils.java

@@ -215,4 +215,14 @@ public final class DateUtils {
         calendar.set(Calendar.MILLISECOND, 0);
         return calendar.getTime().getTime();
     }
+
+    public static Long getBeforeDayStart(int days) {
+        Calendar calendar = Calendar.getInstance();
+        calendar.set(Calendar.HOUR_OF_DAY, 0);
+        calendar.set(Calendar.MINUTE, 0);
+        calendar.set(Calendar.SECOND, 0);
+        calendar.set(Calendar.MILLISECOND, 0);
+        calendar.add(Calendar.DAY_OF_MONTH, -days);
+        return calendar.getTime().getTime() / 1000;
+    }
 }

+ 7 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/DataDashboardController.java

@@ -29,4 +29,11 @@ public class DataDashboardController {
         }).start();
     }
 
+    @GetMapping("/export/firstContentScore")
+    public void firstContentScoreExport(String dateStr) {
+        new Thread(() -> {
+            service.firstContentScoreExport(dateStr);
+        }).start();
+    }
+
 }

+ 10 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/XxlJobController.java

@@ -18,4 +18,14 @@ public class XxlJobController {
     public void checkPublishPlan() {
         service.checkPublishPlan(null);
     }
+
+    @GetMapping("/migrateCrawlerRootSourceId")
+    public void migrateCrawlerRootSourceId(String dateStr) {
+        service.migrateCrawlerRootSourceId(dateStr);
+    }
+
+    @GetMapping("/migrateArticleRootSourceId")
+    public void migrateArticleRootSourceId(String dateStr) {
+        service.migrateArticleRootSourceId(dateStr);
+    }
 }

+ 10 - 0
long-article-recommend-service/src/main/resources/mapper/crawler/CrawlerBaseMapper.xml

@@ -10,4 +10,14 @@
         </foreach>
     </insert>
 
+    <insert id="batchInsertLongArticlesRootSourceId">
+        INSERT INTO long_articles_root_source_id (rootSourceId, accountName, ghId, articleTitle, requestTime, trace_id,
+                                                  push_type, video_id)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.rootSourceId}, #{item.accountName}, #{item.ghId}, #{item.articleTitle}, #{item.requestTime},
+             #{item.traceId}, #{item.pushType}, #{item.videoId})
+        </foreach>
+    </insert>
+
 </mapper>