|
@@ -1,27 +1,24 @@
|
|
|
package com.tzld.longarticle.recommend.server.service.recommend;
|
|
|
|
|
|
-import cn.hutool.core.io.resource.ResourceUtil;
|
|
|
-import com.alibaba.fastjson.JSONObject;
|
|
|
-import com.alibaba.fastjson.TypeReference;
|
|
|
-import com.tzld.longarticle.recommend.server.common.HttpPoolFactory;
|
|
|
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy;
|
|
|
import com.tzld.longarticle.recommend.server.model.vo.IdNameVO;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.aigc.ProduceContentListItemVO;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.aigc.ProducePlanDetailVO;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.aigc.ProducePlanInputSourceParam;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.aigc.AIGCCrawlerPlanSaveService;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.aigc.AIGCProduceContentListService;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.aigc.AIGCProducePlanDetailService;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.aigc.AIGCProducePlanSaveService;
|
|
|
import com.tzld.longarticle.recommend.server.repository.longArticle.DatastatSortStrategyRepository;
|
|
|
import com.tzld.longarticle.recommend.server.util.DateUtils;
|
|
|
import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
-import org.apache.http.HttpEntity;
|
|
|
-import org.apache.http.StatusLine;
|
|
|
-import org.apache.http.client.methods.CloseableHttpResponse;
|
|
|
-import org.apache.http.client.methods.HttpPost;
|
|
|
-import org.apache.http.entity.StringEntity;
|
|
|
-import org.apache.http.impl.client.CloseableHttpClient;
|
|
|
-import org.apache.http.util.EntityUtils;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.util.StringUtils;
|
|
|
|
|
|
import java.net.URLDecoder;
|
|
|
-import java.nio.charset.StandardCharsets;
|
|
|
import java.util.*;
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
@@ -31,27 +28,33 @@ public class ArticlePromotionService {
|
|
|
|
|
|
@Autowired
|
|
|
DatastatSortStrategyRepository datastatSortStrategyRepository;
|
|
|
+ @Autowired
|
|
|
+ AIGCCrawlerPlanSaveService aigcCrawlerPlanSaveService;
|
|
|
+ @Autowired
|
|
|
+ AIGCProducePlanDetailService aigcProducePlanDetailService;
|
|
|
+ @Autowired
|
|
|
+ AIGCProducePlanSaveService aigcProducePlanSaveService;
|
|
|
+ @Autowired
|
|
|
+ AIGCProduceContentListService aigcProduceContentListService;
|
|
|
+
|
|
|
+ @ApolloJsonValue("${articlePromotionProduceConfig:{}}")
|
|
|
+ private Map<String, Map<String, Map<String, String>>> produceConfig;
|
|
|
|
|
|
- private final CloseableHttpClient client = HttpPoolFactory.aigcPool();
|
|
|
- private static List<String> contentPoolType = Arrays.asList("autoArticlePoolLevel1", "autoArticlePoolLevel3", "autoArticlePoolLevel4");
|
|
|
+ private final List<String> contentPoolType = Arrays.asList("autoArticlePoolLevel1", "autoArticlePoolLevel3", "autoArticlePoolLevel4");
|
|
|
|
|
|
public void articlePromotion(String pos, String way, String accountNickName, String tag,
|
|
|
Integer viewCountFilter, Double viewCountRateFilter, List<Integer> positionFilter) {
|
|
|
String today = DateUtils.getCurrentDateStr("yyyyMMdd");
|
|
|
-
|
|
|
// 获取内部表现
|
|
|
List<DatastatSortStrategy> list = datastatSortStrategyRepository.getByViewCountGreaterThanEqualAndReadRateGreaterThanEqualAndPositionIn(
|
|
|
viewCountFilter, viewCountRateFilter, positionFilter);
|
|
|
log.info("优质{}文章数量: {}", accountNickName, list.size());
|
|
|
-
|
|
|
List<DatastatSortStrategy> distinct = filterSameTitle(list);
|
|
|
distinct.sort(Comparator.comparing(DatastatSortStrategy::getDateStr, Comparator.reverseOrder()));
|
|
|
log.info("优质{}文章数量(去重后): {}", accountNickName, distinct.size());
|
|
|
-
|
|
|
addUrlListToAccount(accountNickName, distinct, pos, way, today, tag);
|
|
|
}
|
|
|
|
|
|
-
|
|
|
private List<DatastatSortStrategy> filterSameTitle(List<DatastatSortStrategy> list) {
|
|
|
List<DatastatSortStrategy> result = new ArrayList<>();
|
|
|
List<String> titles = new ArrayList<>();
|
|
@@ -71,148 +74,116 @@ public class ArticlePromotionService {
|
|
|
private void addUrlListToAccount(String accountNickName, List<DatastatSortStrategy> list, String pos, String way,
|
|
|
String today, String tag) {
|
|
|
List<String> urlList = list.stream().map(DatastatSortStrategy::getLink).collect(Collectors.toList());
|
|
|
- // 获取生产配置
|
|
|
- Map<String, Map<String, Map<String, String>>> produceConfig = getProduceConfig();
|
|
|
if (!produceConfig.containsKey(accountNickName)) {
|
|
|
log.info("account_nickname not in produceConfig: " + accountNickName);
|
|
|
String planName = String.format("%d_%s_%s_%s【%s】_%s", list.size(), today, accountNickName, pos, way, today);
|
|
|
- createArticleUrlPlan(planName, urlList, tag);
|
|
|
+ aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, urlList, tag);
|
|
|
return;
|
|
|
}
|
|
|
-
|
|
|
String produceId = produceConfig.get(accountNickName).get(pos).get(way).trim();
|
|
|
- List<Map<String, String>> contentList = getProduceContentList(accountNickName, pos, way);
|
|
|
-
|
|
|
+ List<ProduceContentListItemVO> contentList = getProduceContentList(accountNickName, pos, way);
|
|
|
// 获取已访问的标题和URL
|
|
|
List<String> visitedTitleList = contentList.stream()
|
|
|
- .flatMap(content -> Arrays.stream(new String[]{content.get("referContentTitle"), content.get("title")}))
|
|
|
- .distinct()
|
|
|
- .collect(Collectors.toList());
|
|
|
-
|
|
|
- Set<String> visitedUrlIdList = contentList.stream()
|
|
|
- .map(content -> getUrlId(content.get("referContentLink")))
|
|
|
+ .flatMap(content -> Arrays.stream(new String[]{content.getReferContentTitle(), content.getTitle()}))
|
|
|
+ .distinct().collect(Collectors.toList());
|
|
|
+ Set<String> visitedUrlIdList = contentList.stream().map(content -> getUrlId(content.getReferContentLink()))
|
|
|
.collect(Collectors.toSet());
|
|
|
|
|
|
- List<String> thisUrlIdList = urlList.stream()
|
|
|
- .map(this::getUrlId)
|
|
|
- .collect(Collectors.toList());
|
|
|
-
|
|
|
// 筛选URL和标题
|
|
|
List<String> filterUrlList = new ArrayList<>();
|
|
|
- List<String> filterTitleList = new ArrayList<>();
|
|
|
- for (int i = 0; i < list.size(); i++) {
|
|
|
- DatastatSortStrategy item = list.get(i);
|
|
|
+ for (DatastatSortStrategy item : list) {
|
|
|
String url = item.getLink();
|
|
|
- String urlId = thisUrlIdList.get(i);
|
|
|
+ String urlId = getUrlId(item.getLink());
|
|
|
String title = item.getTitle();
|
|
|
String wxSn = item.getWxSn();
|
|
|
-
|
|
|
if (visitedUrlIdList.contains(urlId)) {
|
|
|
continue;
|
|
|
}
|
|
|
if (TitleSimilarCheckUtil.isDuplicateContent(title, visitedTitleList, TitleSimilarCheckUtil.ARTICLE_PROMOTION_THRESHOLD)) {
|
|
|
continue;
|
|
|
}
|
|
|
-
|
|
|
filterUrlList.add(url);
|
|
|
- filterTitleList.add(title);
|
|
|
-
|
|
|
- // 调用爬虫 detail 接口并保存数据
|
|
|
+ // todo 调用爬虫 detail 接口并保存数据
|
|
|
Map<String, String> detail = getArticleDetail(url);
|
|
|
if (detail != null) {
|
|
|
String level = pos.equals("【1】") ? contentPoolType.get(0) : contentPoolType.get(1);
|
|
|
saveArticlePoolPromotionSource(detail.get("channel_content_id"), wxSn, title, level);
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
if (filterUrlList.isEmpty()) {
|
|
|
log.info("url_list empty: " + accountNickName + ", " + pos + ", " + way);
|
|
|
return;
|
|
|
}
|
|
|
-
|
|
|
int urlLen = filterUrlList.size();
|
|
|
String planName = String.format("%d_%s_%s_%s【%s】_%s", urlLen, today, accountNickName, pos, way, today);
|
|
|
log.info("url_len: " + list.size() + ", " + urlLen);
|
|
|
-
|
|
|
- IdNameVO<String> planInfo = createArticleUrlPlan(planName, filterUrlList, tag);
|
|
|
- if (produceId == null || produceId.isEmpty()) {
|
|
|
+ IdNameVO<String> planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, filterUrlList, tag);
|
|
|
+ if (produceId.isEmpty()) {
|
|
|
log.info("{}, {}, produce plan not exist: {}, {}, {}", planInfo.getName(), planInfo.getId(), accountNickName, pos, way);
|
|
|
return;
|
|
|
}
|
|
|
-
|
|
|
articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
|
|
|
}
|
|
|
|
|
|
- private Map<String, Map<String, Map<String, String>>> getProduceConfig() {
|
|
|
- // 返回生产配置的逻辑
|
|
|
- String json = ResourceUtil.readUtf8Str("file/ProduceConfig.json");
|
|
|
- return JSONObject.parseObject(json,
|
|
|
- new TypeReference<Map<String, Map<String, Map<String, String>>>>() {
|
|
|
- });
|
|
|
+ private List<ProduceContentListItemVO> getProduceContentList(String accountNickName, String pos, String way) {
|
|
|
+ List<String> planIdList = getProducePlanIdList(accountNickName, pos, way);
|
|
|
+ Map<String, Object> contentData = getProduceContentListByPlanIdList(planIdList);
|
|
|
+ return (List<ProduceContentListItemVO>) contentData.get("contentList");
|
|
|
}
|
|
|
|
|
|
- private IdNameVO<String> createArticleUrlPlan(String planName, List<String> urlList, String tag) {
|
|
|
- IdNameVO<String> result = new IdNameVO<>();
|
|
|
- if (urlList == null || urlList.isEmpty()) {
|
|
|
- return null;
|
|
|
+ public List<String> getProducePlanIdList(String accountNickname, String pos, String way) {
|
|
|
+ List<String> res = new ArrayList<>();
|
|
|
+ if (!produceConfig.containsKey(accountNickname)) {
|
|
|
+ return res;
|
|
|
}
|
|
|
-
|
|
|
- // 从配置中读取模板文件路径
|
|
|
- String templateFile = ResourceUtil.readUtf8Str("file/长文_抓取计划_根据url列表抓取.json");
|
|
|
- if (templateFile == null) {
|
|
|
- return null;
|
|
|
+ Map<String, Map<String, String>> accountConfig = produceConfig.get(accountNickname);
|
|
|
+ for (String posKey : accountConfig.keySet()) {
|
|
|
+ if (pos != null && !pos.equals(posKey)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ Map<String, String> posConfig = accountConfig.get(posKey);
|
|
|
+ for (String wayKey : posConfig.keySet()) {
|
|
|
+ if (way != null && !way.equals(wayKey)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ String planId = posConfig.get(wayKey).trim();
|
|
|
+ if (!planId.isEmpty()) {
|
|
|
+ res.add(planId);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
+ return res;
|
|
|
+ }
|
|
|
|
|
|
- // 读取模板文件内容
|
|
|
- JSONObject template = JSONObject.parseObject(templateFile);
|
|
|
-
|
|
|
- String apiUrl = template.getString("api_url");
|
|
|
- JSONObject data = template.getJSONObject("data");
|
|
|
-
|
|
|
- // 设置请求参数
|
|
|
- JSONObject params = data.getJSONObject("params");
|
|
|
- params.put("name", planName);
|
|
|
- if (tag != null) {
|
|
|
- params.put("planTag", tag);
|
|
|
+ public Map<String, Object> getProduceContentListByPlanIdList(List<String> planIdList) {
|
|
|
+ Map<String, Object> result = new HashMap<>();
|
|
|
+ if (planIdList.isEmpty()) {
|
|
|
+ System.out.println("getProduceContentListByPlanIdList: planIdList empty");
|
|
|
+ result.put("contentList", new ArrayList<>());
|
|
|
+ result.put("totalCnt", 0);
|
|
|
+ return result;
|
|
|
}
|
|
|
- params.put("inputModeValues", urlList);
|
|
|
-
|
|
|
- // 将数据转为 JSON 格式
|
|
|
- String requestData = data.toString();
|
|
|
- try {
|
|
|
- HttpPost httpPost = new HttpPost(apiUrl);
|
|
|
- StringEntity stringEntity = new StringEntity(requestData, StandardCharsets.UTF_8);
|
|
|
- httpPost.setHeader("Content-Type", "application/json;charset=UTF-8");
|
|
|
- httpPost.setEntity(stringEntity);
|
|
|
- CloseableHttpResponse response = client.execute(httpPost);
|
|
|
- StatusLine statusLine = response.getStatusLine();
|
|
|
- if (statusLine.getStatusCode() == 200) {
|
|
|
- HttpEntity responseEntity = response.getEntity();
|
|
|
- if (Objects.nonNull(responseEntity)) {
|
|
|
- String responseBody = EntityUtils.toString(responseEntity, "UTF-8");
|
|
|
- JSONObject jsonObject = JSONObject.parseObject(responseBody);
|
|
|
- if (jsonObject.getInteger("code") == 0) {
|
|
|
- JSONObject responseData = jsonObject.getJSONObject("data");
|
|
|
- String planId = responseData.getString("id");
|
|
|
- String returnedPlanName = responseData.getString("name");
|
|
|
- log.info("plan_name: {}, plan_id: {}", returnedPlanName, planId);
|
|
|
- result.setId(planId);
|
|
|
- result.setName(returnedPlanName);
|
|
|
- }
|
|
|
- }
|
|
|
+ int pageSize = 500;
|
|
|
+ List<Integer> produceStatus = Arrays.asList(1,2,3,4,5,6);
|
|
|
+ Map<String, Object> rawData = aigcProduceContentListService.list(planIdList, 1, 1, produceStatus);
|
|
|
+ int totalCnt = (int) rawData.get("totalCnt");
|
|
|
+ int pageNumMax = totalCnt / pageSize;
|
|
|
+ List<ProduceContentListItemVO> allContent = new ArrayList<>();
|
|
|
+ for (int i = 0; i <= pageNumMax; i++) {
|
|
|
+ Map<String, Object> pageData = aigcProduceContentListService.list(planIdList, i + 1, pageSize, produceStatus);
|
|
|
+ allContent.addAll((Collection<? extends ProduceContentListItemVO>) pageData.get("contentList"));
|
|
|
+ }
|
|
|
+ List<ProduceContentListItemVO> filteredContent = new ArrayList<>();
|
|
|
+ for (ProduceContentListItemVO content : allContent) {
|
|
|
+ if (StringUtils.hasText(content.getTitle())) {
|
|
|
+ filteredContent.add(content);
|
|
|
}
|
|
|
- } catch (Exception e) {
|
|
|
- log.error("createArticleUrlPlan error", e);
|
|
|
}
|
|
|
-
|
|
|
+ result.put("contentList", filteredContent);
|
|
|
+ result.put("totalCnt", filteredContent.size());
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- private List<Map<String, String>> getProduceContentList(String accountNickName, String pos, String way) {
|
|
|
- // 返回生产内容列表的逻辑
|
|
|
- return new ArrayList<>();
|
|
|
- }
|
|
|
-
|
|
|
public String getUrlId(String url) {
|
|
|
if (url == null) {
|
|
|
return null;
|
|
@@ -231,7 +202,6 @@ public class ArticlePromotionService {
|
|
|
String sn = params.get("sn");
|
|
|
String mid = params.get("mid");
|
|
|
String idx = params.get("idx");
|
|
|
-
|
|
|
if (biz != null && sn != null && mid != null && idx != null) {
|
|
|
return String.format("biz=%s_mid=%s_idx=%s_sn=%s", biz, mid, idx, sn);
|
|
|
}
|
|
@@ -243,7 +213,7 @@ public class ArticlePromotionService {
|
|
|
}
|
|
|
|
|
|
// 辅助方法:解析查询参数
|
|
|
- private static Map<String, String> parseQueryString(String url) {
|
|
|
+ private Map<String, String> parseQueryString(String url) {
|
|
|
// 示例解析逻辑
|
|
|
Map<String, String> params = new java.util.HashMap<>();
|
|
|
if (url.contains("?")) {
|
|
@@ -269,9 +239,24 @@ public class ArticlePromotionService {
|
|
|
}
|
|
|
|
|
|
private void articleAddDependPlan(String produceId, String planId, String planName) {
|
|
|
- // 添加依赖计划的逻辑
|
|
|
- log.info("Added dependency: " + produceId + " -> " + planId + " (" + planName + ")");
|
|
|
+ // 获取生产计划的详细信息
|
|
|
+ ProducePlanDetailVO detail = aigcProducePlanDetailService.articleGetProducePlanDetail(produceId);
|
|
|
+ if (detail == null) {
|
|
|
+ log.info("Failed to fetch produce plan detail.");
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ // 获取依赖计划 ID 列表
|
|
|
+ List<ProducePlanInputSourceParam> inputSources = detail.getInputSourceGroups().get(0).getInputSources();
|
|
|
+ List<String> dependPlanIds = new ArrayList<>();
|
|
|
+ for (ProducePlanInputSourceParam inputSource : inputSources) {
|
|
|
+ dependPlanIds.add(inputSource.getInputSourceValue());
|
|
|
+ }
|
|
|
+ // 如果计划 ID 已存在,直接返回
|
|
|
+ if (dependPlanIds.contains(planId)) {
|
|
|
+ log.info("depend_plan_id exist: {}", planId);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ aigcProducePlanSaveService.save(planName, planId, detail);
|
|
|
}
|
|
|
|
|
|
-
|
|
|
}
|