|
|
@@ -26,8 +26,6 @@ import java.io.BufferedReader;
|
|
|
import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
|
import java.io.InputStreamReader;
|
|
|
-import java.math.BigDecimal;
|
|
|
-import java.math.RoundingMode;
|
|
|
import java.util.*;
|
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
|
import java.util.concurrent.CountDownLatch;
|
|
|
@@ -896,103 +894,28 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
}};
|
|
|
}
|
|
|
|
|
|
- /**
|
|
|
- * 预计算的分桶值字符串缓存,避免重复调用 String.valueOf
|
|
|
- * 分桶值范围通常是 0.01 到 1.0,步长为 0.01(假设最多100个桶)
|
|
|
- * 使用 BigDecimal 确保精度
|
|
|
- */
|
|
|
- private static final String[] BUCKET_VALUE_CACHE;
|
|
|
- private static final int BUCKET_CACHE_SIZE = 101; // 0-100 对应 0.00-1.00
|
|
|
-
|
|
|
- static {
|
|
|
- BUCKET_VALUE_CACHE = new String[BUCKET_CACHE_SIZE];
|
|
|
- for (int i = 0; i < BUCKET_CACHE_SIZE; i++) {
|
|
|
- // 使用 BigDecimal 确保精度,保留6位小数
|
|
|
- BigDecimal bd = BigDecimal.valueOf(i).divide(BigDecimal.valueOf(100), 6, RoundingMode.HALF_UP);
|
|
|
- BUCKET_VALUE_CACHE[i] = bd.stripTrailingZeros().toPlainString();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 将分桶结果转换为字符串,优先使用缓存
|
|
|
- * @param bucketIndex 分桶索引 (从1开始)
|
|
|
- * @param bucketNum 总桶数
|
|
|
- * @return 分桶值的字符串表示
|
|
|
- */
|
|
|
- private String bucketValueToString(int bucketIndex, double bucketNum) {
|
|
|
- // 计算分桶值:bucketIndex / bucketNum
|
|
|
- // 使用 BigDecimal 确保精度
|
|
|
- BigDecimal value = BigDecimal.valueOf(bucketIndex).divide(BigDecimal.valueOf(bucketNum), 8, RoundingMode.HALF_UP);
|
|
|
-
|
|
|
- // 尝试使用缓存(如果值是 0.01 的整数倍)
|
|
|
- double doubleValue = value.doubleValue();
|
|
|
- int cacheIndex = (int) Math.round(doubleValue * 100);
|
|
|
- if (cacheIndex >= 0 && cacheIndex < BUCKET_CACHE_SIZE) {
|
|
|
- // 验证缓存值是否匹配(考虑精度)
|
|
|
- double cachedValue = cacheIndex / 100.0;
|
|
|
- if (Math.abs(doubleValue - cachedValue) < 1E-6) {
|
|
|
- return BUCKET_VALUE_CACHE[cacheIndex];
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // 缓存未命中,使用 BigDecimal 生成字符串
|
|
|
- return value.stripTrailingZeros().toPlainString();
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 快速解析 double,对于简单格式的数字比 Double.parseDouble 更快
|
|
|
- * 如果解析失败则回退到 Double.parseDouble
|
|
|
- */
|
|
|
- private double fastParseDouble(String s) {
|
|
|
- if (s == null || s.isEmpty()) {
|
|
|
- return 0.0;
|
|
|
- }
|
|
|
-
|
|
|
- try {
|
|
|
- // 对于简单的数字格式,直接解析
|
|
|
- // 大多数特征值是简单的小数,如 "0.123", "1.5" 等
|
|
|
- return Double.parseDouble(s);
|
|
|
- } catch (NumberFormatException e) {
|
|
|
- return 0.0;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
private Map<String, String> featureBucket(Map<String, String> featureMap) {
|
|
|
// 使用 HashMap 替代 ConcurrentHashMap,分桶操作是单线程的
|
|
|
Map<String, String> newFeatureMap = new HashMap<>(featureMap.size());
|
|
|
for (Map.Entry<String, String> entry : featureMap.entrySet()) {
|
|
|
try {
|
|
|
String name = entry.getKey();
|
|
|
- String value = entry.getValue();
|
|
|
-
|
|
|
- // 稀疏特征直接复制
|
|
|
if (this.sparseFeatureSet.contains(name)) {
|
|
|
- if (value != null) {
|
|
|
- newFeatureMap.put(name, value);
|
|
|
+ if (entry.getValue() != null) {
|
|
|
+ newFeatureMap.put(name, entry.getValue());
|
|
|
}
|
|
|
continue;
|
|
|
}
|
|
|
-
|
|
|
- // 空值跳过
|
|
|
- if (value == null || value.isEmpty()) {
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- double score = fastParseDouble(value);
|
|
|
-
|
|
|
+ double score = Double.parseDouble(entry.getValue());
|
|
|
// 注意:0值、不在分桶文件中的特征,会被过滤掉。
|
|
|
if (score > 1E-8) {
|
|
|
- double[] buckets = this.bucketsMap.get(name);
|
|
|
- Double bucketNum = this.bucketsLen.get(name);
|
|
|
-
|
|
|
- if (buckets != null && bucketNum != null) {
|
|
|
- int position = ExtractorUtils.findInsertPosition(buckets, score);
|
|
|
- // 使用优化的字符串转换方法
|
|
|
- String scoreNewStr = bucketValueToString(position + 1, bucketNum);
|
|
|
- newFeatureMap.put(name, scoreNewStr);
|
|
|
+ if (this.bucketsMap.containsKey(name) && this.bucketsLen.containsKey(name)) {
|
|
|
+ double[] buckets = this.bucketsMap.get(name);
|
|
|
+ double bucketNum = this.bucketsLen.get(name);
|
|
|
+ Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
|
|
|
+ newFeatureMap.put(name, String.valueOf(scoreNew));
|
|
|
} else {
|
|
|
- // 不在分桶文件中的特征,保持原值
|
|
|
- newFeatureMap.put(name, value);
|
|
|
+ newFeatureMap.put(name, String.valueOf(score));
|
|
|
}
|
|
|
}
|
|
|
} catch (Exception e) {
|