ExtractorUtils.java 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. package examples.extractor;
  2. import examples.utils.SimilarityUtils;
  3. import java.time.Instant;
  4. import java.time.LocalDate;
  5. import java.time.LocalDateTime;
  6. import java.time.ZoneId;
  7. import java.time.format.DateTimeFormatter;
  8. import java.time.temporal.ChronoUnit;
  9. import java.util.ArrayList;
  10. import java.util.List;
  11. import java.util.Map;
  12. public class ExtractorUtils {
  13. public static Double division(String s1, String s2, Map<String, String> maps) {
  14. double rate = 0.0;
  15. if (maps.containsKey(s1) && maps.containsKey(s2)) {
  16. Double d1 = Double.valueOf(maps.get(s1));
  17. if (isDoubleEqualToZero(d1)) {
  18. return rate;
  19. }
  20. Double d2 = Double.valueOf(maps.get(s2));
  21. rate = d2 / d1;
  22. }
  23. return rate;
  24. }
  25. public static Double divisionDouble(Double d1, Double d2) {
  26. double rate = 0.0;
  27. if (isDoubleEqualToZero(d1)) {
  28. return rate;
  29. }
  30. rate = d2 / d1;
  31. return rate;
  32. }
  33. public static boolean isDoubleEqualToZero(double value) {
  34. final double epsilon = 1e-10; // 定义一个很小的误差范围
  35. // 判断value是否在误差范围内
  36. return Math.abs(value) < epsilon;
  37. }
  38. public static double calculateVariance(List<Double> numbers) {
  39. double average = numbers.stream()
  40. .mapToDouble(Double::doubleValue)
  41. .average()
  42. .orElse(0.0);
  43. double squaredDiffSum = numbers.stream()
  44. .mapToDouble(Double::doubleValue)
  45. .map(x -> Math.pow(x - average, 2))
  46. .average()
  47. .orElse(0.0);
  48. return squaredDiffSum;
  49. }
  50. public static double calculateAverage(List<Double> numbers) {
  51. if (numbers == null || numbers.isEmpty()) {
  52. return 0.0;
  53. }
  54. return numbers.stream()
  55. .mapToDouble(Number::doubleValue)
  56. .average()
  57. .orElse(0.0);
  58. }
  59. public static List<Double> calculateDifferences(List<Double> numbers) {
  60. List<Double> differences = new ArrayList<>();
  61. for (int i = 0; i < numbers.size() - 1; i++) {
  62. Double diff = 0.0;
  63. if (!isDoubleEqualToZero(numbers.get(i))) {
  64. diff = (numbers.get(i + 1) - numbers.get(i)) / numbers.get(i);
  65. }
  66. differences.add(diff);
  67. }
  68. return differences;
  69. }
  70. public static List<String> generateHourStrings(String timeString, int N) {
  71. LocalDateTime dateTime = LocalDateTime.parse(timeString, DateTimeFormatter.ofPattern("yyyyMMddHH"));
  72. List<String> hourStrings = new ArrayList<>();
  73. for (int i = 0; i < N; i++) {
  74. hourStrings.add(dateTime.minusHours(i).format(DateTimeFormatter.ofPattern("yyyyMMddHH")));
  75. }
  76. return hourStrings;
  77. }
  78. public static String subtractHours(String inputDateTime, int hoursToSubtract) {
  79. DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMddHH");
  80. LocalDateTime dateTime = LocalDateTime.parse(inputDateTime, formatter);
  81. LocalDateTime subtractedDateTime = dateTime.minusHours(hoursToSubtract);
  82. return subtractedDateTime.format(formatter);
  83. }
  84. // 针对0-1的数字,进行分桶。
  85. public static Integer ceilLogRate(Double key) {
  86. double bucket = Math.ceil(
  87. Math.pow(key, 0.2) * 100
  88. );
  89. if (bucket > 300) {
  90. bucket = 300;
  91. }
  92. if (bucket < 0) {
  93. bucket = 0;
  94. }
  95. return (int) bucket;
  96. }
  97. // 针对大于1的数字,进行分桶。
  98. public static int bucketCnt(Double key) {
  99. long bucket = Math.round(Math.log((key * 10 + 1.0)) * 10);
  100. if (bucket > 300) {
  101. bucket = 300;
  102. }
  103. if (bucket < 0) {
  104. bucket = 0;
  105. }
  106. return (int) bucket;
  107. }
  108. public static int findInsertPosition(double[] sortedArray, double target) {
  109. int low = 0;
  110. int high = sortedArray.length - 1;
  111. while (low <= high) {
  112. int mid = low + (high - low) / 2;
  113. double midValue = sortedArray[mid];
  114. if (midValue < target) {
  115. low = mid + 1;
  116. } else if (midValue > target) {
  117. high = mid - 1;
  118. } else {
  119. // 找到相等的值,尝试在右侧寻找插入点
  120. while (mid < sortedArray.length - 1 && sortedArray[mid + 1] == target) {
  121. mid++;
  122. }
  123. return mid + 1; // 返回当前mid的下一个位置作为插入点
  124. }
  125. }
  126. return low; // 返回low作为插入点
  127. }
  128. public static Double[] funcC34567ForTagsNew(String tags, String title) {
  129. String[] tagsList = tags.split(",");
  130. int d1 = 0;
  131. List<String> d2 = new ArrayList<>();
  132. double d3 = 0.0;
  133. double d4 = 0.0;
  134. for (String tag : tagsList) {
  135. if (title.contains(tag)) {
  136. d1++;
  137. d2.add(tag);
  138. }
  139. double score = SimilarityUtils.word2VecSimilarity(tag, title);
  140. if (score > d3) {
  141. d3 = score;
  142. }
  143. d4 += score;
  144. }
  145. d4 = (tagsList.length > 0) ? d4 / tagsList.length : d4;
  146. // 使用数组来返回多个值
  147. Double[] result = {(double) d1, d3, d4};
  148. return result;
  149. }
  150. public static double reciprocal(double num) {
  151. if (num == 0) {
  152. return 0;
  153. }
  154. return 1.0 / (num + 1);
  155. }
  156. public static long getDaysBetween(long timestamp1, long timestamp2) {
  157. if (timestamp1 == 0 || timestamp2 == 0) {
  158. return 0;
  159. }
  160. Instant instant1 = Instant.ofEpochSecond(timestamp1);
  161. Instant instant2 = Instant.ofEpochSecond(timestamp2);
  162. LocalDate date1 = instant1.atZone(ZoneId.systemDefault()).toLocalDate();
  163. LocalDate date2 = instant2.atZone(ZoneId.systemDefault()).toLocalDate();
  164. return ChronoUnit.DAYS.between(date1, date2);
  165. }
  166. public static int getHourByTimestamp(long timestamp) {
  167. return LocalDateTime
  168. .ofInstant(Instant.ofEpochSecond(timestamp), ZoneId.systemDefault())
  169. .getHour() + 1;
  170. }
  171. public static int getDayOfWeekByTimestamp(long timestamp) {
  172. return LocalDateTime
  173. .ofInstant(Instant.ofEpochSecond(timestamp), ZoneId.systemDefault())
  174. .getDayOfWeek()
  175. .getValue();
  176. }
  177. public static void main(String[] args) {
  178. double[] sortedArray = {1.0, 2.0, 4.0, 4.0, 6.0};
  179. double target = 0.0;
  180. System.out.println(findInsertPosition(sortedArray, target));
  181. // System.out.println(ceilLogRate(0.0002));
  182. // System.out.println(ceilLogRate(0.01));
  183. // System.out.println(ceilLogRate(0.2));
  184. // System.out.println(ceilLogRate(4.));
  185. // System.out.println(bucketCnt(1.));
  186. // System.out.println(bucketCnt(20.));
  187. // System.out.println(bucketCnt(500.));
  188. // System.out.println(bucketCnt(50000.));
  189. // System.out.println(generateHourStrings("2024011603", 5));
  190. }
  191. }