wxindex_trend.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. """微信指数趋势计算工具。"""
  2. from __future__ import annotations
  3. import math
  4. from typing import Any
  5. MIN_TREND_POINTS = 4
  6. MAX_TREND_POINTS = 7
  7. UP_FIT_CHANGE_RATE = 0.04
  8. UP_WINDOW_CHANGE_RATE = 0.02
  9. DOWN_FIT_CHANGE_RATE = -0.04
  10. DOWN_WINDOW_CHANGE_RATE = -0.02
  11. HEAT_RISING_OVERALL_CHANGE_RATE = 0.15
  12. HEAT_RISING_WINDOW_CHANGE_RATE = 0.12
  13. HEAT_RISING_ADJACENT_UP_RATIO = 0.65
  14. HEAT_RISING_RECENT_DROP_RATE = -0.15
  15. HEAT_SPIKE_BASELINE_FLOOR = 50_000.0
  16. HEAT_SPIKE_RATIO = 2.5
  17. def _median(values: list[float]) -> float:
  18. if not values:
  19. return 0.0
  20. ordered = sorted(values)
  21. mid = len(ordered) // 2
  22. if len(ordered) % 2:
  23. return ordered[mid]
  24. return (ordered[mid - 1] + ordered[mid]) / 2
  25. def _extract_recent_scores(
  26. series: list[dict[str, Any]],
  27. *,
  28. max_points: int = MAX_TREND_POINTS,
  29. ) -> list[float]:
  30. scored_rows: list[tuple[str, int, float]] = []
  31. for index, row in enumerate(series):
  32. if not isinstance(row, dict):
  33. continue
  34. try:
  35. score = float(row.get("total_score"))
  36. except (TypeError, ValueError):
  37. continue
  38. if math.isnan(score) or score < 0:
  39. continue
  40. ymd = str(row.get("ymd") or "").strip()
  41. scored_rows.append((ymd, index, score))
  42. scored_rows.sort(key=lambda item: (item[0], item[1]))
  43. point_limit = max(max_points, 1)
  44. return [score for _, _, score in scored_rows[-point_limit:]]
  45. def _theil_sen_slope(values: list[float]) -> float:
  46. slopes: list[float] = []
  47. for start_index, start_value in enumerate(values):
  48. for end_index in range(start_index + 1, len(values)):
  49. slopes.append((values[end_index] - start_value) / (end_index - start_index))
  50. return _median(slopes)
  51. def extract_sorted_scores(
  52. series: list[dict[str, Any]],
  53. *,
  54. max_points: int | None = None,
  55. ) -> list[float]:
  56. point_limit = (
  57. max_points if max_points is not None and max_points > 0 else MAX_TREND_POINTS
  58. )
  59. return _extract_recent_scores(series, max_points=point_limit)
  60. def is_wxindex_rising_scores(
  61. scores: list[float],
  62. *,
  63. min_points: int = MIN_TREND_POINTS,
  64. ) -> bool:
  65. """判断热度序列是否呈持续上涨趋势(原流程 Theil-Sen 规则)。"""
  66. if len(scores) < min_points:
  67. return False
  68. log_scores = [math.log1p(score) for score in scores]
  69. slope = _theil_sen_slope(log_scores)
  70. fit_change_rate = math.expm1(slope * (len(log_scores) - 1))
  71. early_count = min(3, len(scores))
  72. late_count = min(3, len(scores))
  73. early_avg = sum(scores[:early_count]) / early_count
  74. late_avg = sum(scores[-late_count:]) / late_count
  75. window_change_rate = (late_avg - early_avg) / max(early_avg, 1.0)
  76. return (
  77. fit_change_rate >= UP_FIT_CHANGE_RATE
  78. and window_change_rate >= UP_WINDOW_CHANGE_RATE
  79. )
  80. def _has_significant_recent_drop(
  81. scores: list[float],
  82. *,
  83. drop_rate_threshold: float = HEAT_RISING_RECENT_DROP_RATE,
  84. ) -> bool:
  85. """最近一天热度相较昨天或前天是否出现大幅度下降。"""
  86. if len(scores) < 2:
  87. return False
  88. latest = scores[-1]
  89. prior_scores = [scores[-2]]
  90. if len(scores) >= 3:
  91. prior_scores.append(scores[-3])
  92. for prior in prior_scores:
  93. change_rate = (latest - prior) / max(prior, 1.0)
  94. if change_rate <= drop_rate_threshold:
  95. return True
  96. return False
  97. def is_wxindex_heat_rising_scores(
  98. scores: list[float],
  99. *,
  100. min_points: int = 7,
  101. overall_change_rate: float = HEAT_RISING_OVERALL_CHANGE_RATE,
  102. window_change_rate_threshold: float = HEAT_RISING_WINDOW_CHANGE_RATE,
  103. adjacent_up_ratio: float = HEAT_RISING_ADJACENT_UP_RATIO,
  104. recent_drop_rate_threshold: float = HEAT_RISING_RECENT_DROP_RATE,
  105. ) -> bool:
  106. """热度模式任务:判断区间内是否持续上涨。"""
  107. if len(scores) < min_points:
  108. return False
  109. first_score = scores[0]
  110. last_score = scores[-1]
  111. overall_change_rate_value = (last_score - first_score) / max(first_score, 1.0)
  112. if overall_change_rate_value < overall_change_rate:
  113. return False
  114. early_avg = sum(scores[:3]) / 3
  115. late_avg = sum(scores[-3:]) / 3
  116. window_change_rate = (late_avg - early_avg) / max(early_avg, 1.0)
  117. if window_change_rate < window_change_rate_threshold:
  118. return False
  119. if last_score <= first_score:
  120. return False
  121. if _has_significant_recent_drop(
  122. scores,
  123. drop_rate_threshold=recent_drop_rate_threshold,
  124. ):
  125. return False
  126. adjacent_total = len(scores) - 1
  127. if adjacent_total <= 0:
  128. return False
  129. adjacent_up_count = sum(
  130. 1 for index in range(adjacent_total) if scores[index + 1] > scores[index]
  131. )
  132. return adjacent_up_count / adjacent_total >= adjacent_up_ratio
  133. def is_wxindex_spike_scores(
  134. scores: list[float],
  135. *,
  136. spike_days: int = 3,
  137. min_points: int = 7,
  138. spike_ratio: float = HEAT_SPIKE_RATIO,
  139. baseline_floor: float = HEAT_SPIKE_BASELINE_FLOOR,
  140. ) -> bool:
  141. """判断最近 N 天热度是否相对前期突然暴涨。"""
  142. if len(scores) < min_points or spike_days <= 0 or len(scores) <= spike_days:
  143. return False
  144. baseline = scores[:-spike_days]
  145. recent = scores[-spike_days:]
  146. baseline_avg = sum(baseline) / len(baseline)
  147. recent_avg = sum(recent) / len(recent)
  148. effective_baseline = max(baseline_avg, baseline_floor)
  149. if recent_avg / effective_baseline < spike_ratio:
  150. return False
  151. baseline_max = max(baseline)
  152. return recent[-1] > baseline_max and recent_avg > baseline_avg
  153. def calc_wxindex_trend(series: list[dict[str, Any]]) -> str:
  154. """按最近 7 天整体走势计算趋势,避免被最后一天波动误导。"""
  155. scores = _extract_recent_scores(series)
  156. if len(scores) < MIN_TREND_POINTS:
  157. return "未知"
  158. if is_wxindex_rising_scores(scores, min_points=MIN_TREND_POINTS):
  159. return "上升"
  160. log_scores = [math.log1p(score) for score in scores]
  161. slope = _theil_sen_slope(log_scores)
  162. fit_change_rate = math.expm1(slope * (len(log_scores) - 1))
  163. early_avg = sum(scores[:3]) / 3
  164. late_avg = sum(scores[-3:]) / 3
  165. window_change_rate = (late_avg - early_avg) / max(early_avg, 1.0)
  166. if (
  167. fit_change_rate <= DOWN_FIT_CHANGE_RATE
  168. and window_change_rate <= DOWN_WINDOW_CHANGE_RATE
  169. ):
  170. return "下降"
  171. return "平稳"