|
@@ -28,6 +28,35 @@ def get_fetch_start_ymd_from_event(
|
|
|
return start_date.strftime("%Y%m%d")
|
|
return start_date.strftime("%Y%m%d")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def get_fetch_end_ymd_from_event(
|
|
|
|
|
+ event_created_at: datetime,
|
|
|
|
|
+ *,
|
|
|
|
|
+ forward_days: int = WXINDEX_WORDS_UPDATE_WINDOW_DAYS,
|
|
|
|
|
+) -> str:
|
|
|
|
|
+ """数据窗口右边界:事件创建日后 N 天(yyyymmdd)。"""
|
|
|
|
|
+ event_date = normalize_event_created_at(event_created_at).date()
|
|
|
|
|
+ end_date = event_date + timedelta(days=forward_days)
|
|
|
|
|
+ return end_date.strftime("%Y%m%d")
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def get_fetch_ymd_bounds_from_event(
|
|
|
|
|
+ event_created_at: datetime,
|
|
|
|
|
+ *,
|
|
|
|
|
+ lookback_days: int = WXINDEX_WORDS_LOOKBACK_DAYS,
|
|
|
|
|
+ forward_days: int = WXINDEX_WORDS_UPDATE_WINDOW_DAYS,
|
|
|
|
|
+) -> tuple[str, str]:
|
|
|
|
|
+ return (
|
|
|
|
|
+ get_fetch_start_ymd_from_event(
|
|
|
|
|
+ event_created_at,
|
|
|
|
|
+ lookback_days=lookback_days,
|
|
|
|
|
+ ),
|
|
|
|
|
+ get_fetch_end_ymd_from_event(
|
|
|
|
|
+ event_created_at,
|
|
|
|
|
+ forward_days=forward_days,
|
|
|
|
|
+ ),
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def get_word_data_window_ymd_bounds(
|
|
def get_word_data_window_ymd_bounds(
|
|
|
event_created_at: datetime,
|
|
event_created_at: datetime,
|
|
|
*,
|
|
*,
|
|
@@ -168,6 +197,70 @@ def word_meets_max_score_threshold(
|
|
|
return max_score > min_max_score
|
|
return max_score > min_max_score
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def filter_scores_in_ymd_window(
|
|
|
|
|
+ scores: list[dict[str, Any]],
|
|
|
|
|
+ *,
|
|
|
|
|
+ start_ymd: str,
|
|
|
|
|
+ end_ymd: str,
|
|
|
|
|
+) -> list[dict[str, Any]]:
|
|
|
|
|
+ start = str(start_ymd or "").strip()
|
|
|
|
|
+ end = str(end_ymd or "").strip()
|
|
|
|
|
+ if not start or not end:
|
|
|
|
|
+ return []
|
|
|
|
|
+ filtered: list[dict[str, Any]] = []
|
|
|
|
|
+ for item in scores:
|
|
|
|
|
+ if not isinstance(item, dict):
|
|
|
|
|
+ continue
|
|
|
|
|
+ ymd = str(item.get("ymd") or item.get("dt") or "").strip()
|
|
|
|
|
+ if not ymd or ymd < start or ymd > end:
|
|
|
|
|
+ continue
|
|
|
|
|
+ filtered.append(item)
|
|
|
|
|
+ filtered.sort(key=lambda row: str(row.get("ymd") or row.get("dt") or ""))
|
|
|
|
|
+ return filtered
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def word_has_high_score_in_window(
|
|
|
|
|
+ scores: list[dict[str, Any]],
|
|
|
|
|
+ *,
|
|
|
|
|
+ start_ymd: str,
|
|
|
|
|
+ end_ymd: str,
|
|
|
|
|
+ min_score: float = WXINDEX_WORDS_MIN_MAX_SCORE,
|
|
|
|
|
+) -> bool:
|
|
|
|
|
+ """窗口内是否存在严格大于阈值的微信指数。"""
|
|
|
|
|
+ window_scores = filter_scores_in_ymd_window(
|
|
|
|
|
+ scores,
|
|
|
|
|
+ start_ymd=start_ymd,
|
|
|
|
|
+ end_ymd=end_ymd,
|
|
|
|
|
+ )
|
|
|
|
|
+ for item in window_scores:
|
|
|
|
|
+ try:
|
|
|
|
|
+ score = float(item["total_score"])
|
|
|
|
|
+ except (TypeError, ValueError, KeyError):
|
|
|
|
|
+ continue
|
|
|
|
|
+ if score > min_score:
|
|
|
|
|
+ return True
|
|
|
|
|
+ return False
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def merge_wxindex_score_series(
|
|
|
|
|
+ *series_list: list[dict[str, Any]],
|
|
|
|
|
+) -> list[dict[str, Any]]:
|
|
|
|
|
+ merged: dict[str, dict[str, Any]] = {}
|
|
|
|
|
+ for series in series_list:
|
|
|
|
|
+ for item in series:
|
|
|
|
|
+ if not isinstance(item, dict):
|
|
|
|
|
+ continue
|
|
|
|
|
+ ymd = str(item.get("ymd") or item.get("dt") or "").strip()
|
|
|
|
|
+ if not ymd:
|
|
|
|
|
+ continue
|
|
|
|
|
+ try:
|
|
|
|
|
+ total_score = float(item["total_score"])
|
|
|
|
|
+ except (TypeError, ValueError, KeyError):
|
|
|
|
|
+ continue
|
|
|
|
|
+ merged[ymd] = {"ymd": ymd, "total_score": total_score}
|
|
|
|
|
+ return sorted(merged.values(), key=lambda row: row["ymd"])
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def get_word_score_bounds(
|
|
def get_word_score_bounds(
|
|
|
scores: list[dict[str, Any]],
|
|
scores: list[dict[str, Any]],
|
|
|
) -> tuple[str | None, str | None]:
|
|
) -> tuple[str | None, str | None]:
|
|
@@ -362,6 +455,152 @@ def refresh_stale_wxindex_words(
|
|
|
return summary
|
|
return summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def sync_wxindex_words_from_meta(
|
|
|
|
|
+ repository: HotContentRepository,
|
|
|
|
|
+ api_client: JsonApiClient,
|
|
|
|
|
+ api_url: str,
|
|
|
|
|
+ *,
|
|
|
|
|
+ end_ymd: str | None = None,
|
|
|
|
|
+ dry_run: bool = False,
|
|
|
|
|
+ verbose: bool = False,
|
|
|
|
|
+) -> dict[str, Any]:
|
|
|
|
|
+ """
|
|
|
|
|
+ 按 hot_content_wxindex_word_meta 同步 hot_content_wxindex_words。
|
|
|
|
|
+
|
|
|
|
|
+ 1. 删除 meta 中不存在的词
|
|
|
|
|
+ 2. 删除窗口 [fetch_start_ymd, fetch_end_ymd] 外的日期
|
|
|
|
|
+ 3. 补全窗口内缺失日期(含 fetch_start 早于昨日的历史段,如 20260615 之前)
|
|
|
|
|
+ """
|
|
|
|
|
+ target_end = end_ymd or get_wxindex_end_ymd()
|
|
|
|
|
+ summary: dict[str, Any] = {
|
|
|
|
|
+ "target_end_ymd": target_end,
|
|
|
|
|
+ "meta_count": 0,
|
|
|
|
|
+ "deleted_without_meta_rows": 0,
|
|
|
|
|
+ "deleted_outside_window_rows": 0,
|
|
|
|
|
+ "words_need_refresh": 0,
|
|
|
|
|
+ "refreshed": 0,
|
|
|
|
|
+ "inserted_rows": 0,
|
|
|
|
|
+ "skipped_rows": 0,
|
|
|
|
|
+ "fetch_failed": 0,
|
|
|
|
|
+ "api_empty": 0,
|
|
|
|
|
+ "no_new_range": 0,
|
|
|
|
|
+ "dry_run": dry_run,
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if dry_run:
|
|
|
|
|
+ summary["deleted_without_meta_rows"] = repository.count_wxindex_words_without_meta()
|
|
|
|
|
+ summary["deleted_outside_window_rows"] = (
|
|
|
|
|
+ repository.count_wxindex_words_outside_event_window()
|
|
|
|
|
+ )
|
|
|
|
|
+ else:
|
|
|
|
|
+ summary["deleted_without_meta_rows"] = repository.delete_wxindex_words_without_meta()
|
|
|
|
|
+ summary["deleted_outside_window_rows"] = (
|
|
|
|
|
+ repository.delete_wxindex_words_outside_event_window()
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ meta_rows = repository.list_all_wxindex_word_meta()
|
|
|
|
|
+ summary["meta_count"] = len(meta_rows)
|
|
|
|
|
+
|
|
|
|
|
+ for meta in meta_rows:
|
|
|
|
|
+ name = str(meta.get("name") or "").strip()
|
|
|
|
|
+ fetch_start = str(meta.get("fetch_start_ymd") or "").strip()
|
|
|
|
|
+ fetch_end = str(meta.get("fetch_end_ymd") or "").strip()
|
|
|
|
|
+ if not name or not fetch_start or not fetch_end:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ api_end = min(fetch_end, target_end)
|
|
|
|
|
+ if fetch_start > api_end:
|
|
|
|
|
+ summary["no_new_range"] += 1
|
|
|
|
|
+ if verbose:
|
|
|
|
|
+ print(
|
|
|
|
|
+ f"skip out-of-range word={name} "
|
|
|
|
|
+ f"window={fetch_start}~{fetch_end} api_end={api_end}"
|
|
|
|
|
+ )
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ stored_scores = repository.list_wxindex_word_scores(name)
|
|
|
|
|
+ fetch_range = get_supplement_fetch_range(
|
|
|
|
|
+ stored_scores,
|
|
|
|
|
+ end_ymd=api_end,
|
|
|
|
|
+ start_ymd=fetch_start,
|
|
|
|
|
+ )
|
|
|
|
|
+ if fetch_range is None:
|
|
|
|
|
+ summary["no_new_range"] += 1
|
|
|
|
|
+ if verbose:
|
|
|
|
|
+ print(f"skip complete word={name} window={fetch_start}~{fetch_end}")
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ summary["words_need_refresh"] += 1
|
|
|
|
|
+ start_ymd, range_end = fetch_range
|
|
|
|
|
+ if start_ymd > range_end:
|
|
|
|
|
+ summary["no_new_range"] += 1
|
|
|
|
|
+ if verbose:
|
|
|
|
|
+ print(f"skip up-to-date word={name}")
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ if dry_run:
|
|
|
|
|
+ summary["refreshed"] += 1
|
|
|
|
|
+ if verbose:
|
|
|
|
|
+ print(
|
|
|
|
|
+ f"[dry-run] would fetch word={name} "
|
|
|
|
|
+ f"{start_ymd}->{range_end} "
|
|
|
|
|
+ f"save_window={fetch_start}~{fetch_end}"
|
|
|
|
|
+ )
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ api_scores = fetch_wxindex_scores(
|
|
|
|
|
+ api_client,
|
|
|
|
|
+ api_url,
|
|
|
|
|
+ keyword=name,
|
|
|
|
|
+ start_ymd=start_ymd,
|
|
|
|
|
+ end_ymd=range_end,
|
|
|
|
|
+ )
|
|
|
|
|
+ window_scores = filter_scores_in_ymd_window(
|
|
|
|
|
+ api_scores,
|
|
|
|
|
+ start_ymd=fetch_start,
|
|
|
|
|
+ end_ymd=fetch_end,
|
|
|
|
|
+ )
|
|
|
|
|
+ if not window_scores:
|
|
|
|
|
+ summary["api_empty"] += 1
|
|
|
|
|
+ if verbose:
|
|
|
|
|
+ print(
|
|
|
|
|
+ f"api empty word={name} fetch={start_ymd}->{range_end} "
|
|
|
|
|
+ f"window={fetch_start}~{fetch_end}"
|
|
|
|
|
+ )
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ inserted, skipped = repository.save_wxindex_daily_scores(
|
|
|
|
|
+ name=name,
|
|
|
|
|
+ scores=window_scores,
|
|
|
|
|
+ )
|
|
|
|
|
+ except Exception as exc:
|
|
|
|
|
+ summary["fetch_failed"] += 1
|
|
|
|
|
+ if verbose:
|
|
|
|
|
+ print(f"sync failed word={name}: {exc}")
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ if inserted <= 0:
|
|
|
|
|
+ summary["api_empty"] += 1
|
|
|
|
|
+ if verbose:
|
|
|
|
|
+ print(
|
|
|
|
|
+ f"no new rows word={name} fetch={start_ymd}->{range_end} "
|
|
|
|
|
+ f"api_rows={len(window_scores)} skipped={skipped}"
|
|
|
|
|
+ )
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ summary["refreshed"] += 1
|
|
|
|
|
+ summary["inserted_rows"] += inserted
|
|
|
|
|
+ summary["skipped_rows"] += skipped
|
|
|
|
|
+ if verbose:
|
|
|
|
|
+ print(
|
|
|
|
|
+ f"synced word={name} fetch={start_ymd}->{range_end} "
|
|
|
|
|
+ f"inserted={inserted} skipped={skipped}"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ return summary
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def cleanup_low_max_wxindex_words(
|
|
def cleanup_low_max_wxindex_words(
|
|
|
repository: HotContentRepository,
|
|
repository: HotContentRepository,
|
|
|
*,
|
|
*,
|
|
@@ -440,11 +679,13 @@ def try_register_wxindex_word_meta(
|
|
|
return None, "expired"
|
|
return None, "expired"
|
|
|
|
|
|
|
|
fetch_start_ymd = get_fetch_start_ymd_from_event(normalized_event_at)
|
|
fetch_start_ymd = get_fetch_start_ymd_from_event(normalized_event_at)
|
|
|
|
|
+ fetch_end_ymd = get_fetch_end_ymd_from_event(normalized_event_at)
|
|
|
if dry_run:
|
|
if dry_run:
|
|
|
return {
|
|
return {
|
|
|
"name": name,
|
|
"name": name,
|
|
|
"event_created_at": normalized_event_at,
|
|
"event_created_at": normalized_event_at,
|
|
|
"fetch_start_ymd": fetch_start_ymd,
|
|
"fetch_start_ymd": fetch_start_ymd,
|
|
|
|
|
+ "fetch_end_ymd": fetch_end_ymd,
|
|
|
}, "dry_run"
|
|
}, "dry_run"
|
|
|
|
|
|
|
|
if existing and update_if_exists:
|
|
if existing and update_if_exists:
|
|
@@ -452,6 +693,7 @@ def try_register_wxindex_word_meta(
|
|
|
name=name,
|
|
name=name,
|
|
|
event_created_at=normalized_event_at,
|
|
event_created_at=normalized_event_at,
|
|
|
fetch_start_ymd=fetch_start_ymd,
|
|
fetch_start_ymd=fetch_start_ymd,
|
|
|
|
|
+ fetch_end_ymd=fetch_end_ymd,
|
|
|
)
|
|
)
|
|
|
meta = repository.get_wxindex_word_meta(name)
|
|
meta = repository.get_wxindex_word_meta(name)
|
|
|
if meta is None:
|
|
if meta is None:
|
|
@@ -465,17 +707,18 @@ def try_register_wxindex_word_meta(
|
|
|
name=name,
|
|
name=name,
|
|
|
event_created_at=normalized_event_at,
|
|
event_created_at=normalized_event_at,
|
|
|
fetch_start_ymd=fetch_start_ymd,
|
|
fetch_start_ymd=fetch_start_ymd,
|
|
|
|
|
+ fetch_end_ymd=fetch_end_ymd,
|
|
|
)
|
|
)
|
|
|
return meta, "registered"
|
|
return meta, "registered"
|
|
|
|
|
|
|
|
|
|
|
|
|
-def fix_wxindex_word_meta_fetch_start_ymd(
|
|
|
|
|
|
|
+def fix_wxindex_word_meta_fetch_bounds(
|
|
|
repository: HotContentRepository,
|
|
repository: HotContentRepository,
|
|
|
*,
|
|
*,
|
|
|
dry_run: bool = False,
|
|
dry_run: bool = False,
|
|
|
verbose: bool = False,
|
|
verbose: bool = False,
|
|
|
) -> dict[str, int]:
|
|
) -> dict[str, int]:
|
|
|
- """按 event_created_at 往前 7 天,批量修正 meta.fetch_start_ymd。"""
|
|
|
|
|
|
|
+ """按 event_created_at 修正 meta.fetch_start_ymd / fetch_end_ymd。"""
|
|
|
rows = repository.list_all_wxindex_word_meta()
|
|
rows = repository.list_all_wxindex_word_meta()
|
|
|
summary = {
|
|
summary = {
|
|
|
"total": len(rows),
|
|
"total": len(rows),
|
|
@@ -486,10 +729,11 @@ def fix_wxindex_word_meta_fetch_start_ymd(
|
|
|
name = str(row.get("name") or "").strip()
|
|
name = str(row.get("name") or "").strip()
|
|
|
event_created_at = row.get("event_created_at")
|
|
event_created_at = row.get("event_created_at")
|
|
|
old_fetch_start = str(row.get("fetch_start_ymd") or "").strip()
|
|
old_fetch_start = str(row.get("fetch_start_ymd") or "").strip()
|
|
|
|
|
+ old_fetch_end = str(row.get("fetch_end_ymd") or "").strip()
|
|
|
if not name or event_created_at is None:
|
|
if not name or event_created_at is None:
|
|
|
continue
|
|
continue
|
|
|
- new_fetch_start = get_fetch_start_ymd_from_event(event_created_at)
|
|
|
|
|
- if new_fetch_start == old_fetch_start:
|
|
|
|
|
|
|
+ new_fetch_start, new_fetch_end = get_fetch_ymd_bounds_from_event(event_created_at)
|
|
|
|
|
+ if new_fetch_start == old_fetch_start and new_fetch_end == old_fetch_end:
|
|
|
summary["unchanged"] += 1
|
|
summary["unchanged"] += 1
|
|
|
continue
|
|
continue
|
|
|
if dry_run:
|
|
if dry_run:
|
|
@@ -497,24 +741,40 @@ def fix_wxindex_word_meta_fetch_start_ymd(
|
|
|
if verbose:
|
|
if verbose:
|
|
|
print(
|
|
print(
|
|
|
f"[dry-run] word={name} "
|
|
f"[dry-run] word={name} "
|
|
|
- f"event_created_at={event_created_at} "
|
|
|
|
|
- f"{old_fetch_start} -> {new_fetch_start}"
|
|
|
|
|
|
|
+ f"start {old_fetch_start}->{new_fetch_start} "
|
|
|
|
|
+ f"end {old_fetch_end}->{new_fetch_end}"
|
|
|
)
|
|
)
|
|
|
continue
|
|
continue
|
|
|
- repository.update_wxindex_word_meta_fetch_start(
|
|
|
|
|
|
|
+ repository.update_wxindex_word_meta(
|
|
|
name=name,
|
|
name=name,
|
|
|
|
|
+ event_created_at=event_created_at,
|
|
|
fetch_start_ymd=new_fetch_start,
|
|
fetch_start_ymd=new_fetch_start,
|
|
|
|
|
+ fetch_end_ymd=new_fetch_end,
|
|
|
)
|
|
)
|
|
|
summary["updated"] += 1
|
|
summary["updated"] += 1
|
|
|
if verbose:
|
|
if verbose:
|
|
|
print(
|
|
print(
|
|
|
f"updated word={name} "
|
|
f"updated word={name} "
|
|
|
- f"event_created_at={event_created_at} "
|
|
|
|
|
- f"{old_fetch_start} -> {new_fetch_start}"
|
|
|
|
|
|
|
+ f"start {old_fetch_start}->{new_fetch_start} "
|
|
|
|
|
+ f"end {old_fetch_end}->{new_fetch_end}"
|
|
|
)
|
|
)
|
|
|
return summary
|
|
return summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def fix_wxindex_word_meta_fetch_start_ymd(
|
|
|
|
|
+ repository: HotContentRepository,
|
|
|
|
|
+ *,
|
|
|
|
|
+ dry_run: bool = False,
|
|
|
|
|
+ verbose: bool = False,
|
|
|
|
|
+) -> dict[str, int]:
|
|
|
|
|
+ """按 event_created_at 往前 7 天,批量修正 meta.fetch_start_ymd。"""
|
|
|
|
|
+ return fix_wxindex_word_meta_fetch_bounds(
|
|
|
|
|
+ repository,
|
|
|
|
|
+ dry_run=dry_run,
|
|
|
|
|
+ verbose=verbose,
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def cleanup_wxindex_words_outside_event_window(
|
|
def cleanup_wxindex_words_outside_event_window(
|
|
|
repository: HotContentRepository,
|
|
repository: HotContentRepository,
|
|
|
*,
|
|
*,
|
|
@@ -734,7 +994,8 @@ def ensure_word_full_scores(
|
|
|
"""
|
|
"""
|
|
|
获取词微信指数并入库。
|
|
获取词微信指数并入库。
|
|
|
|
|
|
|
|
- - 表中已有数据但无 meta:若在 7 天窗口内,自动补 meta 并继续更新
|
|
|
|
|
|
|
+ - meta 表:窗口内存在指数 > 10 万才写入/更新
|
|
|
|
|
+ - wxindex_words:仅保留 [fetch_start_ymd, fetch_end_ymd] 区间内数据
|
|
|
- 超过 7 天窗口:不再更新
|
|
- 超过 7 天窗口:不再更新
|
|
|
|
|
|
|
|
返回 (scores, action)。
|
|
返回 (scores, action)。
|
|
@@ -744,17 +1005,86 @@ def ensure_word_full_scores(
|
|
|
return [], "empty"
|
|
return [], "empty"
|
|
|
|
|
|
|
|
target_end = end_ymd or get_wxindex_end_ymd()
|
|
target_end = end_ymd or get_wxindex_end_ymd()
|
|
|
- fetch_start_ymd = get_wxindex_fetch_start_ymd()
|
|
|
|
|
stored_scores = repository.list_wxindex_word_scores(word)
|
|
stored_scores = repository.list_wxindex_word_scores(word)
|
|
|
meta = repository.get_wxindex_word_meta(word)
|
|
meta = repository.get_wxindex_word_meta(word)
|
|
|
|
|
|
|
|
- should_register_meta = meta is None
|
|
|
|
|
|
|
+ should_register_meta = meta is None and event_created_at is not None
|
|
|
should_update_meta = (
|
|
should_update_meta = (
|
|
|
update_meta_if_exists
|
|
update_meta_if_exists
|
|
|
and meta is not None
|
|
and meta is not None
|
|
|
and event_created_at is not None
|
|
and event_created_at is not None
|
|
|
)
|
|
)
|
|
|
|
|
+
|
|
|
|
|
+ fetch_start_ymd: str | None = None
|
|
|
|
|
+ fetch_end_ymd: str | None = None
|
|
|
|
|
+
|
|
|
|
|
+ if event_created_at is not None:
|
|
|
|
|
+ normalized_event_at = normalize_event_created_at(event_created_at)
|
|
|
|
|
+ if not include_expired and not is_word_update_active(normalized_event_at):
|
|
|
|
|
+ if meta is None:
|
|
|
|
|
+ return stored_scores, "expired"
|
|
|
|
|
+ if not should_update_meta:
|
|
|
|
|
+ return stored_scores, "expired"
|
|
|
|
|
+ fetch_start_ymd, fetch_end_ymd = get_fetch_ymd_bounds_from_event(
|
|
|
|
|
+ normalized_event_at
|
|
|
|
|
+ )
|
|
|
|
|
+ elif meta is not None:
|
|
|
|
|
+ fetch_start_ymd = str(meta.get("fetch_start_ymd") or "").strip()
|
|
|
|
|
+ fetch_end_ymd = str(meta.get("fetch_end_ymd") or "").strip()
|
|
|
|
|
+ if not fetch_end_ymd:
|
|
|
|
|
+ fetch_end_ymd = get_fetch_end_ymd_from_event(meta["event_created_at"])
|
|
|
|
|
+ if not include_expired and not is_word_update_active(meta["event_created_at"]):
|
|
|
|
|
+ return stored_scores, "expired"
|
|
|
|
|
+ else:
|
|
|
|
|
+ return stored_scores, "legacy"
|
|
|
|
|
+
|
|
|
|
|
+ if not fetch_start_ymd or not fetch_end_ymd:
|
|
|
|
|
+ return stored_scores, "legacy"
|
|
|
|
|
+
|
|
|
|
|
+ api_end_ymd = min(fetch_end_ymd, target_end)
|
|
|
|
|
+ fetch_range = None if force_refresh else get_supplement_fetch_range(
|
|
|
|
|
+ stored_scores,
|
|
|
|
|
+ end_ymd=api_end_ymd,
|
|
|
|
|
+ start_ymd=fetch_start_ymd,
|
|
|
|
|
+ )
|
|
|
|
|
+ if fetch_range is None and stored_scores and meta is not None and not should_update_meta:
|
|
|
|
|
+ merged_scores = merge_wxindex_score_series(stored_scores)
|
|
|
|
|
+ window_scores = filter_scores_in_ymd_window(
|
|
|
|
|
+ merged_scores,
|
|
|
|
|
+ start_ymd=fetch_start_ymd,
|
|
|
|
|
+ end_ymd=fetch_end_ymd,
|
|
|
|
|
+ )
|
|
|
|
|
+ return window_scores, "cached"
|
|
|
|
|
+
|
|
|
|
|
+ if dry_run:
|
|
|
|
|
+ return [], "dry_run"
|
|
|
|
|
+
|
|
|
|
|
+ had_data = bool(stored_scores)
|
|
|
|
|
+ start_ymd, fetch_end_ymd_api = fetch_range or (fetch_start_ymd, api_end_ymd)
|
|
|
|
|
+ api_scores: list[dict[str, Any]] = []
|
|
|
|
|
+ if fetch_range is not None or not stored_scores or force_refresh:
|
|
|
|
|
+ api_scores = fetch_wxindex_scores(
|
|
|
|
|
+ api_client,
|
|
|
|
|
+ api_url,
|
|
|
|
|
+ keyword=word,
|
|
|
|
|
+ start_ymd=start_ymd,
|
|
|
|
|
+ end_ymd=fetch_end_ymd_api,
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ merged_scores = merge_wxindex_score_series(stored_scores, api_scores)
|
|
|
|
|
+ window_scores = filter_scores_in_ymd_window(
|
|
|
|
|
+ merged_scores,
|
|
|
|
|
+ start_ymd=fetch_start_ymd,
|
|
|
|
|
+ end_ymd=fetch_end_ymd,
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
if should_register_meta or should_update_meta:
|
|
if should_register_meta or should_update_meta:
|
|
|
|
|
+ if not word_has_high_score_in_window(
|
|
|
|
|
+ window_scores,
|
|
|
|
|
+ start_ymd=fetch_start_ymd,
|
|
|
|
|
+ end_ymd=fetch_end_ymd,
|
|
|
|
|
+ ):
|
|
|
|
|
+ return stored_scores, "below_threshold"
|
|
|
meta, register_reason = try_register_wxindex_word_meta(
|
|
meta, register_reason = try_register_wxindex_word_meta(
|
|
|
repository,
|
|
repository,
|
|
|
word=word,
|
|
word=word,
|
|
@@ -767,57 +1097,38 @@ def ensure_word_full_scores(
|
|
|
if register_reason == "expired":
|
|
if register_reason == "expired":
|
|
|
return stored_scores, "expired"
|
|
return stored_scores, "expired"
|
|
|
return stored_scores, "legacy"
|
|
return stored_scores, "legacy"
|
|
|
- if dry_run and register_reason == "dry_run":
|
|
|
|
|
- return [], "dry_run"
|
|
|
|
|
|
|
+ elif meta is None:
|
|
|
|
|
+ return stored_scores, "below_threshold"
|
|
|
|
|
|
|
|
if meta is None:
|
|
if meta is None:
|
|
|
return stored_scores, "legacy"
|
|
return stored_scores, "legacy"
|
|
|
|
|
|
|
|
- if not include_expired and not is_word_update_active(meta["event_created_at"]):
|
|
|
|
|
- return stored_scores, "expired"
|
|
|
|
|
-
|
|
|
|
|
- word_start_ymd = str(meta.get("fetch_start_ymd") or fetch_start_ymd)
|
|
|
|
|
- fetch_range = None if force_refresh else get_supplement_fetch_range(
|
|
|
|
|
- stored_scores,
|
|
|
|
|
- end_ymd=target_end,
|
|
|
|
|
- start_ymd=word_start_ymd,
|
|
|
|
|
- )
|
|
|
|
|
- if fetch_range is None and stored_scores:
|
|
|
|
|
- return stored_scores, "cached"
|
|
|
|
|
-
|
|
|
|
|
- if dry_run:
|
|
|
|
|
- return [], "dry_run"
|
|
|
|
|
-
|
|
|
|
|
- had_data = bool(stored_scores)
|
|
|
|
|
- start_ymd, fetch_end_ymd = fetch_range or (word_start_ymd, target_end)
|
|
|
|
|
- api_scores = fetch_wxindex_scores(
|
|
|
|
|
- api_client,
|
|
|
|
|
- api_url,
|
|
|
|
|
- keyword=word,
|
|
|
|
|
- start_ymd=start_ymd,
|
|
|
|
|
- end_ymd=fetch_end_ymd,
|
|
|
|
|
- )
|
|
|
|
|
- if not api_scores:
|
|
|
|
|
|
|
+ if not api_scores and not window_scores:
|
|
|
return stored_scores, "api_empty"
|
|
return stored_scores, "api_empty"
|
|
|
|
|
|
|
|
- if not had_data and not word_meets_max_score_threshold(
|
|
|
|
|
- api_scores,
|
|
|
|
|
- min_max_score=WXINDEX_WORDS_MIN_MAX_SCORE,
|
|
|
|
|
|
|
+ if not had_data and not word_has_high_score_in_window(
|
|
|
|
|
+ window_scores,
|
|
|
|
|
+ start_ymd=fetch_start_ymd,
|
|
|
|
|
+ end_ymd=fetch_end_ymd,
|
|
|
):
|
|
):
|
|
|
return [], "below_threshold"
|
|
return [], "below_threshold"
|
|
|
|
|
|
|
|
inserted, _skipped = repository.save_wxindex_daily_scores(
|
|
inserted, _skipped = repository.save_wxindex_daily_scores(
|
|
|
name=word,
|
|
name=word,
|
|
|
- scores=api_scores,
|
|
|
|
|
|
|
+ scores=window_scores,
|
|
|
|
|
+ )
|
|
|
|
|
+ final_scores = filter_scores_in_ymd_window(
|
|
|
|
|
+ repository.list_wxindex_word_scores(word),
|
|
|
|
|
+ start_ymd=fetch_start_ymd,
|
|
|
|
|
+ end_ymd=fetch_end_ymd,
|
|
|
)
|
|
)
|
|
|
- final_scores = repository.list_wxindex_word_scores(word)
|
|
|
|
|
if inserted > 0:
|
|
if inserted > 0:
|
|
|
action = "updated" if had_data else "inserted"
|
|
action = "updated" if had_data else "inserted"
|
|
|
elif final_scores:
|
|
elif final_scores:
|
|
|
action = "cached"
|
|
action = "cached"
|
|
|
else:
|
|
else:
|
|
|
action = "api_empty"
|
|
action = "api_empty"
|
|
|
- return final_scores or api_scores, action
|
|
|
|
|
|
|
+ return final_scores or window_scores, action
|
|
|
|
|
|
|
|
|
|
|
|
|
def sync_words_from_trend_json(
|
|
def sync_words_from_trend_json(
|