|
|
@@ -5,7 +5,17 @@ from __future__ import annotations
|
|
|
import hashlib
|
|
|
from typing import Any
|
|
|
|
|
|
-from app.hot_content.demand_export import ITEM_TYPE_ELEMENT, ITEM_TYPE_PHRASE
|
|
|
+from app.hot_content.demand_export import (
|
|
|
+ ITEM_TYPE_ELEMENT,
|
|
|
+ ITEM_TYPE_PHRASE,
|
|
|
+ attach_wxindex_metadata,
|
|
|
+ build_demand_export_rows,
|
|
|
+)
|
|
|
+from app.hot_content.demand_quality import (
|
|
|
+ attach_quality_scores_to_export_rows,
|
|
|
+ build_feature_combo_text,
|
|
|
+ quality_passed,
|
|
|
+)
|
|
|
|
|
|
TITLE_RETAIN_POINT_CATEGORIES = frozenset({"灵感点", "目的点"})
|
|
|
TYPE_FEATURE_POINT = "特征点"
|
|
|
@@ -68,16 +78,102 @@ def _has_matched_demand(row: dict[str, Any]) -> bool:
|
|
|
return bool(str(row.get("matched_demand") or "").strip())
|
|
|
|
|
|
|
|
|
+def _export_row_passes_quality(
|
|
|
+ row: dict[str, Any],
|
|
|
+ *,
|
|
|
+ export_rows: list[dict[str, Any]],
|
|
|
+ event_sense_json: dict[str, Any] | None,
|
|
|
+ senior_fit_json: dict[str, Any] | None,
|
|
|
+ event_threshold: float,
|
|
|
+ senior_threshold: float,
|
|
|
+) -> bool:
|
|
|
+ item_type = str(row.get("item_type") or "")
|
|
|
+ if item_type == ITEM_TYPE_PHRASE:
|
|
|
+ return quality_passed(
|
|
|
+ demand_type=TYPE_PHRASE,
|
|
|
+ demand_text=str(row.get("item_text") or "").strip(),
|
|
|
+ event_sense_json=event_sense_json,
|
|
|
+ senior_fit_json=senior_fit_json,
|
|
|
+ event_threshold=event_threshold,
|
|
|
+ senior_threshold=senior_threshold,
|
|
|
+ )
|
|
|
+ if item_type == ITEM_TYPE_ELEMENT and _has_matched_demand(row):
|
|
|
+ feature_combo = build_feature_combo_text(export_rows)
|
|
|
+ if not feature_combo:
|
|
|
+ return False
|
|
|
+ return quality_passed(
|
|
|
+ demand_type=TYPE_FEATURE_POINT,
|
|
|
+ demand_text=feature_combo,
|
|
|
+ event_sense_json=event_sense_json,
|
|
|
+ senior_fit_json=senior_fit_json,
|
|
|
+ event_threshold=event_threshold,
|
|
|
+ senior_threshold=senior_threshold,
|
|
|
+ )
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
def is_export_row_as_demand(
|
|
|
row: dict[str, Any],
|
|
|
export_rows: list[dict[str, Any]],
|
|
|
*,
|
|
|
wxindex_threshold: float,
|
|
|
+ event_sense_json: dict[str, Any] | None = None,
|
|
|
+ senior_fit_json: dict[str, Any] | None = None,
|
|
|
+ event_threshold: float = 0.0,
|
|
|
+ senior_threshold: float = 0.0,
|
|
|
) -> int:
|
|
|
- """是否与 ODPS 需求同步规则一致:标题保留且该行有匹配需求。返回 0/1。"""
|
|
|
+ """是否与 ODPS 需求同步规则一致:标题保留、质量达标且该行有匹配需求。返回 0/1。"""
|
|
|
if not _should_retain_title(export_rows, wxindex_threshold=wxindex_threshold):
|
|
|
return 0
|
|
|
- return 1 if _has_matched_demand(row) else 0
|
|
|
+ if not _has_matched_demand(row):
|
|
|
+ return 0
|
|
|
+ if event_sense_json is not None or senior_fit_json is not None:
|
|
|
+ if not _export_row_passes_quality(
|
|
|
+ row,
|
|
|
+ export_rows=export_rows,
|
|
|
+ event_sense_json=event_sense_json,
|
|
|
+ senior_fit_json=senior_fit_json,
|
|
|
+ event_threshold=event_threshold,
|
|
|
+ senior_threshold=senior_threshold,
|
|
|
+ ):
|
|
|
+ return 0
|
|
|
+ return 1
|
|
|
+
|
|
|
+
|
|
|
+def _build_export_rows_for_record(
|
|
|
+ record: dict[str, Any],
|
|
|
+ *,
|
|
|
+ wxindex_threshold: float,
|
|
|
+ event_sense_json: dict[str, Any] | None,
|
|
|
+ senior_fit_json: dict[str, Any] | None,
|
|
|
+ event_threshold: float,
|
|
|
+ senior_threshold: float,
|
|
|
+) -> list[dict[str, Any]]:
|
|
|
+ match_result = record.get("contribution_demand_match_json")
|
|
|
+ if not isinstance(match_result, dict):
|
|
|
+ return []
|
|
|
+ contribution_points = record.get("contribution_points_json")
|
|
|
+ trend_json = record.get("wxindex_trend_json")
|
|
|
+ export_rows = attach_wxindex_metadata(
|
|
|
+ build_demand_export_rows(
|
|
|
+ match_result,
|
|
|
+ contribution_points=(
|
|
|
+ contribution_points if isinstance(contribution_points, dict) else None
|
|
|
+ ),
|
|
|
+ trend_json=trend_json if isinstance(trend_json, dict) else None,
|
|
|
+ ),
|
|
|
+ trend_json if isinstance(trend_json, dict) else None,
|
|
|
+ wxindex_threshold=wxindex_threshold,
|
|
|
+ event_sense_json=event_sense_json,
|
|
|
+ senior_fit_json=senior_fit_json,
|
|
|
+ event_threshold=event_threshold,
|
|
|
+ senior_threshold=senior_threshold,
|
|
|
+ )
|
|
|
+ return attach_quality_scores_to_export_rows(
|
|
|
+ export_rows,
|
|
|
+ event_sense_json=event_sense_json,
|
|
|
+ senior_fit_json=senior_fit_json,
|
|
|
+ )
|
|
|
|
|
|
|
|
|
def build_hive_rows_for_record(
|
|
|
@@ -87,36 +183,47 @@ def build_hive_rows_for_record(
|
|
|
strategy: str,
|
|
|
partition_dt: str,
|
|
|
wxindex_threshold: float,
|
|
|
+ event_sense_json: dict[str, Any] | None = None,
|
|
|
+ senior_fit_json: dict[str, Any] | None = None,
|
|
|
+ event_threshold: float = 0.0,
|
|
|
+ senior_threshold: float = 0.0,
|
|
|
) -> list[dict[str, Any]]:
|
|
|
if not _should_retain_title(export_rows, wxindex_threshold=wxindex_threshold):
|
|
|
return []
|
|
|
|
|
|
weight = _record_wxindex_score(export_rows) / WEIGHT_DIVISOR
|
|
|
-
|
|
|
- element_texts = _dedupe_texts(
|
|
|
- [
|
|
|
- str(row.get("item_text") or "").strip()
|
|
|
- for row in export_rows
|
|
|
- if str(row.get("item_type") or "") == ITEM_TYPE_ELEMENT and _has_matched_demand(row)
|
|
|
- ]
|
|
|
- )
|
|
|
+ feature_combo = build_feature_combo_text(export_rows)
|
|
|
phrase_texts = _dedupe_texts(
|
|
|
[
|
|
|
str(row.get("item_text") or "").strip()
|
|
|
for row in export_rows
|
|
|
if str(row.get("item_type") or "") == ITEM_TYPE_PHRASE
|
|
|
and _has_matched_demand(row)
|
|
|
+ and quality_passed(
|
|
|
+ demand_type=TYPE_PHRASE,
|
|
|
+ demand_text=str(row.get("item_text") or "").strip(),
|
|
|
+ event_sense_json=event_sense_json,
|
|
|
+ senior_fit_json=senior_fit_json,
|
|
|
+ event_threshold=event_threshold,
|
|
|
+ senior_threshold=senior_threshold,
|
|
|
+ )
|
|
|
]
|
|
|
)
|
|
|
|
|
|
hive_rows: list[dict[str, Any]] = []
|
|
|
- if element_texts:
|
|
|
- demand_name = " ".join(element_texts)
|
|
|
+ if feature_combo and quality_passed(
|
|
|
+ demand_type=TYPE_FEATURE_POINT,
|
|
|
+ demand_text=feature_combo,
|
|
|
+ event_sense_json=event_sense_json,
|
|
|
+ senior_fit_json=senior_fit_json,
|
|
|
+ event_threshold=event_threshold,
|
|
|
+ senior_threshold=senior_threshold,
|
|
|
+ ):
|
|
|
hive_rows.append(
|
|
|
_build_hive_row(
|
|
|
record_id=record_id,
|
|
|
strategy=strategy,
|
|
|
- demand_name=demand_name,
|
|
|
+ demand_name=feature_combo,
|
|
|
weight=weight,
|
|
|
demand_type=TYPE_FEATURE_POINT,
|
|
|
partition_dt=partition_dt,
|
|
|
@@ -137,12 +244,51 @@ def build_hive_rows_for_record(
|
|
|
return hive_rows
|
|
|
|
|
|
|
|
|
+def build_hive_rows_for_odps_record(
|
|
|
+ record: dict[str, Any],
|
|
|
+ *,
|
|
|
+ strategy: str,
|
|
|
+ partition_dt: str,
|
|
|
+ wxindex_threshold: float,
|
|
|
+ event_threshold: float,
|
|
|
+ senior_threshold: float,
|
|
|
+) -> list[dict[str, Any]]:
|
|
|
+ event_sense_json = record.get("demand_event_sense_json")
|
|
|
+ senior_fit_json = record.get("demand_senior_fit_json")
|
|
|
+ if not isinstance(event_sense_json, dict):
|
|
|
+ event_sense_json = {}
|
|
|
+ if not isinstance(senior_fit_json, dict):
|
|
|
+ senior_fit_json = {}
|
|
|
+
|
|
|
+ export_rows = _build_export_rows_for_record(
|
|
|
+ record,
|
|
|
+ wxindex_threshold=wxindex_threshold,
|
|
|
+ event_sense_json=event_sense_json,
|
|
|
+ senior_fit_json=senior_fit_json,
|
|
|
+ event_threshold=event_threshold,
|
|
|
+ senior_threshold=senior_threshold,
|
|
|
+ )
|
|
|
+ return build_hive_rows_for_record(
|
|
|
+ export_rows,
|
|
|
+ record_id=int(record.get("id") or 0),
|
|
|
+ strategy=strategy,
|
|
|
+ partition_dt=partition_dt,
|
|
|
+ wxindex_threshold=wxindex_threshold,
|
|
|
+ event_sense_json=event_sense_json,
|
|
|
+ senior_fit_json=senior_fit_json,
|
|
|
+ event_threshold=event_threshold,
|
|
|
+ senior_threshold=senior_threshold,
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
def build_hive_rows_from_export_groups(
|
|
|
export_groups: list[dict[str, Any]],
|
|
|
*,
|
|
|
strategy: str,
|
|
|
partition_dt: str,
|
|
|
wxindex_threshold: float,
|
|
|
+ event_threshold: float = 0.0,
|
|
|
+ senior_threshold: float = 0.0,
|
|
|
) -> list[dict[str, Any]]:
|
|
|
rows: list[dict[str, Any]] = []
|
|
|
seen_demand_ids: set[str] = set()
|
|
|
@@ -153,12 +299,49 @@ def build_hive_rows_from_export_groups(
|
|
|
record_id = int(group.get("record_id") or 0)
|
|
|
if record_id <= 0:
|
|
|
continue
|
|
|
+ event_sense_json = group.get("demand_event_sense_json")
|
|
|
+ senior_fit_json = group.get("demand_senior_fit_json")
|
|
|
for hive_row in build_hive_rows_for_record(
|
|
|
export_rows,
|
|
|
record_id=record_id,
|
|
|
strategy=strategy,
|
|
|
partition_dt=partition_dt,
|
|
|
wxindex_threshold=wxindex_threshold,
|
|
|
+ event_sense_json=event_sense_json if isinstance(event_sense_json, dict) else None,
|
|
|
+ senior_fit_json=senior_fit_json if isinstance(senior_fit_json, dict) else None,
|
|
|
+ event_threshold=event_threshold,
|
|
|
+ senior_threshold=senior_threshold,
|
|
|
+ ):
|
|
|
+ demand_id = str(hive_row["demand_id"])
|
|
|
+ if demand_id in seen_demand_ids:
|
|
|
+ continue
|
|
|
+ seen_demand_ids.add(demand_id)
|
|
|
+ rows.append(hive_row)
|
|
|
+ return rows
|
|
|
+
|
|
|
+
|
|
|
+def build_hive_rows_from_odps_records(
|
|
|
+ records: list[dict[str, Any]],
|
|
|
+ *,
|
|
|
+ strategy: str,
|
|
|
+ partition_dt: str,
|
|
|
+ wxindex_threshold: float,
|
|
|
+ event_threshold: float,
|
|
|
+ senior_threshold: float,
|
|
|
+) -> list[dict[str, Any]]:
|
|
|
+ rows: list[dict[str, Any]] = []
|
|
|
+ seen_demand_ids: set[str] = set()
|
|
|
+ for record in records:
|
|
|
+ record_id = int(record.get("id") or 0)
|
|
|
+ if record_id <= 0:
|
|
|
+ continue
|
|
|
+ for hive_row in build_hive_rows_for_odps_record(
|
|
|
+ record,
|
|
|
+ strategy=strategy,
|
|
|
+ partition_dt=partition_dt,
|
|
|
+ wxindex_threshold=wxindex_threshold,
|
|
|
+ event_threshold=event_threshold,
|
|
|
+ senior_threshold=senior_threshold,
|
|
|
):
|
|
|
demand_id = str(hive_row["demand_id"])
|
|
|
if demand_id in seen_demand_ids:
|