|
|
@@ -127,8 +127,8 @@ def write_dwd_multi_demand_pool_di_to_hive(rows: list[dict]) -> int:
|
|
|
|
|
|
分区与 demand_id 的日期均为中国时区当天(yyyymmdd),不使用行内 dt 字段。
|
|
|
执行两次 INSERT(同表、同分区),策略不同:
|
|
|
- 1) 当下供需gap: demand_name=merge_leve2+' '+name, demand_id=md5(strategy+demand_name+dt)
|
|
|
- 2) 当下供需gap-分词: demand_name=name, demand_id=md5(strategy+name+dt)
|
|
|
+ 1) 当下供需gap: demand_name=merge_leve2+' '+name, demand_id=md5(strategy+demand_name+type+dt)
|
|
|
+ 2) 当下供需gap-分词: demand_name=name, demand_id=md5(strategy+name+品类+type+dt)
|
|
|
"""
|
|
|
if not rows:
|
|
|
return 0
|
|
|
@@ -155,7 +155,9 @@ def write_dwd_multi_demand_pool_di_to_hive(rows: list[dict]) -> int:
|
|
|
extend_json = json.dumps({"品类": merge_leve2}, ensure_ascii=False)
|
|
|
|
|
|
demand_name_gap = f"{merge_leve2} {name}"
|
|
|
- demand_id_gap = hashlib.md5(f"{_STRATEGY_GAP}{demand_name_gap}{china_today}".encode("utf-8")).hexdigest()
|
|
|
+ demand_id_gap = hashlib.md5(
|
|
|
+ f"{_STRATEGY_GAP}{demand_name_gap}{type_str}{china_today}".encode("utf-8")
|
|
|
+ ).hexdigest()
|
|
|
gap_parts.append(
|
|
|
_build_hive_select_part(
|
|
|
_STRATEGY_GAP, demand_id_gap, demand_name_gap,
|
|
|
@@ -163,7 +165,9 @@ def write_dwd_multi_demand_pool_di_to_hive(rows: list[dict]) -> int:
|
|
|
)
|
|
|
)
|
|
|
|
|
|
- demand_id_fenci = hashlib.md5(f"{_STRATEGY_GAP_FENCI}{name}{china_today}".encode("utf-8")).hexdigest()
|
|
|
+ demand_id_fenci = hashlib.md5(
|
|
|
+ f"{_STRATEGY_GAP_FENCI}{name}{merge_leve2}{type_str}{china_today}".encode("utf-8")
|
|
|
+ ).hexdigest()
|
|
|
fenci_parts.append(
|
|
|
_build_hive_select_part(
|
|
|
_STRATEGY_GAP_FENCI, demand_id_fenci, name,
|