瀏覽代碼

修改demand_id拼接方式

xueyiming 1 周之前
父節點
當前提交
45f870b86a
共有 1 個文件被更改,包括 8 次插入4 次删除
  1. 8 4
      examples/demand/data_query_tools.py

+ 8 - 4
examples/demand/data_query_tools.py

@@ -127,8 +127,8 @@ def write_dwd_multi_demand_pool_di_to_hive(rows: list[dict]) -> int:
 
     分区与 demand_id 的日期均为中国时区当天(yyyymmdd),不使用行内 dt 字段。
     执行两次 INSERT(同表、同分区),策略不同:
-    1) 当下供需gap: demand_name=merge_leve2+' '+name, demand_id=md5(strategy+demand_name+dt)
-    2) 当下供需gap-分词: demand_name=name, demand_id=md5(strategy+name+dt)
+    1) 当下供需gap: demand_name=merge_leve2+' '+name, demand_id=md5(strategy+demand_name+type+dt)
+    2) 当下供需gap-分词: demand_name=name, demand_id=md5(strategy+name+品类+type+dt)
     """
     if not rows:
         return 0
@@ -155,7 +155,9 @@ def write_dwd_multi_demand_pool_di_to_hive(rows: list[dict]) -> int:
         extend_json = json.dumps({"品类": merge_leve2}, ensure_ascii=False)
 
         demand_name_gap = f"{merge_leve2} {name}"
-        demand_id_gap = hashlib.md5(f"{_STRATEGY_GAP}{demand_name_gap}{china_today}".encode("utf-8")).hexdigest()
+        demand_id_gap = hashlib.md5(
+            f"{_STRATEGY_GAP}{demand_name_gap}{type_str}{china_today}".encode("utf-8")
+        ).hexdigest()
         gap_parts.append(
             _build_hive_select_part(
                 _STRATEGY_GAP, demand_id_gap, demand_name_gap,
@@ -163,7 +165,9 @@ def write_dwd_multi_demand_pool_di_to_hive(rows: list[dict]) -> int:
             )
         )
 
-        demand_id_fenci = hashlib.md5(f"{_STRATEGY_GAP_FENCI}{name}{china_today}".encode("utf-8")).hexdigest()
+        demand_id_fenci = hashlib.md5(
+            f"{_STRATEGY_GAP_FENCI}{name}{merge_leve2}{type_str}{china_today}".encode("utf-8")
+        ).hexdigest()
         fenci_parts.append(
             _build_hive_select_part(
                 _STRATEGY_GAP_FENCI, demand_id_fenci, name,