Browse Source

Update data_fields_v3 and inspect_features

StrayWarrior 3 days ago
parent
commit
a1db8f641b
2 changed files with 17 additions and 7 deletions
  1. 3 0
      data_fields_v3.config
  2. 14 7
      inspect_features.py

+ 3 - 0
data_fields_v3.config

@@ -745,3 +745,6 @@ user_skuid_click_30d BIGINT
 user_skuid_conver_3d BIGINT
 user_skuid_conver_7d BIGINT
 user_skuid_conver_30d BIGINT
+is_weekday BIGINT
+day_of_the_week BIGINT
+user_conver_ad_class STRING

+ 14 - 7
inspect_features.py

@@ -39,7 +39,8 @@ sparse_features = [
     "user_adverid_conver_3d", "user_adverid_conver_7d", "user_adverid_conver_30d",
     "user_skuid_view_3d", "user_skuid_view_7d", "user_skuid_view_30d",
     "user_skuid_click_3d", "user_skuid_click_7d", "user_skuid_click_30d",
-    "user_skuid_conver_3d", "user_skuid_conver_7d", "user_skuid_conver_30d"
+    "user_skuid_conver_3d", "user_skuid_conver_7d", "user_skuid_conver_30d",
+    "user_conver_ad_class"
 ]
 
 int_features = [
@@ -60,10 +61,10 @@ def get_data():
     dense_features = [name.strip().lower() for name in dense_features]
     feature_names = ','.join(dense_features + sparse_features)
 
-    partitions = "dt in ('20250620')"
+    partitions = "dt in ('20250709')"
     sql = f''' SELECT {feature_names},has_conversion
            FROM loghubods.ad_easyrec_train_realtime_data_v3_sampled_temp
-           WHERE {partitions} AND adverid = '598'
+           WHERE {partitions} AND adverid = '523'
     '''
            # AND ts BETWEEN unix_timestamp('2025-05-14 17:40:00') AND unix_timestamp('2025-05-14 18:00:00')
     data_query_hash = hashlib.sha1(sql.encode("utf-8")).hexdigest()[0:8]
@@ -137,7 +138,7 @@ def clear_feature(df, column):
     df[column] = zero_value
     return df
 
-def build_req(df):
+def build_req(df, save_req=None):
     feature_names = df.columns.tolist()
     batch_size = len(df)
     req = TFRequest('serving_default')
@@ -149,6 +150,9 @@ def build_req(df):
             values = [bytes(x, 'utf-8') for x in values]
         req.add_feed(name, [batch_size], tf_type, values)
     req.add_fetch('probs')
+    if save_req:
+        with open(save_req, "wb") as f:
+            f.write(req.to_string())
     return req
 
 def predict_by_batches(df, batch_size = 512):
@@ -188,8 +192,10 @@ def permutate_feature_and_predict(df):
 
 
 def clear_feature_by_prefix_and_predict(df):
-    feature_prefix_list = ["actionstatic","adid","adverid","apptype","b2","b3","b4","b5","b6","b7","b8","brand","cate1","cate2","cid","city","clickall","converall","cpa","creative","ctcvr","ctr","cvr","d1","e1","e2","ecpm","has","hour","incomeall","is","profession","region","root","timediff","title","user","vid","viewall"
-]
+    feature_prefix_list = [
+        # "actionstatic","adid","adverid","apptype","b2","b3","b4","b5","b6","b7","b8","brand","cate1","cate2","cid","city","clickall","converall","cpa","creative","ctcvr","ctr","cvr","d1","e1","e2","ecpm","has","hour","incomeall","is","profession","region","root","timediff","title","user","vid","viewall",
+        "user_conver_ad_class"
+    ]
     base_scores = client.predict(build_req(df)).response.outputs['probs'].float_val
     base_scores = np.array(base_scores)
     base_scores = base_scores / (base_scores + (1 - base_scores) / 0.04)
@@ -289,5 +295,6 @@ if __name__ == '__main__':
     # print(df[['vid', 'cid', 'adid', 'adverid', 'apptype', 'hour', 'hour_quarter', 'is_first_layer']])
     # clear_feature_and_predict(df)
     # permutate_feature_and_predict(df)
-    clear_feature_by_prefix_and_predict(df)
+    # clear_feature_by_prefix_and_predict(df)
+    # scores = client.predict(build_req(df, 'warmup_widedeep_v12.bin')).response.outputs['probs'].float_val