|
@@ -39,7 +39,8 @@ sparse_features = [
|
|
"user_adverid_conver_3d", "user_adverid_conver_7d", "user_adverid_conver_30d",
|
|
"user_adverid_conver_3d", "user_adverid_conver_7d", "user_adverid_conver_30d",
|
|
"user_skuid_view_3d", "user_skuid_view_7d", "user_skuid_view_30d",
|
|
"user_skuid_view_3d", "user_skuid_view_7d", "user_skuid_view_30d",
|
|
"user_skuid_click_3d", "user_skuid_click_7d", "user_skuid_click_30d",
|
|
"user_skuid_click_3d", "user_skuid_click_7d", "user_skuid_click_30d",
|
|
- "user_skuid_conver_3d", "user_skuid_conver_7d", "user_skuid_conver_30d"
|
|
|
|
|
|
+ "user_skuid_conver_3d", "user_skuid_conver_7d", "user_skuid_conver_30d",
|
|
|
|
+ "user_conver_ad_class"
|
|
]
|
|
]
|
|
|
|
|
|
int_features = [
|
|
int_features = [
|
|
@@ -60,10 +61,10 @@ def get_data():
|
|
dense_features = [name.strip().lower() for name in dense_features]
|
|
dense_features = [name.strip().lower() for name in dense_features]
|
|
feature_names = ','.join(dense_features + sparse_features)
|
|
feature_names = ','.join(dense_features + sparse_features)
|
|
|
|
|
|
- partitions = "dt in ('20250620')"
|
|
|
|
|
|
+ partitions = "dt in ('20250709')"
|
|
sql = f''' SELECT {feature_names},has_conversion
|
|
sql = f''' SELECT {feature_names},has_conversion
|
|
FROM loghubods.ad_easyrec_train_realtime_data_v3_sampled_temp
|
|
FROM loghubods.ad_easyrec_train_realtime_data_v3_sampled_temp
|
|
- WHERE {partitions} AND adverid = '598'
|
|
|
|
|
|
+ WHERE {partitions} AND adverid = '523'
|
|
'''
|
|
'''
|
|
# AND ts BETWEEN unix_timestamp('2025-05-14 17:40:00') AND unix_timestamp('2025-05-14 18:00:00')
|
|
# AND ts BETWEEN unix_timestamp('2025-05-14 17:40:00') AND unix_timestamp('2025-05-14 18:00:00')
|
|
data_query_hash = hashlib.sha1(sql.encode("utf-8")).hexdigest()[0:8]
|
|
data_query_hash = hashlib.sha1(sql.encode("utf-8")).hexdigest()[0:8]
|
|
@@ -137,7 +138,7 @@ def clear_feature(df, column):
|
|
df[column] = zero_value
|
|
df[column] = zero_value
|
|
return df
|
|
return df
|
|
|
|
|
|
-def build_req(df):
|
|
|
|
|
|
+def build_req(df, save_req=None):
|
|
feature_names = df.columns.tolist()
|
|
feature_names = df.columns.tolist()
|
|
batch_size = len(df)
|
|
batch_size = len(df)
|
|
req = TFRequest('serving_default')
|
|
req = TFRequest('serving_default')
|
|
@@ -149,6 +150,9 @@ def build_req(df):
|
|
values = [bytes(x, 'utf-8') for x in values]
|
|
values = [bytes(x, 'utf-8') for x in values]
|
|
req.add_feed(name, [batch_size], tf_type, values)
|
|
req.add_feed(name, [batch_size], tf_type, values)
|
|
req.add_fetch('probs')
|
|
req.add_fetch('probs')
|
|
|
|
+ if save_req:
|
|
|
|
+ with open(save_req, "wb") as f:
|
|
|
|
+ f.write(req.to_string())
|
|
return req
|
|
return req
|
|
|
|
|
|
def predict_by_batches(df, batch_size = 512):
|
|
def predict_by_batches(df, batch_size = 512):
|
|
@@ -188,8 +192,10 @@ def permutate_feature_and_predict(df):
|
|
|
|
|
|
|
|
|
|
def clear_feature_by_prefix_and_predict(df):
|
|
def clear_feature_by_prefix_and_predict(df):
|
|
- feature_prefix_list = ["actionstatic","adid","adverid","apptype","b2","b3","b4","b5","b6","b7","b8","brand","cate1","cate2","cid","city","clickall","converall","cpa","creative","ctcvr","ctr","cvr","d1","e1","e2","ecpm","has","hour","incomeall","is","profession","region","root","timediff","title","user","vid","viewall"
|
|
|
|
-]
|
|
|
|
|
|
+ feature_prefix_list = [
|
|
|
|
+ # "actionstatic","adid","adverid","apptype","b2","b3","b4","b5","b6","b7","b8","brand","cate1","cate2","cid","city","clickall","converall","cpa","creative","ctcvr","ctr","cvr","d1","e1","e2","ecpm","has","hour","incomeall","is","profession","region","root","timediff","title","user","vid","viewall",
|
|
|
|
+ "user_conver_ad_class"
|
|
|
|
+ ]
|
|
base_scores = client.predict(build_req(df)).response.outputs['probs'].float_val
|
|
base_scores = client.predict(build_req(df)).response.outputs['probs'].float_val
|
|
base_scores = np.array(base_scores)
|
|
base_scores = np.array(base_scores)
|
|
base_scores = base_scores / (base_scores + (1 - base_scores) / 0.04)
|
|
base_scores = base_scores / (base_scores + (1 - base_scores) / 0.04)
|
|
@@ -289,5 +295,6 @@ if __name__ == '__main__':
|
|
# print(df[['vid', 'cid', 'adid', 'adverid', 'apptype', 'hour', 'hour_quarter', 'is_first_layer']])
|
|
# print(df[['vid', 'cid', 'adid', 'adverid', 'apptype', 'hour', 'hour_quarter', 'is_first_layer']])
|
|
# clear_feature_and_predict(df)
|
|
# clear_feature_and_predict(df)
|
|
# permutate_feature_and_predict(df)
|
|
# permutate_feature_and_predict(df)
|
|
- clear_feature_by_prefix_and_predict(df)
|
|
|
|
|
|
+ # clear_feature_by_prefix_and_predict(df)
|
|
|
|
+ # scores = client.predict(build_req(df, 'warmup_widedeep_v12.bin')).response.outputs['probs'].float_val
|
|
|
|
|