|
@@ -1,202 +0,0 @@
|
|
-import paddle
|
|
|
|
-import paddle.nn as nn
|
|
|
|
-import paddle.nn.functional as F
|
|
|
|
-import math
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-class WideDeepLayer(nn.Layer):
|
|
|
|
- def __init__(self, sparse_feature_number, sparse_feature_dim,
|
|
|
|
- dense_feature_dim, num_field, layer_sizes):
|
|
|
|
- super(WideDeepLayer, self).__init__()
|
|
|
|
- self.sparse_feature_number = sparse_feature_number
|
|
|
|
- self.sparse_feature_dim = sparse_feature_dim
|
|
|
|
- self.dense_feature_dim = dense_feature_dim
|
|
|
|
- self.num_field = num_field
|
|
|
|
- self.layer_sizes = layer_sizes
|
|
|
|
-
|
|
|
|
- def forward(self, sparse_inputs, dense_inputs):
|
|
|
|
- # wide part
|
|
|
|
- wide_w = paddle.static.create_parameter(
|
|
|
|
- shape=[self.dense_feature_dim, 1],
|
|
|
|
- dtype='float32',
|
|
|
|
- name='wide_w',
|
|
|
|
- default_initializer=paddle.nn.initializer.TruncatedNormal(
|
|
|
|
- mean=0.0, std=1.0 / math.sqrt(self.dense_feature_dim)))
|
|
|
|
- wide_b = paddle.static.create_parameter(
|
|
|
|
- shape=[1],
|
|
|
|
- dtype='float32',
|
|
|
|
- name='wide_b',
|
|
|
|
- default_initializer=paddle.nn.initializer.Constant(0.0))
|
|
|
|
-
|
|
|
|
- # 使用paddle.static.nn.fc的正确方式
|
|
|
|
- wide_output = paddle.static.nn.fc(
|
|
|
|
- x=dense_inputs, # 使用x而不是input
|
|
|
|
- size=1,
|
|
|
|
- weight_attr=paddle.ParamAttr(
|
|
|
|
- initializer=paddle.nn.initializer.TruncatedNormal(
|
|
|
|
- mean=0.0, std=1.0 / math.sqrt(self.dense_feature_dim))),
|
|
|
|
- bias_attr=paddle.ParamAttr(
|
|
|
|
- initializer=paddle.nn.initializer.Constant(0.0))
|
|
|
|
- )
|
|
|
|
-
|
|
|
|
- # deep part
|
|
|
|
- sparse_embs = []
|
|
|
|
- for i, s_input in enumerate(sparse_inputs):
|
|
|
|
- emb = paddle.static.nn.embedding(
|
|
|
|
- input=s_input,
|
|
|
|
- size=[self.sparse_feature_number, self.sparse_feature_dim],
|
|
|
|
- param_attr=paddle.ParamAttr(
|
|
|
|
- name=f"embedding_{i}",
|
|
|
|
- initializer=paddle.nn.initializer.Uniform()))
|
|
|
|
- emb = paddle.reshape(emb, shape=[-1, self.sparse_feature_dim])
|
|
|
|
- sparse_embs.append(emb)
|
|
|
|
-
|
|
|
|
- deep_output = paddle.concat(x=sparse_embs + [dense_inputs], axis=1)
|
|
|
|
-
|
|
|
|
- # 创建深度网络层
|
|
|
|
- sizes = [self.sparse_feature_dim * self.num_field + self.dense_feature_dim] + self.layer_sizes + [1]
|
|
|
|
- acts = ["relu" for _ in range(len(self.layer_sizes))] + [None]
|
|
|
|
-
|
|
|
|
- for i in range(len(sizes) - 1):
|
|
|
|
- deep_output = paddle.static.nn.fc(
|
|
|
|
- x=deep_output, # 使用x而不是input
|
|
|
|
- size=sizes[i + 1],
|
|
|
|
- activation=acts[i], # 使用activation而不是act
|
|
|
|
- weight_attr=paddle.ParamAttr(
|
|
|
|
- name=f'fc_{i}_w',
|
|
|
|
- initializer=paddle.nn.initializer.Normal(
|
|
|
|
- std=1.0 / math.sqrt(sizes[i]))),
|
|
|
|
- bias_attr=paddle.ParamAttr(
|
|
|
|
- name=f'fc_{i}_b',
|
|
|
|
- initializer=paddle.nn.initializer.Constant(0.0))
|
|
|
|
- )
|
|
|
|
-
|
|
|
|
- prediction = paddle.add(x=wide_output, y=deep_output)
|
|
|
|
- pred = F.sigmoid(prediction)
|
|
|
|
- return pred
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-class WideDeepModel:
|
|
|
|
- def __init__(self, sparse_feature_number=1000001, sparse_inputs_slots=27, sparse_feature_dim=10, dense_input_dim=13, fc_sizes=[400, 400, 400]):
|
|
|
|
- self.sparse_feature_number = sparse_feature_number
|
|
|
|
- self.sparse_inputs_slots = sparse_inputs_slots
|
|
|
|
- self.sparse_feature_dim = sparse_feature_dim
|
|
|
|
- self.dense_input_dim = dense_input_dim
|
|
|
|
- self.fc_sizes = fc_sizes
|
|
|
|
-
|
|
|
|
- self._metrics = {}
|
|
|
|
-
|
|
|
|
- def acc_metrics(self, pred, label):
|
|
|
|
- correct_cnt = paddle.static.create_global_var(
|
|
|
|
- name="right_cnt", persistable=True, dtype='float32', shape=[1], value=0)
|
|
|
|
- total_cnt = paddle.static.create_global_var(
|
|
|
|
- name="total_cnt", persistable=True, dtype='float32', shape=[1], value=0)
|
|
|
|
-
|
|
|
|
- batch_cnt = paddle.sum(
|
|
|
|
- paddle.full(shape=[paddle.shape(label)[0], 1], fill_value=1.0))
|
|
|
|
- batch_accuracy = paddle.static.accuracy(input=pred, label=label)
|
|
|
|
- batch_correct = batch_cnt * batch_accuracy
|
|
|
|
-
|
|
|
|
- paddle.assign(correct_cnt + batch_correct, correct_cnt)
|
|
|
|
- paddle.assign(total_cnt + batch_cnt, total_cnt)
|
|
|
|
- accuracy = correct_cnt / total_cnt
|
|
|
|
-
|
|
|
|
- self._metrics["acc"] = {}
|
|
|
|
- self._metrics["acc"]["result"] = accuracy
|
|
|
|
- self._metrics["acc"]["state"] = {
|
|
|
|
- "total": (total_cnt, "float32"), "correct": (correct_cnt, "float32")}
|
|
|
|
-
|
|
|
|
- def auc_metrics(self, pred, label):
|
|
|
|
- auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg] = paddle.static.auc(input=pred,
|
|
|
|
- label=label,
|
|
|
|
- num_thresholds=2**12,
|
|
|
|
- slide_steps=20)
|
|
|
|
-
|
|
|
|
- self._metrics["auc"] = {}
|
|
|
|
- self._metrics["auc"]["result"] = auc
|
|
|
|
- self._metrics["auc"]["state"] = {"stat_pos": (
|
|
|
|
- stat_pos, "int64"), "stat_neg": (stat_neg, "int64")}
|
|
|
|
-
|
|
|
|
- def mae_metrics(self, pred, label):
|
|
|
|
- abserr = paddle.static.create_global_var(
|
|
|
|
- name="abserr", persistable=True, dtype='float32', shape=[1], value=0)
|
|
|
|
- total_cnt = paddle.static.create_global_var(
|
|
|
|
- name="total_cnt", persistable=True, dtype='float32', shape=[1], value=0)
|
|
|
|
-
|
|
|
|
- batch_cnt = paddle.sum(
|
|
|
|
- paddle.full(shape=[paddle.shape(label)[0], 1], fill_value=1.0))
|
|
|
|
- batch_abserr = paddle.nn.functional.l1_loss(
|
|
|
|
- pred, label, reduction='sum')
|
|
|
|
-
|
|
|
|
- paddle.assign(abserr + batch_abserr, abserr)
|
|
|
|
- paddle.assign(total_cnt + batch_cnt, total_cnt)
|
|
|
|
- mae = abserr / total_cnt
|
|
|
|
-
|
|
|
|
- self._metrics["mae"] = {}
|
|
|
|
- self._metrics["mae"]["result"] = mae
|
|
|
|
- self._metrics["mae"]["state"] = {
|
|
|
|
- "total": (total_cnt, "float32"), "abserr": (abserr, "float32")}
|
|
|
|
-
|
|
|
|
- def mse_metrics(self, pred, label):
|
|
|
|
- sqrerr = paddle.static.create_global_var(
|
|
|
|
- name="sqrerr", persistable=True, dtype='float32', shape=[1], value=0)
|
|
|
|
- total_cnt = paddle.static.create_global_var(
|
|
|
|
- name="total_cnt", persistable=True, dtype='float32', shape=[1], value=0)
|
|
|
|
-
|
|
|
|
- batch_cnt = paddle.sum(
|
|
|
|
- paddle.full(shape=[paddle.shape(label)[0], 1], fill_value=1.0))
|
|
|
|
- batch_sqrerr = paddle.nn.functional.mse_loss(
|
|
|
|
- pred, label, reduction='sum')
|
|
|
|
-
|
|
|
|
- paddle.assign(sqrerr + batch_sqrerr, sqrerr)
|
|
|
|
- paddle.assign(total_cnt + batch_cnt, total_cnt)
|
|
|
|
- mse = sqrerr / total_cnt
|
|
|
|
- rmse = paddle.sqrt(mse)
|
|
|
|
-
|
|
|
|
- self._metrics["mse"] = {}
|
|
|
|
- self._metrics["mse"]["result"] = mse
|
|
|
|
- self._metrics["mse"]["state"] = {
|
|
|
|
- "total": (total_cnt, "float32"), "sqrerr": (sqrerr, "float32")}
|
|
|
|
-
|
|
|
|
- self._metrics["rmse"] = {}
|
|
|
|
- self._metrics["rmse"]["result"] = rmse
|
|
|
|
- self._metrics["rmse"]["state"] = {
|
|
|
|
- "total": (total_cnt, "float32"), "sqrerr": (sqrerr, "float32")}
|
|
|
|
-
|
|
|
|
- def net(self, is_train=True):
|
|
|
|
- dense_input = paddle.static.data(name="dense_input", shape=[
|
|
|
|
- None, self.dense_input_dim], dtype="float32")
|
|
|
|
-
|
|
|
|
- sparse_inputs = [
|
|
|
|
- paddle.static.data(name="C" + str(i),
|
|
|
|
- shape=[None, 1],
|
|
|
|
- lod_level=1,
|
|
|
|
- dtype="int64") for i in range(1, self.sparse_inputs_slots)
|
|
|
|
- ]
|
|
|
|
-
|
|
|
|
- label_input = paddle.static.data(
|
|
|
|
- name="label", shape=[None, 1], dtype="int64")
|
|
|
|
-
|
|
|
|
- self.inputs = [dense_input] + sparse_inputs + [label_input]
|
|
|
|
-
|
|
|
|
- wide_deep_model = WideDeepLayer(self.sparse_feature_number, self.sparse_feature_dim,
|
|
|
|
- self.dense_input_dim, self.sparse_inputs_slots - 1, self.fc_sizes)
|
|
|
|
-
|
|
|
|
- pred = wide_deep_model.forward(sparse_inputs, dense_input)
|
|
|
|
- predict_2d = paddle.concat(x=[1 - pred, pred], axis=1)
|
|
|
|
- label_float = paddle.cast(label_input, dtype="float32")
|
|
|
|
-
|
|
|
|
- with paddle.utils.unique_name.guard():
|
|
|
|
- self.acc_metrics(pred, label_input)
|
|
|
|
- self.auc_metrics(predict_2d, label_input)
|
|
|
|
- self.mae_metrics(pred, label_float)
|
|
|
|
- self.mse_metrics(pred, label_float)
|
|
|
|
-
|
|
|
|
- # loss
|
|
|
|
- cost = paddle.nn.functional.log_loss(input=pred, label=label_float)
|
|
|
|
- avg_cost = paddle.mean(x=cost)
|
|
|
|
- self.loss = avg_cost
|
|
|