static_model_lod.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import math
  15. import paddle
  16. from net import DNNLayer, StaticDNNLayer
  17. class StaticModel():
  18. def __init__(self, config):
  19. self.cost = None
  20. self.infer_target_var = None
  21. self.config = config
  22. self._init_hyper_parameters()
  23. def _init_hyper_parameters(self):
  24. self.is_distributed = False
  25. self.distributed_embedding = False
  26. if self.config.get("hyper_parameters.distributed_embedding", 0) == 1:
  27. self.distributed_embedding = True
  28. self.sparse_feature_number = self.config.get(
  29. "hyper_parameters.sparse_feature_number")
  30. self.sparse_feature_dim = self.config.get(
  31. "hyper_parameters.sparse_feature_dim")
  32. self.sparse_inputs_slots = self.config.get(
  33. "hyper_parameters.sparse_inputs_slots")
  34. self.dense_input_dim = self.config.get(
  35. "hyper_parameters.dense_input_dim")
  36. self.learning_rate = self.config.get(
  37. "hyper_parameters.optimizer.learning_rate")
  38. self.fc_sizes = self.config.get("hyper_parameters.fc_sizes")
  39. def create_feeds(self, is_infer=False):
  40. dense_input = paddle.static.data(
  41. name="dense_input",
  42. shape=[None, self.dense_input_dim],
  43. dtype="float32")
  44. sparse_input_ids = [
  45. paddle.static.data(
  46. name="C" + str(i), shape=[None, 1], lod_level=1, dtype="int64")
  47. for i in range(1, self.sparse_inputs_slots)
  48. ]
  49. label = paddle.static.data(
  50. name="label", shape=[None, 1], dtype="int64")
  51. feeds_list = [label] + sparse_input_ids + [dense_input]
  52. return feeds_list
  53. def net(self, input, is_infer=False):
  54. self.label_input = input[0]
  55. self.sparse_inputs = input[1:self.sparse_inputs_slots]
  56. self.dense_input = input[-1]
  57. sparse_number = self.sparse_inputs_slots - 1
  58. def embedding_layer(input):
  59. if self.distributed_embedding:
  60. emb = paddle.static.nn.sparse_embedding(
  61. input=input,
  62. size=[
  63. self.sparse_feature_number, self.sparse_feature_dim
  64. ],
  65. param_attr=paddle.ParamAttr(
  66. name="SparseFeatFactors",
  67. initializer=paddle.nn.initializer.Uniform()))
  68. else:
  69. paddle.static.Print(input)
  70. emb = paddle.static.nn.embedding(
  71. input=input,
  72. is_sparse=True,
  73. is_distributed=self.is_distributed,
  74. size=[
  75. self.sparse_feature_number, self.sparse_feature_dim
  76. ],
  77. param_attr=paddle.ParamAttr(
  78. name="SparseFeatFactors",
  79. initializer=paddle.nn.initializer.Uniform()))
  80. emb_sum = paddle.static.nn.sequence_pool(
  81. input=emb, pool_type='sum')
  82. sequeeze_emb_sum = paddle.squeeze(emb_sum, axis=1)
  83. #return emb_sum
  84. return sequeeze_emb_sum
  85. sparse_embs = list(map(embedding_layer, self.sparse_inputs))
  86. dnn_model = StaticDNNLayer(
  87. self.sparse_feature_number, self.sparse_feature_dim,
  88. self.dense_input_dim, sparse_number, self.fc_sizes)
  89. raw_predict_2d = dnn_model.forward(sparse_embs, self.dense_input)
  90. predict_2d = paddle.nn.functional.softmax(raw_predict_2d)
  91. self.predict = predict_2d
  92. auc, batch_auc, _ = paddle.static.auc(input=self.predict,
  93. label=self.label_input,
  94. num_thresholds=2**12,
  95. slide_steps=20)
  96. self.inference_target_var = auc
  97. if is_infer:
  98. fetch_dict = {'auc': auc}
  99. return fetch_dict
  100. cost = paddle.nn.functional.cross_entropy(
  101. input=raw_predict_2d, label=self.label_input)
  102. avg_cost = paddle.mean(x=cost)
  103. self._cost = avg_cost
  104. fetch_dict = {'cost': avg_cost, 'auc': auc}
  105. return fetch_dict
  106. def create_optimizer(self, strategy=None):
  107. optimizer = paddle.optimizer.Adam(
  108. learning_rate=self.learning_rate, lazy_mode=True)
  109. if strategy != None:
  110. import paddle.distributed.fleet as fleet
  111. optimizer = fleet.distributed_optimizer(optimizer, strategy)
  112. optimizer.minimize(self._cost)
  113. def infer_net(self, input):
  114. return self.net(input, is_infer=True)