|
@@ -1,101 +1,146 @@
|
|
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
-#
|
|
|
|
-# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
-# you may not use this file except in compliance with the License.
|
|
|
|
-# You may obtain a copy of the License at
|
|
|
|
-#
|
|
|
|
-# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
-#
|
|
|
|
-# Unless required by applicable law or agreed to in writing, software
|
|
|
|
-# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
-# See the License for the specific language governing permissions and
|
|
|
|
-# limitations under the License.
|
|
|
|
-
|
|
|
|
import paddle
|
|
import paddle
|
|
import paddle.nn as nn
|
|
import paddle.nn as nn
|
|
import paddle.nn.functional as F
|
|
import paddle.nn.functional as F
|
|
import numpy as np
|
|
import numpy as np
|
|
|
|
|
|
-
|
|
|
|
class DSSMLayer(nn.Layer):
|
|
class DSSMLayer(nn.Layer):
|
|
- def __init__(self, trigram_d, neg_num, slice_end, hidden_layers,
|
|
|
|
- hidden_acts):
|
|
|
|
|
|
+ def __init__(self, feature_nums=[5,5,5,5,5], embedding_dim=8, output_dim=16,
|
|
|
|
+ hidden_layers=[40, 32], hidden_acts=["relu", "relu"]):
|
|
super(DSSMLayer, self).__init__()
|
|
super(DSSMLayer, self).__init__()
|
|
-
|
|
|
|
- self.hidden_layers = [trigram_d] + hidden_layers
|
|
|
|
|
|
+
|
|
|
|
+ self.feature_num = len(feature_nums)
|
|
|
|
+ self.embedding_dim = embedding_dim
|
|
|
|
+ self.output_dim = output_dim
|
|
|
|
+ # 第一层的输入维度是所有特征的embedding拼接
|
|
|
|
+ self.hidden_layers = [self.feature_num * embedding_dim] + hidden_layers + [output_dim]
|
|
self.hidden_acts = hidden_acts
|
|
self.hidden_acts = hidden_acts
|
|
- self.slice_end = slice_end
|
|
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ # 为每个特征创建对应维度的Embedding层
|
|
|
|
+ self.left_embeddings = nn.LayerList([
|
|
|
|
+ nn.Embedding(
|
|
|
|
+ num_embeddings=feature_nums[i],
|
|
|
|
+ embedding_dim=embedding_dim,
|
|
|
|
+ weight_attr=paddle.ParamAttr(
|
|
|
|
+ initializer=paddle.nn.initializer.XavierNormal()
|
|
|
|
+ )
|
|
|
|
+ ) for i in range(self.feature_num)
|
|
|
|
+ ])
|
|
|
|
+
|
|
|
|
+ self.right_embeddings = nn.LayerList([
|
|
|
|
+ nn.Embedding(
|
|
|
|
+ num_embeddings=feature_nums[i],
|
|
|
|
+ embedding_dim=embedding_dim,
|
|
|
|
+ weight_attr=paddle.ParamAttr(
|
|
|
|
+ initializer=paddle.nn.initializer.XavierNormal()
|
|
|
|
+ )
|
|
|
|
+ ) for i in range(self.feature_num)
|
|
|
|
+ ])
|
|
|
|
|
|
- self._query_layers = []
|
|
|
|
|
|
+ # 左视频塔
|
|
|
|
+ self._left_tower = []
|
|
for i in range(len(self.hidden_layers) - 1):
|
|
for i in range(len(self.hidden_layers) - 1):
|
|
linear = paddle.nn.Linear(
|
|
linear = paddle.nn.Linear(
|
|
in_features=self.hidden_layers[i],
|
|
in_features=self.hidden_layers[i],
|
|
out_features=self.hidden_layers[i + 1],
|
|
out_features=self.hidden_layers[i + 1],
|
|
weight_attr=paddle.ParamAttr(
|
|
weight_attr=paddle.ParamAttr(
|
|
- initializer=paddle.nn.initializer.XavierNormal(
|
|
|
|
- fan_in=self.hidden_layers[i],
|
|
|
|
- fan_out=self.hidden_layers[i + 1])),
|
|
|
|
|
|
+ initializer=paddle.nn.initializer.XavierNormal()
|
|
|
|
+ ),
|
|
bias_attr=paddle.ParamAttr(
|
|
bias_attr=paddle.ParamAttr(
|
|
- initializer=paddle.nn.initializer.XavierNormal(
|
|
|
|
- fan_in=self.hidden_layers[i],
|
|
|
|
- fan_out=self.hidden_layers[i + 1])))
|
|
|
|
- self.add_sublayer('query_linear_%d' % i, linear)
|
|
|
|
- self._query_layers.append(linear)
|
|
|
|
- if self.hidden_acts[i] == "relu":
|
|
|
|
|
|
+ initializer=paddle.nn.initializer.Constant(value=0.0)
|
|
|
|
+ )
|
|
|
|
+ )
|
|
|
|
+ self.add_sublayer('left_linear_%d' % i, linear)
|
|
|
|
+ self._left_tower.append(linear)
|
|
|
|
+
|
|
|
|
+ if i < len(hidden_acts) and self.hidden_acts[i] == "relu":
|
|
act = paddle.nn.ReLU()
|
|
act = paddle.nn.ReLU()
|
|
- self.add_sublayer('query_act_%d' % i, act)
|
|
|
|
- self._query_layers.append(act)
|
|
|
|
|
|
+ self.add_sublayer('left_act_%d' % i, act)
|
|
|
|
+ self._left_tower.append(act)
|
|
|
|
|
|
- self._doc_layers = []
|
|
|
|
|
|
+ # 右视频塔
|
|
|
|
+ self._right_tower = []
|
|
for i in range(len(self.hidden_layers) - 1):
|
|
for i in range(len(self.hidden_layers) - 1):
|
|
linear = paddle.nn.Linear(
|
|
linear = paddle.nn.Linear(
|
|
in_features=self.hidden_layers[i],
|
|
in_features=self.hidden_layers[i],
|
|
out_features=self.hidden_layers[i + 1],
|
|
out_features=self.hidden_layers[i + 1],
|
|
weight_attr=paddle.ParamAttr(
|
|
weight_attr=paddle.ParamAttr(
|
|
- initializer=paddle.nn.initializer.XavierNormal(
|
|
|
|
- fan_in=self.hidden_layers[i],
|
|
|
|
- fan_out=self.hidden_layers[i + 1])),
|
|
|
|
|
|
+ initializer=paddle.nn.initializer.XavierNormal()
|
|
|
|
+ ),
|
|
bias_attr=paddle.ParamAttr(
|
|
bias_attr=paddle.ParamAttr(
|
|
- initializer=paddle.nn.initializer.XavierNormal(
|
|
|
|
- fan_in=self.hidden_layers[i],
|
|
|
|
- fan_out=self.hidden_layers[i + 1])))
|
|
|
|
- self.add_sublayer('pos_linear_%d' % i, linear)
|
|
|
|
- self._doc_layers.append(linear)
|
|
|
|
- if self.hidden_acts[i] == "relu":
|
|
|
|
|
|
+ initializer=paddle.nn.initializer.Constant(value=0.0)
|
|
|
|
+ )
|
|
|
|
+ )
|
|
|
|
+ self.add_sublayer('right_linear_%d' % i, linear)
|
|
|
|
+ self._right_tower.append(linear)
|
|
|
|
+
|
|
|
|
+ if i < len(hidden_acts) and self.hidden_acts[i] == "relu":
|
|
act = paddle.nn.ReLU()
|
|
act = paddle.nn.ReLU()
|
|
- self.add_sublayer('pos_act_%d' % i, act)
|
|
|
|
- self._doc_layers.append(act)
|
|
|
|
|
|
+ self.add_sublayer('right_act_%d' % i, act)
|
|
|
|
+ self._right_tower.append(act)
|
|
|
|
+
|
|
|
|
+ def _process_features(self, features, embeddings):
|
|
|
|
+ # 将每个特征转换为embedding
|
|
|
|
+ embedded_features = []
|
|
|
|
+ for i in range(self.feature_num):
|
|
|
|
+ feature = paddle.slice(
|
|
|
|
+ features,
|
|
|
|
+ axes=[1],
|
|
|
|
+ starts=[i],
|
|
|
|
+ ends=[i+1]
|
|
|
|
+ )
|
|
|
|
+ feature = paddle.cast(feature, dtype='int64')
|
|
|
|
+ embedded = embeddings[i](feature)
|
|
|
|
+
|
|
|
|
+ embedded_features.append(embedded)
|
|
|
|
+
|
|
|
|
+ # 将所有embedding连接起来
|
|
|
|
+
|
|
|
|
+ return paddle.concat(embedded_features, axis=1)
|
|
|
|
+
|
|
|
|
+ def forward(self, left_features, right_features):
|
|
|
|
+ # 获取两个视频的特征表示
|
|
|
|
+ left_vec, right_vec = self.get_vectors(left_features, right_features)
|
|
|
|
+
|
|
|
|
+ # 计算相似度
|
|
|
|
+ sim_score = F.cosine_similarity(
|
|
|
|
+ left_vec,
|
|
|
|
+ right_vec,
|
|
|
|
+ axis=1
|
|
|
|
+ ).reshape([-1, 1])
|
|
|
|
+
|
|
|
|
+ return sim_score, left_vec, right_vec
|
|
|
|
|
|
- def forward(self, input_data, is_infer):
|
|
|
|
- query_fc = input_data[0]
|
|
|
|
- for n_layer in self._query_layers:
|
|
|
|
- query_fc = n_layer(query_fc)
|
|
|
|
- self.query_fc = query_fc
|
|
|
|
|
|
+ def get_vectors(self, left_features, right_features):
|
|
|
|
+ """获取两个视频的16维特征向量"""
|
|
|
|
+ # 处理左视频特征
|
|
|
|
+
|
|
|
|
+ left_embedded = self._process_features(left_features, self.left_embeddings)
|
|
|
|
+
|
|
|
|
+ # left_vec = left_embedded
|
|
|
|
+ left_vec = paddle.reshape(left_embedded, [-1, self.feature_num * self.embedding_dim])
|
|
|
|
+
|
|
|
|
+
|
|
|
|
|
|
- doc_pos_fc = input_data[1]
|
|
|
|
- for n_layer in self._doc_layers:
|
|
|
|
- doc_pos_fc = n_layer(doc_pos_fc)
|
|
|
|
- self.doc_pos_fc = doc_pos_fc
|
|
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ for i, layer in enumerate(self._left_tower):
|
|
|
|
+ left_vec = layer(left_vec)
|
|
|
|
|
|
- self.params = [self._query_layers[-2].bias]
|
|
|
|
|
|
+
|
|
|
|
+ # 处理右视频特征
|
|
|
|
+ right_embedded = self._process_features(right_features, self.right_embeddings)
|
|
|
|
+ # right_vec = right_embedded
|
|
|
|
+ right_vec = paddle.reshape(right_embedded, [-1, self.feature_num * self.embedding_dim])
|
|
|
|
|
|
- R_Q_D_p = F.cosine_similarity(
|
|
|
|
- query_fc, doc_pos_fc, axis=1).reshape([-1, 1])
|
|
|
|
|
|
+ for layer in self._right_tower:
|
|
|
|
+ right_vec = layer(right_vec)
|
|
|
|
|
|
- if is_infer:
|
|
|
|
- return R_Q_D_p, paddle.ones(shape=[self.slice_end, 1])
|
|
|
|
|
|
+
|
|
|
|
+ # 确保输出是L2归一化的
|
|
|
|
+ left_vec = F.normalize(left_vec, p=2, axis=1)
|
|
|
|
+ right_vec = F.normalize(right_vec, p=2, axis=1)
|
|
|
|
+
|
|
|
|
+ return left_vec, right_vec
|
|
|
|
|
|
- R_Q_D_ns = []
|
|
|
|
- for i in range(len(input_data) - 2):
|
|
|
|
- doc_neg_fc_i = input_data[i + 2]
|
|
|
|
- for n_layer in self._doc_layers:
|
|
|
|
- doc_neg_fc_i = n_layer(doc_neg_fc_i)
|
|
|
|
- R_Q_D_n = F.cosine_similarity(
|
|
|
|
- query_fc, doc_neg_fc_i, axis=1).reshape([-1, 1])
|
|
|
|
- R_Q_D_ns.append(R_Q_D_n)
|
|
|
|
- concat_Rs = paddle.concat(x=[R_Q_D_p] + R_Q_D_ns, axis=1)
|
|
|
|
- prob = F.softmax(concat_Rs, axis=1)
|
|
|
|
- hit_prob = paddle.slice(
|
|
|
|
- prob, axes=[0, 1], starts=[0, 0], ends=[self.slice_end, 1])
|
|
|
|
- return R_Q_D_p, hit_prob
|
|
|