|
@@ -1,72 +1,141 @@
|
|
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
-#
|
|
|
|
-# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
-# you may not use this file except in compliance with the License.
|
|
|
|
-# You may obtain a copy of the License at
|
|
|
|
-#
|
|
|
|
-# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
-#
|
|
|
|
-# Unless required by applicable law or agreed to in writing, software
|
|
|
|
-# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
-# See the License for the specific language governing permissions and
|
|
|
|
-# limitations under the License.
|
|
|
|
-
|
|
|
|
import paddle
|
|
import paddle
|
|
import paddle.nn as nn
|
|
import paddle.nn as nn
|
|
import paddle.nn.functional as F
|
|
import paddle.nn.functional as F
|
|
import numpy as np
|
|
import numpy as np
|
|
|
|
|
|
-
|
|
|
|
class DSSMLayer(nn.Layer):
|
|
class DSSMLayer(nn.Layer):
|
|
- def __init__(self, trigram_d, neg_num, slice_end, hidden_layers,
|
|
|
|
- hidden_acts):
|
|
|
|
|
|
+ def __init__(self, feature_num=5, embedding_dim=8, output_dim=16,
|
|
|
|
+ hidden_layers=[64, 32], hidden_acts=["relu", "relu"]):
|
|
super(DSSMLayer, self).__init__()
|
|
super(DSSMLayer, self).__init__()
|
|
-
|
|
|
|
- self.hidden_layers = [trigram_d] + hidden_layers
|
|
|
|
|
|
+
|
|
|
|
+ self.feature_num = feature_num
|
|
|
|
+ self.embedding_dim = embedding_dim
|
|
|
|
+ self.output_dim = output_dim
|
|
|
|
+ # 第一层的输入维度是所有特征的embedding拼接
|
|
|
|
+ self.hidden_layers = [feature_num * embedding_dim] + hidden_layers + [output_dim]
|
|
self.hidden_acts = hidden_acts
|
|
self.hidden_acts = hidden_acts
|
|
- self.slice_end = slice_end
|
|
|
|
|
|
|
|
- self._query_layers = []
|
|
|
|
|
|
+ # 为每个特征创建embedding层
|
|
|
|
+ self.left_embeddings = nn.LayerList([
|
|
|
|
+ nn.Linear(
|
|
|
|
+ 1,
|
|
|
|
+ embedding_dim,
|
|
|
|
+ weight_attr=paddle.ParamAttr(
|
|
|
|
+ initializer=paddle.nn.initializer.XavierNormal()
|
|
|
|
+ ),
|
|
|
|
+ bias_attr=paddle.ParamAttr(
|
|
|
|
+ initializer=paddle.nn.initializer.Constant(value=0.0)
|
|
|
|
+ )
|
|
|
|
+ ) for _ in range(feature_num)
|
|
|
|
+ ])
|
|
|
|
+
|
|
|
|
+ self.right_embeddings = nn.LayerList([
|
|
|
|
+ nn.Linear(
|
|
|
|
+ 1,
|
|
|
|
+ embedding_dim,
|
|
|
|
+ weight_attr=paddle.ParamAttr(
|
|
|
|
+ initializer=paddle.nn.initializer.XavierNormal()
|
|
|
|
+ ),
|
|
|
|
+ bias_attr=paddle.ParamAttr(
|
|
|
|
+ initializer=paddle.nn.initializer.Constant(value=0.0)
|
|
|
|
+ )
|
|
|
|
+ ) for _ in range(feature_num)
|
|
|
|
+ ])
|
|
|
|
+
|
|
|
|
+ # 左视频塔
|
|
|
|
+ self._left_tower = []
|
|
for i in range(len(self.hidden_layers) - 1):
|
|
for i in range(len(self.hidden_layers) - 1):
|
|
linear = paddle.nn.Linear(
|
|
linear = paddle.nn.Linear(
|
|
in_features=self.hidden_layers[i],
|
|
in_features=self.hidden_layers[i],
|
|
out_features=self.hidden_layers[i + 1],
|
|
out_features=self.hidden_layers[i + 1],
|
|
weight_attr=paddle.ParamAttr(
|
|
weight_attr=paddle.ParamAttr(
|
|
- initializer=paddle.nn.initializer.XavierNormal(
|
|
|
|
- fan_in=self.hidden_layers[i],
|
|
|
|
- fan_out=self.hidden_layers[i + 1])),
|
|
|
|
|
|
+ initializer=paddle.nn.initializer.XavierNormal()
|
|
|
|
+ ),
|
|
bias_attr=paddle.ParamAttr(
|
|
bias_attr=paddle.ParamAttr(
|
|
- initializer=paddle.nn.initializer.XavierNormal(
|
|
|
|
- fan_in=self.hidden_layers[i],
|
|
|
|
- fan_out=self.hidden_layers[i + 1])))
|
|
|
|
- self.add_sublayer('query_linear_%d' % i, linear)
|
|
|
|
- self._query_layers.append(linear)
|
|
|
|
- if self.hidden_acts[i] == "relu":
|
|
|
|
|
|
+ initializer=paddle.nn.initializer.Constant(value=0.0)
|
|
|
|
+ )
|
|
|
|
+ )
|
|
|
|
+ self.add_sublayer('left_linear_%d' % i, linear)
|
|
|
|
+ self._left_tower.append(linear)
|
|
|
|
+
|
|
|
|
+ if i < len(hidden_acts) and self.hidden_acts[i] == "relu":
|
|
act = paddle.nn.ReLU()
|
|
act = paddle.nn.ReLU()
|
|
- self.add_sublayer('query_act_%d' % i, act)
|
|
|
|
- self._query_layers.append(act)
|
|
|
|
|
|
+ self.add_sublayer('left_act_%d' % i, act)
|
|
|
|
+ self._left_tower.append(act)
|
|
|
|
|
|
- self._doc_layers = []
|
|
|
|
|
|
+ # 右视频塔
|
|
|
|
+ self._right_tower = []
|
|
for i in range(len(self.hidden_layers) - 1):
|
|
for i in range(len(self.hidden_layers) - 1):
|
|
linear = paddle.nn.Linear(
|
|
linear = paddle.nn.Linear(
|
|
in_features=self.hidden_layers[i],
|
|
in_features=self.hidden_layers[i],
|
|
out_features=self.hidden_layers[i + 1],
|
|
out_features=self.hidden_layers[i + 1],
|
|
weight_attr=paddle.ParamAttr(
|
|
weight_attr=paddle.ParamAttr(
|
|
- initializer=paddle.nn.initializer.XavierNormal(
|
|
|
|
- fan_in=self.hidden_layers[i],
|
|
|
|
- fan_out=self.hidden_layers[i + 1])),
|
|
|
|
|
|
+ initializer=paddle.nn.initializer.XavierNormal()
|
|
|
|
+ ),
|
|
bias_attr=paddle.ParamAttr(
|
|
bias_attr=paddle.ParamAttr(
|
|
- initializer=paddle.nn.initializer.XavierNormal(
|
|
|
|
- fan_in=self.hidden_layers[i],
|
|
|
|
- fan_out=self.hidden_layers[i + 1])))
|
|
|
|
- self.add_sublayer('pos_linear_%d' % i, linear)
|
|
|
|
- self._doc_layers.append(linear)
|
|
|
|
- if self.hidden_acts[i] == "relu":
|
|
|
|
|
|
+ initializer=paddle.nn.initializer.Constant(value=0.0)
|
|
|
|
+ )
|
|
|
|
+ )
|
|
|
|
+ self.add_sublayer('right_linear_%d' % i, linear)
|
|
|
|
+ self._right_tower.append(linear)
|
|
|
|
+
|
|
|
|
+ if i < len(hidden_acts) and self.hidden_acts[i] == "relu":
|
|
act = paddle.nn.ReLU()
|
|
act = paddle.nn.ReLU()
|
|
- self.add_sublayer('pos_act_%d' % i, act)
|
|
|
|
- self._doc_layers.append(act)
|
|
|
|
|
|
+ self.add_sublayer('right_act_%d' % i, act)
|
|
|
|
+ self._right_tower.append(act)
|
|
|
|
+
|
|
|
|
+ def _process_features(self, features, embeddings):
|
|
|
|
+ # 将每个特征转换为embedding
|
|
|
|
+ embedded_features = []
|
|
|
|
+ for i in range(self.feature_num):
|
|
|
|
+ feature = paddle.slice(
|
|
|
|
+ features,
|
|
|
|
+ axes=[1],
|
|
|
|
+ starts=[i],
|
|
|
|
+ ends=[i+1]
|
|
|
|
+ )
|
|
|
|
+ embedded = embeddings[i](feature)
|
|
|
|
+ embedded_features.append(embedded)
|
|
|
|
+
|
|
|
|
+ # 将所有embedding连接起来
|
|
|
|
+ return paddle.concat(embedded_features, axis=1)
|
|
|
|
+
|
|
|
|
+ def forward(self, left_features, right_features):
|
|
|
|
+ # 获取两个视频的特征表示
|
|
|
|
+ left_vec, right_vec = self.get_vectors(left_features, right_features)
|
|
|
|
+
|
|
|
|
+ # 计算相似度
|
|
|
|
+ sim_score = F.cosine_similarity(
|
|
|
|
+ left_vec,
|
|
|
|
+ right_vec,
|
|
|
|
+ axis=1
|
|
|
|
+ ).reshape([-1, 1])
|
|
|
|
+
|
|
|
|
+ return sim_score, left_vec, right_vec
|
|
|
|
+
|
|
|
|
+ def get_vectors(self, left_features, right_features):
|
|
|
|
+ """获取两个视频的16维特征向量"""
|
|
|
|
+ # 处理左视频特征
|
|
|
|
+ left_embedded = self._process_features(left_features, self.left_embeddings)
|
|
|
|
+ left_vec = left_embedded
|
|
|
|
+ for layer in self._left_tower:
|
|
|
|
+ left_vec = layer(left_vec)
|
|
|
|
+
|
|
|
|
+ # 处理右视频特征
|
|
|
|
+ right_embedded = self._process_features(right_features, self.right_embeddings)
|
|
|
|
+ right_vec = right_embedded
|
|
|
|
+ for layer in self._right_tower:
|
|
|
|
+ right_vec = layer(right_vec)
|
|
|
|
+
|
|
|
|
+ # 确保输出是L2归一化的
|
|
|
|
+ left_vec = F.normalize(left_vec, p=2, axis=1)
|
|
|
|
+ right_vec = F.normalize(right_vec, p=2, axis=1)
|
|
|
|
+
|
|
|
|
+ return left_vec, right_vec
|
|
|
|
+
|
|
|
|
|
|
|
|
+
|
|
|
|
+
|
|
def forward(self, input_data, is_infer):
|
|
def forward(self, input_data, is_infer):
|
|
query_fc = input_data[0]
|
|
query_fc = input_data[0]
|
|
for n_layer in self._query_layers:
|
|
for n_layer in self._query_layers:
|