net.py 9.0 KB


  1. import paddle
  2. import paddle.nn as nn
  3. import paddle.nn.functional as F
  4. import numpy as np
  5. class DSSMLayer(nn.Layer):
  6. def __init__(self, feature_nums=[5,5,5,5,5], embedding_dim=8, output_dim=16,
  7. hidden_layers=[40, 32], hidden_acts=["relu", "relu"]):
  8. super(DSSMLayer, self).__init__()
  9. tag_features_dict = {
  10. "vid":3407301,
  11. "cate1":42,
  12. "cate2":67,
  13. "video_style":6517,
  14. "valid_time":728,
  15. "captions_color":656,
  16. "audience_age_group":65,
  17. "audience_value_type":61,
  18. "font_size":49,
  19. "cover_persons_num":44,
  20. "audience_gender":37,
  21. "sentiment_tendency":11,
  22. "video_type":8,
  23. "background_music_type":6,
  24. "captions":3,
  25. "has_end_credit_guide":2
  26. }
  27. self.stat_features_num = 47
  28. self.stat_features_num_embeddings = 100
  29. self.feature_num = len(list(tag_features_dict.values()))
  30. feature_nums = list(tag_features_dict.values())
  31. self.embedding_dim = embedding_dim
  32. self.output_dim = output_dim
  33. # 第一层的输入维度是所有特征的embedding拼接
  34. self.hidden_layers = [self.feature_num * embedding_dim + 47*3*embedding_dim] + hidden_layers + [output_dim]
  35. self.hidden_acts = hidden_acts
  36. # 为每个特征创建对应维度的Embedding层
  37. self.left_embeddings = nn.LayerList()
  38. for i in range(self.feature_num): # 视频的静态特征
  39. layer = nn.Embedding(
  40. num_embeddings=feature_nums[i],
  41. embedding_dim=embedding_dim,
  42. weight_attr=paddle.ParamAttr(
  43. initializer=paddle.nn.initializer.XavierNormal()
  44. )
  45. )
  46. self.left_embeddings.append(layer)
  47. for i in range(self.stat_features_num): #长短期和品类的表现 分桶后映射成index
  48. layer = nn.Embedding(
  49. num_embeddings=self.stat_features_num_embeddings,
  50. embedding_dim=embedding_dim,
  51. weight_attr=paddle.ParamAttr(
  52. initializer=paddle.nn.initializer.XavierNormal()
  53. )
  54. )
  55. self.left_embeddings.append(layer)
  56. for i in range(self.stat_features_num*2): # 长短期和品类的表现,hive表的原始值映射到0-1之间的小数 + hive表的原始值
  57. layer = nn.Linear(
  58. in_features=1,
  59. out_features=embedding_dim,
  60. weight_attr=paddle.ParamAttr(
  61. initializer=paddle.nn.initializer.XavierNormal()
  62. )
  63. )
  64. self.left_embeddings.append(layer)
  65. self.right_embeddings = nn.LayerList()
  66. for i in range(self.feature_num): # 视频的静态特征
  67. layer = nn.Embedding(
  68. num_embeddings=feature_nums[i],
  69. embedding_dim=embedding_dim,
  70. weight_attr=paddle.ParamAttr(
  71. initializer=paddle.nn.initializer.XavierNormal()
  72. )
  73. )
  74. self.right_embeddings.append(layer)
  75. for i in range(self.stat_features_num): #长短期和品类的表现 分桶后映射成index
  76. layer = nn.Embedding(
  77. num_embeddings=self.stat_features_num_embeddings,
  78. embedding_dim=embedding_dim,
  79. weight_attr=paddle.ParamAttr(
  80. initializer=paddle.nn.initializer.XavierNormal()
  81. )
  82. )
  83. self.right_embeddings.append(layer)
  84. for i in range(self.stat_features_num*2): # 长短期和品类的表现,hive表的原始值映射到0-1之间的小数 + hive表的原始值
  85. layer = nn.Linear(
  86. in_features=1,
  87. out_features=embedding_dim,
  88. weight_attr=paddle.ParamAttr(
  89. initializer=paddle.nn.initializer.XavierNormal()
  90. )
  91. )
  92. self.right_embeddings.append(layer)
  93. # 左视频塔
  94. self._left_tower = []
  95. for i in range(len(self.hidden_layers) - 1):
  96. linear = paddle.nn.Linear(
  97. in_features=self.hidden_layers[i],
  98. out_features=self.hidden_layers[i + 1],
  99. weight_attr=paddle.ParamAttr(
  100. initializer=paddle.nn.initializer.XavierNormal()
  101. ),
  102. bias_attr=paddle.ParamAttr(
  103. initializer=paddle.nn.initializer.Constant(value=0.0)
  104. )
  105. )
  106. self.add_sublayer('left_linear_%d' % i, linear)
  107. self._left_tower.append(linear)
  108. if i < len(hidden_acts) and self.hidden_acts[i] == "relu":
  109. act = paddle.nn.ReLU()
  110. self.add_sublayer('left_act_%d' % i, act)
  111. self._left_tower.append(act)
  112. # 右视频塔
  113. self._right_tower = []
  114. for i in range(len(self.hidden_layers) - 1):
  115. linear = paddle.nn.Linear(
  116. in_features=self.hidden_layers[i],
  117. out_features=self.hidden_layers[i + 1],
  118. weight_attr=paddle.ParamAttr(
  119. initializer=paddle.nn.initializer.XavierNormal()
  120. ),
  121. bias_attr=paddle.ParamAttr(
  122. initializer=paddle.nn.initializer.Constant(value=0.0)
  123. )
  124. )
  125. self.add_sublayer('right_linear_%d' % i, linear)
  126. self._right_tower.append(linear)
  127. if i < len(hidden_acts) and self.hidden_acts[i] == "relu":
  128. act = paddle.nn.ReLU()
  129. self.add_sublayer('right_act_%d' % i, act)
  130. self._right_tower.append(act)
  131. def _process_features(self, features, embeddings):
  132. # 将每个特征转换为embedding
  133. embedded_features = []
  134. for i in range(self.feature_num):
  135. feature = paddle.slice(
  136. features,
  137. axes=[1],
  138. starts=[i],
  139. ends=[i+1]
  140. )
  141. feature = paddle.cast(feature, dtype='int64')
  142. embedded = embeddings[i](feature)
  143. embedded_features.append(embedded)
  144. # 将所有embedding连接起来
  145. return paddle.concat(embedded_features, axis=1)
  146. def forward(self, left_features, right_features, is_infer=False):
  147. if is_infer:
  148. left_vec = self.get_leftvectors(left_features)
  149. return left_vec
  150. else:
  151. # 获取两个视频的特征表示
  152. left_vec, right_vec = self.get_vectors(left_features, right_features)
  153. # 计算相似度
  154. sim_score = F.cosine_similarity(
  155. left_vec,
  156. right_vec,
  157. axis=1
  158. ).reshape([-1, 1])
  159. return sim_score, left_vec, right_vec
  160. def get_leftvectors(self, left_features):
  161. """获取两个视频的16维特征向量"""
  162. # 处理左视频特征
  163. left_embedded = self._process_features(left_features, self.left_embeddings)
  164. # left_vec = left_embedded
  165. left_vec = paddle.reshape(left_embedded, [-1, self.feature_num * self.embedding_dim])
  166. for i, layer in enumerate(self._left_tower):
  167. left_vec = layer(left_vec)
  168. return left_vec
  169. def get_rightvectors(self,right_features):
  170. # 处理右视频特征
  171. right_embedded = self._process_features(right_features, self.right_embeddings)
  172. # right_vec = right_embedded
  173. right_vec = paddle.reshape(right_embedded, [-1, self.feature_num * self.embedding_dim])
  174. for layer in self._right_tower:
  175. right_vec = layer(right_vec)
  176. # 确保输出是L2归一化的
  177. left_vec = F.normalize(left_vec, p=2, axis=1)
  178. right_vec = F.normalize(right_vec, p=2, axis=1)
  179. return right_vec
  180. def get_vectors(self, left_features, right_features):
  181. """获取两个视频的16维特征向量"""
  182. # 处理左视频特征
  183. left_embedded = self._process_features(left_features, self.left_embeddings)
  184. # left_vec = left_embedded
  185. left_vec = paddle.reshape(left_embedded, [-1, self.feature_num * self.embedding_dim])
  186. for i, layer in enumerate(self._left_tower):
  187. left_vec = layer(left_vec)
  188. # 处理右视频特征
  189. right_embedded = self._process_features(right_features, self.right_embeddings)
  190. # right_vec = right_embedded
  191. right_vec = paddle.reshape(right_embedded, [-1, self.feature_num * self.embedding_dim])
  192. for layer in self._right_tower:
  193. right_vec = layer(right_vec)
  194. # 确保输出是L2归一化的
  195. left_vec = F.normalize(left_vec, p=2, axis=1)
  196. right_vec = F.normalize(right_vec, p=2, axis=1)
  197. return left_vec, right_vec