liqian 2 éve
szülő
commit
d46edde21c
3 módosított fájl, 23 hozzáadás és 11 törlés
  1. 3 0
      config.py
  2. 13 11
      videos_similarity.py
  3. 7 0
      videos_similarity_task.sh

+ 3 - 0
config.py

@@ -261,6 +261,9 @@ class BaseConfig(object):
     RECALL_POSITION1_KEY_NAME = 'com.weiqu.video.recall.hot.apptype.h.item.score.position.1'
     RECALL_POSITION2_KEY_NAME = 'com.weiqu.video.recall.hot.apptype.h.item.score.position.2'
 
+    # 最惊奇电影类视频相关推荐列表存放 redis key前缀,完整格式: com.weiqu.movie.relevant.list.item.{videoId}
+    MOVIE_RELEVANT_LIST_KEY_NAME_PREFIX = 'com.weiqu.movie.relevant.list.item.'
+
 
 class DevelopmentConfig(BaseConfig):
     """开发环境配置"""

+ 13 - 11
videos_similarity.py

@@ -5,6 +5,7 @@ import datetime
 import pandas as pd
 from odps import ODPS
 from utils import filter_video_status
+from db_helper import RedisHelper
 from config import set_config
 from log import Log
 
@@ -79,7 +80,7 @@ def cos_dist(vec1, vec2):
 
 
 def get_movie_video_top_list():
-    sql = "select videoid, title from videoods.movie_store_video_top_list;"
+    sql = "select videoid, title from videoods.movie_store_video_top_list where returns > 5;"
     data_df = get_data_from_odps(project='videoods', sql=sql)
     data_df = data_df.fillna(0)
     data_df['videoid'] = data_df['videoid'].astype(int)
@@ -114,6 +115,7 @@ def get_sim_videos():
 
 
 def similarity_rank(movie_videos, sim_videos):
+    redis_helper = RedisHelper()
     sim_result = []
     for video_id, title in movie_videos.items():
         item_sim_list = []
@@ -128,22 +130,22 @@ def similarity_rank(movie_videos, sim_videos):
                 item_sim_list.append(item_sim)
         item_sim_list.sort(key=lambda x: x['dist'], reverse=True)
         sim_result.extend(item_sim_list[:4])
-                # sim_result.append(item_sim)
+        # to_redis
+        key_name = f"{config_.MOVIE_RELEVANT_LIST_KEY_NAME_PREFIX}{video_id}"
+        relevant_data = dict()
+        for item in item_sim_list:
+            relevant_data[item['vid']] = item['dist']
+        if redis_helper.key_exists(key_name=key_name):
+            redis_helper.del_keys(key_name=key_name)
+        redis_helper.add_data_with_zset(key_name=key_name, data=relevant_data, expire_time=24*3600)
+
     dist_df = pd.DataFrame(sim_result, columns=['top_video_id', 'title', 'vid', 'title1', 'dist'])
     dist_df.to_csv('./data/videos_dist.csv', index=False)
-        # sim_result[video_id] = item_sim
-        # print(video_id, item_sim)
 
 
 if __name__ == '__main__':
-    # str_list = ['S手的生活.2020', '花X道Z', '肉Y不能.法语中字', '窃YU无罪']
-    # s1 = "杀手的生活"
-    # for s2 in str_list:
-    #     vec1, vec2 = get_word_vector(s1, s2)
-    #     dist1 = cos_dist(vec1, vec2)
-    #     print(dist1)
     movie_videos = get_movie_video_top_list()
     sim_videos = get_sim_videos()
-    print(len(movie_videos), len(sim_videos))
+    log_.info(f"movie_videos count = {len(movie_videos)}, sim_videos count = {len(sim_videos)}")
     similarity_rank(movie_videos=movie_videos, sim_videos=sim_videos)
 

+ 7 - 0
videos_similarity_task.sh

@@ -0,0 +1,7 @@
+source /etc/profile
+echo $ROV_OFFLINE_ENV
+if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
+    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/videos_similarity.py
+elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
+    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/videos_similarity.py
+fi