|
@@ -79,6 +79,14 @@ def cal_score(df):
|
|
|
# ctr = lastonehour_play/(lastonehour_view+1000), 对ctr限最大值:K2 = 0.6 if ctr > 0.6 else ctr
|
|
|
# score = sharerate * backrate * LOG(lastonehour_return+1) * K2
|
|
|
|
|
|
+ # 视频状态过滤
|
|
|
+ video_ids = [int(video_id) for video_id in df['videoid']]
|
|
|
+ filtered_result = filter_video_status(video_ids=video_ids)
|
|
|
+ filter_videos = set(video_ids) - set(filtered_result)
|
|
|
+ filter_df = df[int(df['videoid']) in filter_videos]
|
|
|
+ df = df.append(filter_df)
|
|
|
+ df = df.drop_duplicates(['videoid'], keep=False)
|
|
|
+ # 计算score
|
|
|
df = df.fillna(0)
|
|
|
df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
@@ -202,7 +210,7 @@ def predict_test(app_type_list, count):
|
|
|
sql = "SELECT id FROM wx_video ORDER BY id DESC LIMIT 40000;"
|
|
|
mysql_helper = MysqlHelper(mysql_info=config_.MYSQL_INFO)
|
|
|
data = mysql_helper.get_data(sql=sql)
|
|
|
- video_ids = [video[0] for video in data]
|
|
|
+ video_ids = [int(video[0]) for video in data]
|
|
|
# 视频状态过滤
|
|
|
filtered_videos = filter_video_status(video_ids)
|
|
|
log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
|