|
@@ -78,24 +78,24 @@ def basic_cal(df):
|
|
|
df['return_back'] = df.apply(lambda x:1 if x['weighted_retrn']> 0 else 0,axis=1)
|
|
|
return df
|
|
|
|
|
|
-def today_view_category(df):
|
|
|
+def today_view_category(predict_data):
|
|
|
### 对当天的曝光量分三个级别,未来三天的曝光量分3个级别,添加Category feaure
|
|
|
- data_test1_view1 = df.loc[predict_data['day1viewcount_rank'] > 10000]['day1viewcount'].mean()
|
|
|
- data_test1_view2 = df.loc[(predict_data['day1viewcount_rank'] > 3000)&(predict_data['day1viewcount_rank'] <= 10000)]['day1viewcount'].mean()
|
|
|
- data_test1_view3 = df.loc[(predict_data['day1viewcount_rank'] > 1000)&(predict_data['day1viewcount_rank'] <= 3000)]['day1viewcount'].mean()
|
|
|
- data_test1_view4 = df.loc[(predict_data['day1viewcount_rank'] > 300)&(predict_data['day1viewcount_rank'] <= 1000)]['day1viewcount'].mean()
|
|
|
- data_test1_view5 = df.loc[(predict_data['day1viewcount_rank'] > 100)&(predict_data['day1viewcount_rank'] <= 300)]['day1viewcount'].mean()
|
|
|
- data_test1_view6 = df.loc[(predict_data['day1viewcount_rank'] > 30)&(predict_data['day1viewcount_rank'] <= 100)]['day1viewcount'].mean()
|
|
|
- data_test1_view7 = df.loc[(predict_data['day1viewcount_rank'] > 0)&(predict_data['day1viewcount_rank'] <= 30)]['day1viewcount'].mean()
|
|
|
+ data_test1_view1 = predict_data.loc[predict_data['day1viewcount_rank'] > 10000]['day1viewcount'].mean()
|
|
|
+ data_test1_view2 = predict_data.loc[(predict_data['day1viewcount_rank'] > 3000)&(predict_data['day1viewcount_rank'] <= 10000)]['day1viewcount'].mean()
|
|
|
+ data_test1_view3 = predict_data.loc[(predict_data['day1viewcount_rank'] > 1000)&(predict_data['day1viewcount_rank'] <= 3000)]['day1viewcount'].mean()
|
|
|
+ data_test1_view4 = predict_data.loc[(predict_data['day1viewcount_rank'] > 300)&(predict_data['day1viewcount_rank'] <= 1000)]['day1viewcount'].mean()
|
|
|
+ data_test1_view5 = predict_data.loc[(predict_data['day1viewcount_rank'] > 100)&(predict_data['day1viewcount_rank'] <= 300)]['day1viewcount'].mean()
|
|
|
+ data_test1_view6 = predict_data.loc[(predict_data['day1viewcount_rank'] > 30)&(predict_data['day1viewcount_rank'] <= 100)]['day1viewcount'].mean()
|
|
|
+ data_test1_view7 = predict_data.loc[(predict_data['day1viewcount_rank'] > 0)&(predict_data['day1viewcount_rank'] <= 30)]['day1viewcount'].mean()
|
|
|
|
|
|
- df.loc[df['day1viewcount_rank'] > 10000, 'todyviewcount'] = data_test1_view1
|
|
|
- df.loc[(predict_data['day1viewcount_rank'] > 3000)&(predict_data['day1viewcount_rank'] <= 10000), 'todyviewcount'] = data_test1_view2
|
|
|
- df.loc[(predict_data['day1viewcount_rank'] > 1000)&(predict_data['day1viewcount_rank'] <= 3000), 'todyviewcount'] = data_test1_view3
|
|
|
- df.loc[(predict_data['day1viewcount_rank'] > 300)&(predict_data['day1viewcount_rank'] <= 1000), 'todyviewcount'] = data_test1_view4
|
|
|
- df.loc[(predict_data['day1viewcount_rank'] > 100)&(predict_data['day1viewcount_rank'] <= 300), 'todyviewcount'] = data_test1_view5
|
|
|
- df.loc[(predict_data['day1viewcount_rank'] > 30)&(predict_data['day1viewcount_rank'] <= 100), 'todyviewcount'] = data_test1_view6
|
|
|
- df.loc[(predict_data['day1viewcount_rank'] > 0)&(predict_data['day1viewcount_rank'] <= 30), 'todyviewcount'] = data_test1_view7
|
|
|
- return df
|
|
|
+ predict_data.loc[predict_data['day1viewcount_rank'] > 10000, 'todyviewcount'] = data_test1_view1
|
|
|
+ predict_data.loc[(predict_data['day1viewcount_rank'] > 3000)&(predict_data['day1viewcount_rank'] <= 10000), 'todyviewcount'] = data_test1_view2
|
|
|
+ predict_data.loc[(predict_data['day1viewcount_rank'] > 1000)&(predict_data['day1viewcount_rank'] <= 3000), 'todyviewcount'] = data_test1_view3
|
|
|
+ predict_data.loc[(predict_data['day1viewcount_rank'] > 300)&(predict_data['day1viewcount_rank'] <= 1000), 'todyviewcount'] = data_test1_view4
|
|
|
+ predict_data.loc[(predict_data['day1viewcount_rank'] > 100)&(predict_data['day1viewcount_rank'] <= 300), 'todyviewcount'] = data_test1_view5
|
|
|
+ predict_data.loc[(predict_data['day1viewcount_rank'] > 30)&(predict_data['day1viewcount_rank'] <= 100), 'todyviewcount'] = data_test1_view6
|
|
|
+ predict_data.loc[(predict_data['day1viewcount_rank'] > 0)&(predict_data['day1viewcount_rank'] <= 30), 'todyviewcount'] = data_test1_view7
|
|
|
+ return predict_data
|
|
|
|
|
|
def dataprepare(df_pre):
|
|
|
# 直接将特征送进去,不加交叉特征。
|
|
@@ -152,7 +152,7 @@ def process_train_predict_data():
|
|
|
train_data = getdatasample(train_day, 30, 'rov_feature_add_v1')
|
|
|
predict_data = getdatasample(predict_day, 1, 'rov_predict_table_add_v1')
|
|
|
#TODO save tempt
|
|
|
-
|
|
|
+
|
|
|
train_data = basic_cal(train_data)
|
|
|
predict_data = basic_cal(predict_data)
|
|
|
|