baichongyang 3 lat temu
rodzic
commit
df151d1b18
1 zmienionych plików z 17 dodań i 17 usunięć
  1. 17 17
      rov_train.py

+ 17 - 17
rov_train.py

@@ -78,24 +78,24 @@ def basic_cal(df):
     df['return_back'] = df.apply(lambda x:1 if x['weighted_retrn']> 0 else 0,axis=1)
     return df 
 
-def today_view_category(df):
+def today_view_category(predict_data):
 ### 对当天的曝光量分三个级别,未来三天的曝光量分3个级别,添加Category feaure
-    data_test1_view1 =   df.loc[predict_data['day1viewcount_rank'] > 10000]['day1viewcount'].mean()
-    data_test1_view2 =   df.loc[(predict_data['day1viewcount_rank'] > 3000)&(predict_data['day1viewcount_rank'] <= 10000)]['day1viewcount'].mean()
-    data_test1_view3 =   df.loc[(predict_data['day1viewcount_rank'] > 1000)&(predict_data['day1viewcount_rank'] <= 3000)]['day1viewcount'].mean()
-    data_test1_view4 =   df.loc[(predict_data['day1viewcount_rank'] > 300)&(predict_data['day1viewcount_rank'] <= 1000)]['day1viewcount'].mean()
-    data_test1_view5 =   df.loc[(predict_data['day1viewcount_rank'] > 100)&(predict_data['day1viewcount_rank'] <= 300)]['day1viewcount'].mean()
-    data_test1_view6 =   df.loc[(predict_data['day1viewcount_rank'] > 30)&(predict_data['day1viewcount_rank'] <= 100)]['day1viewcount'].mean()
-    data_test1_view7 =   df.loc[(predict_data['day1viewcount_rank'] > 0)&(predict_data['day1viewcount_rank'] <= 30)]['day1viewcount'].mean()
+    data_test1_view1 =   predict_data.loc[predict_data['day1viewcount_rank'] > 10000]['day1viewcount'].mean()
+    data_test1_view2 =   predict_data.loc[(predict_data['day1viewcount_rank'] > 3000)&(predict_data['day1viewcount_rank'] <= 10000)]['day1viewcount'].mean()
+    data_test1_view3 =   predict_data.loc[(predict_data['day1viewcount_rank'] > 1000)&(predict_data['day1viewcount_rank'] <= 3000)]['day1viewcount'].mean()
+    data_test1_view4 =   predict_data.loc[(predict_data['day1viewcount_rank'] > 300)&(predict_data['day1viewcount_rank'] <= 1000)]['day1viewcount'].mean()
+    data_test1_view5 =   predict_data.loc[(predict_data['day1viewcount_rank'] > 100)&(predict_data['day1viewcount_rank'] <= 300)]['day1viewcount'].mean()
+    data_test1_view6 =   predict_data.loc[(predict_data['day1viewcount_rank'] > 30)&(predict_data['day1viewcount_rank'] <= 100)]['day1viewcount'].mean()
+    data_test1_view7 =   predict_data.loc[(predict_data['day1viewcount_rank'] > 0)&(predict_data['day1viewcount_rank'] <= 30)]['day1viewcount'].mean()
     
-    df.loc[df['day1viewcount_rank'] > 10000, 'todyviewcount'] = data_test1_view1
-    df.loc[(predict_data['day1viewcount_rank'] > 3000)&(predict_data['day1viewcount_rank'] <= 10000), 'todyviewcount'] = data_test1_view2
-    df.loc[(predict_data['day1viewcount_rank'] > 1000)&(predict_data['day1viewcount_rank'] <= 3000), 'todyviewcount'] = data_test1_view3
-    df.loc[(predict_data['day1viewcount_rank'] > 300)&(predict_data['day1viewcount_rank'] <= 1000), 'todyviewcount'] = data_test1_view4
-    df.loc[(predict_data['day1viewcount_rank'] > 100)&(predict_data['day1viewcount_rank'] <= 300), 'todyviewcount'] = data_test1_view5
-    df.loc[(predict_data['day1viewcount_rank'] > 30)&(predict_data['day1viewcount_rank'] <= 100), 'todyviewcount'] = data_test1_view6
-    df.loc[(predict_data['day1viewcount_rank'] > 0)&(predict_data['day1viewcount_rank'] <= 30), 'todyviewcount'] = data_test1_view7
-    return df
+    predict_data.loc[predict_data['day1viewcount_rank'] > 10000, 'todyviewcount'] = data_test1_view1
+    predict_data.loc[(predict_data['day1viewcount_rank'] > 3000)&(predict_data['day1viewcount_rank'] <= 10000), 'todyviewcount'] = data_test1_view2
+    predict_data.loc[(predict_data['day1viewcount_rank'] > 1000)&(predict_data['day1viewcount_rank'] <= 3000), 'todyviewcount'] = data_test1_view3
+    predict_data.loc[(predict_data['day1viewcount_rank'] > 300)&(predict_data['day1viewcount_rank'] <= 1000), 'todyviewcount'] = data_test1_view4
+    predict_data.loc[(predict_data['day1viewcount_rank'] > 100)&(predict_data['day1viewcount_rank'] <= 300), 'todyviewcount'] = data_test1_view5
+    predict_data.loc[(predict_data['day1viewcount_rank'] > 30)&(predict_data['day1viewcount_rank'] <= 100), 'todyviewcount'] = data_test1_view6
+    predict_data.loc[(predict_data['day1viewcount_rank'] > 0)&(predict_data['day1viewcount_rank'] <= 30), 'todyviewcount'] = data_test1_view7
+    return predict_data
 
 def dataprepare(df_pre):
     #  直接将特征送进去,不加交叉特征。
@@ -152,7 +152,7 @@ def process_train_predict_data():
     train_data = getdatasample(train_day, 30, 'rov_feature_add_v1')
     predict_data = getdatasample(predict_day, 1, 'rov_predict_table_add_v1')
     #TODO save tempt
-    
+
     train_data = basic_cal(train_data)
     predict_data = basic_cal(predict_data)