|
@@ -0,0 +1,593 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 2,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import pandas as pd"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 3,
|
|
|
+ "metadata": {
|
|
|
+ "scrolled": true
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "133\n",
|
|
|
+ " Unnamed: 0 feature_name data_count 0_count 0_rate \\\n",
|
|
|
+ "0 0 stage_one_retrn 319586 290427 0.908760 \n",
|
|
|
+ "1 1 stage_two_retrn 319586 305954 0.957345 \n",
|
|
|
+ "2 2 stage_four_retrn 319586 314081 0.982775 \n",
|
|
|
+ "3 3 stage_three_retrn 319586 311375 0.974307 \n",
|
|
|
+ "4 4 stage_two_return_added 319586 290970 0.910459 \n",
|
|
|
+ ".. ... ... ... ... ... \n",
|
|
|
+ "128 128 videocategory6 319586 316051 0.988939 \n",
|
|
|
+ "129 129 videocategory1 319586 314240 0.983272 \n",
|
|
|
+ "130 130 videocategory49 319586 309823 0.969451 \n",
|
|
|
+ "131 131 videocategory45 319586 319323 0.999177 \n",
|
|
|
+ "132 132 videocategory7 319586 316632 0.990757 \n",
|
|
|
+ "\n",
|
|
|
+ " mean_0.25 mean_0.5 mean_0.75 mean_1 var_0.25 ... \\\n",
|
|
|
+ "0 0.000000 0.000000 0.000000 29.053141 0.000000 ... \n",
|
|
|
+ "1 0.000000 0.000000 0.000000 18.705926 0.000000 ... \n",
|
|
|
+ "2 0.000000 0.000000 0.000000 8.048241 0.000000 ... \n",
|
|
|
+ "3 0.000000 0.000000 0.000000 12.135716 0.000000 ... \n",
|
|
|
+ "4 -43.642152 -21.821076 -14.547445 -10.347215 233092.343014 ... \n",
|
|
|
+ ".. ... ... ... ... ... ... \n",
|
|
|
+ "128 0.000000 0.000000 0.000000 0.011061 0.000000 ... \n",
|
|
|
+ "129 0.000000 0.000000 0.000000 0.016728 0.000000 ... \n",
|
|
|
+ "130 0.000000 0.000000 0.000000 0.030549 0.000000 ... \n",
|
|
|
+ "131 0.000000 0.000000 0.000000 0.000823 0.000000 ... \n",
|
|
|
+ "132 0.000000 0.000000 0.000000 0.009243 0.000000 ... \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n",
|
|
|
+ "0 7.139735 318.425769 1.459960e-01 2.504982e+00 \n",
|
|
|
+ "1 21.390416 438.538146 1.029324e+00 2.315790e+01 \n",
|
|
|
+ "2 53.258658 467.230699 7.457763e+00 2.379205e+02 \n",
|
|
|
+ "3 40.048709 472.342589 2.707221e+00 9.688260e+01 \n",
|
|
|
+ "4 -162.274612 -115.558604 2.398539e+06 1.253440e+06 \n",
|
|
|
+ ".. ... ... ... ... \n",
|
|
|
+ "128 1.000000 1.000000 0.000000e+00 0.000000e+00 \n",
|
|
|
+ "129 1.000000 1.000000 0.000000e+00 0.000000e+00 \n",
|
|
|
+ "130 1.000000 1.000000 0.000000e+00 0.000000e+00 \n",
|
|
|
+ "131 1.000000 1.000000 0.000000e+00 0.000000e+00 \n",
|
|
|
+ "132 1.000000 1.000000 0.000000e+00 0.000000e+00 \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_var_0.75 non_zero_var_1 non_zero_std_0.25 non_zero_std_0.5 \\\n",
|
|
|
+ "0 70.237904 4.695676e+06 0.382094 1.582714 \n",
|
|
|
+ "1 788.731488 4.739503e+06 1.014556 4.812266 \n",
|
|
|
+ "2 3900.825806 3.281403e+06 2.730890 15.424672 \n",
|
|
|
+ "3 2589.586846 3.965567e+06 1.645364 9.842896 \n",
|
|
|
+ "4 848453.781924 6.494582e+05 1548.721605 1119.571313 \n",
|
|
|
+ ".. ... ... ... ... \n",
|
|
|
+ "128 0.000000 0.000000e+00 0.000000 0.000000 \n",
|
|
|
+ "129 0.000000 0.000000e+00 0.000000 0.000000 \n",
|
|
|
+ "130 0.000000 0.000000e+00 0.000000 0.000000 \n",
|
|
|
+ "131 0.000000 0.000000e+00 0.000000 0.000000 \n",
|
|
|
+ "132 0.000000 0.000000e+00 0.000000 0.000000 \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_std_0.75 non_zero_std_1 \n",
|
|
|
+ "0 8.380806 2166.950796 \n",
|
|
|
+ "1 28.084364 2177.040037 \n",
|
|
|
+ "2 62.456591 1811.464241 \n",
|
|
|
+ "3 50.887983 1991.373137 \n",
|
|
|
+ "4 921.115510 805.889696 \n",
|
|
|
+ ".. ... ... \n",
|
|
|
+ "128 0.000000 0.000000 \n",
|
|
|
+ "129 0.000000 0.000000 \n",
|
|
|
+ "130 0.000000 0.000000 \n",
|
|
|
+ "131 0.000000 0.000000 \n",
|
|
|
+ "132 0.000000 0.000000 \n",
|
|
|
+ "\n",
|
|
|
+ "[133 rows x 29 columns]\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "train_file_path_20220218 = './data/train_data_monitor_20220218.csv'\n",
|
|
|
+ "df_train_20220218 = pd.read_csv(train_file_path_20220218)\n",
|
|
|
+ "print(len(df_train_20220218))\n",
|
|
|
+ "print(df_train_20220218)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 4,
|
|
|
+ "metadata": {
|
|
|
+ "scrolled": true
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " Unnamed: 0 feature_name data_count 0_count 0_rate \\\n",
|
|
|
+ "0 0 stage_one_retrn 319586 290427 0.908760 \n",
|
|
|
+ "1 1 stage_two_retrn 319586 305954 0.957345 \n",
|
|
|
+ "2 2 stage_four_retrn 319586 314081 0.982775 \n",
|
|
|
+ "3 3 stage_three_retrn 319586 311375 0.974307 \n",
|
|
|
+ "4 4 stage_two_return_added 319586 290970 0.910459 \n",
|
|
|
+ "5 5 stage_three_return_added 319586 306146 0.957946 \n",
|
|
|
+ "6 6 stage_four_return_added 319586 311477 0.974627 \n",
|
|
|
+ "7 7 stage_two_return_ratio 319586 290970 0.910459 \n",
|
|
|
+ "8 8 stage_four_return_ratio 319586 311477 0.974627 \n",
|
|
|
+ "9 9 stage_three_return_ratio 319586 306146 0.957946 \n",
|
|
|
+ "\n",
|
|
|
+ " mean_0.25 mean_0.5 mean_0.75 mean_1 var_0.25 ... \\\n",
|
|
|
+ "0 0.000000 0.000000 0.000000 29.053141 0.000000 ... \n",
|
|
|
+ "1 0.000000 0.000000 0.000000 18.705926 0.000000 ... \n",
|
|
|
+ "2 0.000000 0.000000 0.000000 8.048241 0.000000 ... \n",
|
|
|
+ "3 0.000000 0.000000 0.000000 12.135716 0.000000 ... \n",
|
|
|
+ "4 -43.642152 -21.821076 -14.547445 -10.347215 233092.343014 ... \n",
|
|
|
+ "5 -27.887705 -13.943853 -9.295941 -6.570210 99191.736492 ... \n",
|
|
|
+ "6 -17.560559 -8.780280 -5.853544 -4.087476 43690.700442 ... \n",
|
|
|
+ "7 -inf -inf -inf -inf NaN ... \n",
|
|
|
+ "8 -inf -inf -inf -inf NaN ... \n",
|
|
|
+ "9 -inf -inf -inf -inf NaN ... \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n",
|
|
|
+ "0 7.139735 318.425769 1.459960e-01 2.504982e+00 \n",
|
|
|
+ "1 21.390416 438.538146 1.029324e+00 2.315790e+01 \n",
|
|
|
+ "2 53.258658 467.230699 7.457763e+00 2.379205e+02 \n",
|
|
|
+ "3 40.048709 472.342589 2.707221e+00 9.688260e+01 \n",
|
|
|
+ "4 -162.274612 -115.558604 2.398539e+06 1.253440e+06 \n",
|
|
|
+ "5 -220.820454 -156.231176 1.976463e+06 1.080257e+06 \n",
|
|
|
+ "6 -230.444919 -161.092613 1.321688e+06 7.500555e+05 \n",
|
|
|
+ "7 -inf -inf NaN NaN \n",
|
|
|
+ "8 -inf -inf NaN NaN \n",
|
|
|
+ "9 -inf -inf NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_var_0.75 non_zero_var_1 non_zero_std_0.25 non_zero_std_0.5 \\\n",
|
|
|
+ "0 70.237904 4.695676e+06 0.382094 1.582714 \n",
|
|
|
+ "1 788.731488 4.739503e+06 1.014556 4.812266 \n",
|
|
|
+ "2 3900.825806 3.281403e+06 2.730890 15.424672 \n",
|
|
|
+ "3 2589.586846 3.965567e+06 1.645364 9.842896 \n",
|
|
|
+ "4 848453.781924 6.494582e+05 1548.721605 1119.571313 \n",
|
|
|
+ "5 743546.359590 5.877715e+05 1405.867249 1039.354097 \n",
|
|
|
+ "6 524894.428157 4.317385e+05 1149.646842 866.057452 \n",
|
|
|
+ "7 NaN NaN NaN NaN \n",
|
|
|
+ "8 NaN NaN NaN NaN \n",
|
|
|
+ "9 NaN NaN NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_std_0.75 non_zero_std_1 \n",
|
|
|
+ "0 8.380806 2166.950796 \n",
|
|
|
+ "1 28.084364 2177.040037 \n",
|
|
|
+ "2 62.456591 1811.464241 \n",
|
|
|
+ "3 50.887983 1991.373137 \n",
|
|
|
+ "4 921.115510 805.889696 \n",
|
|
|
+ "5 862.291343 766.662541 \n",
|
|
|
+ "6 724.495982 657.068119 \n",
|
|
|
+ "7 NaN NaN \n",
|
|
|
+ "8 NaN NaN \n",
|
|
|
+ "9 NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ "[10 rows x 29 columns]\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "print(df_train_20220218.head(10))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 5,
|
|
|
+ "metadata": {
|
|
|
+ "scrolled": true
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " Unnamed: 0 feature_name data_count 0_count 0_rate \\\n",
|
|
|
+ "0 0 stage_one_retrn 308675 279287 0.904793 \n",
|
|
|
+ "1 1 stage_two_retrn 308675 294930 0.955471 \n",
|
|
|
+ "2 2 stage_four_retrn 308675 303106 0.981958 \n",
|
|
|
+ "3 3 stage_three_retrn 308675 300388 0.973153 \n",
|
|
|
+ "4 4 stage_two_return_added 308675 279831 0.906555 \n",
|
|
|
+ "5 5 stage_three_return_added 308675 295119 0.956083 \n",
|
|
|
+ "6 6 stage_four_return_added 308675 300487 0.973474 \n",
|
|
|
+ "7 7 stage_two_return_ratio 308675 279831 0.906555 \n",
|
|
|
+ "8 8 stage_four_return_ratio 308675 300487 0.973474 \n",
|
|
|
+ "9 9 stage_three_return_ratio 308675 295119 0.956083 \n",
|
|
|
+ "\n",
|
|
|
+ " mean_0.25 mean_0.5 mean_0.75 mean_1 var_0.25 ... \\\n",
|
|
|
+ "0 0.000000 0.000000 0.000000 30.684182 0.000000 ... \n",
|
|
|
+ "1 0.000000 0.000000 0.000000 19.802935 0.000000 ... \n",
|
|
|
+ "2 0.000000 0.000000 0.000000 8.577209 0.000000 ... \n",
|
|
|
+ "3 0.000000 0.000000 0.000000 12.867560 0.000000 ... \n",
|
|
|
+ "4 -45.941933 -22.970966 -15.313978 -10.881247 246586.644725 ... \n",
|
|
|
+ "5 -29.466509 -14.733254 -9.822170 -6.935375 105535.713482 ... \n",
|
|
|
+ "6 -18.534321 -9.267160 -6.178107 -4.290351 46148.960740 ... \n",
|
|
|
+ "7 -inf -inf -inf -inf NaN ... \n",
|
|
|
+ "8 -inf -inf -inf -inf NaN ... \n",
|
|
|
+ "9 -inf -inf -inf -inf NaN ... \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n",
|
|
|
+ "0 7.207377 322.289370 1.471082e-01 2.535730e+00 \n",
|
|
|
+ "1 21.718983 444.719607 1.050753e+00 2.365002e+01 \n",
|
|
|
+ "2 54.492219 475.412103 7.670916e+00 2.505415e+02 \n",
|
|
|
+ "3 40.826094 479.292144 2.763735e+00 1.006501e+02 \n",
|
|
|
+ "4 -163.690302 -116.446020 2.431524e+06 1.270892e+06 \n",
|
|
|
+ "5 -223.428600 -157.920994 2.012587e+06 1.100527e+06 \n",
|
|
|
+ "6 -232.626343 -161.739619 1.332624e+06 7.570373e+05 \n",
|
|
|
+ "7 -inf -inf NaN NaN \n",
|
|
|
+ "8 -inf -inf NaN NaN \n",
|
|
|
+ "9 -inf -inf NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_var_0.75 non_zero_var_1 non_zero_std_0.25 non_zero_std_0.5 \\\n",
|
|
|
+ "0 72.045508 4.778534e+06 0.383547 1.592398 \n",
|
|
|
+ "1 817.447948 4.818667e+06 1.025062 4.863129 \n",
|
|
|
+ "2 4099.125209 3.347202e+06 2.769642 15.828503 \n",
|
|
|
+ "3 2697.035876 4.017723e+06 1.662449 10.032454 \n",
|
|
|
+ "4 860314.590137 6.589329e+05 1559.334576 1127.338453 \n",
|
|
|
+ "5 757621.619017 5.989774e+05 1418.656878 1049.060053 \n",
|
|
|
+ "6 530023.047128 4.410375e+05 1154.393260 870.078894 \n",
|
|
|
+ "7 NaN NaN NaN NaN \n",
|
|
|
+ "8 NaN NaN NaN NaN \n",
|
|
|
+ "9 NaN NaN NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_std_0.75 non_zero_std_1 \n",
|
|
|
+ "0 8.487963 2185.985905 \n",
|
|
|
+ "1 28.591047 2195.146262 \n",
|
|
|
+ "2 64.024411 1829.535988 \n",
|
|
|
+ "3 51.932994 2004.425852 \n",
|
|
|
+ "4 927.531450 811.746798 \n",
|
|
|
+ "5 870.414625 773.936274 \n",
|
|
|
+ "6 728.026818 664.106568 \n",
|
|
|
+ "7 NaN NaN \n",
|
|
|
+ "8 NaN NaN \n",
|
|
|
+ "9 NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ "[10 rows x 29 columns]\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "train_file_path_20220219 = './data/train_data_monitor_20220219.csv'\n",
|
|
|
+ "df_train_20220219 = pd.read_csv(train_file_path_20220219)\n",
|
|
|
+ "print(df_train_20220219.head(10))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 20,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " 0_rate mean_1 var_1 non_zero_mean_0.25 non_zero_mean_0.5 \\\n",
|
|
|
+ "0 0.004365 0.056140 0.061938 0.001468 0.004284 \n",
|
|
|
+ "1 0.001957 0.058645 0.061745 0.008027 0.009626 \n",
|
|
|
+ "2 0.000831 0.065725 0.069314 0.008695 0.021213 \n",
|
|
|
+ "3 0.001185 0.060305 0.059503 0.010793 0.016691 \n",
|
|
|
+ "4 0.004288 -0.051611 0.058754 -0.008783 -0.008744 \n",
|
|
|
+ "5 0.001944 -0.055579 0.064221 -0.011894 -0.011860 \n",
|
|
|
+ "6 0.001183 -0.049633 0.067102 -0.008996 -0.009480 \n",
|
|
|
+ "7 0.004288 NaN NaN NaN NaN \n",
|
|
|
+ "8 0.001183 NaN NaN NaN NaN \n",
|
|
|
+ "9 0.001944 NaN NaN NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n",
|
|
|
+ "0 0.009474 0.012133 0.007618 0.012275 \n",
|
|
|
+ "1 0.015361 0.014096 0.020818 0.021251 \n",
|
|
|
+ "2 0.023162 0.017510 0.028581 0.053047 \n",
|
|
|
+ "3 0.019411 0.014713 0.020875 0.038888 \n",
|
|
|
+ "4 -0.008724 -0.007679 0.013752 0.013923 \n",
|
|
|
+ "5 -0.011811 -0.010816 0.018277 0.018764 \n",
|
|
|
+ "6 -0.009466 -0.004016 0.008274 0.009308 \n",
|
|
|
+ "7 NaN NaN NaN NaN \n",
|
|
|
+ "8 NaN NaN NaN NaN \n",
|
|
|
+ "9 NaN NaN NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_var_0.75 non_zero_var_1 feature_name \n",
|
|
|
+ "0 0.025735 0.017646 stage_one_retrn \n",
|
|
|
+ "1 0.036408 0.016703 stage_two_retrn \n",
|
|
|
+ "2 0.050835 0.020052 stage_four_retrn \n",
|
|
|
+ "3 0.041493 0.013152 stage_three_retrn \n",
|
|
|
+ "4 0.013979 0.014589 stage_two_return_added \n",
|
|
|
+ "5 0.018930 0.019065 stage_three_return_added \n",
|
|
|
+ "6 0.009771 0.021539 stage_four_return_added \n",
|
|
|
+ "7 NaN NaN stage_two_return_ratio \n",
|
|
|
+ "8 NaN NaN stage_four_return_ratio \n",
|
|
|
+ "9 NaN NaN stage_three_return_ratio \n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "feature_top10_20220219 = df_train_20220219.head(10)['feature_name']\n",
|
|
|
+ "monitor_list = ['0_rate', 'mean_1', 'var_1', \n",
|
|
|
+ " 'non_zero_mean_0.25', 'non_zero_mean_0.5', 'non_zero_mean_0.75', 'non_zero_mean_1',\n",
|
|
|
+ " 'non_zero_var_0.25', 'non_zero_var_0.5', 'non_zero_var_0.75', 'non_zero_var_1']\n",
|
|
|
+ "diff_20220218 = df_train_20220218.loc[df_train_20220218['feature_name'].isin(feature_top10_20220219)][monitor_list]\n",
|
|
|
+ "diff_20220219 = df_train_20220219.loc[df_train_20220219['feature_name'].isin(feature_top10_20220219)][monitor_list]\n",
|
|
|
+ "diff = abs(diff_20220219-diff_20220218)/diff_20220218\n",
|
|
|
+ "diff['feature_name'] = feature_top10_20220219\n",
|
|
|
+ "print(diff)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 30,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def cal_diff(yesterday_file, today_file):\n",
|
|
|
+ " df_train_yesterday = pd.read_csv(yesterday_file)\n",
|
|
|
+ " df_train_today = pd.read_csv(today_file)\n",
|
|
|
+ "\n",
|
|
|
+ " feature_top10_today = df_train_today.head(10)['feature_name']\n",
|
|
|
+ " monitor_list = ['0_rate', 'mean_1', 'var_1', \n",
|
|
|
+ " 'non_zero_mean_0.25', 'non_zero_mean_0.5', 'non_zero_mean_0.75', 'non_zero_mean_1',\n",
|
|
|
+ " 'non_zero_var_0.25', 'non_zero_var_0.5', 'non_zero_var_0.75', 'non_zero_var_1']\n",
|
|
|
+ " df_yesterday = df_train_yesterday.loc[df_train_yesterday['feature_name'].isin(feature_top10_today)][monitor_list]\n",
|
|
|
+ " df_today = df_train_today.loc[df_train_today['feature_name'].isin(feature_top10_today)][monitor_list]\n",
|
|
|
+ " diff = abs(df_today - df_yesterday) / df_yesterday\n",
|
|
|
+ " diff['feature_name'] = feature_top10_today\n",
|
|
|
+ " print(diff)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 31,
|
|
|
+ "metadata": {
|
|
|
+ "scrolled": true
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " 0_rate mean_1 var_1 non_zero_mean_0.25 non_zero_mean_0.5 \\\n",
|
|
|
+ "0 0.005002 0.057325 0.071410 0.000330 0.000139 \n",
|
|
|
+ "1 0.002172 0.061239 0.110287 0.000205 0.001382 \n",
|
|
|
+ "2 0.000856 0.076648 0.379979 0.002546 0.002176 \n",
|
|
|
+ "3 0.001281 0.066219 0.211559 0.006632 0.003632 \n",
|
|
|
+ "4 0.004924 -0.050201 0.103066 -0.008135 -0.007913 \n",
|
|
|
+ "5 0.002149 -0.052000 0.083131 -0.009921 -0.009605 \n",
|
|
|
+ "6 0.001273 -0.045370 0.046743 -0.001187 -0.001560 \n",
|
|
|
+ "7 0.004924 NaN NaN NaN NaN \n",
|
|
|
+ "8 0.001273 NaN NaN NaN NaN \n",
|
|
|
+ "9 0.002149 NaN NaN NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n",
|
|
|
+ "0 0.000849 0.009346 0.001695 0.001666 \n",
|
|
|
+ "1 0.003023 0.013982 0.001716 0.002233 \n",
|
|
|
+ "2 0.011587 0.028726 0.002981 0.003943 \n",
|
|
|
+ "3 0.007318 0.018900 0.021120 0.000492 \n",
|
|
|
+ "4 -0.007917 -0.002322 0.032027 0.031292 \n",
|
|
|
+ "5 -0.009624 -0.004975 0.019300 0.019249 \n",
|
|
|
+ "6 -0.001606 -0.001296 0.006401 0.005717 \n",
|
|
|
+ "7 NaN NaN NaN NaN \n",
|
|
|
+ "8 NaN NaN NaN NaN \n",
|
|
|
+ "9 NaN NaN NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_var_0.75 non_zero_var_1 feature_name \n",
|
|
|
+ "0 0.001764 0.022971 stage_one_retrn \n",
|
|
|
+ "1 0.008437 0.062215 stage_two_retrn \n",
|
|
|
+ "2 0.036278 0.335875 stage_four_retrn \n",
|
|
|
+ "3 0.019741 0.164520 stage_three_retrn \n",
|
|
|
+ "4 0.031083 0.053767 stage_two_return_added \n",
|
|
|
+ "5 0.019300 0.035786 stage_three_return_added \n",
|
|
|
+ "6 0.005394 0.000239 stage_four_return_added \n",
|
|
|
+ "7 NaN NaN stage_two_return_ratio \n",
|
|
|
+ "8 NaN NaN stage_four_return_ratio \n",
|
|
|
+ "9 NaN NaN stage_three_return_ratio \n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "file_20220219 = './data/train_data_monitor_20220219.csv'\n",
|
|
|
+ "file_20220220 = './data/train_data_monitor_20220220.csv'\n",
|
|
|
+ "cal_diff(yesterday_file=file_20220219, today_file=file_20220220)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 26,
|
|
|
+ "metadata": {
|
|
|
+ "scrolled": true
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " 0_rate mean_1 var_1 non_zero_mean_0.25 non_zero_mean_0.5 \\\n",
|
|
|
+ "0 0.005572 0.063716 0.206425 0.000137 0.002426 \n",
|
|
|
+ "1 0.002313 0.072997 0.489410 0.007128 0.002891 \n",
|
|
|
+ "2 0.000955 0.110413 1.329780 0.001553 0.009829 \n",
|
|
|
+ "3 0.001368 0.090244 0.962106 0.002813 0.002741 \n",
|
|
|
+ "4 0.005481 -0.046648 0.111135 -0.003523 -0.003452 \n",
|
|
|
+ "5 0.002273 -0.040563 0.062435 -0.001561 -0.001249 \n",
|
|
|
+ "6 0.001349 -0.048717 0.061383 -0.000450 -0.000628 \n",
|
|
|
+ "7 0.005481 NaN NaN NaN NaN \n",
|
|
|
+ "8 0.020957 NaN NaN NaN NaN \n",
|
|
|
+ "9 0.017709 NaN NaN NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n",
|
|
|
+ "0 0.004514 0.012775 0.000702 0.008503 \n",
|
|
|
+ "1 0.002557 0.024518 0.018237 0.000353 \n",
|
|
|
+ "2 0.010414 0.057912 0.010849 0.018093 \n",
|
|
|
+ "3 0.003311 0.040968 0.000559 0.006996 \n",
|
|
|
+ "4 -0.003437 -0.003670 0.022244 0.021602 \n",
|
|
|
+ "5 -0.001314 -0.006318 0.001523 0.001560 \n",
|
|
|
+ "6 -0.000693 -0.001422 0.019594 0.017566 \n",
|
|
|
+ "7 NaN NaN NaN NaN \n",
|
|
|
+ "8 NaN NaN NaN NaN \n",
|
|
|
+ "9 NaN NaN NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_var_0.75 non_zero_var_1 feature_name \n",
|
|
|
+ "0 0.013407 0.151157 stage_one_retrn \n",
|
|
|
+ "1 0.005393 0.436318 stage_two_retrn \n",
|
|
|
+ "2 0.020078 1.277436 stage_four_retrn \n",
|
|
|
+ "3 0.006591 0.912625 stage_three_retrn \n",
|
|
|
+ "4 0.021383 0.058964 stage_two_return_added \n",
|
|
|
+ "5 0.001633 0.015707 stage_three_return_added \n",
|
|
|
+ "6 0.016856 0.014209 stage_four_return_added \n",
|
|
|
+ "7 NaN NaN stage_two_return_ratio \n",
|
|
|
+ "8 NaN NaN stage_three_return_ratio \n",
|
|
|
+ "9 NaN NaN stage_four_return_ratio \n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "file_20220220 = './data/train_data_monitor_20220220.csv'\n",
|
|
|
+ "file_20220221 = './data/train_data_monitor_20220221.csv'\n",
|
|
|
+ "cal_diff(yesterday_file=file_20220220, today_file=file_20220221)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 55,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def cal_diff2(yesterday_file, today_file, feature_top=-1):\n",
|
|
|
+ " df_train_yesterday = pd.read_csv(yesterday_file, index_col='feature_name')\n",
|
|
|
+ " df_train_today = pd.read_csv(today_file, index_col='feature_name')\n",
|
|
|
+ "\n",
|
|
|
+ " feature_top_today = df_train_today.index.values[:feature_top]\n",
|
|
|
+ "# print(feature_top10_today)\n",
|
|
|
+ " monitor_list = ['0_rate', 'mean_1', 'var_1', \n",
|
|
|
+ " 'non_zero_mean_0.25', 'non_zero_mean_0.5', 'non_zero_mean_0.75', 'non_zero_mean_1',\n",
|
|
|
+ " 'non_zero_var_0.25', 'non_zero_var_0.5', 'non_zero_var_0.75', 'non_zero_var_1']\n",
|
|
|
+ " df_yesterday = df_train_yesterday.loc[feature_top_today, monitor_list]\n",
|
|
|
+ "# print(df_yesterday)\n",
|
|
|
+ " df_today = df_train_today.loc[feature_top_today, monitor_list]\n",
|
|
|
+ "# print(df_today)\n",
|
|
|
+ " diff = abs(df_today - df_yesterday) / df_yesterday\n",
|
|
|
+ " return diff"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 58,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " 0_rate mean_1 var_1 non_zero_mean_0.25 \\\n",
|
|
|
+ "feature_name \n",
|
|
|
+ "stage_one_retrn 0.005002 0.057325 0.071410 0.000330 \n",
|
|
|
+ "stage_two_retrn 0.002172 0.061239 0.110287 0.000205 \n",
|
|
|
+ "stage_four_retrn 0.000856 0.076648 0.379979 0.002546 \n",
|
|
|
+ "stage_three_retrn 0.001281 0.066219 0.211559 0.006632 \n",
|
|
|
+ "stage_two_return_added 0.004924 -0.050201 0.103066 -0.008135 \n",
|
|
|
+ "... ... ... ... ... \n",
|
|
|
+ "videocategory6 0.000860 0.086727 0.085941 0.000000 \n",
|
|
|
+ "videocategory5 0.001115 0.022561 0.021471 0.000000 \n",
|
|
|
+ "videocategory11 0.001181 0.068037 0.066936 0.000000 \n",
|
|
|
+ "videocategory12 0.000831 0.089856 0.089100 0.000000 \n",
|
|
|
+ "videocategory45 0.000051 0.064570 0.064522 0.000000 \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_mean_0.5 non_zero_mean_0.75 \\\n",
|
|
|
+ "feature_name \n",
|
|
|
+ "stage_one_retrn 0.000139 0.000849 \n",
|
|
|
+ "stage_two_retrn 0.001382 0.003023 \n",
|
|
|
+ "stage_four_retrn 0.002176 0.011587 \n",
|
|
|
+ "stage_three_retrn 0.003632 0.007318 \n",
|
|
|
+ "stage_two_return_added -0.007913 -0.007917 \n",
|
|
|
+ "... ... ... \n",
|
|
|
+ "videocategory6 0.000000 0.000000 \n",
|
|
|
+ "videocategory5 0.000000 0.000000 \n",
|
|
|
+ "videocategory11 0.000000 0.000000 \n",
|
|
|
+ "videocategory12 0.000000 0.000000 \n",
|
|
|
+ "videocategory45 0.000000 0.000000 \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n",
|
|
|
+ "feature_name \n",
|
|
|
+ "stage_one_retrn 0.009346 0.001695 0.001666 \n",
|
|
|
+ "stage_two_retrn 0.013982 0.001716 0.002233 \n",
|
|
|
+ "stage_four_retrn 0.028726 0.002981 0.003943 \n",
|
|
|
+ "stage_three_retrn 0.018900 0.021120 0.000492 \n",
|
|
|
+ "stage_two_return_added -0.002322 0.032027 0.031292 \n",
|
|
|
+ "... ... ... ... \n",
|
|
|
+ "videocategory6 0.000000 NaN NaN \n",
|
|
|
+ "videocategory5 0.000000 NaN NaN \n",
|
|
|
+ "videocategory11 0.000000 NaN NaN \n",
|
|
|
+ "videocategory12 0.000000 NaN NaN \n",
|
|
|
+ "videocategory45 0.000000 NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ " non_zero_var_0.75 non_zero_var_1 \n",
|
|
|
+ "feature_name \n",
|
|
|
+ "stage_one_retrn 0.001764 0.022971 \n",
|
|
|
+ "stage_two_retrn 0.008437 0.062215 \n",
|
|
|
+ "stage_four_retrn 0.036278 0.335875 \n",
|
|
|
+ "stage_three_retrn 0.019741 0.164520 \n",
|
|
|
+ "stage_two_return_added 0.031083 0.053767 \n",
|
|
|
+ "... ... ... \n",
|
|
|
+ "videocategory6 NaN NaN \n",
|
|
|
+ "videocategory5 NaN NaN \n",
|
|
|
+ "videocategory11 NaN NaN \n",
|
|
|
+ "videocategory12 NaN NaN \n",
|
|
|
+ "videocategory45 NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ "[132 rows x 11 columns]\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "file_20220219 = './data/train_data_monitor_20220219.csv'\n",
|
|
|
+ "file_20220220 = './data/train_data_monitor_20220220.csv'\n",
|
|
|
+ "diff = cal_diff2(yesterday_file=file_20220219, today_file=file_20220220, feature_top=-1)\n",
|
|
|
+ "print(diff)\n",
|
|
|
+ "diff.to_csv('./data/diff_20220219_20220220.csv')"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.8.5"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 4
|
|
|
+}
|