{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from utils import send_msg_to_feishu" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "133\n", " Unnamed: 0 feature_name data_count 0_count 0_rate \\\n", "0 0 stage_one_retrn 319586 290427 0.908760 \n", "1 1 stage_two_retrn 319586 305954 0.957345 \n", "2 2 stage_four_retrn 319586 314081 0.982775 \n", "3 3 stage_three_retrn 319586 311375 0.974307 \n", "4 4 stage_two_return_added 319586 290970 0.910459 \n", ".. ... ... ... ... ... \n", "128 128 videocategory6 319586 316051 0.988939 \n", "129 129 videocategory1 319586 314240 0.983272 \n", "130 130 videocategory49 319586 309823 0.969451 \n", "131 131 videocategory45 319586 319323 0.999177 \n", "132 132 videocategory7 319586 316632 0.990757 \n", "\n", " mean_0.25 mean_0.5 mean_0.75 mean_1 var_0.25 ... \\\n", "0 0.000000 0.000000 0.000000 29.053141 0.000000 ... \n", "1 0.000000 0.000000 0.000000 18.705926 0.000000 ... \n", "2 0.000000 0.000000 0.000000 8.048241 0.000000 ... \n", "3 0.000000 0.000000 0.000000 12.135716 0.000000 ... \n", "4 -43.642152 -21.821076 -14.547445 -10.347215 233092.343014 ... \n", ".. ... ... ... ... ... ... \n", "128 0.000000 0.000000 0.000000 0.011061 0.000000 ... \n", "129 0.000000 0.000000 0.000000 0.016728 0.000000 ... \n", "130 0.000000 0.000000 0.000000 0.030549 0.000000 ... \n", "131 0.000000 0.000000 0.000000 0.000823 0.000000 ... \n", "132 0.000000 0.000000 0.000000 0.009243 0.000000 ... \n", "\n", " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n", "0 7.139735 318.425769 1.459960e-01 2.504982e+00 \n", "1 21.390416 438.538146 1.029324e+00 2.315790e+01 \n", "2 53.258658 467.230699 7.457763e+00 2.379205e+02 \n", "3 40.048709 472.342589 2.707221e+00 9.688260e+01 \n", "4 -162.274612 -115.558604 2.398539e+06 1.253440e+06 \n", ".. ... ... ... ... \n", "128 1.000000 1.000000 0.000000e+00 0.000000e+00 \n", "129 1.000000 1.000000 0.000000e+00 0.000000e+00 \n", "130 1.000000 1.000000 0.000000e+00 0.000000e+00 \n", "131 1.000000 1.000000 0.000000e+00 0.000000e+00 \n", "132 1.000000 1.000000 0.000000e+00 0.000000e+00 \n", "\n", " non_zero_var_0.75 non_zero_var_1 non_zero_std_0.25 non_zero_std_0.5 \\\n", "0 70.237904 4.695676e+06 0.382094 1.582714 \n", "1 788.731488 4.739503e+06 1.014556 4.812266 \n", "2 3900.825806 3.281403e+06 2.730890 15.424672 \n", "3 2589.586846 3.965567e+06 1.645364 9.842896 \n", "4 848453.781924 6.494582e+05 1548.721605 1119.571313 \n", ".. ... ... ... ... \n", "128 0.000000 0.000000e+00 0.000000 0.000000 \n", "129 0.000000 0.000000e+00 0.000000 0.000000 \n", "130 0.000000 0.000000e+00 0.000000 0.000000 \n", "131 0.000000 0.000000e+00 0.000000 0.000000 \n", "132 0.000000 0.000000e+00 0.000000 0.000000 \n", "\n", " non_zero_std_0.75 non_zero_std_1 \n", "0 8.380806 2166.950796 \n", "1 28.084364 2177.040037 \n", "2 62.456591 1811.464241 \n", "3 50.887983 1991.373137 \n", "4 921.115510 805.889696 \n", ".. ... ... \n", "128 0.000000 0.000000 \n", "129 0.000000 0.000000 \n", "130 0.000000 0.000000 \n", "131 0.000000 0.000000 \n", "132 0.000000 0.000000 \n", "\n", "[133 rows x 29 columns]\n" ] } ], "source": [ "train_file_path_20220218 = './data/train_data_monitor_20220218.csv'\n", "df_train_20220218 = pd.read_csv(train_file_path_20220218)\n", "print(len(df_train_20220218))\n", "print(df_train_20220218)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Unnamed: 0 feature_name data_count 0_count 0_rate \\\n", "0 0 stage_one_retrn 319586 290427 0.908760 \n", "1 1 stage_two_retrn 319586 305954 0.957345 \n", "2 2 stage_four_retrn 319586 314081 0.982775 \n", "3 3 stage_three_retrn 319586 311375 0.974307 \n", "4 4 stage_two_return_added 319586 290970 0.910459 \n", "5 5 stage_three_return_added 319586 306146 0.957946 \n", "6 6 stage_four_return_added 319586 311477 0.974627 \n", "7 7 stage_two_return_ratio 319586 290970 0.910459 \n", "8 8 stage_four_return_ratio 319586 311477 0.974627 \n", "9 9 stage_three_return_ratio 319586 306146 0.957946 \n", "\n", " mean_0.25 mean_0.5 mean_0.75 mean_1 var_0.25 ... \\\n", "0 0.000000 0.000000 0.000000 29.053141 0.000000 ... \n", "1 0.000000 0.000000 0.000000 18.705926 0.000000 ... \n", "2 0.000000 0.000000 0.000000 8.048241 0.000000 ... \n", "3 0.000000 0.000000 0.000000 12.135716 0.000000 ... \n", "4 -43.642152 -21.821076 -14.547445 -10.347215 233092.343014 ... \n", "5 -27.887705 -13.943853 -9.295941 -6.570210 99191.736492 ... \n", "6 -17.560559 -8.780280 -5.853544 -4.087476 43690.700442 ... \n", "7 -inf -inf -inf -inf NaN ... \n", "8 -inf -inf -inf -inf NaN ... \n", "9 -inf -inf -inf -inf NaN ... \n", "\n", " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n", "0 7.139735 318.425769 1.459960e-01 2.504982e+00 \n", "1 21.390416 438.538146 1.029324e+00 2.315790e+01 \n", "2 53.258658 467.230699 7.457763e+00 2.379205e+02 \n", "3 40.048709 472.342589 2.707221e+00 9.688260e+01 \n", "4 -162.274612 -115.558604 2.398539e+06 1.253440e+06 \n", "5 -220.820454 -156.231176 1.976463e+06 1.080257e+06 \n", "6 -230.444919 -161.092613 1.321688e+06 7.500555e+05 \n", "7 -inf -inf NaN NaN \n", "8 -inf -inf NaN NaN \n", "9 -inf -inf NaN NaN \n", "\n", " non_zero_var_0.75 non_zero_var_1 non_zero_std_0.25 non_zero_std_0.5 \\\n", "0 70.237904 4.695676e+06 0.382094 1.582714 \n", "1 788.731488 4.739503e+06 1.014556 4.812266 \n", "2 3900.825806 3.281403e+06 2.730890 15.424672 \n", "3 2589.586846 3.965567e+06 1.645364 9.842896 \n", "4 848453.781924 6.494582e+05 1548.721605 1119.571313 \n", "5 743546.359590 5.877715e+05 1405.867249 1039.354097 \n", "6 524894.428157 4.317385e+05 1149.646842 866.057452 \n", "7 NaN NaN NaN NaN \n", "8 NaN NaN NaN NaN \n", "9 NaN NaN NaN NaN \n", "\n", " non_zero_std_0.75 non_zero_std_1 \n", "0 8.380806 2166.950796 \n", "1 28.084364 2177.040037 \n", "2 62.456591 1811.464241 \n", "3 50.887983 1991.373137 \n", "4 921.115510 805.889696 \n", "5 862.291343 766.662541 \n", "6 724.495982 657.068119 \n", "7 NaN NaN \n", "8 NaN NaN \n", "9 NaN NaN \n", "\n", "[10 rows x 29 columns]\n" ] } ], "source": [ "print(df_train_20220218.head(10))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Unnamed: 0 feature_name data_count 0_count 0_rate \\\n", "0 0 stage_one_retrn 308675 279287 0.904793 \n", "1 1 stage_two_retrn 308675 294930 0.955471 \n", "2 2 stage_four_retrn 308675 303106 0.981958 \n", "3 3 stage_three_retrn 308675 300388 0.973153 \n", "4 4 stage_two_return_added 308675 279831 0.906555 \n", "5 5 stage_three_return_added 308675 295119 0.956083 \n", "6 6 stage_four_return_added 308675 300487 0.973474 \n", "7 7 stage_two_return_ratio 308675 279831 0.906555 \n", "8 8 stage_four_return_ratio 308675 300487 0.973474 \n", "9 9 stage_three_return_ratio 308675 295119 0.956083 \n", "\n", " mean_0.25 mean_0.5 mean_0.75 mean_1 var_0.25 ... \\\n", "0 0.000000 0.000000 0.000000 30.684182 0.000000 ... \n", "1 0.000000 0.000000 0.000000 19.802935 0.000000 ... \n", "2 0.000000 0.000000 0.000000 8.577209 0.000000 ... \n", "3 0.000000 0.000000 0.000000 12.867560 0.000000 ... \n", "4 -45.941933 -22.970966 -15.313978 -10.881247 246586.644725 ... \n", "5 -29.466509 -14.733254 -9.822170 -6.935375 105535.713482 ... \n", "6 -18.534321 -9.267160 -6.178107 -4.290351 46148.960740 ... \n", "7 -inf -inf -inf -inf NaN ... \n", "8 -inf -inf -inf -inf NaN ... \n", "9 -inf -inf -inf -inf NaN ... \n", "\n", " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n", "0 7.207377 322.289370 1.471082e-01 2.535730e+00 \n", "1 21.718983 444.719607 1.050753e+00 2.365002e+01 \n", "2 54.492219 475.412103 7.670916e+00 2.505415e+02 \n", "3 40.826094 479.292144 2.763735e+00 1.006501e+02 \n", "4 -163.690302 -116.446020 2.431524e+06 1.270892e+06 \n", "5 -223.428600 -157.920994 2.012587e+06 1.100527e+06 \n", "6 -232.626343 -161.739619 1.332624e+06 7.570373e+05 \n", "7 -inf -inf NaN NaN \n", "8 -inf -inf NaN NaN \n", "9 -inf -inf NaN NaN \n", "\n", " non_zero_var_0.75 non_zero_var_1 non_zero_std_0.25 non_zero_std_0.5 \\\n", "0 72.045508 4.778534e+06 0.383547 1.592398 \n", "1 817.447948 4.818667e+06 1.025062 4.863129 \n", "2 4099.125209 3.347202e+06 2.769642 15.828503 \n", "3 2697.035876 4.017723e+06 1.662449 10.032454 \n", "4 860314.590137 6.589329e+05 1559.334576 1127.338453 \n", "5 757621.619017 5.989774e+05 1418.656878 1049.060053 \n", "6 530023.047128 4.410375e+05 1154.393260 870.078894 \n", "7 NaN NaN NaN NaN \n", "8 NaN NaN NaN NaN \n", "9 NaN NaN NaN NaN \n", "\n", " non_zero_std_0.75 non_zero_std_1 \n", "0 8.487963 2185.985905 \n", "1 28.591047 2195.146262 \n", "2 64.024411 1829.535988 \n", "3 51.932994 2004.425852 \n", "4 927.531450 811.746798 \n", "5 870.414625 773.936274 \n", "6 728.026818 664.106568 \n", "7 NaN NaN \n", "8 NaN NaN \n", "9 NaN NaN \n", "\n", "[10 rows x 29 columns]\n" ] } ], "source": [ "train_file_path_20220219 = './data/train_data_monitor_20220219.csv'\n", "df_train_20220219 = pd.read_csv(train_file_path_20220219)\n", "print(df_train_20220219.head(10))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0_rate mean_1 var_1 non_zero_mean_0.25 non_zero_mean_0.5 \\\n", "0 0.004365 0.056140 0.061938 0.001468 0.004284 \n", "1 0.001957 0.058645 0.061745 0.008027 0.009626 \n", "2 0.000831 0.065725 0.069314 0.008695 0.021213 \n", "3 0.001185 0.060305 0.059503 0.010793 0.016691 \n", "4 0.004288 -0.051611 0.058754 -0.008783 -0.008744 \n", "5 0.001944 -0.055579 0.064221 -0.011894 -0.011860 \n", "6 0.001183 -0.049633 0.067102 -0.008996 -0.009480 \n", "7 0.004288 NaN NaN NaN NaN \n", "8 0.001183 NaN NaN NaN NaN \n", "9 0.001944 NaN NaN NaN NaN \n", "\n", " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n", "0 0.009474 0.012133 0.007618 0.012275 \n", "1 0.015361 0.014096 0.020818 0.021251 \n", "2 0.023162 0.017510 0.028581 0.053047 \n", "3 0.019411 0.014713 0.020875 0.038888 \n", "4 -0.008724 -0.007679 0.013752 0.013923 \n", "5 -0.011811 -0.010816 0.018277 0.018764 \n", "6 -0.009466 -0.004016 0.008274 0.009308 \n", "7 NaN NaN NaN NaN \n", "8 NaN NaN NaN NaN \n", "9 NaN NaN NaN NaN \n", "\n", " non_zero_var_0.75 non_zero_var_1 feature_name \n", "0 0.025735 0.017646 stage_one_retrn \n", "1 0.036408 0.016703 stage_two_retrn \n", "2 0.050835 0.020052 stage_four_retrn \n", "3 0.041493 0.013152 stage_three_retrn \n", "4 0.013979 0.014589 stage_two_return_added \n", "5 0.018930 0.019065 stage_three_return_added \n", "6 0.009771 0.021539 stage_four_return_added \n", "7 NaN NaN stage_two_return_ratio \n", "8 NaN NaN stage_four_return_ratio \n", "9 NaN NaN stage_three_return_ratio \n" ] } ], "source": [ "feature_top10_20220219 = df_train_20220219.head(10)['feature_name']\n", "monitor_list = ['0_rate', 'mean_1', 'var_1', \n", " 'non_zero_mean_0.25', 'non_zero_mean_0.5', 'non_zero_mean_0.75', 'non_zero_mean_1',\n", " 'non_zero_var_0.25', 'non_zero_var_0.5', 'non_zero_var_0.75', 'non_zero_var_1']\n", "diff_20220218 = df_train_20220218.loc[df_train_20220218['feature_name'].isin(feature_top10_20220219)][monitor_list]\n", "diff_20220219 = df_train_20220219.loc[df_train_20220219['feature_name'].isin(feature_top10_20220219)][monitor_list]\n", "diff = abs(diff_20220219-diff_20220218)/diff_20220218\n", "diff['feature_name'] = feature_top10_20220219\n", "print(diff)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "def cal_diff(yesterday_file, today_file):\n", " df_train_yesterday = pd.read_csv(yesterday_file)\n", " df_train_today = pd.read_csv(today_file)\n", "\n", " feature_top10_today = df_train_today.head(10)['feature_name']\n", " monitor_list = ['0_rate', 'mean_1', 'var_1', \n", " 'non_zero_mean_0.25', 'non_zero_mean_0.5', 'non_zero_mean_0.75', 'non_zero_mean_1',\n", " 'non_zero_var_0.25', 'non_zero_var_0.5', 'non_zero_var_0.75', 'non_zero_var_1']\n", " df_yesterday = df_train_yesterday.loc[df_train_yesterday['feature_name'].isin(feature_top10_today)][monitor_list]\n", " df_today = df_train_today.loc[df_train_today['feature_name'].isin(feature_top10_today)][monitor_list]\n", " diff = abs(df_today - df_yesterday) / df_yesterday\n", " diff['feature_name'] = feature_top10_today\n", " print(diff)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0_rate mean_1 var_1 non_zero_mean_0.25 non_zero_mean_0.5 \\\n", "0 0.005002 0.057325 0.071410 0.000330 0.000139 \n", "1 0.002172 0.061239 0.110287 0.000205 0.001382 \n", "2 0.000856 0.076648 0.379979 0.002546 0.002176 \n", "3 0.001281 0.066219 0.211559 0.006632 0.003632 \n", "4 0.004924 -0.050201 0.103066 -0.008135 -0.007913 \n", "5 0.002149 -0.052000 0.083131 -0.009921 -0.009605 \n", "6 0.001273 -0.045370 0.046743 -0.001187 -0.001560 \n", "7 0.004924 NaN NaN NaN NaN \n", "8 0.001273 NaN NaN NaN NaN \n", "9 0.002149 NaN NaN NaN NaN \n", "\n", " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n", "0 0.000849 0.009346 0.001695 0.001666 \n", "1 0.003023 0.013982 0.001716 0.002233 \n", "2 0.011587 0.028726 0.002981 0.003943 \n", "3 0.007318 0.018900 0.021120 0.000492 \n", "4 -0.007917 -0.002322 0.032027 0.031292 \n", "5 -0.009624 -0.004975 0.019300 0.019249 \n", "6 -0.001606 -0.001296 0.006401 0.005717 \n", "7 NaN NaN NaN NaN \n", "8 NaN NaN NaN NaN \n", "9 NaN NaN NaN NaN \n", "\n", " non_zero_var_0.75 non_zero_var_1 feature_name \n", "0 0.001764 0.022971 stage_one_retrn \n", "1 0.008437 0.062215 stage_two_retrn \n", "2 0.036278 0.335875 stage_four_retrn \n", "3 0.019741 0.164520 stage_three_retrn \n", "4 0.031083 0.053767 stage_two_return_added \n", "5 0.019300 0.035786 stage_three_return_added \n", "6 0.005394 0.000239 stage_four_return_added \n", "7 NaN NaN stage_two_return_ratio \n", "8 NaN NaN stage_four_return_ratio \n", "9 NaN NaN stage_three_return_ratio \n" ] } ], "source": [ "file_20220219 = './data/train_data_monitor_20220219.csv'\n", "file_20220220 = './data/train_data_monitor_20220220.csv'\n", "cal_diff(yesterday_file=file_20220219, today_file=file_20220220)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0_rate mean_1 var_1 non_zero_mean_0.25 non_zero_mean_0.5 \\\n", "0 0.005572 0.063716 0.206425 0.000137 0.002426 \n", "1 0.002313 0.072997 0.489410 0.007128 0.002891 \n", "2 0.000955 0.110413 1.329780 0.001553 0.009829 \n", "3 0.001368 0.090244 0.962106 0.002813 0.002741 \n", "4 0.005481 -0.046648 0.111135 -0.003523 -0.003452 \n", "5 0.002273 -0.040563 0.062435 -0.001561 -0.001249 \n", "6 0.001349 -0.048717 0.061383 -0.000450 -0.000628 \n", "7 0.005481 NaN NaN NaN NaN \n", "8 0.020957 NaN NaN NaN NaN \n", "9 0.017709 NaN NaN NaN NaN \n", "\n", " non_zero_mean_0.75 non_zero_mean_1 non_zero_var_0.25 non_zero_var_0.5 \\\n", "0 0.004514 0.012775 0.000702 0.008503 \n", "1 0.002557 0.024518 0.018237 0.000353 \n", "2 0.010414 0.057912 0.010849 0.018093 \n", "3 0.003311 0.040968 0.000559 0.006996 \n", "4 -0.003437 -0.003670 0.022244 0.021602 \n", "5 -0.001314 -0.006318 0.001523 0.001560 \n", "6 -0.000693 -0.001422 0.019594 0.017566 \n", "7 NaN NaN NaN NaN \n", "8 NaN NaN NaN NaN \n", "9 NaN NaN NaN NaN \n", "\n", " non_zero_var_0.75 non_zero_var_1 feature_name \n", "0 0.013407 0.151157 stage_one_retrn \n", "1 0.005393 0.436318 stage_two_retrn \n", "2 0.020078 1.277436 stage_four_retrn \n", "3 0.006591 0.912625 stage_three_retrn \n", "4 0.021383 0.058964 stage_two_return_added \n", "5 0.001633 0.015707 stage_three_return_added \n", "6 0.016856 0.014209 stage_four_return_added \n", "7 NaN NaN stage_two_return_ratio \n", "8 NaN NaN stage_three_return_ratio \n", "9 NaN NaN stage_four_return_ratio \n" ] } ], "source": [ "file_20220220 = './data/train_data_monitor_20220220.csv'\n", "file_20220221 = './data/train_data_monitor_20220221.csv'\n", "cal_diff(yesterday_file=file_20220220, today_file=file_20220221)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def cal_diff2(yesterday_file, today_file, feature_top=-1):\n", " df_train_yesterday = pd.read_csv(yesterday_file, index_col='feature_name')\n", " df_train_today = pd.read_csv(today_file, index_col='feature_name')\n", " \n", " # 计算数据偏移量\n", " feature_top_today = df_train_today.index.values[:feature_top]\n", " # print(feature_top10_today)\n", " monitor_list = ['0_rate', 'mean_1', 'var_1', \n", " 'non_zero_mean_0.25', 'non_zero_mean_0.5', 'non_zero_mean_0.75', 'non_zero_mean_1',\n", " 'non_zero_var_0.25', 'non_zero_var_0.5', 'non_zero_var_0.75', 'non_zero_var_1']\n", " df_yesterday = df_train_yesterday.loc[feature_top_today, monitor_list]\n", " # print(df_yesterday)\n", " df_today = df_train_today.loc[feature_top_today, monitor_list]\n", " # print(df_today)\n", " diff = abs(df_today - df_yesterday) / df_yesterday\n", " \n", " # 获取偏移超过 10%的特征\n", " offset_feature_list = []\n", " for column in diff.columns.to_list():\n", " temp_df = diff.loc[diff[column]>=0.1]\n", " if not temp_df.empty:\n", " offset_feature_list.append(temp_df)\n", " offset_feature_df = pd.concat(offset_feature_list)\n", " # 去重\n", " offset_feature_df.drop_duplicates(inplace=True)\n", " \n", " return offset_feature_df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0_rate mean_1 \\\n", "feature_name \n", "stage_four_retrn 0.000955 0.110413 \n", "videocategory2 0.010831 0.111064 \n", "videocategory10 0.001226 0.159772 \n", "videocategory6 0.001242 0.137218 \n", "videocategory8 0.003189 0.126149 \n", "videocategory12 0.001257 0.149417 \n", "videocategory45 0.000074 0.101004 \n", "stage_one_retrn 0.005572 0.063716 \n", "stage_two_retrn 0.002313 0.072997 \n", "stage_three_retrn 0.001368 0.090244 \n", "stage_two_return_added 0.005481 -0.046648 \n", "four_up_return_day1_view_day1_return_count 0.000415 0.082808 \n", "four_up_return_div_three_return_day1 0.001412 0.048325 \n", "day1playcount 0.026017 0.067653 \n", "todyviewcount_rank NaN 0.058987 \n", "usercategory8 0.011316 0.039797 \n", "usercategory49 0.002495 0.031561 \n", "day3viewcount 0.009367 0.058399 \n", "day7viewcount 0.023594 0.061726 \n", "usercategory12 0.013911 0.020124 \n", "usercategory6 0.001753 0.006368 \n", "usercategory45 0.043287 0.056887 \n", "usercategory10 0.026096 0.017610 \n", "usercategory85 0.035717 0.003181 \n", "usercategory1 0.019829 0.009141 \n", "four_up_return_day3_view_day3_return_count 0.000957 0.068258 \n", "usercategory7 0.050857 0.025389 \n", "all_return_day1_return_count 0.007139 0.062472 \n", "usercategory9 0.030080 0.025230 \n", "three_return_day1_return_count 0.007083 0.062605 \n", "four_up_return_day7_return_count 0.001563 0.056201 \n", "all_return_day14_return_count 0.008692 0.052830 \n", "todyviewcount NaN 0.054649 \n", "usercategory11 0.044032 0.022018 \n", "usercategory4 0.006735 0.022629 \n", "four_up_return_day14_return_count 0.001364 0.046675 \n", "four_up_return_day1_return_count 0.001428 0.062099 \n", "four_up_return_day3_return_count 0.001563 0.052683 \n", "four_up_return_day3_view_day3_return_div_three_d3 0.000957 0.053065 \n", "day3ctr 0.032330 0.015493 \n", "day7ctr 0.034606 0.018660 \n", "\n", " var_1 \\\n", "feature_name \n", "stage_four_retrn 1.329780 \n", "videocategory2 0.101436 \n", "videocategory10 0.158741 \n", "videocategory6 0.136146 \n", "videocategory8 0.123362 \n", "videocategory12 0.148348 \n", "videocategory45 0.100937 \n", "stage_one_retrn 0.206425 \n", "stage_two_retrn 0.489410 \n", "stage_three_retrn 0.962106 \n", "stage_two_return_added 0.111135 \n", "four_up_return_day1_view_day1_return_count 0.256227 \n", "four_up_return_div_three_return_day1 0.504287 \n", "day1playcount 0.108513 \n", "todyviewcount_rank 0.172902 \n", "usercategory8 0.040935 \n", "usercategory49 0.034497 \n", "day3viewcount 0.075310 \n", "day7viewcount 0.081014 \n", "usercategory12 0.044976 \n", "usercategory6 0.009099 \n", "usercategory45 0.068209 \n", "usercategory10 0.018733 \n", "usercategory85 0.013926 \n", "usercategory1 0.009071 \n", "four_up_return_day3_view_day3_return_count 0.054460 \n", "usercategory7 0.042184 \n", "all_return_day1_return_count 0.063919 \n", "usercategory9 0.040968 \n", "three_return_day1_return_count 0.072129 \n", "four_up_return_day7_return_count 0.049085 \n", "all_return_day14_return_count 0.061986 \n", "todyviewcount 0.076444 \n", "usercategory11 0.006161 \n", "usercategory4 0.033476 \n", "four_up_return_day14_return_count 0.033090 \n", "four_up_return_day1_return_count 0.045627 \n", "four_up_return_day3_return_count 0.039399 \n", "four_up_return_day3_view_day3_return_div_three_d3 0.049843 \n", "day3ctr 0.059119 \n", "day7ctr 0.050853 \n", "\n", " non_zero_mean_0.25 \\\n", "feature_name \n", "stage_four_retrn 0.001553 \n", "videocategory2 0.000000 \n", "videocategory10 0.000000 \n", "videocategory6 0.000000 \n", "videocategory8 0.000000 \n", "videocategory12 0.000000 \n", "videocategory45 0.000000 \n", "stage_one_retrn 0.000137 \n", "stage_two_retrn 0.007128 \n", "stage_three_retrn 0.002813 \n", "stage_two_return_added -0.003523 \n", "four_up_return_day1_view_day1_return_count 0.002859 \n", "four_up_return_div_three_return_day1 0.016367 \n", "day1playcount 0.008248 \n", "todyviewcount_rank 0.034262 \n", "usercategory8 0.144831 \n", "usercategory49 0.145017 \n", "day3viewcount 0.378388 \n", "day7viewcount 0.369795 \n", "usercategory12 0.027332 \n", "usercategory6 0.078629 \n", "usercategory45 0.055002 \n", "usercategory10 0.049888 \n", "usercategory85 0.014034 \n", "usercategory1 0.009520 \n", "four_up_return_day3_view_day3_return_count 0.036363 \n", "usercategory7 0.031458 \n", "all_return_day1_return_count 0.006241 \n", "usercategory9 0.062283 \n", "three_return_day1_return_count 0.007088 \n", "four_up_return_day7_return_count 0.032509 \n", "all_return_day14_return_count 0.018621 \n", "todyviewcount 0.000000 \n", "usercategory11 0.004512 \n", "usercategory4 0.039397 \n", "four_up_return_day14_return_count 0.035060 \n", "four_up_return_day1_return_count 0.005887 \n", "four_up_return_day3_return_count 0.024137 \n", "four_up_return_day3_view_day3_return_div_three_d3 0.021652 \n", "day3ctr 0.007768 \n", "day7ctr 0.012601 \n", "\n", " non_zero_mean_0.5 \\\n", "feature_name \n", "stage_four_retrn 0.009829 \n", "videocategory2 0.000000 \n", "videocategory10 0.000000 \n", "videocategory6 0.000000 \n", "videocategory8 0.000000 \n", "videocategory12 0.000000 \n", "videocategory45 0.000000 \n", "stage_one_retrn 0.002426 \n", "stage_two_retrn 0.002891 \n", "stage_three_retrn 0.002741 \n", "stage_two_return_added -0.003452 \n", "four_up_return_day1_view_day1_return_count 0.002179 \n", "four_up_return_div_three_return_day1 0.019310 \n", "day1playcount 0.027277 \n", "todyviewcount_rank 0.034269 \n", "usercategory8 0.135420 \n", "usercategory49 0.115789 \n", "day3viewcount 0.121670 \n", "day7viewcount 0.104674 \n", "usercategory12 0.055497 \n", "usercategory6 0.042390 \n", "usercategory45 0.054150 \n", "usercategory10 0.012099 \n", "usercategory85 0.047790 \n", "usercategory1 0.020109 \n", "four_up_return_day3_view_day3_return_count 0.029564 \n", "usercategory7 0.058902 \n", "all_return_day1_return_count 0.006836 \n", "usercategory9 0.095772 \n", "three_return_day1_return_count 0.008354 \n", "four_up_return_day7_return_count 0.040166 \n", "all_return_day14_return_count 0.023581 \n", "todyviewcount 0.054714 \n", "usercategory11 0.043522 \n", "usercategory4 0.043823 \n", "four_up_return_day14_return_count 0.054255 \n", "four_up_return_day1_return_count 0.024934 \n", "four_up_return_day3_return_count 0.033559 \n", "four_up_return_day3_view_day3_return_div_three_d3 0.018049 \n", "day3ctr 0.003958 \n", "day7ctr 0.007342 \n", "\n", " non_zero_mean_0.75 \\\n", "feature_name \n", "stage_four_retrn 0.010414 \n", "videocategory2 0.000000 \n", "videocategory10 0.000000 \n", "videocategory6 0.000000 \n", "videocategory8 0.000000 \n", "videocategory12 0.000000 \n", "videocategory45 0.000000 \n", "stage_one_retrn 0.004514 \n", "stage_two_retrn 0.002557 \n", "stage_three_retrn 0.003311 \n", "stage_two_return_added -0.003437 \n", "four_up_return_day1_view_day1_return_count 0.000774 \n", "four_up_return_div_three_return_day1 0.016906 \n", "day1playcount 0.022199 \n", "todyviewcount_rank 0.034629 \n", "usercategory8 0.083957 \n", "usercategory49 0.065647 \n", "day3viewcount 0.078234 \n", "day7viewcount 0.066762 \n", "usercategory12 0.030142 \n", "usercategory6 0.013628 \n", "usercategory45 0.040219 \n", "usercategory10 0.003313 \n", "usercategory85 0.050542 \n", "usercategory1 0.000484 \n", "four_up_return_day3_view_day3_return_count 0.030331 \n", "usercategory7 0.041463 \n", "all_return_day1_return_count 0.012683 \n", "usercategory9 0.058713 \n", "three_return_day1_return_count 0.013850 \n", "four_up_return_day7_return_count 0.054221 \n", "all_return_day14_return_count 0.026809 \n", "todyviewcount 0.092298 \n", "usercategory11 0.035500 \n", "usercategory4 0.042675 \n", "four_up_return_day14_return_count 0.062747 \n", "four_up_return_day1_return_count 0.041597 \n", "four_up_return_day3_return_count 0.051668 \n", "four_up_return_day3_view_day3_return_div_three_d3 0.014849 \n", "day3ctr 0.001072 \n", "day7ctr 0.003971 \n", "\n", " non_zero_mean_1 \\\n", "feature_name \n", "stage_four_retrn 0.057912 \n", "videocategory2 0.000000 \n", "videocategory10 0.000000 \n", "videocategory6 0.000000 \n", "videocategory8 0.000000 \n", "videocategory12 0.000000 \n", "videocategory45 0.000000 \n", "stage_one_retrn 0.012775 \n", "stage_two_retrn 0.024518 \n", "stage_three_retrn 0.040968 \n", "stage_two_return_added -0.003670 \n", "four_up_return_day1_view_day1_return_count 0.025033 \n", "four_up_return_div_three_return_day1 0.017316 \n", "day1playcount 0.022780 \n", "todyviewcount_rank 0.058987 \n", "usercategory8 0.042837 \n", "usercategory49 0.032021 \n", "day3viewcount 0.052805 \n", "day7viewcount 0.050658 \n", "usercategory12 0.014068 \n", "usercategory6 0.005199 \n", "usercategory45 0.033315 \n", "usercategory10 0.008161 \n", "usercategory85 0.005018 \n", "usercategory1 0.005178 \n", "four_up_return_day3_view_day3_return_count 0.017286 \n", "usercategory7 0.032458 \n", "all_return_day1_return_count 0.026116 \n", "usercategory9 0.028566 \n", "three_return_day1_return_count 0.025082 \n", "four_up_return_day7_return_count 0.036746 \n", "all_return_day14_return_count 0.028682 \n", "todyviewcount 0.054649 \n", "usercategory11 0.008586 \n", "usercategory4 0.023515 \n", "four_up_return_day14_return_count 0.031379 \n", "four_up_return_day1_return_count 0.033956 \n", "four_up_return_day3_return_count 0.030638 \n", "four_up_return_day3_view_day3_return_div_three_d3 0.002818 \n", "day3ctr 0.011206 \n", "day7ctr 0.007188 \n", "\n", " non_zero_var_0.25 \\\n", "feature_name \n", "stage_four_retrn 0.010849 \n", "videocategory2 NaN \n", "videocategory10 NaN \n", "videocategory6 NaN \n", "videocategory8 NaN \n", "videocategory12 NaN \n", "videocategory45 NaN \n", "stage_one_retrn 0.000702 \n", "stage_two_retrn 0.018237 \n", "stage_three_retrn 0.000559 \n", "stage_two_return_added 0.022244 \n", "four_up_return_day1_view_day1_return_count 0.008815 \n", "four_up_return_div_three_return_day1 0.036767 \n", "day1playcount 0.012271 \n", "todyviewcount_rank 0.067377 \n", "usercategory8 0.379763 \n", "usercategory49 0.576893 \n", "day3viewcount 0.557694 \n", "day7viewcount 0.883487 \n", "usercategory12 0.130214 \n", "usercategory6 0.139570 \n", "usercategory45 0.427017 \n", "usercategory10 0.166045 \n", "usercategory85 0.169806 \n", "usercategory1 0.114492 \n", "four_up_return_day3_view_day3_return_count 0.102537 \n", "usercategory7 0.122846 \n", "all_return_day1_return_count 0.124121 \n", "usercategory9 0.209519 \n", "three_return_day1_return_count 0.645306 \n", "four_up_return_day7_return_count 0.143251 \n", "all_return_day14_return_count 0.134059 \n", "todyviewcount NaN \n", "usercategory11 0.009885 \n", "usercategory4 0.030476 \n", "four_up_return_day14_return_count 0.097179 \n", "four_up_return_day1_return_count 0.024356 \n", "four_up_return_day3_return_count 0.093090 \n", "four_up_return_day3_view_day3_return_div_three_d3 0.019762 \n", "day3ctr 0.000536 \n", "day7ctr 0.001180 \n", "\n", " non_zero_var_0.5 \\\n", "feature_name \n", "stage_four_retrn 0.018093 \n", "videocategory2 NaN \n", "videocategory10 NaN \n", "videocategory6 NaN \n", "videocategory8 NaN \n", "videocategory12 NaN \n", "videocategory45 NaN \n", "stage_one_retrn 0.008503 \n", "stage_two_retrn 0.000353 \n", "stage_three_retrn 0.006996 \n", "stage_two_return_added 0.021602 \n", "four_up_return_day1_view_day1_return_count 0.009560 \n", "four_up_return_div_three_return_day1 0.041840 \n", "day1playcount 0.091721 \n", "todyviewcount_rank 0.067377 \n", "usercategory8 0.209333 \n", "usercategory49 0.159956 \n", "day3viewcount 0.032570 \n", "day7viewcount 0.074300 \n", "usercategory12 0.152885 \n", "usercategory6 0.012217 \n", "usercategory45 0.131324 \n", "usercategory10 0.020356 \n", "usercategory85 0.117835 \n", "usercategory1 0.013757 \n", "four_up_return_day3_view_day3_return_count 0.060346 \n", "usercategory7 0.127237 \n", "all_return_day1_return_count 0.017365 \n", "usercategory9 0.183533 \n", "three_return_day1_return_count 0.027203 \n", "four_up_return_day7_return_count 0.089129 \n", "all_return_day14_return_count 0.059231 \n", "todyviewcount 0.234774 \n", "usercategory11 0.136667 \n", "usercategory4 0.106830 \n", "four_up_return_day14_return_count 0.121594 \n", "four_up_return_day1_return_count 0.085318 \n", "four_up_return_day3_return_count 0.079053 \n", "four_up_return_day3_view_day3_return_div_three_d3 0.032172 \n", "day3ctr 0.011389 \n", "day7ctr 0.013882 \n", "\n", " non_zero_var_0.75 \\\n", "feature_name \n", "stage_four_retrn 0.020078 \n", "videocategory2 NaN \n", "videocategory10 NaN \n", "videocategory6 NaN \n", "videocategory8 NaN \n", "videocategory12 NaN \n", "videocategory45 NaN \n", "stage_one_retrn 0.013407 \n", "stage_two_retrn 0.005393 \n", "stage_three_retrn 0.006591 \n", "stage_two_return_added 0.021383 \n", "four_up_return_day1_view_day1_return_count 0.006945 \n", "four_up_return_div_three_return_day1 0.032526 \n", "day1playcount 0.041539 \n", "todyviewcount_rank 0.069366 \n", "usercategory8 0.100997 \n", "usercategory49 0.066917 \n", "day3viewcount 0.033151 \n", "day7viewcount 0.059949 \n", "usercategory12 0.010870 \n", "usercategory6 0.022983 \n", "usercategory45 0.043284 \n", "usercategory10 0.004968 \n", "usercategory85 0.111731 \n", "usercategory1 0.018887 \n", "four_up_return_day3_view_day3_return_count 0.061611 \n", "usercategory7 0.060318 \n", "all_return_day1_return_count 0.033697 \n", "usercategory9 0.072907 \n", "three_return_day1_return_count 0.036757 \n", "four_up_return_day7_return_count 0.120658 \n", "all_return_day14_return_count 0.056375 \n", "todyviewcount 0.215959 \n", "usercategory11 0.051559 \n", "usercategory4 0.073398 \n", "four_up_return_day14_return_count 0.132839 \n", "four_up_return_day1_return_count 0.100219 \n", "four_up_return_day3_return_count 0.118647 \n", "four_up_return_day3_view_day3_return_div_three_d3 0.025368 \n", "day3ctr 0.019178 \n", "day7ctr 0.019550 \n", "\n", " non_zero_var_1 \n", "feature_name \n", "stage_four_retrn 1.277436 \n", "videocategory2 NaN \n", "videocategory10 NaN \n", "videocategory6 NaN \n", "videocategory8 NaN \n", "videocategory12 NaN \n", "videocategory45 NaN \n", "stage_one_retrn 0.151157 \n", "stage_two_retrn 0.436318 \n", "stage_three_retrn 0.912625 \n", "stage_two_return_added 0.058964 \n", "four_up_return_day1_view_day1_return_count 0.203595 \n", "four_up_return_div_three_return_day1 0.466700 \n", "day1playcount 0.062169 \n", "todyviewcount_rank 0.172902 \n", "usercategory8 0.040812 \n", "usercategory49 0.033942 \n", "day3viewcount 0.069517 \n", "day7viewcount 0.069719 \n", "usercategory12 0.040111 \n", "usercategory6 0.007871 \n", "usercategory45 0.045161 \n", "usercategory10 0.010546 \n", "usercategory85 0.007461 \n", "usercategory1 0.005524 \n", "four_up_return_day3_view_day3_return_count 0.003348 \n", "usercategory7 0.048708 \n", "all_return_day1_return_count 0.027357 \n", "usercategory9 0.043999 \n", "three_return_day1_return_count 0.034166 \n", "four_up_return_day7_return_count 0.029382 \n", "all_return_day14_return_count 0.037552 \n", "todyviewcount 0.076444 \n", "usercategory11 0.005929 \n", "usercategory4 0.034067 \n", "four_up_return_day14_return_count 0.017569 \n", "four_up_return_day1_return_count 0.017424 \n", "four_up_return_day3_return_count 0.017275 \n", "four_up_return_day3_view_day3_return_div_three_d3 0.104871 \n", "day3ctr 0.114604 \n", "day7ctr 0.102625 \n" ] } ], "source": [ "file_20220221 = './data/train_data_monitor_20220221.csv'\n", "file_20220220 = './data/train_data_monitor_20220220.csv'\n", "diff = cal_diff2(yesterday_file=file_20220220, today_file=file_20220221, feature_top=-1)\n", "print(diff)\n", "# diff.to_csv('./data/diff_20220219_20220220.csv')" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 0_rate | \n", "mean_1 | \n", "var_1 | \n", "non_zero_mean_0.25 | \n", "non_zero_mean_0.5 | \n", "non_zero_mean_0.75 | \n", "non_zero_mean_1 | \n", "non_zero_var_0.25 | \n", "non_zero_var_0.5 | \n", "non_zero_var_0.75 | \n", "non_zero_var_1 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
feature_name | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
stage_four_retrn | \n", "0.000955 | \n", "0.110413 | \n", "1.329780 | \n", "0.001553 | \n", "0.009829 | \n", "0.010414 | \n", "0.057912 | \n", "0.010849 | \n", "0.018093 | \n", "0.020078 | \n", "1.277436 | \n", "
videocategory2 | \n", "0.010831 | \n", "0.111064 | \n", "0.101436 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
videocategory10 | \n", "0.001226 | \n", "0.159772 | \n", "0.158741 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
videocategory6 | \n", "0.001242 | \n", "0.137218 | \n", "0.136146 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
videocategory8 | \n", "0.003189 | \n", "0.126149 | \n", "0.123362 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
videocategory12 | \n", "0.001257 | \n", "0.149417 | \n", "0.148348 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
videocategory45 | \n", "0.000074 | \n", "0.101004 | \n", "0.100937 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "