liqian 3 rokov pred
rodič
commit
256e9f3e1c
1 zmenil súbory, kde vykonal 593 pridanie a 0 odobranie
  1. 593 0
      data_monitor.ipynb

+ 593 - 0
data_monitor.ipynb

@@ -0,0 +1,593 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "133\n",
+      "     Unnamed: 0            feature_name  data_count  0_count    0_rate  \\\n",
+      "0             0         stage_one_retrn      319586   290427  0.908760   \n",
+      "1             1         stage_two_retrn      319586   305954  0.957345   \n",
+      "2             2        stage_four_retrn      319586   314081  0.982775   \n",
+      "3             3       stage_three_retrn      319586   311375  0.974307   \n",
+      "4             4  stage_two_return_added      319586   290970  0.910459   \n",
+      "..          ...                     ...         ...      ...       ...   \n",
+      "128         128          videocategory6      319586   316051  0.988939   \n",
+      "129         129          videocategory1      319586   314240  0.983272   \n",
+      "130         130         videocategory49      319586   309823  0.969451   \n",
+      "131         131         videocategory45      319586   319323  0.999177   \n",
+      "132         132          videocategory7      319586   316632  0.990757   \n",
+      "\n",
+      "     mean_0.25   mean_0.5  mean_0.75     mean_1       var_0.25  ...  \\\n",
+      "0     0.000000   0.000000   0.000000  29.053141       0.000000  ...   \n",
+      "1     0.000000   0.000000   0.000000  18.705926       0.000000  ...   \n",
+      "2     0.000000   0.000000   0.000000   8.048241       0.000000  ...   \n",
+      "3     0.000000   0.000000   0.000000  12.135716       0.000000  ...   \n",
+      "4   -43.642152 -21.821076 -14.547445 -10.347215  233092.343014  ...   \n",
+      "..         ...        ...        ...        ...            ...  ...   \n",
+      "128   0.000000   0.000000   0.000000   0.011061       0.000000  ...   \n",
+      "129   0.000000   0.000000   0.000000   0.016728       0.000000  ...   \n",
+      "130   0.000000   0.000000   0.000000   0.030549       0.000000  ...   \n",
+      "131   0.000000   0.000000   0.000000   0.000823       0.000000  ...   \n",
+      "132   0.000000   0.000000   0.000000   0.009243       0.000000  ...   \n",
+      "\n",
+      "     non_zero_mean_0.75  non_zero_mean_1  non_zero_var_0.25  non_zero_var_0.5  \\\n",
+      "0              7.139735       318.425769       1.459960e-01      2.504982e+00   \n",
+      "1             21.390416       438.538146       1.029324e+00      2.315790e+01   \n",
+      "2             53.258658       467.230699       7.457763e+00      2.379205e+02   \n",
+      "3             40.048709       472.342589       2.707221e+00      9.688260e+01   \n",
+      "4           -162.274612      -115.558604       2.398539e+06      1.253440e+06   \n",
+      "..                  ...              ...                ...               ...   \n",
+      "128            1.000000         1.000000       0.000000e+00      0.000000e+00   \n",
+      "129            1.000000         1.000000       0.000000e+00      0.000000e+00   \n",
+      "130            1.000000         1.000000       0.000000e+00      0.000000e+00   \n",
+      "131            1.000000         1.000000       0.000000e+00      0.000000e+00   \n",
+      "132            1.000000         1.000000       0.000000e+00      0.000000e+00   \n",
+      "\n",
+      "     non_zero_var_0.75  non_zero_var_1  non_zero_std_0.25  non_zero_std_0.5  \\\n",
+      "0            70.237904    4.695676e+06           0.382094          1.582714   \n",
+      "1           788.731488    4.739503e+06           1.014556          4.812266   \n",
+      "2          3900.825806    3.281403e+06           2.730890         15.424672   \n",
+      "3          2589.586846    3.965567e+06           1.645364          9.842896   \n",
+      "4        848453.781924    6.494582e+05        1548.721605       1119.571313   \n",
+      "..                 ...             ...                ...               ...   \n",
+      "128           0.000000    0.000000e+00           0.000000          0.000000   \n",
+      "129           0.000000    0.000000e+00           0.000000          0.000000   \n",
+      "130           0.000000    0.000000e+00           0.000000          0.000000   \n",
+      "131           0.000000    0.000000e+00           0.000000          0.000000   \n",
+      "132           0.000000    0.000000e+00           0.000000          0.000000   \n",
+      "\n",
+      "     non_zero_std_0.75  non_zero_std_1  \n",
+      "0             8.380806     2166.950796  \n",
+      "1            28.084364     2177.040037  \n",
+      "2            62.456591     1811.464241  \n",
+      "3            50.887983     1991.373137  \n",
+      "4           921.115510      805.889696  \n",
+      "..                 ...             ...  \n",
+      "128           0.000000        0.000000  \n",
+      "129           0.000000        0.000000  \n",
+      "130           0.000000        0.000000  \n",
+      "131           0.000000        0.000000  \n",
+      "132           0.000000        0.000000  \n",
+      "\n",
+      "[133 rows x 29 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "train_file_path_20220218 = './data/train_data_monitor_20220218.csv'\n",
+    "df_train_20220218 = pd.read_csv(train_file_path_20220218)\n",
+    "print(len(df_train_20220218))\n",
+    "print(df_train_20220218)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   Unnamed: 0              feature_name  data_count  0_count    0_rate  \\\n",
+      "0           0           stage_one_retrn      319586   290427  0.908760   \n",
+      "1           1           stage_two_retrn      319586   305954  0.957345   \n",
+      "2           2          stage_four_retrn      319586   314081  0.982775   \n",
+      "3           3         stage_three_retrn      319586   311375  0.974307   \n",
+      "4           4    stage_two_return_added      319586   290970  0.910459   \n",
+      "5           5  stage_three_return_added      319586   306146  0.957946   \n",
+      "6           6   stage_four_return_added      319586   311477  0.974627   \n",
+      "7           7    stage_two_return_ratio      319586   290970  0.910459   \n",
+      "8           8   stage_four_return_ratio      319586   311477  0.974627   \n",
+      "9           9  stage_three_return_ratio      319586   306146  0.957946   \n",
+      "\n",
+      "   mean_0.25   mean_0.5  mean_0.75     mean_1       var_0.25  ...  \\\n",
+      "0   0.000000   0.000000   0.000000  29.053141       0.000000  ...   \n",
+      "1   0.000000   0.000000   0.000000  18.705926       0.000000  ...   \n",
+      "2   0.000000   0.000000   0.000000   8.048241       0.000000  ...   \n",
+      "3   0.000000   0.000000   0.000000  12.135716       0.000000  ...   \n",
+      "4 -43.642152 -21.821076 -14.547445 -10.347215  233092.343014  ...   \n",
+      "5 -27.887705 -13.943853  -9.295941  -6.570210   99191.736492  ...   \n",
+      "6 -17.560559  -8.780280  -5.853544  -4.087476   43690.700442  ...   \n",
+      "7       -inf       -inf       -inf       -inf            NaN  ...   \n",
+      "8       -inf       -inf       -inf       -inf            NaN  ...   \n",
+      "9       -inf       -inf       -inf       -inf            NaN  ...   \n",
+      "\n",
+      "   non_zero_mean_0.75  non_zero_mean_1  non_zero_var_0.25  non_zero_var_0.5  \\\n",
+      "0            7.139735       318.425769       1.459960e-01      2.504982e+00   \n",
+      "1           21.390416       438.538146       1.029324e+00      2.315790e+01   \n",
+      "2           53.258658       467.230699       7.457763e+00      2.379205e+02   \n",
+      "3           40.048709       472.342589       2.707221e+00      9.688260e+01   \n",
+      "4         -162.274612      -115.558604       2.398539e+06      1.253440e+06   \n",
+      "5         -220.820454      -156.231176       1.976463e+06      1.080257e+06   \n",
+      "6         -230.444919      -161.092613       1.321688e+06      7.500555e+05   \n",
+      "7                -inf             -inf                NaN               NaN   \n",
+      "8                -inf             -inf                NaN               NaN   \n",
+      "9                -inf             -inf                NaN               NaN   \n",
+      "\n",
+      "   non_zero_var_0.75  non_zero_var_1  non_zero_std_0.25  non_zero_std_0.5  \\\n",
+      "0          70.237904    4.695676e+06           0.382094          1.582714   \n",
+      "1         788.731488    4.739503e+06           1.014556          4.812266   \n",
+      "2        3900.825806    3.281403e+06           2.730890         15.424672   \n",
+      "3        2589.586846    3.965567e+06           1.645364          9.842896   \n",
+      "4      848453.781924    6.494582e+05        1548.721605       1119.571313   \n",
+      "5      743546.359590    5.877715e+05        1405.867249       1039.354097   \n",
+      "6      524894.428157    4.317385e+05        1149.646842        866.057452   \n",
+      "7                NaN             NaN                NaN               NaN   \n",
+      "8                NaN             NaN                NaN               NaN   \n",
+      "9                NaN             NaN                NaN               NaN   \n",
+      "\n",
+      "   non_zero_std_0.75  non_zero_std_1  \n",
+      "0           8.380806     2166.950796  \n",
+      "1          28.084364     2177.040037  \n",
+      "2          62.456591     1811.464241  \n",
+      "3          50.887983     1991.373137  \n",
+      "4         921.115510      805.889696  \n",
+      "5         862.291343      766.662541  \n",
+      "6         724.495982      657.068119  \n",
+      "7                NaN             NaN  \n",
+      "8                NaN             NaN  \n",
+      "9                NaN             NaN  \n",
+      "\n",
+      "[10 rows x 29 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df_train_20220218.head(10))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   Unnamed: 0              feature_name  data_count  0_count    0_rate  \\\n",
+      "0           0           stage_one_retrn      308675   279287  0.904793   \n",
+      "1           1           stage_two_retrn      308675   294930  0.955471   \n",
+      "2           2          stage_four_retrn      308675   303106  0.981958   \n",
+      "3           3         stage_three_retrn      308675   300388  0.973153   \n",
+      "4           4    stage_two_return_added      308675   279831  0.906555   \n",
+      "5           5  stage_three_return_added      308675   295119  0.956083   \n",
+      "6           6   stage_four_return_added      308675   300487  0.973474   \n",
+      "7           7    stage_two_return_ratio      308675   279831  0.906555   \n",
+      "8           8   stage_four_return_ratio      308675   300487  0.973474   \n",
+      "9           9  stage_three_return_ratio      308675   295119  0.956083   \n",
+      "\n",
+      "   mean_0.25   mean_0.5  mean_0.75     mean_1       var_0.25  ...  \\\n",
+      "0   0.000000   0.000000   0.000000  30.684182       0.000000  ...   \n",
+      "1   0.000000   0.000000   0.000000  19.802935       0.000000  ...   \n",
+      "2   0.000000   0.000000   0.000000   8.577209       0.000000  ...   \n",
+      "3   0.000000   0.000000   0.000000  12.867560       0.000000  ...   \n",
+      "4 -45.941933 -22.970966 -15.313978 -10.881247  246586.644725  ...   \n",
+      "5 -29.466509 -14.733254  -9.822170  -6.935375  105535.713482  ...   \n",
+      "6 -18.534321  -9.267160  -6.178107  -4.290351   46148.960740  ...   \n",
+      "7       -inf       -inf       -inf       -inf            NaN  ...   \n",
+      "8       -inf       -inf       -inf       -inf            NaN  ...   \n",
+      "9       -inf       -inf       -inf       -inf            NaN  ...   \n",
+      "\n",
+      "   non_zero_mean_0.75  non_zero_mean_1  non_zero_var_0.25  non_zero_var_0.5  \\\n",
+      "0            7.207377       322.289370       1.471082e-01      2.535730e+00   \n",
+      "1           21.718983       444.719607       1.050753e+00      2.365002e+01   \n",
+      "2           54.492219       475.412103       7.670916e+00      2.505415e+02   \n",
+      "3           40.826094       479.292144       2.763735e+00      1.006501e+02   \n",
+      "4         -163.690302      -116.446020       2.431524e+06      1.270892e+06   \n",
+      "5         -223.428600      -157.920994       2.012587e+06      1.100527e+06   \n",
+      "6         -232.626343      -161.739619       1.332624e+06      7.570373e+05   \n",
+      "7                -inf             -inf                NaN               NaN   \n",
+      "8                -inf             -inf                NaN               NaN   \n",
+      "9                -inf             -inf                NaN               NaN   \n",
+      "\n",
+      "   non_zero_var_0.75  non_zero_var_1  non_zero_std_0.25  non_zero_std_0.5  \\\n",
+      "0          72.045508    4.778534e+06           0.383547          1.592398   \n",
+      "1         817.447948    4.818667e+06           1.025062          4.863129   \n",
+      "2        4099.125209    3.347202e+06           2.769642         15.828503   \n",
+      "3        2697.035876    4.017723e+06           1.662449         10.032454   \n",
+      "4      860314.590137    6.589329e+05        1559.334576       1127.338453   \n",
+      "5      757621.619017    5.989774e+05        1418.656878       1049.060053   \n",
+      "6      530023.047128    4.410375e+05        1154.393260        870.078894   \n",
+      "7                NaN             NaN                NaN               NaN   \n",
+      "8                NaN             NaN                NaN               NaN   \n",
+      "9                NaN             NaN                NaN               NaN   \n",
+      "\n",
+      "   non_zero_std_0.75  non_zero_std_1  \n",
+      "0           8.487963     2185.985905  \n",
+      "1          28.591047     2195.146262  \n",
+      "2          64.024411     1829.535988  \n",
+      "3          51.932994     2004.425852  \n",
+      "4         927.531450      811.746798  \n",
+      "5         870.414625      773.936274  \n",
+      "6         728.026818      664.106568  \n",
+      "7                NaN             NaN  \n",
+      "8                NaN             NaN  \n",
+      "9                NaN             NaN  \n",
+      "\n",
+      "[10 rows x 29 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "train_file_path_20220219 = './data/train_data_monitor_20220219.csv'\n",
+    "df_train_20220219 = pd.read_csv(train_file_path_20220219)\n",
+    "print(df_train_20220219.head(10))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "     0_rate    mean_1     var_1  non_zero_mean_0.25  non_zero_mean_0.5  \\\n",
+      "0  0.004365  0.056140  0.061938            0.001468           0.004284   \n",
+      "1  0.001957  0.058645  0.061745            0.008027           0.009626   \n",
+      "2  0.000831  0.065725  0.069314            0.008695           0.021213   \n",
+      "3  0.001185  0.060305  0.059503            0.010793           0.016691   \n",
+      "4  0.004288 -0.051611  0.058754           -0.008783          -0.008744   \n",
+      "5  0.001944 -0.055579  0.064221           -0.011894          -0.011860   \n",
+      "6  0.001183 -0.049633  0.067102           -0.008996          -0.009480   \n",
+      "7  0.004288       NaN       NaN                 NaN                NaN   \n",
+      "8  0.001183       NaN       NaN                 NaN                NaN   \n",
+      "9  0.001944       NaN       NaN                 NaN                NaN   \n",
+      "\n",
+      "   non_zero_mean_0.75  non_zero_mean_1  non_zero_var_0.25  non_zero_var_0.5  \\\n",
+      "0            0.009474         0.012133           0.007618          0.012275   \n",
+      "1            0.015361         0.014096           0.020818          0.021251   \n",
+      "2            0.023162         0.017510           0.028581          0.053047   \n",
+      "3            0.019411         0.014713           0.020875          0.038888   \n",
+      "4           -0.008724        -0.007679           0.013752          0.013923   \n",
+      "5           -0.011811        -0.010816           0.018277          0.018764   \n",
+      "6           -0.009466        -0.004016           0.008274          0.009308   \n",
+      "7                 NaN              NaN                NaN               NaN   \n",
+      "8                 NaN              NaN                NaN               NaN   \n",
+      "9                 NaN              NaN                NaN               NaN   \n",
+      "\n",
+      "   non_zero_var_0.75  non_zero_var_1              feature_name  \n",
+      "0           0.025735        0.017646           stage_one_retrn  \n",
+      "1           0.036408        0.016703           stage_two_retrn  \n",
+      "2           0.050835        0.020052          stage_four_retrn  \n",
+      "3           0.041493        0.013152         stage_three_retrn  \n",
+      "4           0.013979        0.014589    stage_two_return_added  \n",
+      "5           0.018930        0.019065  stage_three_return_added  \n",
+      "6           0.009771        0.021539   stage_four_return_added  \n",
+      "7                NaN             NaN    stage_two_return_ratio  \n",
+      "8                NaN             NaN   stage_four_return_ratio  \n",
+      "9                NaN             NaN  stage_three_return_ratio  \n"
+     ]
+    }
+   ],
+   "source": [
+    "feature_top10_20220219 = df_train_20220219.head(10)['feature_name']\n",
+    "monitor_list = ['0_rate', 'mean_1', 'var_1', \n",
+    "                'non_zero_mean_0.25', 'non_zero_mean_0.5', 'non_zero_mean_0.75', 'non_zero_mean_1',\n",
+    "                'non_zero_var_0.25', 'non_zero_var_0.5', 'non_zero_var_0.75', 'non_zero_var_1']\n",
+    "diff_20220218 = df_train_20220218.loc[df_train_20220218['feature_name'].isin(feature_top10_20220219)][monitor_list]\n",
+    "diff_20220219 = df_train_20220219.loc[df_train_20220219['feature_name'].isin(feature_top10_20220219)][monitor_list]\n",
+    "diff = abs(diff_20220219-diff_20220218)/diff_20220218\n",
+    "diff['feature_name'] = feature_top10_20220219\n",
+    "print(diff)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cal_diff(yesterday_file, today_file):\n",
+    "    df_train_yesterday = pd.read_csv(yesterday_file)\n",
+    "    df_train_today = pd.read_csv(today_file)\n",
+    "\n",
+    "    feature_top10_today = df_train_today.head(10)['feature_name']\n",
+    "    monitor_list = ['0_rate', 'mean_1', 'var_1', \n",
+    "                    'non_zero_mean_0.25', 'non_zero_mean_0.5', 'non_zero_mean_0.75', 'non_zero_mean_1',\n",
+    "                    'non_zero_var_0.25', 'non_zero_var_0.5', 'non_zero_var_0.75', 'non_zero_var_1']\n",
+    "    df_yesterday = df_train_yesterday.loc[df_train_yesterday['feature_name'].isin(feature_top10_today)][monitor_list]\n",
+    "    df_today = df_train_today.loc[df_train_today['feature_name'].isin(feature_top10_today)][monitor_list]\n",
+    "    diff = abs(df_today - df_yesterday) / df_yesterday\n",
+    "    diff['feature_name'] = feature_top10_today\n",
+    "    print(diff)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "     0_rate    mean_1     var_1  non_zero_mean_0.25  non_zero_mean_0.5  \\\n",
+      "0  0.005002  0.057325  0.071410            0.000330           0.000139   \n",
+      "1  0.002172  0.061239  0.110287            0.000205           0.001382   \n",
+      "2  0.000856  0.076648  0.379979            0.002546           0.002176   \n",
+      "3  0.001281  0.066219  0.211559            0.006632           0.003632   \n",
+      "4  0.004924 -0.050201  0.103066           -0.008135          -0.007913   \n",
+      "5  0.002149 -0.052000  0.083131           -0.009921          -0.009605   \n",
+      "6  0.001273 -0.045370  0.046743           -0.001187          -0.001560   \n",
+      "7  0.004924       NaN       NaN                 NaN                NaN   \n",
+      "8  0.001273       NaN       NaN                 NaN                NaN   \n",
+      "9  0.002149       NaN       NaN                 NaN                NaN   \n",
+      "\n",
+      "   non_zero_mean_0.75  non_zero_mean_1  non_zero_var_0.25  non_zero_var_0.5  \\\n",
+      "0            0.000849         0.009346           0.001695          0.001666   \n",
+      "1            0.003023         0.013982           0.001716          0.002233   \n",
+      "2            0.011587         0.028726           0.002981          0.003943   \n",
+      "3            0.007318         0.018900           0.021120          0.000492   \n",
+      "4           -0.007917        -0.002322           0.032027          0.031292   \n",
+      "5           -0.009624        -0.004975           0.019300          0.019249   \n",
+      "6           -0.001606        -0.001296           0.006401          0.005717   \n",
+      "7                 NaN              NaN                NaN               NaN   \n",
+      "8                 NaN              NaN                NaN               NaN   \n",
+      "9                 NaN              NaN                NaN               NaN   \n",
+      "\n",
+      "   non_zero_var_0.75  non_zero_var_1              feature_name  \n",
+      "0           0.001764        0.022971           stage_one_retrn  \n",
+      "1           0.008437        0.062215           stage_two_retrn  \n",
+      "2           0.036278        0.335875          stage_four_retrn  \n",
+      "3           0.019741        0.164520         stage_three_retrn  \n",
+      "4           0.031083        0.053767    stage_two_return_added  \n",
+      "5           0.019300        0.035786  stage_three_return_added  \n",
+      "6           0.005394        0.000239   stage_four_return_added  \n",
+      "7                NaN             NaN    stage_two_return_ratio  \n",
+      "8                NaN             NaN   stage_four_return_ratio  \n",
+      "9                NaN             NaN  stage_three_return_ratio  \n"
+     ]
+    }
+   ],
+   "source": [
+    "file_20220219 = './data/train_data_monitor_20220219.csv'\n",
+    "file_20220220 = './data/train_data_monitor_20220220.csv'\n",
+    "cal_diff(yesterday_file=file_20220219, today_file=file_20220220)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "     0_rate    mean_1     var_1  non_zero_mean_0.25  non_zero_mean_0.5  \\\n",
+      "0  0.005572  0.063716  0.206425            0.000137           0.002426   \n",
+      "1  0.002313  0.072997  0.489410            0.007128           0.002891   \n",
+      "2  0.000955  0.110413  1.329780            0.001553           0.009829   \n",
+      "3  0.001368  0.090244  0.962106            0.002813           0.002741   \n",
+      "4  0.005481 -0.046648  0.111135           -0.003523          -0.003452   \n",
+      "5  0.002273 -0.040563  0.062435           -0.001561          -0.001249   \n",
+      "6  0.001349 -0.048717  0.061383           -0.000450          -0.000628   \n",
+      "7  0.005481       NaN       NaN                 NaN                NaN   \n",
+      "8  0.020957       NaN       NaN                 NaN                NaN   \n",
+      "9  0.017709       NaN       NaN                 NaN                NaN   \n",
+      "\n",
+      "   non_zero_mean_0.75  non_zero_mean_1  non_zero_var_0.25  non_zero_var_0.5  \\\n",
+      "0            0.004514         0.012775           0.000702          0.008503   \n",
+      "1            0.002557         0.024518           0.018237          0.000353   \n",
+      "2            0.010414         0.057912           0.010849          0.018093   \n",
+      "3            0.003311         0.040968           0.000559          0.006996   \n",
+      "4           -0.003437        -0.003670           0.022244          0.021602   \n",
+      "5           -0.001314        -0.006318           0.001523          0.001560   \n",
+      "6           -0.000693        -0.001422           0.019594          0.017566   \n",
+      "7                 NaN              NaN                NaN               NaN   \n",
+      "8                 NaN              NaN                NaN               NaN   \n",
+      "9                 NaN              NaN                NaN               NaN   \n",
+      "\n",
+      "   non_zero_var_0.75  non_zero_var_1              feature_name  \n",
+      "0           0.013407        0.151157           stage_one_retrn  \n",
+      "1           0.005393        0.436318           stage_two_retrn  \n",
+      "2           0.020078        1.277436          stage_four_retrn  \n",
+      "3           0.006591        0.912625         stage_three_retrn  \n",
+      "4           0.021383        0.058964    stage_two_return_added  \n",
+      "5           0.001633        0.015707  stage_three_return_added  \n",
+      "6           0.016856        0.014209   stage_four_return_added  \n",
+      "7                NaN             NaN    stage_two_return_ratio  \n",
+      "8                NaN             NaN  stage_three_return_ratio  \n",
+      "9                NaN             NaN   stage_four_return_ratio  \n"
+     ]
+    }
+   ],
+   "source": [
+    "file_20220220 = './data/train_data_monitor_20220220.csv'\n",
+    "file_20220221 = './data/train_data_monitor_20220221.csv'\n",
+    "cal_diff(yesterday_file=file_20220220, today_file=file_20220221)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cal_diff2(yesterday_file, today_file, feature_top=-1):\n",
+    "    df_train_yesterday = pd.read_csv(yesterday_file, index_col='feature_name')\n",
+    "    df_train_today = pd.read_csv(today_file, index_col='feature_name')\n",
+    "\n",
+    "    feature_top_today = df_train_today.index.values[:feature_top]\n",
+    "#     print(feature_top10_today)\n",
+    "    monitor_list = ['0_rate', 'mean_1', 'var_1', \n",
+    "                    'non_zero_mean_0.25', 'non_zero_mean_0.5', 'non_zero_mean_0.75', 'non_zero_mean_1',\n",
+    "                    'non_zero_var_0.25', 'non_zero_var_0.5', 'non_zero_var_0.75', 'non_zero_var_1']\n",
+    "    df_yesterday = df_train_yesterday.loc[feature_top_today, monitor_list]\n",
+    "#     print(df_yesterday)\n",
+    "    df_today = df_train_today.loc[feature_top_today, monitor_list]\n",
+    "#     print(df_today)\n",
+    "    diff = abs(df_today - df_yesterday) / df_yesterday\n",
+    "    return diff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                          0_rate    mean_1     var_1  non_zero_mean_0.25  \\\n",
+      "feature_name                                                               \n",
+      "stage_one_retrn         0.005002  0.057325  0.071410            0.000330   \n",
+      "stage_two_retrn         0.002172  0.061239  0.110287            0.000205   \n",
+      "stage_four_retrn        0.000856  0.076648  0.379979            0.002546   \n",
+      "stage_three_retrn       0.001281  0.066219  0.211559            0.006632   \n",
+      "stage_two_return_added  0.004924 -0.050201  0.103066           -0.008135   \n",
+      "...                          ...       ...       ...                 ...   \n",
+      "videocategory6          0.000860  0.086727  0.085941            0.000000   \n",
+      "videocategory5          0.001115  0.022561  0.021471            0.000000   \n",
+      "videocategory11         0.001181  0.068037  0.066936            0.000000   \n",
+      "videocategory12         0.000831  0.089856  0.089100            0.000000   \n",
+      "videocategory45         0.000051  0.064570  0.064522            0.000000   \n",
+      "\n",
+      "                        non_zero_mean_0.5  non_zero_mean_0.75  \\\n",
+      "feature_name                                                    \n",
+      "stage_one_retrn                  0.000139            0.000849   \n",
+      "stage_two_retrn                  0.001382            0.003023   \n",
+      "stage_four_retrn                 0.002176            0.011587   \n",
+      "stage_three_retrn                0.003632            0.007318   \n",
+      "stage_two_return_added          -0.007913           -0.007917   \n",
+      "...                                   ...                 ...   \n",
+      "videocategory6                   0.000000            0.000000   \n",
+      "videocategory5                   0.000000            0.000000   \n",
+      "videocategory11                  0.000000            0.000000   \n",
+      "videocategory12                  0.000000            0.000000   \n",
+      "videocategory45                  0.000000            0.000000   \n",
+      "\n",
+      "                        non_zero_mean_1  non_zero_var_0.25  non_zero_var_0.5  \\\n",
+      "feature_name                                                                   \n",
+      "stage_one_retrn                0.009346           0.001695          0.001666   \n",
+      "stage_two_retrn                0.013982           0.001716          0.002233   \n",
+      "stage_four_retrn               0.028726           0.002981          0.003943   \n",
+      "stage_three_retrn              0.018900           0.021120          0.000492   \n",
+      "stage_two_return_added        -0.002322           0.032027          0.031292   \n",
+      "...                                 ...                ...               ...   \n",
+      "videocategory6                 0.000000                NaN               NaN   \n",
+      "videocategory5                 0.000000                NaN               NaN   \n",
+      "videocategory11                0.000000                NaN               NaN   \n",
+      "videocategory12                0.000000                NaN               NaN   \n",
+      "videocategory45                0.000000                NaN               NaN   \n",
+      "\n",
+      "                        non_zero_var_0.75  non_zero_var_1  \n",
+      "feature_name                                               \n",
+      "stage_one_retrn                  0.001764        0.022971  \n",
+      "stage_two_retrn                  0.008437        0.062215  \n",
+      "stage_four_retrn                 0.036278        0.335875  \n",
+      "stage_three_retrn                0.019741        0.164520  \n",
+      "stage_two_return_added           0.031083        0.053767  \n",
+      "...                                   ...             ...  \n",
+      "videocategory6                        NaN             NaN  \n",
+      "videocategory5                        NaN             NaN  \n",
+      "videocategory11                       NaN             NaN  \n",
+      "videocategory12                       NaN             NaN  \n",
+      "videocategory45                       NaN             NaN  \n",
+      "\n",
+      "[132 rows x 11 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "file_20220219 = './data/train_data_monitor_20220219.csv'\n",
+    "file_20220220 = './data/train_data_monitor_20220220.csv'\n",
+    "diff = cal_diff2(yesterday_file=file_20220219, today_file=file_20220220, feature_top=-1)\n",
+    "print(diff)\n",
+    "diff.to_csv('./data/diff_20220219_20220220.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}