|
@@ -15,6 +15,7 @@ from sklearn.linear_model import LogisticRegression, LinearRegression
|
|
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
|
|
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
|
|
from sklearn.metrics import mean_squared_error, r2_score
|
|
from sklearn.metrics import mean_squared_error, r2_score
|
|
import statsmodels.api as sm
|
|
import statsmodels.api as sm
|
|
|
|
+import numpy as np
|
|
from .consts import category_name_map, reverse_category_name_map
|
|
from .consts import category_name_map, reverse_category_name_map
|
|
|
|
|
|
class CategoryRegressionV1:
|
|
class CategoryRegressionV1:
|
|
@@ -41,8 +42,10 @@ class CategoryRegressionV1:
|
|
# print(debias_selection[['account_name', 'read_avg_rate', 'read_avg_rate1']])
|
|
# print(debias_selection[['account_name', 'read_avg_rate', 'read_avg_rate1']])
|
|
df = df.drop(debias_selection.index)
|
|
df = df.drop(debias_selection.index)
|
|
|
|
|
|
- df['read_avg_rate'] = df['read_avg_rate'].clip(upper=1.4)
|
|
|
|
- df['view_count_rate'] = df['view_count_rate'].clip(upper=1.3)
|
|
|
|
|
|
+ def clip_func(x):
|
|
|
|
+ return x if x < 1.4 else 0.7 * np.log(x) + 1.165
|
|
|
|
+ df['read_avg_rate'] = df['read_avg_rate'].apply(clip_func)
|
|
|
|
+ df['view_count_rate'] = df['view_count_rate'].apply(clip_func)
|
|
df['days_decrease'] = df['first_pub_interval'] * (-0.2 / 120)
|
|
df['days_decrease'] = df['first_pub_interval'] * (-0.2 / 120)
|
|
# df['ClassY'] = df['read_avg_rate'] > 1
|
|
# df['ClassY'] = df['read_avg_rate'] > 1
|
|
df['RegressionY'] = df['read_avg_rate']
|
|
df['RegressionY'] = df['read_avg_rate']
|