|
@@ -28,7 +28,7 @@ NIGHT_ACCOUNTS = ('gh_12523d39d809','gh_df4a630c04db','gh_f67df16f4670','gh_ca44
|
|
def prepare_raw_data(dt_begin, dt_end):
|
|
def prepare_raw_data(dt_begin, dt_end):
|
|
data_fields = ['dt', 'gh_id', 'account_name', 'title', 'similarity',
|
|
data_fields = ['dt', 'gh_id', 'account_name', 'title', 'similarity',
|
|
'view_count_rate', 'category', 'read_avg',
|
|
'view_count_rate', 'category', 'read_avg',
|
|
- 'read_avg_rate', 'first_pub_interval']
|
|
|
|
|
|
+ 'read_avg_rate', 'first_pub_interval', '`index`']
|
|
fields_str = ','.join(data_fields)
|
|
fields_str = ','.join(data_fields)
|
|
db_manager = MySQLManager(Config().MYSQL_LONG_ARTICLES)
|
|
db_manager = MySQLManager(Config().MYSQL_LONG_ARTICLES)
|
|
night_accounts_condition = str(NIGHT_ACCOUNTS)
|
|
night_accounts_condition = str(NIGHT_ACCOUNTS)
|
|
@@ -42,6 +42,7 @@ def prepare_raw_data(dt_begin, dt_end):
|
|
"""
|
|
"""
|
|
rows = db_manager.select(sql)
|
|
rows = db_manager.select(sql)
|
|
df = pd.DataFrame(rows, columns=data_fields)
|
|
df = pd.DataFrame(rows, columns=data_fields)
|
|
|
|
+ df.rename(columns={'`index`': 'index'}, inplace=True)
|
|
df = df.drop_duplicates(['dt', 'gh_id', 'title'])
|
|
df = df.drop_duplicates(['dt', 'gh_id', 'title'])
|
|
return df
|
|
return df
|
|
|
|
|