|
@@ -25,14 +25,15 @@ from config.dev import Config
|
|
def prepare_raw_data(dt_begin, dt_end):
|
|
def prepare_raw_data(dt_begin, dt_end):
|
|
data_fields = ['dt', 'gh_id', 'account_name', 'title', 'similarity',
|
|
data_fields = ['dt', 'gh_id', 'account_name', 'title', 'similarity',
|
|
'view_count_rate', 'category', 'read_avg',
|
|
'view_count_rate', 'category', 'read_avg',
|
|
- 'read_avg_rate']
|
|
|
|
|
|
+ 'read_avg_rate', 'first_pub_interval']
|
|
fields_str = ','.join(data_fields)
|
|
fields_str = ','.join(data_fields)
|
|
db_manager = MySQLManager(Config().MYSQL_LONG_ARTICLES)
|
|
db_manager = MySQLManager(Config().MYSQL_LONG_ARTICLES)
|
|
sql = f"""
|
|
sql = f"""
|
|
SELECT {fields_str} FROM datastat_score WHERE dt BETWEEN {dt_begin} AND {dt_end}
|
|
SELECT {fields_str} FROM datastat_score WHERE dt BETWEEN {dt_begin} AND {dt_end}
|
|
AND similarity > 0 AND category IS NOT NULL AND read_avg > 500
|
|
AND similarity > 0 AND category IS NOT NULL AND read_avg > 500
|
|
AND read_avg_rate BETWEEN 0 AND 3
|
|
AND read_avg_rate BETWEEN 0 AND 3
|
|
- AND `index` = 1
|
|
|
|
|
|
+ AND `index` in (1, 2)
|
|
|
|
+ AND FROM_UNIXTIME(coalesce(publish_timestamp, 0), '%H') < '15'
|
|
"""
|
|
"""
|
|
rows = db_manager.select(sql)
|
|
rows = db_manager.select(sql)
|
|
df = pd.DataFrame(rows, columns=data_fields)
|
|
df = pd.DataFrame(rows, columns=data_fields)
|