|
@@ -9,7 +9,7 @@ from datetime import datetime, timedelta
|
|
|
from argparse import ArgumentParser
|
|
|
from pymysql.cursors import DictCursor
|
|
|
|
|
|
-from applications.const import updateAccountReadAvgTaskConst
|
|
|
+from applications.const import UpdateAccountReadAvgTaskConst
|
|
|
from applications.db import DatabaseConnector
|
|
|
from applications.utils import fetch_account_fans
|
|
|
from applications.utils import fetch_publishing_account_list
|
|
@@ -19,7 +19,7 @@ from config import long_articles_config, denet_config, piaoquan_crawler_config
|
|
|
read_rate_table = "long_articles_read_rate"
|
|
|
read_avg_table = "account_avg_info_v3"
|
|
|
config = apolloConfig()
|
|
|
-const = updateAccountReadAvgTaskConst()
|
|
|
+const = UpdateAccountReadAvgTaskConst()
|
|
|
unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
|
|
|
touliu_accounts = set(json.loads(config.getConfigValue("touliu_gh_id_list")))
|
|
|
backup_account_fans = json.loads(config.getConfigValue("backup_account_fans"))
|
|
@@ -65,7 +65,7 @@ class UpdateAccountInfoVersion3(object):
|
|
|
do it
|
|
|
"""
|
|
|
# get fans dict from aigc
|
|
|
- fans_dict = fetch_account_fans(self.piaoquan_crawler_db_client, dt)
|
|
|
+ fans_dict = fetch_account_fans(self.denet_db_client, dt)
|
|
|
|
|
|
# get publishing account list from aigc
|
|
|
account_list = fetch_publishing_account_list(self.denet_db_client)
|
|
@@ -76,25 +76,26 @@ class UpdateAccountInfoVersion3(object):
|
|
|
for account in tqdm(account_list, desc=dt):
|
|
|
gh_id = account["gh_id"]
|
|
|
business_type = const.TOULIU if gh_id in touliu_accounts else const.ARTICLES_DAILY
|
|
|
- fans = fans_dict.get(gh_id, {}).get(dt, 0)
|
|
|
+ fans = fans_dict.get(gh_id, {}).get(dt, const.DEFAULT_FANS)
|
|
|
|
|
|
# use unauthorized account's fans if not found in aigc
|
|
|
if not fans:
|
|
|
- fans = int(unauthorized_account.get(gh_id, 0))
|
|
|
+ fans = int(unauthorized_account.get(gh_id, const.DEFAULT_FANS))
|
|
|
|
|
|
# use backup account's fans if not found in aigc
|
|
|
if not fans:
|
|
|
- fans = int(backup_account_fans.get(gh_id, 0))
|
|
|
+ fans = int(backup_account_fans.get(gh_id, const.DEFAULT_FANS))
|
|
|
|
|
|
if fans:
|
|
|
- for index in range(1, 9):
|
|
|
+ for index in const.ARTICLE_INDEX_LIST:
|
|
|
gh_id_position = "{}_{}".format(gh_id, index)
|
|
|
if read_rate_avg_dict.get(gh_id_position):
|
|
|
# fetch read rate avg
|
|
|
read_rate_avg = read_rate_avg_dict[gh_id_position]
|
|
|
# cal read avg
|
|
|
read_avg = fans * read_rate_avg
|
|
|
- print(read_rate_avg, read_avg)
|
|
|
+
|
|
|
+ # insert into database
|
|
|
insert_sql = f"""
|
|
|
insert into {read_avg_table}
|
|
|
(gh_id, position, update_time, account_name, fans, read_avg, like_avg, status, account_type, account_mode, account_source, account_status, business_type, read_rate_avg)
|
|
@@ -111,8 +112,8 @@ class UpdateAccountInfoVersion3(object):
|
|
|
account['account_name'],
|
|
|
fans,
|
|
|
read_avg,
|
|
|
- 0,
|
|
|
- 1,
|
|
|
+ const.DEFAULT_LIKE,
|
|
|
+ const.USING_STATUS,
|
|
|
account['account_type'],
|
|
|
account['mode_type'],
|
|
|
account['account_source'],
|
|
@@ -151,7 +152,7 @@ class UpdateAccountInfoVersion3(object):
|
|
|
self.piaoquan_crawler_db_client.save(
|
|
|
query=update_status_sql,
|
|
|
params=(
|
|
|
- 0, dt, account['gh_id'], index
|
|
|
+ const.NOT_USING_STATUS, dt, account['gh_id'], index
|
|
|
)
|
|
|
)
|
|
|
|
|
@@ -166,15 +167,15 @@ def main():
|
|
|
help="Run only once for date in format of %Y-%m-%d. \
|
|
|
If no specified, run as daily jobs.")
|
|
|
args = parser.parse_args()
|
|
|
- Up = UpdateAccountInfoVersion3()
|
|
|
+ update_account_read_avg_task = UpdateAccountInfoVersion3()
|
|
|
if args.run_date:
|
|
|
- Up.do_task_list(dt=args.run_date)
|
|
|
+ update_account_read_avg_task.do_task_list(dt=args.run_date)
|
|
|
else:
|
|
|
dt_object = datetime.fromtimestamp(int(time.time()))
|
|
|
one_day = timedelta(days=1)
|
|
|
yesterday = dt_object - one_day
|
|
|
yesterday_str = yesterday.strftime('%Y-%m-%d')
|
|
|
- Up.do_task_list(dt=yesterday_str)
|
|
|
+ update_account_read_avg_task.do_task_list(dt=yesterday_str)
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|