瀏覽代碼

Update run_category_model_v1: remove sample without view_count_rate

StrayWarrior 4 月之前
父節點
當前提交
20ad5a5af4
共有 1 個文件被更改,包括 1 次插入2 次删除
  1. 1 2
      run_category_model_v1.py

+ 1 - 2
run_category_model_v1.py

@@ -24,7 +24,6 @@ from config.dev import Config
 NIGHT_ACCOUNTS = ('gh_12523d39d809','gh_df4a630c04db','gh_f67df16f4670','gh_ca44517edda9','gh_a66c1316fd5e','gh_4242c478bbba','gh_60b0c23fcc7c','gh_33b3470784fc','gh_ec1bcb283daf','gh_234ab9ff490d','gh_7715a626a4c6','gh_1bfe1d257728','gh_9db5e3ac2c93','gh_9d1ae5f9ceac','gh_7208b813f16d','gh_e56ddf195d91','gh_a43aecffe81b','gh_d4a7d2ce54fd','gh_c2b458818b09','gh_349a57ef1c44','gh_89bfe54ad90f','gh_b929ed680b62','gh_f8e8a931ff56','gh_916f4fad5ce0','gh_0d7c5f4c38a9','gh_bceef3f747c2','gh_706456719017','gh_fd51a5e33fc6','gh_5372093f5fb0','gh_957ff8e08e1b','gh_64fc629d3ec2','gh_c8b69797912a','gh_6909b38ad95f','gh_1e69a1b4dc1a','gh_0763523103e4','gh_9b83a9ad7da0','gh_82b416f27698','gh_a60647e98cd9','gh_3ce2fa1956ea','gh_44127c197525','gh_06834aba13a5','gh_c33809af68bc','gh_82cf39ef616e','gh_a342ef23c48e','gh_c9cc1471af7d','gh_291ec369f017','gh_810a439f320a','gh_00f942061a0d','gh_7662653b0e77','gh_d192d757b606','gh_391702d26b3b','gh_3e90f421c974','gh_30d189fe56c7','gh_7ebfbbf675ee','gh_3f84c2b9a1a2','gh_bccbe3681e22','gh_005fc1cb4b73','gh_21d120007b64','gh_3d5f24fd3311','gh_3621aaa6c4a0','gh_aee2dca32701','gh_c25c6040c4b2','gh_641019d44876','gh_95ba63e5cf18','gh_efd90dcf48ac','gh_5e1464b76ff6','gh_5765f834684c','gh_81bec2f4f577','gh_401396006e13','gh_7c33726c5147','gh_bbd8a52ba98b','gh_f74ca3104604'
 )
 
-
 def prepare_raw_data(dt_begin, dt_end):
     data_fields = ['dt', 'gh_id', 'account_name', 'title', 'similarity',
                    'view_count_rate', 'category', 'read_avg',
@@ -35,7 +34,7 @@ def prepare_raw_data(dt_begin, dt_end):
     sql = f"""
         SELECT {fields_str} FROM datastat_score WHERE dt BETWEEN {dt_begin} AND {dt_end}
             AND similarity > 0 AND category IS NOT NULL AND read_avg > 500
-            AND read_avg_rate BETWEEN 0 AND 3
+            AND read_avg_rate BETWEEN 0 AND 3 AND view_count_rate > 0
             AND `index` in (1, 2)
             AND (FROM_UNIXTIME(coalesce(publish_timestamp, 0), '%H') < '15'
                 OR gh_id in {night_accounts_condition})