Browse Source

仅通过标题tag 分析全部数据

罗俊辉 1 year ago
parent
commit
10754c7644
1 changed files with 8 additions and 4 deletions
  1. 8 4
      data_process/update_mysql_data.py

+ 8 - 4
data_process/update_mysql_data.py

@@ -57,10 +57,14 @@ class DataProcessor(object):
             label_dt = generate_label_date(hourly_dt_str)
             label_obj = label_info.get(label_dt, {}).get(video_id)
             if label_obj:
-                label = int(label_obj["uplevel"]) if label_obj["uplevel"] else 0
-                # print(label)
+                total_return = label_obj.get('flowpool_return_users', 0)
+                total_view = label_obj.get('flowpool_distribute_view_times', 0)
+                if total_view == 0:
+                    label = None
+                else:
+                    label = total_return / total_view
             else:
-                label = 0
+                label = None
             return label, label_dt
 
         def process_info(item_):
@@ -74,7 +78,7 @@ class DataProcessor(object):
             # title = read_title(client=self.client, video_id=video_id)
             label, dt_daily = generate_label(str(video_id), hour_dt, label_info)
 
-            insert_sql = f"""UPDATE lightgbm_data set label = '{label}', daily_dt_str = '{dt_daily}' where video_id = '{video_id}';"""
+            insert_sql = f"""UPDATE lightgbm_data set orv_label = '{label}', daily_dt_str = '{dt_daily}' where video_id = '{video_id}';"""
             # print(insert_sql)
             self.client_spider.update(insert_sql)