liqian 2 年之前
父节点
当前提交
1559ab982a
共有 2 个文件被更改,包括 11 次插入7 次删除
  1. 7 7
      user_group_update.py
  2. 4 0
      utils.py

+ 7 - 7
user_group_update.py

@@ -32,7 +32,7 @@ def to_redis(group, mid_list, class_key_list):
         for i in range(len(mid_list) // 100 + 1):
             # log_.info(f"i = {i}")
             mid_temp_list = mid_list[i * 100:(i + 1) * 100]
-            # print(mid_temp_list)
+            print(mid_temp_list)
             task_list = [
                 gevent.spawn(redis_helper.set_data_to_redis,
                              f"{config_.KEY_NAME_PREFIX_MID_GROUP}{class_key}:{mid}", group, 26 * 3600)
@@ -102,12 +102,12 @@ def update_user_group_to_redis(project, table, dt, app_type_list, features, ad_m
         process_mid_list = mids[i*step:(i+1)*step]
         pool.apply_async(func=to_redis2, args=(process_mid_list, mid_group_mapping, ad_mid_group_key_params))
 
-    for group, class_key_list in ad_mid_group_key_params.items():
-        mid_list = feature_df[group].tolist()
-        mid_list = list(set(mid_list))
-        mid_list = [mid for mid in mid_list if mid is not None]
-        # class_key_list = ad_mid_group_key_params.get(group)
-        pool.apply_async(func=to_redis, args=(group, mid_list, class_key_list))
+    # for group, class_key_list in ad_mid_group_key_params.items():
+    #     mid_list = feature_df[group].tolist()
+    #     mid_list = list(set(mid_list))
+    #     mid_list = [mid for mid in mid_list if mid is not None]
+    #     # class_key_list = ad_mid_group_key_params.get(group)
+    #     pool.apply_async(func=to_redis, args=(group, mid_list, class_key_list))
     pool.close()
     pool.join()
 

+ 4 - 0
utils.py

@@ -477,11 +477,15 @@ def get_feature_data(project, table, features, dt):
     """获取特征数据"""
     records = get_data_from_odps(date=dt, project=project, table=table)
     feature_data = []
+    i = 0
     for record in records:
+        if i >= 1000:
+            break
         item = {}
         for feature_name in features:
             item[feature_name] = record[feature_name]
         feature_data.append(item)
+        i += 1
     feature_df = pd.DataFrame(feature_data)
     return feature_df