Browse Source

Merge branch 'ad-recommend-20221018'

liqian 2 years ago
parent
commit
0883fb6abc

+ 13 - 0
ad_threshold_update_task.sh

@@ -0,0 +1,13 @@
+source /etc/profile
+echo $ROV_OFFLINE_ENV
+if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
+    cd /data2/rov-offline &&
+    /root/anaconda3/bin/python /data2/rov-offline/ad_users_data_update.py &&
+    /root/anaconda3/bin/python /data2/rov-offline/ad_video_data_update.py &&
+    /root/anaconda3/bin/python /data2/rov-offline/ad_user_video_predict.py
+elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
+    cd /data/rov-offline &&
+    /root/anaconda3/bin/python /data/rov-offline/ad_users_data_update.py &&
+    /root/anaconda3/bin/python /data/rov-offline/ad_video_data_update.py &&
+    /root/anaconda3/bin/python /data/rov-offline/ad_user_video_predict.py
+fi

+ 7 - 0
ad_user_data_update_task.sh

@@ -0,0 +1,7 @@
+source /etc/profile
+echo $ROV_OFFLINE_ENV
+if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
+    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/ad_user_data_update.py
+elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
+    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/ad_user_data_update.py
+fi

+ 105 - 0
ad_user_video_predict.py

@@ -0,0 +1,105 @@
+import datetime
+
+import numpy as np
+import pandas as pd
+from odps import ODPS
+from utils import data_check, get_feature_data, send_msg_to_feishu, RedisHelper
+from config import set_config
+from log import Log
+config_, _ = set_config()
+log_ = Log()
+redis_helper = RedisHelper()
+
+
+def predict_user_group_share_rate(dt, app_type):
+    """预估用户组对应的有广告时分享率"""
+    # 获取用户组特征
+    project = config_.ad_model_data['users_share_rate'].get('project')
+    table = config_.ad_model_data['users_share_rate'].get('table')
+    features = [
+        'apptype',
+        'group',
+        'sharerate_all',
+        'sharerate_ad'
+    ]
+    user_group_df = get_feature_data(project=project, table=table, features=features, dt=dt)
+    user_group_df['apptype'] = user_group_df['apptype'].astype(int)
+    user_group_df = user_group_df[user_group_df['apptype'] == app_type]
+    user_group_df['sharerate_all'] = user_group_df['sharerate_all'].astype(float)
+    user_group_df['sharerate_ad'] = user_group_df['sharerate_ad'].astype(float)
+    # 获取有广告时所有用户组近30天的分享率
+    ad_all_group_share_rate = user_group_df[user_group_df['group'] == 'allmids']['sharerate_ad'].values[0]
+    user_group_df = user_group_df[user_group_df['group'] != 'allmids']
+    # 计算用户组有广告时分享率
+    user_group_df['group_ad_share_rate'] = \
+        user_group_df['sharerate_ad'] * float(ad_all_group_share_rate) / user_group_df['sharerate_all']
+    return user_group_df
+
+
+def predict_video_share_rate(dt, app_type):
+    """预估视频有广告时分享率"""
+    # 获取视频特征
+    project = config_.ad_model_data['videos_share_rate'].get('project')
+    table = config_.ad_model_data['videos_share_rate'].get('table')
+    features = [
+        'apptype',
+        'videoid',
+        'sharerate_all',
+        'sharerate_ad'
+    ]
+    video_df = get_feature_data(project=project, table=table, features=features, dt=dt)
+    video_df['apptype'] = video_df['apptype'].astype(int)
+    video_df = video_df[video_df['apptype'] == app_type]
+    video_df['sharerate_all'] = video_df['sharerate_all'].astype(float)
+    video_df['sharerate_ad'] = video_df['sharerate_ad'].astype(float)
+    # 获取有广告时所有视频近30天的分享率
+    ad_all_videos_share_rate = video_df[video_df['videoid'] == 'allvideos']['sharerate_ad'].values[0]
+    video_df = video_df[video_df['videoid'] != 'allvideos']
+    # 计算视频有广告时分享率
+    video_df['video_ad_share_rate'] = \
+        video_df['sharerate_ad'] * float(ad_all_videos_share_rate) / video_df['sharerate_all']
+    return video_df
+
+
+def predict_ad_group_video():
+    now_date = datetime.datetime.today()
+    dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+    log_.info(f"dt = {dt}")
+    # 获取用户组预测值
+    group_key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{dt}"
+    group_data = redis_helper.get_all_data_from_zset(key_name=group_key_name, with_scores=True)
+    if group_data is None:
+        log_.info(f"group data is None!")
+    group_df = pd.DataFrame(data=group_data, columns=['group', 'group_ad_share_rate'])
+    group_df = group_df[group_df['group'] != 'mean_group']
+    log_.info(f"group_df count = {len(group_df)}")
+    # 获取视频预测值
+    video_key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{dt}"
+    video_data = redis_helper.get_all_data_from_zset(key_name=video_key_name, with_scores=True)
+    if video_data is None:
+        log_.info(f"video data is None!")
+    video_df = pd.DataFrame(data=video_data, columns=['videoid', 'video_ad_share_rate'])
+    video_df = video_df[video_df['videoid'] != -1]
+    log_.info(f"video_df count = {len(video_df)}")
+    predict_df = video_df
+    threshold_data = {}
+    all_group_data = []
+    for index, item in group_df.iterrows():
+        predict_df[item['group']] = predict_df['video_ad_share_rate'] * item['group_ad_share_rate']
+        # 获取分组对应的均值作为阈值
+        threshold_data[item['group']] = predict_df[item['group']].mean()
+        all_group_data.extend(predict_df[item['group']].tolist())
+    threshold_data['mean_group'] = np.mean(all_group_data)
+    log_.info(f"threshold_data = {threshold_data}")
+    # 将阈值写入redis
+    for key, val in threshold_data.items():
+        key_name = f"{config_.KEY_NAME_PREFIX_AD_THRESHOLD}{key}"
+        redis_helper.set_data_to_redis(key_name=key_name, value=val, expire_time=2 * 24 * 3600)
+
+    predict_df.to_csv('./data/ad_user_video_predict.csv')
+    return predict_df
+
+
+if __name__ == '__main__':
+    predict_df = predict_ad_group_video()
+

+ 89 - 0
ad_users_data_update.py

@@ -0,0 +1,89 @@
+import datetime
+import traceback
+from threading import Timer
+from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from config import set_config
+from log import Log
+config_, _ = set_config()
+log_ = Log()
+redis_helper = RedisHelper()
+
+features = [
+    'apptype',
+    'group',
+    'sharerate_all',
+    'sharerate_ad'
+]
+
+
+def predict_user_group_share_rate(project, table, dt, app_type):
+    """预估用户组对应的有广告时分享率"""
+    # 获取用户组特征
+    user_group_df = get_feature_data(project=project, table=table, features=features, dt=dt)
+    user_group_df['apptype'] = user_group_df['apptype'].astype(int)
+    user_group_df = user_group_df[user_group_df['apptype'] == app_type]
+    user_group_df['sharerate_all'].fillna(0, inplace=True)
+    user_group_df['sharerate_ad'].fillna(0, inplace=True)
+    user_group_df['sharerate_all'] = user_group_df['sharerate_all'].astype(float)
+    user_group_df['sharerate_ad'] = user_group_df['sharerate_ad'].astype(float)
+    # 获取有广告时所有用户组近30天的分享率
+    ad_all_group_share_rate = user_group_df[user_group_df['group'] == 'allmids']['sharerate_ad'].values[0]
+    user_group_df = user_group_df[user_group_df['group'] != 'allmids']
+    # 计算用户组有广告时分享率
+    user_group_df['group_ad_share_rate'] = \
+        user_group_df['sharerate_ad'] * float(ad_all_group_share_rate) / user_group_df['sharerate_all']
+    user_group_df['group_ad_share_rate'].fillna(0, inplace=True)
+    # 结果写入redis
+    key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{dt}"
+    redis_data = {}
+    for index, item in user_group_df.iterrows():
+        redis_data[item['group']] = item['group_ad_share_rate']
+    group_ad_share_rate_mean = user_group_df['group_ad_share_rate'].mean()
+    redis_data['mean_group'] = group_ad_share_rate_mean
+    if len(redis_data) > 0:
+        redis_helper = RedisHelper()
+        redis_helper.add_data_with_zset(key_name=key_name, data=redis_data, expire_time=2 * 24 * 3600)
+    return user_group_df
+
+
+def timer_check():
+    try:
+        app_type = config_.APP_TYPE['VLOG']
+        project = config_.ad_model_data['users_share_rate'].get('project')
+        table = config_.ad_model_data['users_share_rate'].get('table')
+        now_date = datetime.datetime.today()
+        dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+        log_.info(f"now_date: {dt}")
+        now_min = datetime.datetime.now().minute
+        # 查看当前更新的数据是否已准备好
+        data_count = data_check(project=project, table=table, dt=dt)
+        if data_count > 0:
+            log_.info(f"ad user group data count = {data_count}")
+            # 数据准备好,进行更新
+            predict_user_group_share_rate(project=project, table=table, dt=dt, app_type=app_type)
+            log_.info(f"ad user group data update end!")
+        elif now_min > 45:
+            log_.info('ad user group data is None!')
+            send_msg_to_feishu(
+                webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
+                key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
+                msg_text=f"rov-offline{config_.ENV_TEXT} - 用户组分享率数据未准备好!\n"
+                         f"traceback: {traceback.format_exc()}"
+            )
+        else:
+            # 数据没准备好,1分钟后重新检查
+            Timer(60, timer_check).start()
+
+    except Exception as e:
+        log_.error(f"用户组分享率预测数据更新失败, exception: {e}, traceback: {traceback.format_exc()}")
+        send_msg_to_feishu(
+            webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
+            key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
+            msg_text=f"rov-offline{config_.ENV_TEXT} - 用户组分享率预测数据更新失败\n"
+                     f"exception: {e}\n"
+                     f"traceback: {traceback.format_exc()}"
+        )
+
+
+if __name__ == '__main__':
+    timer_check()

+ 90 - 0
ad_video_data_update.py

@@ -0,0 +1,90 @@
+import datetime
+import traceback
+from threading import Timer
+from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from config import set_config
+from log import Log
+config_, _ = set_config()
+log_ = Log()
+redis_helper = RedisHelper()
+
+features = [
+    'apptype',
+    'videoid',
+    'sharerate_all',
+    'sharerate_ad'
+]
+
+
+def predict_video_share_rate(project, table, dt, app_type):
+    """预估视频有广告时分享率"""
+    # 获取视频特征
+    video_df = get_feature_data(project=project, table=table, features=features, dt=dt)
+    video_df['apptype'] = video_df['apptype'].astype(int)
+    video_df = video_df[video_df['apptype'] == app_type]
+    video_df['sharerate_all'].fillna(0, inplace=True)
+    video_df['sharerate_ad'].fillna(0, inplace=True)
+    video_df['sharerate_all'] = video_df['sharerate_all'].astype(float)
+    video_df['sharerate_ad'] = video_df['sharerate_ad'].astype(float)
+    # 获取有广告时所有视频近30天的分享率
+    ad_all_videos_share_rate = video_df[video_df['videoid'] == 'allvideos']['sharerate_ad'].values[0]
+    video_df = video_df[video_df['videoid'] != 'allvideos']
+    # 计算视频有广告时分享率
+    video_df['video_ad_share_rate'] = \
+        video_df['sharerate_ad'] * float(ad_all_videos_share_rate) / video_df['sharerate_all']
+    video_df['video_ad_share_rate'].fillna(0, inplace=True)
+    video_df = video_df[video_df['video_ad_share_rate'] != 0]
+    # 结果写入redis
+    key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{dt}"
+    redis_data = {}
+    for index, item in video_df.iterrows():
+        redis_data[int(item['videoid'])] = item['video_ad_share_rate']
+    group_ad_share_rate_mean = video_df['video_ad_share_rate'].mean()
+    redis_data[-1] = group_ad_share_rate_mean
+    if len(redis_data) > 0:
+        redis_helper = RedisHelper()
+        redis_helper.add_data_with_zset(key_name=key_name, data=redis_data, expire_time=2 * 24 * 3600)
+    return video_df
+
+
+def timer_check():
+    try:
+        app_type = config_.APP_TYPE['VLOG']
+        project = config_.ad_model_data['videos_share_rate'].get('project')
+        table = config_.ad_model_data['videos_share_rate'].get('table')
+        now_date = datetime.datetime.today()
+        dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+        log_.info(f"now_date: {dt}")
+        now_min = datetime.datetime.now().minute
+        # 查看当前更新的数据是否已准备好
+        data_count = data_check(project=project, table=table, dt=dt)
+        if data_count > 0:
+            log_.info(f"ad video data count = {data_count}")
+            # 数据准备好,进行更新
+            predict_video_share_rate(project=project, table=table, dt=dt, app_type=app_type)
+            log_.info(f"ad video data update end!")
+        elif now_min > 45:
+            log_.info('ad video data is None!')
+            send_msg_to_feishu(
+                webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
+                key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
+                msg_text=f"rov-offline{config_.ENV_TEXT} - 视频分享率数据未准备好!\n"
+                         f"traceback: {traceback.format_exc()}"
+            )
+        else:
+            # 数据没准备好,1分钟后重新检查
+            Timer(60, timer_check).start()
+
+    except Exception as e:
+        log_.error(f"视频分享率预测数据更新失败, exception: {e}, traceback: {traceback.format_exc()}")
+        send_msg_to_feishu(
+            webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
+            key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
+            msg_text=f"rov-offline{config_.ENV_TEXT} - 视频分享率预测数据更新失败\n"
+                     f"exception: {e}\n"
+                     f"traceback: {traceback.format_exc()}"
+        )
+
+
+if __name__ == '__main__':
+    timer_check()

+ 7 - 0
ad_video_data_update_task.sh

@@ -0,0 +1,7 @@
+source /etc/profile
+echo $ROV_OFFLINE_ENV
+if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
+    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/ad_video_data_update.py
+elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
+    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/ad_video_data_update.py
+fi

+ 30 - 1
config.py

@@ -581,6 +581,30 @@ class BaseConfig(object):
     # 宗教视频列表更新结果存放 redis key 前缀,完整格式:'religion:videos:item:{date}'
     KEY_NAME_PREFIX_RELIGION_VIDEOS = 'religion:videos:item:'
 
+    # 广告模型数据
+    ad_model_data = {
+        'user_group': {
+            'project': 'loghubods',
+            'table': 'user_share_return_admodel'
+        },
+        'users_share_rate': {
+            'project': 'loghubods',
+            'table': 'usergroup_sharerate_admodel'
+        },
+        'videos_share_rate': {
+            'project': 'loghubods',
+            'table': 'video_sharerate_admodel'
+        },
+    }
+    # 用户组有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:users:group:predict:share:rate:{date}
+    KEY_NAME_PREFIX_AD_GROUP = 'ad:users:group:predict:share:rate:'
+    # 视频有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:video:predict:share:rate:{date}
+    KEY_NAME_PREFIX_AD_VIDEO = 'ad:video:predict:share:rate:'
+    # 用户分组结果存放 redis key 前缀,完整格式:mid:group:{mid}
+    KEY_NAME_PREFIX_MID_GROUP = 'mid:group:'
+    # 广告推荐阈值结果存放 redis key 前缀,完整格式:ad:threshold:{group}
+    KEY_NAME_PREFIX_AD_THRESHOLD = 'ad:threshold:'
+
 
 class DevelopmentConfig(BaseConfig):
     """开发环境配置"""
@@ -590,8 +614,13 @@ class DevelopmentConfig(BaseConfig):
     PROJECT_PATH = '/data2/rov-offline'
 
     # 测试环境redis地址
+    # REDIS_INFO = {
+    #     'host': 'r-bp1ps6my7lzg8rdhwx682.redis.rds.aliyuncs.com',
+    #     'port': 6379,
+    #     'password': 'Wqsd@2019',
+    # }
     REDIS_INFO = {
-        'host': 'r-bp1ps6my7lzg8rdhwx682.redis.rds.aliyuncs.com',
+        'host': 'r-bp16jexstuzih671tz.redis.rds.aliyuncs.com',
         'port': 6379,
         'password': 'Wqsd@2019',
     }

+ 3 - 3
db_helper.py

@@ -346,6 +346,6 @@ if __name__ == '__main__':
     # res = redis_helper.get_score_with_value(key, 90797)
     # print(res)
     # redis_helper.remove_value_from_set(key_name=config_.RELEVANT_TOP_VIDEOS_KEY_NAME, values=(8633849,))
-    data = redis_helper.get_all_data_from_zset(key_name="com.weiqu.video.recall.hot.item.score.20220622", with_scores=True)
-    print(data[:10])
-    print(len(data))
+    con = redis_helper.connect()
+    res = redis_helper.key_exists(key_name='eeew')
+    print(res)

+ 100 - 0
user_group_update.py

@@ -0,0 +1,100 @@
+import datetime
+import multiprocessing
+import time
+import traceback
+import gevent
+from threading import Timer
+from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from config import set_config
+from log import Log
+config_, _ = set_config()
+log_ = Log()
+redis_helper = RedisHelper()
+
+features = [
+    'apptype',
+    'return1mids',
+    'return2_3mids',
+    'return4_8mids',
+    'return9_24mids',
+    'return25_nmids',
+    'return0share1mids',
+    'return0share2_nmids'
+]
+
+
+def to_redis(group, mid_list):
+    log_.info(f"group = {group} update redis start ...")
+    start_time = time.time()
+    log_.info(f"mid count = {len(mid_list)}")
+    for i in range(len(mid_list) // 100 + 1):
+        # log_.info(f"i = {i}")
+        mid_temp_list = mid_list[i * 100:(i + 1) * 100]
+        task_list = [
+            gevent.spawn(redis_helper.set_data_to_redis,
+                         f"{config_.KEY_NAME_PREFIX_MID_GROUP}{mid}", group, 26 * 3600)
+            for mid in mid_temp_list
+        ]
+        gevent.joinall(task_list)
+    log_.info(f"group = {group}, mid count = {len(mid_list)}, update redis finished! "
+              f"execute time = {(time.time() - start_time) / 60}min")
+
+
+def update_user_group_to_redis(project, table, dt, app_type):
+    """更新mid对应分组到redis中"""
+    # 获取用户分组数据
+    feature_df = get_feature_data(project=project, table=table, features=features, dt=dt)
+    feature_df['apptype'] = feature_df['apptype'].astype(int)
+    feature_df = feature_df[feature_df['apptype'] == app_type]
+    group_list = features[1:]
+    pool = multiprocessing.Pool(processes=len(group_list))
+    for group in group_list:
+        mid_list = feature_df[group].tolist()
+        mid_list = list(set(mid_list))
+        mid_list = [mid for mid in mid_list if mid is not None]
+        pool.apply_async(func=to_redis, args=(group, mid_list))
+    pool.close()
+    pool.join()
+
+
+def timer_check():
+    try:
+        app_type = config_.APP_TYPE['VLOG']
+        project = config_.ad_model_data['user_group'].get('project')
+        table = config_.ad_model_data['user_group'].get('table')
+        now_date = datetime.datetime.today()
+        dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+        log_.info(f"now_date: {dt}")
+        now_min = datetime.datetime.now().minute
+        # 查看当前更新的数据是否已准备好
+        data_count = data_check(project=project, table=table, dt=dt)
+        if data_count > 0:
+            log_.info(f"user group data count = {data_count}")
+            # 数据准备好,进行更新
+            update_user_group_to_redis(project=project, table=table, dt=dt, app_type=app_type)
+            log_.info(f"user group data update end!")
+        elif now_min > 45:
+            log_.info('user group data is None!')
+            send_msg_to_feishu(
+                webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
+                key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
+                msg_text=f"rov-offline{config_.ENV_TEXT} - 用户分组数据未准备好!\n"
+                         f"traceback: {traceback.format_exc()}"
+            )
+        else:
+            # 数据没准备好,1分钟后重新检查
+            Timer(60, timer_check).start()
+
+    except Exception as e:
+        log_.error(f"用户分组数据更新失败, exception: {e}, traceback: {traceback.format_exc()}")
+        send_msg_to_feishu(
+            webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
+            key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
+            msg_text=f"rov-offline{config_.ENV_TEXT} - 用户分组数据更新失败\n"
+                     f"exception: {e}\n"
+                     f"traceback: {traceback.format_exc()}"
+        )
+
+
+if __name__ == '__main__':
+    timer_check()

+ 7 - 0
user_group_update_task.sh

@@ -0,0 +1,7 @@
+source /etc/profile
+echo $ROV_OFFLINE_ENV
+if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
+    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/user_group_update.py
+elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
+    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/user_group_update.py
+fi

+ 40 - 0
utils.py

@@ -4,6 +4,7 @@ import os
 import requests
 import json
 import traceback
+import pandas as pd
 
 from odps import ODPS
 from config import set_config
@@ -426,6 +427,45 @@ def update_video_w_h_rate(video_ids, key_name):
         redis_helper.add_data_with_zset(key_name=key_name, data=info_data)
 
 
+def data_check(project, table, dt):
+    """检查数据是否准备好"""
+    odps = ODPS(
+        access_id=config_.ODPS_CONFIG['ACCESSID'],
+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
+        project=project,
+        endpoint=config_.ODPS_CONFIG['ENDPOINT'],
+        connect_timeout=3000,
+        read_timeout=500000,
+        pool_maxsize=1000,
+        pool_connections=1000
+    )
+
+    try:
+        check_res = check_table_partition_exits(date=dt, project=project, table=table)
+        if check_res:
+            sql = f'select * from {project}.{table} where dt = {dt}'
+            with odps.execute_sql(sql=sql).open_reader() as reader:
+                data_count = reader.count
+        else:
+            data_count = 0
+    except Exception as e:
+        data_count = 0
+    return data_count
+
+
+def get_feature_data(project, table, features, dt):
+    """获取特征数据"""
+    records = get_data_from_odps(date=dt, project=project, table=table)
+    feature_data = []
+    for record in records:
+        item = {}
+        for feature_name in features:
+            item[feature_name] = record[feature_name]
+        feature_data.append(item)
+    feature_df = pd.DataFrame(feature_data)
+    return feature_df
+
+
 if __name__ == '__main__':
     # data_test = [9.20273281e+03, 7.00795065e+03, 5.54813112e+03, 9.97402494e-01, 9.96402495e-01, 9.96402494e-01]
     # data_normalization(data_test)