#coding utf-8
import sys
import pandas as pd

from tqdm import tqdm

from collections import defaultdict
types = defaultdict(str)

item_sparse_conf = [
# 基础特征_视频
'i_id',
'i_up_id',
# 'i_tag',
# 'i_title',
'i_title_len',
'i_play_len',
'i_days_since_upload',
# 统计特征_视频
'i_1day_exp_cnt',
'i_1day_click_cnt',
'i_1day_share_cnt',
'i_1day_return_cnt',
'i_3day_exp_cnt',
'i_3day_click_cnt',
'i_3day_share_cnt',
'i_3day_return_cnt',
'i_7day_exp_cnt',
'i_7day_click_cnt',
'i_7day_share_cnt',
'i_7day_return_cnt',
'i_3month_exp_cnt',
'i_3month_click_cnt',
'i_3month_share_cnt',
'i_3month_return_cnt',
]
item_dense_conf = [
'i_ctr_1day',
'i_str_1day',
'i_rov_1day',
'i_ros_1day',
'i_ctr_3day',
'i_str_3day',
'i_rov_3day',
'i_ros_3day',
'i_ctr_7day',
'i_str_7day',
'i_rov_7day',
'i_ros_7day',
'i_ctr_3month',
'i_str_3month',
'i_rov_3month',
'i_ros_3month',
]

user_sparse_conf = [
'u_brand',
'u_device',
'u_system',
'u_system_ver',
'ctx_region',
'ctx_city',
# 统计特征_用户
'u_cycle_bucket_7days',
'u_cycle_bucket_30days',
'u_share_bucket_30days',
'u_1day_exp_cnt',
'u_1day_click_cnt',
'u_1day_share_cnt',
'u_1day_return_cnt',
'u_3day_exp_cnt',
'u_3day_click_cnt',
'u_3day_share_cnt',
'u_3day_return_cnt',
'u_7day_exp_cnt',
'u_7day_click_cnt',
'u_7day_share_cnt',
'u_7day_return_cnt',
'u_3month_exp_cnt',
'u_3month_click_cnt',
'u_3month_share_cnt',
'u_3month_return_cnt',
]

user_dense_conf = [
'u_ctr_1day',
'u_str_1day',
'u_rov_1day',
'u_ros_1day',
'u_ctr_3day',
'u_str_3day',
'u_rov_3day',
'u_ros_3day',
'u_ctr_7day',
'u_str_7day',
'u_rov_7day',
'u_ros_7day',
'u_ctr_3month',
'u_str_3month',
'u_rov_3month',
'u_ros_3month',
]

def format_x(x):
    if x is None:
        x = ''
    return str(x).replace(' ', '').replace(':', '_')
def sparse_fea_2_feature(v, k):
    f_k = format_x(k)
    f_v = format_x(v)
    if len(f_v) < 1:
        return (None, None)
    return ('#'.join([f_k, f_v]), 1.0)

def dense_fea_2_feature(v, k):
    f_k = format_x(k)
    f_v = format_x(v)
    if len(f_v) < 1:
        return (None, None)
    return (f_k, float(f_v))

def get_features(sparse_conf, dense_conf, row):
    features = dict(map(lambda k:sparse_fea_2_feature(row[k], k), sparse_conf))
    dense_features = dict(map(lambda k:dense_fea_2_feature(row[k], k), dense_conf))
    features.update(dense_features)
    if None in features:
        del(features[None])
    return features

def get_item_features(row):
    return get_features(item_sparse_conf, item_dense_conf, row)

def get_user_features(row):
    return get_features(user_sparse_conf, user_dense_conf, row)
     
label_col = 'ui_is_out'

sparse_fea_cols = [
# 'u_id',
# 基础特征_场景
#'ctx_day','
'ctx_apptype',
'ctx_week',
'ctx_hour',
# 基础特征_交叉
#'ui_is_out',
#'playtime',
#'ui_root_id',
#'ui_share_id',
]

dense_fea_cols = [

]