#coding utf-8 import json import math def load_json(filename): with open(filename, 'r') as fin: json_data = json.load(fin) return json_data def wx(w_dict, kv): k, v = kv w = w_dict.get(k, 0.0) return w * v def sigmoid(x): return 1.0 / (1.0 + math.exp(-x)) def libsvm_row_to_features(row): items = row.strip().split(' ') label = items[0] features = {} for kv in items[1:]: k, v = kv.split(':') features[k] = float(v) return label, features class LrModel: def __init__(self, w_json_file): self.w_dict = load_json(w_json_file) def predict_h(self, features): h = sum(map(lambda x: wx(self.w_dict, x), features.items())) return h def predict(self, features): bias = self.w_dict.get('bias', 0.0) h = self.predict_h(features) score = sigmoid(h + bias) return score def test(): lr_model = LrModel('model/ad_out_v2_model_v1.day.json') rows = [ (0.279004,'0 u_brand#vivo:1 u_device#V1829A:1 u_system#Android:1 u_system_ver#Android10:1 i_id#17015839:1 i_up_id#24811642:1 i_title_len#5:1 i_play_len#8:1 i_days_since_upload#4:1 ctx_week#3:1 ctx_hour#8:1 ctx_region#山西:1 ctx_city#临汾:1 u_3month_exp_cnt#4:1 u_3month_click_cnt#4:1 u_3month_share_cnt#2:1 u_3month_return_cnt#6:1 i_1day_exp_cnt#16:1 i_1day_click_cnt#15:1 i_1day_share_cnt#12:1 i_1day_return_cnt#15:1 i_3day_exp_cnt#18:1 i_3day_click_cnt#17:1 i_3day_share_cnt#14:1 i_3day_return_cnt#15:1 i_7day_exp_cnt#18:1 i_7day_click_cnt#18:1 i_7day_share_cnt#14:1 i_7day_return_cnt#15:1 i_3month_exp_cnt#19:1 i_3month_click_cnt#18:1 i_3month_share_cnt#14:1 i_3month_return_cnt#16:1 u_ctr_3month:0.066667 u_str_3month:0.0375 u_rov_3month:0.283333 u_ros_3month:1.0 i_ctr_1day:0.070518 i_str_1day:0.007219 i_rov_1day:0.044376 i_ros_1day:0.871681 i_ctr_3day:0.070359 i_str_3day:0.007297 i_rov_3day:0.017242 i_ros_3day:0.335814 i_ctr_7day:0.070245 i_str_7day:0.007044 i_rov_7day:0.012268 i_ros_7day:0.247943 i_ctr_3month:0.06989 i_str_3month:0.007203 i_rov_3month:0.012624 i_ros_3month:0.250784'), (0.454255,'1 u_brand#vivo:1 u_device#V2230A:1 u_system#Android:1 u_system_ver#Android13:1 i_id#17141266:1 i_up_id#65303321:1 i_title_len#5:1 i_play_len#8:1 i_days_since_upload#3:1 ctx_week#3:1 ctx_hour#17:1 ctx_region#河北:1 ctx_city#邯郸:1 u_3month_exp_cnt#3:1 u_3month_click_cnt#3:1 i_1day_exp_cnt#19:1 i_1day_click_cnt#19:1 i_1day_share_cnt#16:1 i_1day_return_cnt#19:1 i_3day_exp_cnt#21:1 i_3day_click_cnt#21:1 i_3day_share_cnt#18:1 i_3day_return_cnt#20:1 i_7day_exp_cnt#22:1 i_7day_click_cnt#21:1 i_7day_share_cnt#18:1 i_7day_return_cnt#20:1 i_3month_exp_cnt#22:1 i_3month_click_cnt#21:1 i_3month_share_cnt#18:1 i_3month_return_cnt#20:1 u_ctr_3month:0.1 i_ctr_1day:0.078281 i_str_1day:0.009801 i_rov_1day:0.066601 i_ros_1day:0.868055 i_ctr_3day:0.076168 i_str_3day:0.010706 i_rov_3day:0.036384 i_ros_3day:0.446198 i_ctr_7day:0.07423 i_str_7day:0.011886 i_rov_7day:0.022732 i_ros_7day:0.257645 i_ctr_3month:0.07423 i_str_3month:0.011886 i_rov_3month:0.022732 i_ros_3month:0.257645'), (lr_model.predict({}), '0 dsiaod:1 dsaodadsa:1.2'), ] for std_score, row in rows: label, features = libsvm_row_to_features(row) score = lr_model.predict(features) score_diff = std_score - score print(std_score, score, score_diff) assert(abs(score_diff) < 10e-6) if __name__ == '__main__': test()