1 năm trước cách đây · 148e4e3151
--- a/feature.py
+++ b/feature.py
@@ -93,7 +93,9 @@ user_dense_conf = [
 
				 ]
			
 
				 
			
 
				 def format_x(x):
			
 
				-    return  str(x).replace(' ', '').replace(':', '_')
			
 
				+    if x is None:
			
 
				+        x = ''
			
 
				+    return str(x).replace(' ', '').replace(':', '_')
			
 
				 def sparse_fea_2_feature(v, k):
			
 
				     f_k = format_x(k)
			
 
				     f_v = format_x(v)
			
--- a/get_ad_out_sample_v2_item.py
+++ b/get_ad_out_sample_v2_item.py
@@ -16,6 +16,7 @@ from feature import get_item_features
 
				 from lr_model import LrModel
			
 
				 from utils import exe_sql
			
 
				 
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     project = 'loghubods'
			
 
				     datetime = sys.argv[1]
			
@@ -214,22 +215,22 @@ from candidate_item
 
				     print('sql done')
			
 
				     # data.to_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t')
			
 
				     # data = pd.read_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t', dtype=str)
			
 
				-    data.fillna('', inplace=True)
			
 
				     model_key = 'ad_out_v2_model_v1.day'
			
 
				     lr_model = LrModel('model/{}.json'.format(model_key))
			
 
				     item_h_dict = {}
			
 
				     k_col = 'i_id'
			
 
				     dt = datetime
			
 
				-    key_name = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}:{dt}"
			
 
				-    print(key_name)
			
 
				-    for index, row in tqdm(data.iterrows()):
			
 
				-        k = row['i_id']
			
 
				-        item_features = get_item_features(row)
			
 
				-        item_h = lr_model.predict_h(item_features)
			
 
				-        item_h_dict[k] = item_h
			
 
				-        # print(item_features)
			
 
				-        # print(item_h)
			
 
				-    redis_helper.add_data_with_zset(key_name=key_name, data=item_h_dict, expire_time=2 * 24 * 3600)
			
 
				-    with open('{}.json'.format(key_name), 'w') as fout:
			
 
				+    key_name_prefix = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}:"
			
 
				+    print(key_name_prefix)
			
 
				+    with data.open_reader() as reader:
			
 
				+        for row in reader:
			
 
				+            k = row['i_id']
			
 
				+            item_features = get_item_features(row)
			
 
				+            item_h = lr_model.predict_h(item_features)
			
 
				+            redis_helper.set_data_to_redis(f"{key_name_prefix}:{k}", item_h, 28 * 3600)
			
 
				+            item_h_dict[k] = item_h
			
 
				+            # print(item_features)
			
 
				+            # print(item_h)
			
 
				+    with open('{}.json'.format(key_name_prefix), 'w') as fout:
			
 
				         json.dump(item_h_dict, fout, indent=2, ensure_ascii=False, sort_keys=True)
			
 
				 
			
--- a/utils.py
+++ b/utils.py
@@ -42,13 +42,8 @@ def exe_sql(project, sql, connect_timeout=3000, read_timeout=500000,
 
				         pool_maxsize=pool_maxsize,
			
 
				         pool_connections=pool_connections
			
 
				     )
			
 
				-    with odps.execute_sql(sql).open_reader() as reader:
			
 
				-        d = defaultdict(list)  #
			
 
				-        for record in reader:
			
 
				-            for res in record:
			
 
				-                d[res[0]].append(res[1])  #
			
 
				-        data = pd.DataFrame.from_dict(d, orient='columns', dtype=str)  #
			
 
				-    return data
			
 
				+    records = odps.execute_sql(sql)
			
 
				+    return records
			
 
				 
			
 
				 def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
			
 
				                        pool_maxsize=1000, pool_connections=1000):