yangxiaohui hai 1 ano
pai
achega
148e4e3151
Modificáronse 3 ficheiros con 18 adicións e 20 borrados
  1. 3 1
      feature.py
  2. 13 12
      get_ad_out_sample_v2_item.py
  3. 2 7
      utils.py

+ 3 - 1
feature.py

@@ -93,7 +93,9 @@ user_dense_conf = [
 ]
 
 def format_x(x):
-    return  str(x).replace(' ', '').replace(':', '_')
+    if x is None:
+        x = ''
+    return str(x).replace(' ', '').replace(':', '_')
 def sparse_fea_2_feature(v, k):
     f_k = format_x(k)
     f_v = format_x(v)

+ 13 - 12
get_ad_out_sample_v2_item.py

@@ -16,6 +16,7 @@ from feature import get_item_features
 from lr_model import LrModel
 from utils import exe_sql
 
+
 if __name__ == "__main__":
     project = 'loghubods'
     datetime = sys.argv[1]
@@ -214,22 +215,22 @@ from candidate_item
     print('sql done')
     # data.to_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t')
     # data = pd.read_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t', dtype=str)
-    data.fillna('', inplace=True)
     model_key = 'ad_out_v2_model_v1.day'
     lr_model = LrModel('model/{}.json'.format(model_key))
     item_h_dict = {}
     k_col = 'i_id'
     dt = datetime
-    key_name = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}:{dt}"
-    print(key_name)
-    for index, row in tqdm(data.iterrows()):
-        k = row['i_id']
-        item_features = get_item_features(row)
-        item_h = lr_model.predict_h(item_features)
-        item_h_dict[k] = item_h
-        # print(item_features)
-        # print(item_h)
-    redis_helper.add_data_with_zset(key_name=key_name, data=item_h_dict, expire_time=2 * 24 * 3600)
-    with open('{}.json'.format(key_name), 'w') as fout:
+    key_name_prefix = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}:"
+    print(key_name_prefix)
+    with data.open_reader() as reader:
+        for row in reader:
+            k = row['i_id']
+            item_features = get_item_features(row)
+            item_h = lr_model.predict_h(item_features)
+            redis_helper.set_data_to_redis(f"{key_name_prefix}:{k}", item_h, 28 * 3600)
+            item_h_dict[k] = item_h
+            # print(item_features)
+            # print(item_h)
+    with open('{}.json'.format(key_name_prefix), 'w') as fout:
         json.dump(item_h_dict, fout, indent=2, ensure_ascii=False, sort_keys=True)
 

+ 2 - 7
utils.py

@@ -42,13 +42,8 @@ def exe_sql(project, sql, connect_timeout=3000, read_timeout=500000,
         pool_maxsize=pool_maxsize,
         pool_connections=pool_connections
     )
-    with odps.execute_sql(sql).open_reader() as reader:
-        d = defaultdict(list)  #
-        for record in reader:
-            for res in record:
-                d[res[0]].append(res[1])  #
-        data = pd.DataFrame.from_dict(d, orient='columns', dtype=str)  #
-    return data
+    records = odps.execute_sql(sql)
+    return records
 
 def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
                        pool_maxsize=1000, pool_connections=1000):