Browse Source

重构lr_v1

yangxiaohui 1 year ago
parent
commit
0841898231
2 changed files with 22 additions and 33 deletions
  1. 2 32
      get_ad_out_sample_v2_item.py
  2. 20 1
      utils.py

+ 2 - 32
get_ad_out_sample_v2_item.py

@@ -1,13 +1,8 @@
 #coding utf-8
-from odps import ODPS
-import pandas as pd
-from collections import defaultdict
 from tqdm import tqdm
 import sys
-import requests
 import json
 
-import datetime
 import traceback
 from threading import Timer
 from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
@@ -19,32 +14,7 @@ redis_helper = RedisHelper()
 
 from feature import get_item_features
 from lr_model import LrModel
-
-odps = ODPS(
-        access_id='LTAIWYUujJAm7CbH',
-        secret_access_key='RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
-        project="loghubods",
-        endpoint='http://service.cn.maxcompute.aliyun.com/api')
-
-def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
-                       pool_maxsize=1000, pool_connections=1000):
-    """
-    :param pool_connections:
-    :return: records
-    """
-    records = odps.read_table(name=table, partition='dt=%s' % date)
-    return records
-
-def exe_sql(sql):
-    data = []
-    with odps.execute_sql(sql).open_reader() as reader:
-        d = defaultdict(list)  #
-        for record in reader:
-            for res in record:
-                d[res[0]].append(res[1])  # 
-        #data = pd.DataFrame.from_dict(d, orient='index', dtype=str).T  # 
-        data = pd.DataFrame.from_dict(d, orient='columns', dtype=str)  # 
-    return data
+from utils import exe_sql
 
 if __name__=="__main__":
     project = 'loghubods'
@@ -240,7 +210,7 @@ SELECT
 from candidate_item
     """.format(datetime=datetime)
     print(sql)
-    data = exe_sql(sql)
+    data = exe_sql(project, sql)
     print('sql done')
     #data.to_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t')
     #data = pd.read_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t', dtype=str)

+ 20 - 1
utils.py

@@ -10,6 +10,7 @@ from odps import ODPS
 from config import set_config
 from db_helper import HologresHelper, MysqlHelper, RedisHelper
 from log import Log
+from collections import defaultdict
 
 config_, env = set_config()
 log_ = Log()
@@ -29,7 +30,25 @@ def execute_sql_from_odps(project, sql, connect_timeout=3000, read_timeout=50000
     )
     records = odps.execute_sql(sql=sql)
     return records
-
+def exe_sql(project, sql, connect_timeout=3000, read_timeout=500000,
+                          pool_maxsize=1000, pool_connections=1000):
+    odps = ODPS(
+        access_id=config_.ODPS_CONFIG['ACCESSID'],
+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
+        project=project,
+        endpoint=config_.ODPS_CONFIG['ENDPOINT'],
+        connect_timeout=connect_timeout,
+        read_timeout=read_timeout,
+        pool_maxsize=pool_maxsize,
+        pool_connections=pool_connections
+    )
+    with odps.execute_sql(sql).open_reader() as reader:
+        d = defaultdict(list)  #
+        for record in reader:
+            for res in record:
+                d[res[0]].append(res[1])  #
+        data = pd.DataFrame.from_dict(d, orient='columns', dtype=str)  #
+    return data
 
 def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
                        pool_maxsize=1000, pool_connections=1000):