|
@@ -1,13 +1,8 @@
|
|
|
#coding utf-8
|
|
|
-from odps import ODPS
|
|
|
-import pandas as pd
|
|
|
-from collections import defaultdict
|
|
|
from tqdm import tqdm
|
|
|
import sys
|
|
|
-import requests
|
|
|
import json
|
|
|
|
|
|
-import datetime
|
|
|
import traceback
|
|
|
from threading import Timer
|
|
|
from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
|
|
@@ -19,32 +14,7 @@ redis_helper = RedisHelper()
|
|
|
|
|
|
from feature import get_item_features
|
|
|
from lr_model import LrModel
|
|
|
-
|
|
|
-odps = ODPS(
|
|
|
- access_id='LTAIWYUujJAm7CbH',
|
|
|
- secret_access_key='RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
|
|
|
- project="loghubods",
|
|
|
- endpoint='http://service.cn.maxcompute.aliyun.com/api')
|
|
|
-
|
|
|
-def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
|
|
|
- pool_maxsize=1000, pool_connections=1000):
|
|
|
- """
|
|
|
- :param pool_connections:
|
|
|
- :return: records
|
|
|
- """
|
|
|
- records = odps.read_table(name=table, partition='dt=%s' % date)
|
|
|
- return records
|
|
|
-
|
|
|
-def exe_sql(sql):
|
|
|
- data = []
|
|
|
- with odps.execute_sql(sql).open_reader() as reader:
|
|
|
- d = defaultdict(list) #
|
|
|
- for record in reader:
|
|
|
- for res in record:
|
|
|
- d[res[0]].append(res[1]) #
|
|
|
- #data = pd.DataFrame.from_dict(d, orient='index', dtype=str).T #
|
|
|
- data = pd.DataFrame.from_dict(d, orient='columns', dtype=str) #
|
|
|
- return data
|
|
|
+from utils import exe_sql
|
|
|
|
|
|
if __name__=="__main__":
|
|
|
project = 'loghubods'
|
|
@@ -240,7 +210,7 @@ SELECT
|
|
|
from candidate_item
|
|
|
""".format(datetime=datetime)
|
|
|
print(sql)
|
|
|
- data = exe_sql(sql)
|
|
|
+ data = exe_sql(project, sql)
|
|
|
print('sql done')
|
|
|
#data.to_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t')
|
|
|
#data = pd.read_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t', dtype=str)
|