|
@@ -11,8 +11,7 @@ log_ = Log()
|
|
|
config_ = set_config()
|
|
|
|
|
|
|
|
|
-def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
|
|
|
- pool_maxsize=1000, pool_connections=1000):
|
|
|
+def get_data_from_odps(date, project, table):
|
|
|
"""
|
|
|
从odps获取数据
|
|
|
:param date: 日期 type-string '%Y%m%d'
|
|
@@ -28,11 +27,7 @@ def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=
|
|
|
access_id=config_.ODPS_CONFIG['ACCESSID'],
|
|
|
secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
|
|
|
project=project,
|
|
|
- endpoint=config_.ODPS_CONFIG['ENDPOINT'],
|
|
|
- connect_timeout=connect_timeout,
|
|
|
- read_timeout=read_timeout,
|
|
|
- pool_maxsize=pool_maxsize,
|
|
|
- pool_connections=pool_connections
|
|
|
+ endpoint=config_.ODPS_CONFIG['ENDPOINT']
|
|
|
)
|
|
|
records = odps.read_table(name=table, partition='dt=%s' % date)
|
|
|
return records
|
|
@@ -51,8 +46,7 @@ def get_feature_data(project, table, dt, features):
|
|
|
return feature_df
|
|
|
|
|
|
|
|
|
-def check_table_partition_exits(date, project, table, connect_timeout=3000, read_timeout=500000,
|
|
|
- pool_maxsize=1000, pool_connections=1000):
|
|
|
+def check_table_partition_exits(date, project, table):
|
|
|
"""
|
|
|
判断表中是否存在这个分区
|
|
|
:param date: 日期 type-string '%Y%m%d'
|
|
@@ -68,11 +62,7 @@ def check_table_partition_exits(date, project, table, connect_timeout=3000, read
|
|
|
access_id=config_.ODPS_CONFIG['ACCESSID'],
|
|
|
secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
|
|
|
project=project,
|
|
|
- endpoint=config_.ODPS_CONFIG['ENDPOINT'],
|
|
|
- connect_timeout=connect_timeout,
|
|
|
- read_timeout=read_timeout,
|
|
|
- pool_maxsize=pool_maxsize,
|
|
|
- pool_connections=pool_connections
|
|
|
+ endpoint=config_.ODPS_CONFIG['ENDPOINT']
|
|
|
)
|
|
|
t = odps.get_table(name=table)
|
|
|
return t.exist_partition(partition_spec=f'dt={date}')
|
|
@@ -84,15 +74,12 @@ def data_check(project, table, dt):
|
|
|
access_id=config_.ODPS_CONFIG['ACCESSID'],
|
|
|
secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
|
|
|
project=project,
|
|
|
- endpoint=config_.ODPS_CONFIG['ENDPOINT'],
|
|
|
- connect_timeout=3000,
|
|
|
- read_timeout=500000,
|
|
|
- pool_maxsize=1000,
|
|
|
- pool_connections=1000
|
|
|
+ endpoint=config_.ODPS_CONFIG['ENDPOINT']
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
- check_res = check_table_partition_exits(date=dt, project=project, table=table)
|
|
|
+ check_res = check_table_partition_exits(
|
|
|
+ date=dt, project=project, table=table)
|
|
|
if check_res:
|
|
|
sql = f'select * from {project}.{table} where dt = {dt}'
|
|
|
with odps.execute_sql(sql=sql).open_reader() as reader:
|
|
@@ -113,7 +100,8 @@ def request_post(request_url, headers, request_data):
|
|
|
:return: res_data json格式
|
|
|
"""
|
|
|
try:
|
|
|
- response = requests.post(url=request_url, json=request_data, headers=headers)
|
|
|
+ response = requests.post(
|
|
|
+ url=request_url, json=request_data, headers=headers)
|
|
|
|
|
|
if response.status_code == 200:
|
|
|
res_data = json.loads(response.text)
|
|
@@ -121,7 +109,8 @@ def request_post(request_url, headers, request_data):
|
|
|
else:
|
|
|
return None
|
|
|
except Exception as e:
|
|
|
- log_.error('url: {}, exception: {}, traceback: {}'.format(request_url, e, traceback.format_exc()))
|
|
|
+ log_.error('url: {}, exception: {}, traceback: {}'.format(
|
|
|
+ request_url, e, traceback.format_exc()))
|
|
|
return None
|
|
|
|
|
|
|
|
@@ -134,14 +123,16 @@ def request_get(request_url, headers, params=None):
|
|
|
:return: res_data json格式
|
|
|
"""
|
|
|
try:
|
|
|
- response = requests.get(url=request_url, headers=headers, params=params)
|
|
|
+ response = requests.get(
|
|
|
+ url=request_url, headers=headers, params=params)
|
|
|
if response.status_code == 200:
|
|
|
res_data = json.loads(response.text)
|
|
|
return res_data
|
|
|
else:
|
|
|
return None
|
|
|
except Exception as e:
|
|
|
- log_.error('url: {}, exception: {}, traceback: {}'.format(request_url, e, traceback.format_exc()))
|
|
|
+ log_.error('url: {}, exception: {}, traceback: {}'.format(
|
|
|
+ request_url, e, traceback.format_exc()))
|
|
|
return None
|
|
|
|
|
|
|
|
@@ -173,9 +164,6 @@ def asr_validity_discrimination(text):
|
|
|
return True
|
|
|
|
|
|
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
if __name__ == '__main__':
|
|
|
text = """现场和电视机前的观众朋友,大家晚上好。
|
|
|
这里是非常说明的访谈现场,
|
|
@@ -913,4 +901,3 @@ Haha哈哈那个。
|
|
|
他还是。"""
|
|
|
res = asr_validity_discrimination(text=text)
|
|
|
print(res)
|
|
|
-
|