-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add basic operations for UAI Train job control,
Including: Create job List job Get job info Stop job Delete job All five AI arch all supported
- Loading branch information
宋翔
authored and
宋翔
committed
Nov 14, 2017
1 parent
3ba8b49
commit 9d4e7a7
Showing
30 changed files
with
1,362 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
|
||
import re | ||
|
||
UFS_MOUNT_POINT_FORMAT = r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\/ufs-\w+' | ||
UFS_PATH_FORMAT = r'(\w+\/)+' | ||
|
||
def concat_ufs_path(path, mount_point): | ||
mount_point_pattern = re.compile(UFS_MOUNT_POINT_FORMAT) | ||
path_pattern = re.compile(UFS_PATH_FORMAT) | ||
|
||
if mount_point_pattern.match(mount_point) is None: | ||
raise RuntimeError("UFS mount point should be in format x.x.x.x:/ufs-xxx") | ||
|
||
if path_pattern.match(path) is None: | ||
raise RuntimeError("UFS path should match xxx/xxx/") | ||
|
||
return mount_point + '/' + path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
|
||
from uaitrain.api.base_op import BaseUAITrainAPIOp | ||
|
||
class CreateUAITrainJobOp(BaseUAITrainAPIOp): | ||
ACTION_NAME = "CreateUAITrainJob" | ||
""" | ||
CreateUAITrainJobOp | ||
Compatable with UAI Train CreateUAITrainJob API func | ||
Input: | ||
pub_key string(required) Public key of the user | ||
priv_key string(required) Private key of the user | ||
project_id int(optional) Project ID of the job | ||
region string(optional) Which Region to run the job | ||
zone string(optional) Which Zone in the Region to run the job | ||
job_name string(required) Job name of the job | ||
work_id int(required) the id of train node, you can get detail info from GetUAITrainAvailableResourceOp. | ||
1860001, include 1 GPU | ||
1860003, include 4 GPU | ||
etc. | ||
code_uhub_path string(required) Which image in the uhub to run the job | ||
data_ufile_path string(required) the ufile path of input data | ||
out_ufile_path string(required) the ufile path of output data | ||
docker_cmd string(required) the cmd of run the job | ||
max_exec_time int(required) the max exec time of job. if the job don't finish in the time, system will stop the job. | ||
business_group string(optional) Which business group to run the job | ||
job_memo string(optional) the memo of the job | ||
Output: | ||
RetCode int(required) Op return code: 0: success, others: error code | ||
TrainJObID string(required) the id of the train job | ||
Message string(not required) Message: error description | ||
""" | ||
|
||
def __init__(self, pub_key, priv_key, job_name, work_id, code_uhub_path, data_ufile_path, out_ufile_path, | ||
docker_cmd, max_exec_time, business_group="", job_memo="", project_id="", | ||
region="", zone=""): | ||
super(CreateUAITrainJobOp, self).__init__(self.ACTION_NAME, | ||
pub_key, | ||
priv_key, | ||
project_id, | ||
region, | ||
zone) | ||
self.cmd_params["TrainJobName"] = job_name | ||
self.cmd_params["TrainWorkId"] = work_id | ||
self.cmd_params["CodeUhubPath"] = code_uhub_path | ||
self.cmd_params["DataUfilePath"] = data_ufile_path | ||
self.cmd_params["OutputUfilePath"] = out_ufile_path | ||
self.cmd_params["DockerCmd"] = docker_cmd | ||
self.cmd_params["PredictStartTime"] = 0 | ||
self.cmd_params["MaxExecuteTime"] = max_exec_time | ||
|
||
self.cmd_params["TrainPublicKey"] = pub_key | ||
self.cmd_params["TrainPrivateKey"] = priv_key | ||
|
||
self.cmd_params["TrainJobMemo"] = job_memo | ||
self.cmd_params["BusinessGroup"] = business_group | ||
|
||
def _check_args(self): | ||
super(CreateUAITrainJobOp, self)._check_args() | ||
if self.cmd_params["TrainJobName"] == "" or type(self.cmd_params["TrainJobName"]) != str: | ||
raise RuntimeError("job_name shoud be <str> and is not nil.") | ||
|
||
if self.cmd_params["TrainWorkId"] == "" or type(self.cmd_params["TrainWorkId"]) != int: | ||
raise RuntimeError("work_id shoud be <int> and is not nil.") | ||
|
||
if self.cmd_params["CodeUhubPath"] == "" or type(self.cmd_params["CodeUhubPath"]) != str: | ||
raise RuntimeError("code_uhub_path shoud be <str> and is not nil.") | ||
|
||
if self.cmd_params["DataUfilePath"] == "" or type(self.cmd_params["DataUfilePath"]) != str: | ||
raise RuntimeError("data_ufile_path shoud be <str> and is not nil.") | ||
|
||
if self.cmd_params["OutputUfilePath"] == "" or type(self.cmd_params["OutputUfilePath"]) != str: | ||
raise RuntimeError("out_ufile_path shoud be <str> and is not nil.") | ||
|
||
if self.cmd_params["DockerCmd"] == "" or type(self.cmd_params["DockerCmd"]) != str: | ||
raise RuntimeError("docker_cmd shoud be <str> and is not nil.") | ||
|
||
if self.cmd_params["MaxExecuteTime"] == "" or type(self.cmd_params["MaxExecuteTime"]) != int: | ||
raise RuntimeError("max_exec_time shoud be <int> and is not nil.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
|
||
from uaitrain.api.base_op import BaseUAITrainAPIOp | ||
|
||
class GetUAITrainAvailableResourceOp(BaseUAITrainAPIOp): | ||
ACTION_NAME = "GetUAITrainAvailableResource" | ||
""" | ||
GetUAITrainAvailableResourceOp | ||
Compatable with UAI Train GetUAITrainAvailableResource API func | ||
Input: | ||
pub_key string(required) Public key of the user | ||
priv_key string(required) Private key of the user | ||
project_id int(optional) Project ID of the job | ||
region string(optional) Which Region to run the job | ||
zone string(optional) Which Zone in the Region to run the job | ||
node_type string(optional) the type of node, default is 'Work'. | ||
'Work': train node | ||
'PS': param node | ||
Output: | ||
RetCode int(required) Op return code: 0: success, others: error code | ||
TotalCount string(required) the count of result | ||
Message string(not required) Message: error description | ||
DataSet [] the detailed information of resource | ||
""" | ||
|
||
def __init__(self, pub_key, priv_key, node_type='Work', project_id="", region="", zone=""): | ||
super(GetUAITrainAvailableResourceOp, self).__init__(self.ACTION_NAME, | ||
pub_key, | ||
priv_key, | ||
project_id, | ||
region, | ||
zone) | ||
self.cmd_params["NodeType"] = node_type | ||
|
||
def _check_args(self): | ||
super(GetUAITrainAvailableResourceOp, self)._check_args() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
|
||
from uaitrain.api.base_op import BaseUAITrainAPIOp | ||
|
||
class GetUAITrainBillInfoOp(BaseUAITrainAPIOp): | ||
ACTION_NAME = "GetUAITrainBillInfo" | ||
""" | ||
GetUAITrainBillInfoOp | ||
Compatable with UAI Train GetUAITrainBillInfo API func | ||
Input: | ||
pub_key string(required) Public key of the user | ||
priv_key string(required) Private key of the user | ||
project_id int(optional) Project ID of the job | ||
region string(optional) Which Region to run the job | ||
zone string(optional) Which Zone in the Region to run the job | ||
beg_time string(required) the start time of bill | ||
end_time string(required) the end time of bill | ||
offset int(optional) the offset of list | ||
limit int(optional) the max num of returned list, return all bill list if isn't set | ||
Output: | ||
RetCode int(required) Op return code: 0: success, others: error code | ||
TotalCount string(required) the count of result | ||
TotalExecuteTime int(required) total exec time of all train job | ||
TotalPrice int(required) total price of all train job | ||
Message string(not required) Message: error description | ||
DataSet [] the detailed bill information of train job | ||
""" | ||
|
||
def __init__(self, pub_key, priv_key, beg_time, end_time, offset="", limit="", project_id="", region="", zone=""): | ||
super(GetUAITrainBillInfoOp, self).__init__(self.ACTION_NAME, | ||
pub_key, | ||
priv_key, | ||
project_id, | ||
region, | ||
zone) | ||
self.cmd_params["BeginTime"] = beg_time | ||
self.cmd_params["EndTime"] = end_time | ||
self.cmd_params["Offset"] = offset | ||
self.cmd_params["Limit"] = limit | ||
|
||
def _check_args(self): | ||
super(GetUAITrainBillInfoOp, self)._check_args() | ||
if self.cmd_params["BeginTime"] == "" or type(self.cmd_params["BeginTime"]) != int: | ||
raise RuntimeError("beg_time shoud be <int> and is not nil.") | ||
if self.cmd_params["EndTime"] == "" or type(self.cmd_params["EndTime"]) != int: | ||
raise RuntimeError("end_time shoud be <int> and is not nil.") | ||
|
||
if self.cmd_params["BeginTime"] > self.cmd_params["EndTime"]: | ||
raise RuntimeError("end_time should be greater than beg_time. end_time: {0}, beg_time: {1}". | ||
format(self.cmd_params["EndTime"], self.cmd_params["BeginTime"])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
|
||
from uaitrain.api.base_op import BaseUAITrainAPIOp | ||
|
||
class GetUAITrainJobListOp(BaseUAITrainAPIOp): | ||
ACTION_NAME = "GetUAITrainJobList" | ||
""" | ||
GetUAITrainJobListOp | ||
Compatable with UAI Train GetUAITrainJobList API func | ||
Input: | ||
pub_key string(required) Public key of the user | ||
priv_key string(required) Private key of the user | ||
project_id int(optional) Project ID of the job | ||
region string(optional) Which Region to run the job | ||
zone string(optional) Which Zone in the Region to run the job | ||
job_id string(optional) Which train job to get info | ||
offset int(optional) the offset of list | ||
limit int(optional) the max num of returned list, return all job list if isn't set | ||
Output: | ||
RetCode int(required) Op return code: 0: success, others: error code | ||
TotalCount string(required) the count of result | ||
Message string(not required) Message: error description | ||
DataSet [] the detailed information of train job | ||
""" | ||
|
||
def __init__(self, pub_key, priv_key, job_id="", offset="", limit="", project_id="", region="", zone=""): | ||
super(GetUAITrainJobListOp, self).__init__(self.ACTION_NAME, | ||
pub_key, | ||
priv_key, | ||
project_id, | ||
region, | ||
zone) | ||
self.cmd_params["TrainJobId"] = job_id | ||
self.cmd_params["Offset"] = offset | ||
self.cmd_params["Limit"] = limit | ||
|
||
def _check_args(self): | ||
super(GetUAITrainJobListOp, self)._check_args() |
Oops, something went wrong.