From 9d4e7a72546bb7192e1d24e7988f559a109657e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=8B=E7=BF=94?= Date: Tue, 14 Nov 2017 16:48:06 +0800 Subject: [PATCH] Add basic operations for UAI Train job control, Including: Create job List job Get job info Stop job Delete job All five AI arch all supported --- README.md | 6 +- uai/utils/utils.py | 1 + uai/utils/utils_ufs.py | 31 +++ uaitrain/api/base_op.py | 11 +- uaitrain/api/create_train_job.py | 94 +++++++ uaitrain/api/get_train_available_resource.py | 49 ++++ uaitrain/api/get_train_job_bill_info.py | 64 +++++ uaitrain/api/get_train_job_list.py | 52 ++++ uaitrain/api/get_train_job_running_info.py | 52 ++++ uaitrain/api/get_train_tensorboard_url.py | 51 ++++ uaitrain/api/modify_train_job_memo.py | 56 +++++ uaitrain/api/modify_train_job_name.py | 56 +++++ uaitrain/api/remove_train_job.py | 51 ++++ uaitrain/api/stop_train_job.py | 51 ++++ .../operation/create_train_job/__init__.py | 0 .../create_train_job/base_create_op.py | 229 ++++++++++++++++++ .../operation/delete_train_job/__init__.py | 0 .../delete_train_job/base_delete_op.py | 69 ++++++ uaitrain/operation/info_train_job/__init__.py | 0 .../operation/info_train_job/info_train_op.py | 77 ++++++ uaitrain/operation/list_train_job/__init__.py | 0 .../list_train_job/base_list_job_op.py | 104 ++++++++ uaitrain/operation/stop_train_job/__init__.py | 0 .../operation/stop_train_job/base_stop_op.py | 69 ++++++ uaitrain_tool/base_tool.py | 53 ++++ uaitrain_tool/caffe/caffe_tool.py | 22 +- uaitrain_tool/keras/keras_tool.py | 22 +- uaitrain_tool/mxnet/mxnet_tool.py | 22 +- uaitrain_tool/pytorch/pytorch_tool.py | 92 ++++--- uaitrain_tool/tf/tf_tool.py | 23 +- 30 files changed, 1362 insertions(+), 45 deletions(-) create mode 100644 uai/utils/utils_ufs.py create mode 100644 uaitrain/api/create_train_job.py create mode 100644 uaitrain/api/get_train_available_resource.py create mode 100644 uaitrain/api/get_train_job_bill_info.py create mode 100644 uaitrain/api/get_train_job_list.py create mode 100644 uaitrain/api/get_train_job_running_info.py create mode 100644 uaitrain/api/get_train_tensorboard_url.py create mode 100644 uaitrain/api/modify_train_job_memo.py create mode 100644 uaitrain/api/modify_train_job_name.py create mode 100644 uaitrain/api/remove_train_job.py create mode 100644 uaitrain/api/stop_train_job.py create mode 100644 uaitrain/operation/create_train_job/__init__.py create mode 100644 uaitrain/operation/create_train_job/base_create_op.py create mode 100644 uaitrain/operation/delete_train_job/__init__.py create mode 100644 uaitrain/operation/delete_train_job/base_delete_op.py create mode 100644 uaitrain/operation/info_train_job/__init__.py create mode 100644 uaitrain/operation/info_train_job/info_train_op.py create mode 100644 uaitrain/operation/list_train_job/__init__.py create mode 100644 uaitrain/operation/list_train_job/base_list_job_op.py create mode 100644 uaitrain/operation/stop_train_job/__init__.py create mode 100644 uaitrain/operation/stop_train_job/base_stop_op.py create mode 100644 uaitrain_tool/base_tool.py diff --git a/README.md b/README.md index 39c1403..b6cfbc1 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,13 @@ - Tensorflow (0.11.0 tested) - Tensorflow(1.1.0 tested) - Tensorflow(1.2.0 tested) +- Tensorflow (1.3.0 tested) +- Tensorflow (1.4.0 tested) - MXNet(0.9.5 tested) +- MXNet(0.11.0 tested) - Keras(1.2.0 tested) - Caffe(1.0.0 tested) +- PyTorch(0.2.0 tested) ## How to install 1. Install your deep learning python package, such as Tensorflow, MXNet, Keras, Caffe (tested version preferred) @@ -40,4 +44,4 @@ ### UAI Service Docs https://docs.ucloud.cn/ai/uai-service/use ### UAI Train Docs -https://docs.ucloud.cn/ai/uai-train/use \ No newline at end of file +https://docs.ucloud.cn/ai/uai-train/use diff --git a/uai/utils/utils.py b/uai/utils/utils.py index d4a0dd9..6376ba1 100644 --- a/uai/utils/utils.py +++ b/uai/utils/utils.py @@ -3,6 +3,7 @@ import tarfile import json +GATEWAY_DEFAULT='Default' def _verfy_ac(private_key, params): items = params.items() diff --git a/uai/utils/utils_ufs.py b/uai/utils/utils_ufs.py new file mode 100644 index 0000000..ce15a92 --- /dev/null +++ b/uai/utils/utils_ufs.py @@ -0,0 +1,31 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import re + +UFS_MOUNT_POINT_FORMAT = r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\/ufs-\w+' +UFS_PATH_FORMAT = r'(\w+\/)+' + +def concat_ufs_path(path, mount_point): + mount_point_pattern = re.compile(UFS_MOUNT_POINT_FORMAT) + path_pattern = re.compile(UFS_PATH_FORMAT) + + if mount_point_pattern.match(mount_point) is None: + raise RuntimeError("UFS mount point should be in format x.x.x.x:/ufs-xxx") + + if path_pattern.match(path) is None: + raise RuntimeError("UFS path should match xxx/xxx/") + + return mount_point + '/' + path \ No newline at end of file diff --git a/uaitrain/api/base_op.py b/uaitrain/api/base_op.py index 46e474f..14899e6 100644 --- a/uaitrain/api/base_op.py +++ b/uaitrain/api/base_op.py @@ -15,6 +15,7 @@ import requests import json + from uai.utils.utils import _verfy_ac from uai.utils.logger import uai_logger from uai.utils.retcode_checker import * @@ -23,6 +24,10 @@ DEFAULT_UAI_TRAIN_REGION = 'cn-bj2' DEFAULT_UAI_TRAIN_ZONE = 'cn-bj2-04' +# DEFAULT_UCLOUD_API_URL = 'http://api.pre.ucloudadmin.com' +# DEFAULT_UAI_TRAIN_REGION = 'pre' +# DEFAULT_UAI_TRAIN_ZONE = 'pre' + PARAM_ACTION = 'Action' PARAM_PUBLIC_KEY = 'PublicKey' PARAM_PROJECT_ID = 'ProjectId' @@ -60,7 +65,7 @@ def _cmd_common_request(self): self.cmd_params.pop('Signature') self.cmd_params['Signature'] = _verfy_ac(self.priv_key, self.cmd_params) - + print (self.cmd_params) uai_logger.info("Call http request: {0} ".format(get_request(self.cmd_url, params=self.cmd_params))) r = requests.get(self.cmd_url, params=self.cmd_params) rsp = json.loads(r.text, 'utf-8') @@ -70,7 +75,7 @@ def _cmd_common_request(self): return False, rsp else: del rsp[PARAM_ACTION] - uai_logger.info("{0} Success: {1}".format(self.cmd_params[PARAM_ACTION], get_response(rsp, 0))) + #uai_logger.info("{0} Success: {1}".format(self.cmd_params[PARAM_ACTION], get_response(rsp, 0))) return True, rsp # add other operations in subclasses# @@ -89,5 +94,5 @@ def call_api(self): return self._cmd_common_request() - def check_errcode(): + def check_errcode(self): pass \ No newline at end of file diff --git a/uaitrain/api/create_train_job.py b/uaitrain/api/create_train_job.py new file mode 100644 index 0000000..fe03808 --- /dev/null +++ b/uaitrain/api/create_train_job.py @@ -0,0 +1,94 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from uaitrain.api.base_op import BaseUAITrainAPIOp + +class CreateUAITrainJobOp(BaseUAITrainAPIOp): + ACTION_NAME = "CreateUAITrainJob" + """ + CreateUAITrainJobOp + Compatable with UAI Train CreateUAITrainJob API func + Input: + pub_key string(required) Public key of the user + priv_key string(required) Private key of the user + project_id int(optional) Project ID of the job + region string(optional) Which Region to run the job + zone string(optional) Which Zone in the Region to run the job + job_name string(required) Job name of the job + work_id int(required) the id of train node, you can get detail info from GetUAITrainAvailableResourceOp. + 1860001, include 1 GPU + 1860003, include 4 GPU + etc. + code_uhub_path string(required) Which image in the uhub to run the job + data_ufile_path string(required) the ufile path of input data + out_ufile_path string(required) the ufile path of output data + docker_cmd string(required) the cmd of run the job + max_exec_time int(required) the max exec time of job. if the job don't finish in the time, system will stop the job. + business_group string(optional) Which business group to run the job + job_memo string(optional) the memo of the job + + Output: + RetCode int(required) Op return code: 0: success, others: error code + TrainJObID string(required) the id of the train job + Message string(not required) Message: error description + + """ + + def __init__(self, pub_key, priv_key, job_name, work_id, code_uhub_path, data_ufile_path, out_ufile_path, + docker_cmd, max_exec_time, business_group="", job_memo="", project_id="", + region="", zone=""): + super(CreateUAITrainJobOp, self).__init__(self.ACTION_NAME, + pub_key, + priv_key, + project_id, + region, + zone) + self.cmd_params["TrainJobName"] = job_name + self.cmd_params["TrainWorkId"] = work_id + self.cmd_params["CodeUhubPath"] = code_uhub_path + self.cmd_params["DataUfilePath"] = data_ufile_path + self.cmd_params["OutputUfilePath"] = out_ufile_path + self.cmd_params["DockerCmd"] = docker_cmd + self.cmd_params["PredictStartTime"] = 0 + self.cmd_params["MaxExecuteTime"] = max_exec_time + + self.cmd_params["TrainPublicKey"] = pub_key + self.cmd_params["TrainPrivateKey"] = priv_key + + self.cmd_params["TrainJobMemo"] = job_memo + self.cmd_params["BusinessGroup"] = business_group + + def _check_args(self): + super(CreateUAITrainJobOp, self)._check_args() + if self.cmd_params["TrainJobName"] == "" or type(self.cmd_params["TrainJobName"]) != str: + raise RuntimeError("job_name shoud be and is not nil.") + + if self.cmd_params["TrainWorkId"] == "" or type(self.cmd_params["TrainWorkId"]) != int: + raise RuntimeError("work_id shoud be and is not nil.") + + if self.cmd_params["CodeUhubPath"] == "" or type(self.cmd_params["CodeUhubPath"]) != str: + raise RuntimeError("code_uhub_path shoud be and is not nil.") + + if self.cmd_params["DataUfilePath"] == "" or type(self.cmd_params["DataUfilePath"]) != str: + raise RuntimeError("data_ufile_path shoud be and is not nil.") + + if self.cmd_params["OutputUfilePath"] == "" or type(self.cmd_params["OutputUfilePath"]) != str: + raise RuntimeError("out_ufile_path shoud be and is not nil.") + + if self.cmd_params["DockerCmd"] == "" or type(self.cmd_params["DockerCmd"]) != str: + raise RuntimeError("docker_cmd shoud be and is not nil.") + + if self.cmd_params["MaxExecuteTime"] == "" or type(self.cmd_params["MaxExecuteTime"]) != int: + raise RuntimeError("max_exec_time shoud be and is not nil.") \ No newline at end of file diff --git a/uaitrain/api/get_train_available_resource.py b/uaitrain/api/get_train_available_resource.py new file mode 100644 index 0000000..758567a --- /dev/null +++ b/uaitrain/api/get_train_available_resource.py @@ -0,0 +1,49 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from uaitrain.api.base_op import BaseUAITrainAPIOp + +class GetUAITrainAvailableResourceOp(BaseUAITrainAPIOp): + ACTION_NAME = "GetUAITrainAvailableResource" + """ + GetUAITrainAvailableResourceOp + Compatable with UAI Train GetUAITrainAvailableResource API func + Input: + pub_key string(required) Public key of the user + priv_key string(required) Private key of the user + project_id int(optional) Project ID of the job + region string(optional) Which Region to run the job + zone string(optional) Which Zone in the Region to run the job + node_type string(optional) the type of node, default is 'Work'. + 'Work': train node + 'PS': param node + Output: + RetCode int(required) Op return code: 0: success, others: error code + TotalCount string(required) the count of result + Message string(not required) Message: error description + DataSet [] the detailed information of resource + """ + + def __init__(self, pub_key, priv_key, node_type='Work', project_id="", region="", zone=""): + super(GetUAITrainAvailableResourceOp, self).__init__(self.ACTION_NAME, + pub_key, + priv_key, + project_id, + region, + zone) + self.cmd_params["NodeType"] = node_type + + def _check_args(self): + super(GetUAITrainAvailableResourceOp, self)._check_args() \ No newline at end of file diff --git a/uaitrain/api/get_train_job_bill_info.py b/uaitrain/api/get_train_job_bill_info.py new file mode 100644 index 0000000..b8c6530 --- /dev/null +++ b/uaitrain/api/get_train_job_bill_info.py @@ -0,0 +1,64 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from uaitrain.api.base_op import BaseUAITrainAPIOp + +class GetUAITrainBillInfoOp(BaseUAITrainAPIOp): + ACTION_NAME = "GetUAITrainBillInfo" + """ + GetUAITrainBillInfoOp + Compatable with UAI Train GetUAITrainBillInfo API func + Input: + pub_key string(required) Public key of the user + priv_key string(required) Private key of the user + project_id int(optional) Project ID of the job + region string(optional) Which Region to run the job + zone string(optional) Which Zone in the Region to run the job + beg_time string(required) the start time of bill + end_time string(required) the end time of bill + offset int(optional) the offset of list + limit int(optional) the max num of returned list, return all bill list if isn't set + + Output: + RetCode int(required) Op return code: 0: success, others: error code + TotalCount string(required) the count of result + TotalExecuteTime int(required) total exec time of all train job + TotalPrice int(required) total price of all train job + Message string(not required) Message: error description + DataSet [] the detailed bill information of train job + """ + + def __init__(self, pub_key, priv_key, beg_time, end_time, offset="", limit="", project_id="", region="", zone=""): + super(GetUAITrainBillInfoOp, self).__init__(self.ACTION_NAME, + pub_key, + priv_key, + project_id, + region, + zone) + self.cmd_params["BeginTime"] = beg_time + self.cmd_params["EndTime"] = end_time + self.cmd_params["Offset"] = offset + self.cmd_params["Limit"] = limit + + def _check_args(self): + super(GetUAITrainBillInfoOp, self)._check_args() + if self.cmd_params["BeginTime"] == "" or type(self.cmd_params["BeginTime"]) != int: + raise RuntimeError("beg_time shoud be and is not nil.") + if self.cmd_params["EndTime"] == "" or type(self.cmd_params["EndTime"]) != int: + raise RuntimeError("end_time shoud be and is not nil.") + + if self.cmd_params["BeginTime"] > self.cmd_params["EndTime"]: + raise RuntimeError("end_time should be greater than beg_time. end_time: {0}, beg_time: {1}". + format(self.cmd_params["EndTime"], self.cmd_params["BeginTime"])) diff --git a/uaitrain/api/get_train_job_list.py b/uaitrain/api/get_train_job_list.py new file mode 100644 index 0000000..e7a3f2f --- /dev/null +++ b/uaitrain/api/get_train_job_list.py @@ -0,0 +1,52 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from uaitrain.api.base_op import BaseUAITrainAPIOp + +class GetUAITrainJobListOp(BaseUAITrainAPIOp): + ACTION_NAME = "GetUAITrainJobList" + """ + GetUAITrainJobListOp + Compatable with UAI Train GetUAITrainJobList API func + Input: + pub_key string(required) Public key of the user + priv_key string(required) Private key of the user + project_id int(optional) Project ID of the job + region string(optional) Which Region to run the job + zone string(optional) Which Zone in the Region to run the job + job_id string(optional) Which train job to get info + offset int(optional) the offset of list + limit int(optional) the max num of returned list, return all job list if isn't set + + Output: + RetCode int(required) Op return code: 0: success, others: error code + TotalCount string(required) the count of result + Message string(not required) Message: error description + DataSet [] the detailed information of train job + """ + + def __init__(self, pub_key, priv_key, job_id="", offset="", limit="", project_id="", region="", zone=""): + super(GetUAITrainJobListOp, self).__init__(self.ACTION_NAME, + pub_key, + priv_key, + project_id, + region, + zone) + self.cmd_params["TrainJobId"] = job_id + self.cmd_params["Offset"] = offset + self.cmd_params["Limit"] = limit + + def _check_args(self): + super(GetUAITrainJobListOp, self)._check_args() \ No newline at end of file diff --git a/uaitrain/api/get_train_job_running_info.py b/uaitrain/api/get_train_job_running_info.py new file mode 100644 index 0000000..a666c2f --- /dev/null +++ b/uaitrain/api/get_train_job_running_info.py @@ -0,0 +1,52 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from uaitrain.api.base_op import BaseUAITrainAPIOp + +class GetUAITrainRunningInfoOp(BaseUAITrainAPIOp): + ACTION_NAME = "GetUAITrainRunningInfo" + """ + GetUAITrainRunningInfoOp + Compatable with UAI Train GetUAITrainRunningInfo API func + Input: + pub_key string(required) Public key of the user + priv_key string(required) Private key of the user + project_id int(optional) Project ID of the job + region string(optional) Which Region to run the job + zone string(optional) Which Zone in the Region to run the job + job_id string(required) Job id of the job + + Output: + RetCode int(required) Op return code: 0: success, others: error code + Action string(required) Action name + Message string(not required) Message: error description + ExecTime int(not required) the time that the train job has run + TotalPrice int(not required) the price that the train job has speed + """ + + def __init__(self, pub_key, priv_key, job_id, project_id="", region="", zone=""): + super(GetUAITrainRunningInfoOp, self).__init__(self.ACTION_NAME, + pub_key, + priv_key, + project_id, + region, + zone) + self.cmd_params["TrainJobId"] = job_id + + def _check_args(self): + super(GetUAITrainRunningInfoOp, self)._check_args() + + if self.cmd_params["TrainJobId"] == "" or type(self.cmd_params["TrainJobId"]) != str: + raise RuntimeError("job_id shoud be str and is not nil.") \ No newline at end of file diff --git a/uaitrain/api/get_train_tensorboard_url.py b/uaitrain/api/get_train_tensorboard_url.py new file mode 100644 index 0000000..af95c11 --- /dev/null +++ b/uaitrain/api/get_train_tensorboard_url.py @@ -0,0 +1,51 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from uaitrain.api.base_op import BaseUAITrainAPIOp + +class GetUAITrainTensorboardUrlOp(BaseUAITrainAPIOp): + ACTION_NAME = "GetUAITrainTensorboardUrl" + """ + GetUAITrainTensorboardUrlOp + Compatable with UAI Train GetUAITrainTensorboardUrl API func + Input: + pub_key string(required) Public key of the user + priv_key string(required) Private key of the user + project_id int(optional) Project ID of the job + region string(optional) Which Region to run the job + zone string(optional) Which Zone in the Region to run the job + PkgType string(required) Package Type to check including OS, Python, AIFrame, Accelerator + + Output: + RetCode int(required) Op return code: 0: success, others: error code + Action string(required) Action name + Message string(not required) Message: error description + + """ + + def __init__(self, pub_key, priv_key, job_id, project_id="", region="", zone=""): + super(GetUAITrainTensorboardUrlOp, self).__init__(self.ACTION_NAME, + pub_key, + priv_key, + project_id, + region, + zone) + self.cmd_params["TrainJobId"] = job_id + + def _check_args(self): + super(GetUAITrainTensorboardUrlOp, self)._check_args() + + if self.cmd_params["TrainJobId"] == "" or type(self.cmd_params["TrainJobId"]) != str: + raise RuntimeError("job_id shoud be and is not nil.") \ No newline at end of file diff --git a/uaitrain/api/modify_train_job_memo.py b/uaitrain/api/modify_train_job_memo.py new file mode 100644 index 0000000..97df865 --- /dev/null +++ b/uaitrain/api/modify_train_job_memo.py @@ -0,0 +1,56 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from uaitrain.api.base_op import BaseUAITrainAPIOp + +class ModifyUAITrainJobMemoOp(BaseUAITrainAPIOp): + ACTION_NAME = "ModifyUAITrainJobMemo" + """ + ModifyUAITrainJobMemoOp + Compatable with UAI Train ModifyUAITrainJobMemo API func + Input: + pub_key string(required) Public key of the user + priv_key string(required) Private key of the user + project_id int(optional) Project ID of the job + region string(optional) Which Region to run the job + zone string(optional) Which Zone in the Region to run the job + job_id string(optional) the id of train job + job_memo string(optional) the memo of train job + + Output: + RetCode int(required) Op return code: 0: success, others: error code + Action string(required) Action name + Message string(not required) Message: error description + + """ + + def __init__(self, pub_key, priv_key, job_id, job_memo, project_id="", region="", zone=""): + super(ModifyUAITrainJobMemoOp, self).__init__(self.ACTION_NAME, + pub_key, + priv_key, + project_id, + region, + zone) + self.cmd_params["TrainJobId"] = job_id + self.cmd_params["TrainJobMemo"] = job_memo + + def _check_args(self): + super(ModifyUAITrainJobMemoOp, self)._check_args() + + if self.cmd_params["TrainJobId"] == "" or type(self.cmd_params["TrainJobId"]) != str: + raise RuntimeError("job_id shoud be and is not nil.") + + if self.cmd_params["TrainJobMemo"] == "" or type(self.cmd_params["TrainJobMemo"]) != str: + raise RuntimeError("job_memo shoud be and is not nil.") \ No newline at end of file diff --git a/uaitrain/api/modify_train_job_name.py b/uaitrain/api/modify_train_job_name.py new file mode 100644 index 0000000..a64a5e4 --- /dev/null +++ b/uaitrain/api/modify_train_job_name.py @@ -0,0 +1,56 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from uaitrain.api.base_op import BaseUAITrainAPIOp + +class ModifyUAITrainJobNameOp(BaseUAITrainAPIOp): + ACTION_NAME = "ModifyUAITrainJobName" + """ + GetUAITrainEnvPkgAPIOp + Compatable with UAI Train ModifyUAITrainJobName API func + Input: + pub_key string(required) Public key of the user + priv_key string(required) Private key of the user + project_id int(optional) Project ID of the job + region string(optional) Which Region to run the job + zone string(optional) Which Zone in the Region to run the job + job_id string(required) Job id of the job + job_name string(required) Job name of the job + + Output: + RetCode int(required) Op return code: 0: success, others: error code + Action string(required) Action name + Message string(not required) Message: error description + + """ + + def __init__(self, pub_key, priv_key, job_id, job_name, project_id="", region="", zone=""): + super(ModifyUAITrainJobNameOp, self).__init__(self.ACTION_NAME, + pub_key, + priv_key, + project_id, + region, + zone) + self.cmd_params["TrainJobId"] = job_id + self.cmd_params["TrainJobName"] = job_name + + def _check_args(self): + super(ModifyUAITrainJobNameOp, self)._check_args() + + if self.cmd_params["TrainJobId"] == "" or type(self.cmd_params["TrainJobId"]) != str: + raise RuntimeError("job_id shoud be and is not nil.") + + if self.cmd_params["TrainJobName"] == "" or type(self.cmd_params["TrainJobName"]) != str: + raise RuntimeError("job_name shoud be and is not nil.") \ No newline at end of file diff --git a/uaitrain/api/remove_train_job.py b/uaitrain/api/remove_train_job.py new file mode 100644 index 0000000..64ea2ed --- /dev/null +++ b/uaitrain/api/remove_train_job.py @@ -0,0 +1,51 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from uaitrain.api.base_op import BaseUAITrainAPIOp + +class RemoveUAITrainJobOp(BaseUAITrainAPIOp): + ACTION_NAME = "RemoveUAITrainJob" + """ + RemoveUAITrainJobOp + Compatable with UAI Train RemoveUAITrainJob API func + Input: + pub_key string(required) Public key of the user + priv_key string(required) Private key of the user + project_id int(optional) Project ID of the job + region string(optional) Which Region to run the job + zone string(optional) Which Zone in the Region to run the job + job_id string(required) Job id of the job + + Output: + RetCode int(required) Op return code: 0: success, others: error code + Action string(required) Action name + Message string(not required) Message: error description + + """ + + def __init__(self, pub_key, priv_key, job_id, project_id="", region="", zone=""): + super(RemoveUAITrainJobOp, self).__init__(self.ACTION_NAME, + pub_key, + priv_key, + project_id, + region, + zone) + self.cmd_params["TrainJobId"] = job_id + + def _check_args(self): + super(RemoveUAITrainJobOp, self)._check_args() + + if self.cmd_params["TrainJobId"] == "" or type(self.cmd_params["TrainJobId"]) != str: + raise RuntimeError("job_id shoud be and is not nil.") \ No newline at end of file diff --git a/uaitrain/api/stop_train_job.py b/uaitrain/api/stop_train_job.py new file mode 100644 index 0000000..f6a6bd4 --- /dev/null +++ b/uaitrain/api/stop_train_job.py @@ -0,0 +1,51 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from uaitrain.api.base_op import BaseUAITrainAPIOp + +class StopUAITrainJobOp(BaseUAITrainAPIOp): + ACTION_NAME = "StopUAITrainJob" + """ + StopUAITrainJobOp + Compatable with UAI Train StopUAITrainJob API func + Input: + pub_key string(required) Public key of the user + priv_key string(required) Private key of the user + project_id int(optional) Project ID of the job + region string(optional) Which Region to stop the job + zone string(optional) Which Zone in the Region to stop the job + job_id string(required) Which train job to stop + + Output: + RetCode int(required) Op return code: 0: success, others: error code + Action string(required) Action name + Message string(not required) Message: error description + + """ + + def __init__(self, pub_key, priv_key, job_id, project_id="", region="", zone=""): + super(StopUAITrainJobOp, self).__init__(self.ACTION_NAME, + pub_key, + priv_key, + project_id, + region, + zone) + self.cmd_params["TrainJobId"] = job_id + + def _check_args(self): + super(StopUAITrainJobOp, self)._check_args() + + if self.cmd_params["TrainJobId"] == "" or type(self.cmd_params["TrainJobId"]) != str: + raise RuntimeError("job_id shoud be and is not nil.") \ No newline at end of file diff --git a/uaitrain/operation/create_train_job/__init__.py b/uaitrain/operation/create_train_job/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uaitrain/operation/create_train_job/base_create_op.py b/uaitrain/operation/create_train_job/base_create_op.py new file mode 100644 index 0000000..c458238 --- /dev/null +++ b/uaitrain/operation/create_train_job/base_create_op.py @@ -0,0 +1,229 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import sys +import os +import argparse + +from uai.utils.utils import GATEWAY_DEFAULT +from uai.utils.logger import uai_logger +from uaitrain.operation.base_op import BaseUAITrainOp +from uaitrain.api.create_train_job import CreateUAITrainJobOp +from uaitrain.api.get_train_available_resource import GetUAITrainAvailableResourceOp + +class BaseUAITrainCreateTrainJobOp(BaseUAITrainOp): + def __init__(self, parser): + super(BaseUAITrainCreateTrainJobOp, self).__init__(parser) + + def _add_create_info_args(self, create_parser): + info_parser = create_parser.add_argument_group( + 'Job Info Params', 'Job Infos') + info_parser.add_argument( + '--job_name', + type=str, + required=True, + help='The train job name') + info_parser.add_argument( + '--job_memo', + type=str, + required=False, + default=GATEWAY_DEFAULT, + help='The train job memo') + info_parser.add_argument( + '--business_group', + type=str, + required=False, + default=GATEWAY_DEFAULT, + help='The train business group train job belong to') + + def _add_create_config_args(self, create_parser): + node_parser = create_parser.add_argument_group( + 'Job Executor Params', 'Job Executor information') + node_parser.add_argument( + '--node_type', + type=str, + required=True, + help='The training node used: e.g., 1-P40') + + def _add_create_job_args(self, create_parser): + run_parser = create_parser.add_argument_group( + 'Job Runninng Params', 'Job Execution infos') + run_parser.add_argument( + '--code_uhub_path', + type=str, + required=True, + help='The uhub docker path of training code') + run_parser.add_argument( + '--docker_cmd', + type=str, + required=True, + help='The running python cmd inside docker') + run_parser.add_argument( + '--max_exec_time', + type=int, + required=True, + help='The maximun running time in hours, should larger than 6 hours') + + def _add_create_ufile_args(self, create_parser): + ufile_parser = create_parser.add_argument_group( + 'Ufile Params', 'Data/Output Stored in Ufile') + + ufile_parser.add_argument( + '--data_ufile_path', + type=str, + required=False, + help='The ufile path store the data') + ufile_parser.add_argument( + '--output_ufile_path', + type=str, + required=False, + help='The ufile path store the output') + + def _add_create_ufs_args(self, create_parser): + ufs_parser = create_parser.add_argument_group( + 'UFS Params', 'Data/Output Stored in UFS') + + ufs_parser.add_argument( + '--data_ufs_path', + type=str, + required=False, + help='The ufs path storing the data') + ufs_parser.add_argument( + '--data_ufs_mount_point', + type=str, + required=False, + help='The ufs mount point for the data') + ufs_parser.add_argument( + '--output_ufs_path', + type=str, + required=False, + help='The ufs path storing the output') + ufs_parser.add_argument( + '--output_ufs_mount_point', + type=str, + required=False, + help='The ufs mount point for the output') + + def _add_args(self): + parser = self.parser.add_parser('create', help='Create UAI Train Job') + self.create_parser = parser + self._add_account_args(parser) + self._add_create_info_args(parser) + self._add_create_config_args(parser) + self._add_create_job_args(parser) + self._add_create_ufile_args(parser) + self._add_create_ufs_args(parser) + + def _parse_args(self, args): + super(BaseUAITrainCreateTrainJobOp, self)._parse_args(args) + + #info + self.job_name = args['job_name'] + self.job_memo = args['job_memo'] if 'job_memo' in args else "" + self.business_group = args['business_group'] if 'business_group' in args else "" + + #config + self.node_type = args['node_type'] + + #job + self.code_uhub_path = args['code_uhub_path'] + self.docker_cmd = args['docker_cmd'] + self.max_exec_time = args['max_exec_time'] + + #data + if 'data_ufile_path' in args: + self.data_path = args['data_ufile_path'] + elif 'data_ufs_path' in args: + if 'data_ufs_mount_point' in args: + ufs_path = args['data_ufs_path'] + ufs_mount = args['data_ufs_mount_point'] + self.data_path = concat_ufs_path(ufs_path, ufs_mount) + else: + raise RuntimeError("Need data_ufs_mount_point") + else: + raise RuntimeError("Need either data_ufile_path or data_ufs_path") + + #output + if 'output_ufile_path' in args: + self.output_path = args['output_ufile_path'] + elif 'output_ufs_path' in args: + if 'output_ufs_mount_point' in args: + ufs_path = args['output_ufs_path'] + ufs_mount = args['output_ufs_mount_point'] + self.output_path = concat_ufs_path(ufs_path, ufs_mount) + else: + raise RuntimeError("Need output_ufs_mount_point") + else: + raise RuntimeError("Need either output_ufile_path or output_ufs_path") + + return True + + def _check_res(self): + get_train_res_op = GetUAITrainAvailableResourceOp(self.pub_key, + self.pri_key) + succ, result = get_train_res_op.call_api() + if succ is False: + raise RuntimeError("Error get Training Resouce Type") + + if self.node_type == "1-P40": + data_set = result['DataSet'] + for data in data_set: + if data['AcceleratorVersion'] == 'p40' and data['AcceleratorAmount'] == 1: + return data['NodeId'] + elif self.node_type == "2-P40": + for data in data_set: + if data['AcceleratorVersion'] == 'p40' and data['AcceleratorAmount'] == 2: + return data['NodeId'] + elif self.node_type == "4-P40": + for data in data_set: + if data['AcceleratorVersion'] == 'p40' and data['AcceleratorAmount'] == 4: + return data['NodeId'] + + print("Required Type {0} not exist", self.node_type) + print("Now only support {0}", result['DataSet']) + RuntimeError('Unsupported node_type') + return -1 + + def cmd_run(self, args): + if self._parse_args(args) == False: + return False + + node_id = self._check_res() + if node_id < 0: + return False + + create_op = CreateUAITrainJobOp( + pub_key=self.pub_key, + priv_key=self.pri_key, + job_name=self.job_name, + work_id=node_id, + code_uhub_path=self.code_uhub_path, + data_ufile_path=self.data_path, + out_ufile_path=self.output_path, + docker_cmd=self.docker_cmd, + max_exec_time=self.max_exec_time, + business_group=self.business_group, + job_memo=self.job_memo, + project_id=self.project_id, + region=self.region, + zone=self.zone) + + succ, resp = create_op.call_api() + if succ is False: + print("Error call create train job") + return False + + print('Your Job ID is: {0}'.format(resp['TrainJObID'])) + diff --git a/uaitrain/operation/delete_train_job/__init__.py b/uaitrain/operation/delete_train_job/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uaitrain/operation/delete_train_job/base_delete_op.py b/uaitrain/operation/delete_train_job/base_delete_op.py new file mode 100644 index 0000000..7080b8d --- /dev/null +++ b/uaitrain/operation/delete_train_job/base_delete_op.py @@ -0,0 +1,69 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import sys +import os +import argparse +import json +import subprocess + +from uai.utils.logger import uai_logger +from uaitrain.operation.base_op import BaseUAITrainOp +from uaitrain.api.remove_train_job import RemoveUAITrainJobOp + +class BaseUAITrainDeleteTrainJobOp(BaseUAITrainOp): + def __init__(self, parser): + super(BaseUAITrainDeleteTrainJobOp, self).__init__(parser) + + def _add_delete_info_args(self, delete_parser): + info_parser = delete_parser.add_argument_group( + 'Job Info Params', 'Job Infos') + info_parser.add_argument( + '--job_id', + type=str, + required=True, + help='The to delete') + + def _add_args(self): + parser = self.parser.add_parser('delete', help='Delete UAI Train Job') + self.delete_parser = parser + self._add_account_args(parser) + self._add_delete_info_args(parser) + + def _parse_args(self, args): + super(BaseUAITrainDeleteTrainJobOp, self)._parse_args(args) + + self.job_id = args['job_id'] + return True + + + def cmd_run(self, args): + if self._parse_args(args) == False: + return False + + create_op = RemoveUAITrainJobOp( + pub_key=self.pub_key, + priv_key=self.pri_key, + job_id=self.job_id, + project_id=self.project_id, + region=self.region, + zone=self.zone) + + succ, resp = create_op.call_api() + if succ is False: + print("Error delete job {0}, check your job_id".format(self.job_id)) + return False + + print("Success delete job {0}".format(self.job_id)) diff --git a/uaitrain/operation/info_train_job/__init__.py b/uaitrain/operation/info_train_job/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uaitrain/operation/info_train_job/info_train_op.py b/uaitrain/operation/info_train_job/info_train_op.py new file mode 100644 index 0000000..a54e467 --- /dev/null +++ b/uaitrain/operation/info_train_job/info_train_op.py @@ -0,0 +1,77 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import sys +import os +import argparse +import json +import subprocess + +from uai.utils.logger import uai_logger +from uaitrain.operation.base_op import BaseUAITrainOp +from uaitrain.api.get_train_job_running_info import GetUAITrainRunningInfoOp + +class BaseUAITrainRunningJobInfoOp(BaseUAITrainOp): + def __init__(self, parser): + super(BaseUAITrainRunningJobInfoOp, self).__init__(parser) + + def _add_job_info_args(self, info_parser): + job_info_parser = info_parser.add_argument_group( + 'Job Info Params', 'Job Infos') + job_info_parser.add_argument( + '--job_id', + type=str, + required=True, + help='The to show Job Info') + + def _add_args(self): + parser = self.parser.add_parser('info', help='Show UAI Train Job Info') + self.info_parser = parser + self._add_account_args(parser) + self._add_job_info_args(parser) + + def _parse_args(self, args): + super(BaseUAITrainRunningJobInfoOp, self)._parse_args(args) + + self.job_id = args['job_id'] + return True + + def _format_info(self, job_id, resp): + exec_time = resp['ExecTime'] + cost = resp['TotalPrice'] + + print('JOB_ID: {0}; ExecTime: {1} secs; Total Cost: {2}'.format( + job_id, + exec_time, + float(cost) / 100)) + + def cmd_run(self, args): + if self._parse_args(args) == False: + return False + + info_op = GetUAITrainRunningInfoOp( + pub_key=self.pub_key, + priv_key=self.pri_key, + job_id=self.job_id, + project_id=self.project_id, + region=self.region, + zone=self.zone) + + succ, resp = info_op.call_api() + if succ is False: + print("Error get job info of {0}, check your job_id".format(self.job_id)) + return False + + self._format_info(self.job_id, resp) diff --git a/uaitrain/operation/list_train_job/__init__.py b/uaitrain/operation/list_train_job/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uaitrain/operation/list_train_job/base_list_job_op.py b/uaitrain/operation/list_train_job/base_list_job_op.py new file mode 100644 index 0000000..9660898 --- /dev/null +++ b/uaitrain/operation/list_train_job/base_list_job_op.py @@ -0,0 +1,104 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import sys +import os +import argparse +import datetime + +from uai.utils.utils import GATEWAY_DEFAULT +from uai.utils.logger import uai_logger +from uaitrain.operation.base_op import BaseUAITrainOp +from uaitrain.api.get_train_job_list import GetUAITrainJobListOp + +class BaseUAITrainListTrainJobOp(BaseUAITrainOp): + def __init__(self, parser): + super(BaseUAITrainListTrainJobOp, self).__init__(parser) + + def _add_list_info_args(self, list_parser): + info_parser = list_parser.add_argument_group( + 'Job Info Params', 'Job Infos') + info_parser.add_argument( + '--job_id', + type=str, + required=False, + default='', + help='Show the basic info of ') + + info_parser.add_argument( + '--limit', + type=int, + required=False, + default=10, + help='Number of jobs show in this list') + + def _add_args(self): + parser = self.parser.add_parser('list', help='List UAI Train Job') + self.list_parser = parser + self._add_account_args(parser) + self._add_list_info_args(parser) + + def _parse_args(self, args): + super(BaseUAITrainListTrainJobOp, self)._parse_args(args) + + self.job_id = args['job_id'] + self.limit = args['limit'] + self.offset = 1 + return True + + def _format_jobinfo(self, job): + create_time = job['CreateTime'] + start_time = job['StartTime'] + end_time = job['EndTime'] + + + job_name = job['TrainJobName'] + job_id = job['TrainJobId'] + business_group = job['BusinessGroup'] + + status = job['Status'] + + print('JOB_NAME: {0}; JOB_ID: {1}; BUSINESS_ID: {2}; STATUS: {3}; CREATE_TIME: {4}; START_TIME: {5}; END_TIME: {6}'.format( + job_name, + job_id, + business_group, + status, + datetime.datetime.fromtimestamp(create_time).strftime('%Y-%m-%d %H:%M:%S'), + datetime.datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S'), + datetime.datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S'))) + + def cmd_run(self, args): + if self._parse_args(args) == False: + return False + + create_op = GetUAITrainJobListOp( + pub_key=self.pub_key, + priv_key=self.pri_key, + job_id=self.job_id, + offset=self.offset, + limit=self.limit, + project_id=self.project_id, + region=self.region, + zone=self.zone) + + succ, resp = create_op.call_api() + if succ is False: + uai_logger.error("Error call list train jobs") + return False + + result = resp['DataSet'] + for job in result: + self._format_jobinfo(job) + diff --git a/uaitrain/operation/stop_train_job/__init__.py b/uaitrain/operation/stop_train_job/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uaitrain/operation/stop_train_job/base_stop_op.py b/uaitrain/operation/stop_train_job/base_stop_op.py new file mode 100644 index 0000000..acf87fa --- /dev/null +++ b/uaitrain/operation/stop_train_job/base_stop_op.py @@ -0,0 +1,69 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import sys +import os +import argparse +import json +import subprocess + +from uai.utils.logger import uai_logger +from uaitrain.operation.base_op import BaseUAITrainOp +from uaitrain.api.stop_train_job import StopUAITrainJobOp + +class BaseUAITrainStopTrainJobOp(BaseUAITrainOp): + def __init__(self, parser): + super(BaseUAITrainStopTrainJobOp, self).__init__(parser) + + def _add_stop_info_args(self, stop_parser): + info_parser = stop_parser.add_argument_group( + 'Job Info Params', 'Job Infos') + info_parser.add_argument( + '--job_id', + type=str, + required=True, + help='The to stop') + + def _add_args(self): + parser = self.parser.add_parser('stop', help='Stop UAI Train Job') + self.stop_parser = parser + self._add_account_args(parser) + self._add_stop_info_args(parser) + + def _parse_args(self, args): + super(BaseUAITrainStopTrainJobOp, self)._parse_args(args) + + self.job_id = args['job_id'] + return True + + + def cmd_run(self, args): + if self._parse_args(args) == False: + return False + + create_op = StopUAITrainJobOp( + pub_key=self.pub_key, + priv_key=self.pri_key, + job_id=self.job_id, + project_id=self.project_id, + region=self.region, + zone=self.zone) + + succ, resp = create_op.call_api() + if succ is False: + print("Error call stop train job {0}".format(self.job_id)) + return False + + print("Success stop job {0}".format(self.job_id)) diff --git a/uaitrain_tool/base_tool.py b/uaitrain_tool/base_tool.py new file mode 100644 index 0000000..33bd243 --- /dev/null +++ b/uaitrain_tool/base_tool.py @@ -0,0 +1,53 @@ +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import sys +import os +import argparse +import time + +from uaitrain.operation.create_train_job.base_create_op import BaseUAITrainCreateTrainJobOp +from uaitrain.operation.stop_train_job.base_stop_op import BaseUAITrainStopTrainJobOp +from uaitrain.operation.delete_train_job.base_delete_op import BaseUAITrainDeleteTrainJobOp +from uaitrain.operation.list_train_job.base_list_job_op import BaseUAITrainListTrainJobOp +from uaitrain.operation.info_train_job.info_train_op import BaseUAITrainRunningJobInfoOp + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='AI TensorFlow Arch Deployer', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + subparsers = parser.add_subparsers(dest='commands', help='commands') + + create_op = BaseUAITrainCreateTrainJobOp(subparsers) + stop_op = BaseUAITrainStopTrainJobOp(subparsers) + delete_op = BaseUAITrainDeleteTrainJobOp(subparsers) + list_op = BaseUAITrainListTrainJobOp(subparsers) + info_op = BaseUAITrainRunningJobInfoOp(subparsers) + cmd_args = vars(parser.parse_args()) + + if cmd_args['commands'] == 'create': + create_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'stop': + stop_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'delete': + delete_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'list': + list_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'info': + info_op.cmd_run(cmd_args) + else: + print("UAI Train Base Tool Only Support General operations, please use python base_tool.py -h to check") + diff --git a/uaitrain_tool/caffe/caffe_tool.py b/uaitrain_tool/caffe/caffe_tool.py index 533c7df..6f1d800 100644 --- a/uaitrain_tool/caffe/caffe_tool.py +++ b/uaitrain_tool/caffe/caffe_tool.py @@ -18,6 +18,11 @@ import argparse from uaitrain.operation.pack_docker_image.caffe_pack_op import CaffeUAITrainDockerImagePackOp +from uaitrain.operation.create_train_job.base_create_op import BaseUAITrainCreateTrainJobOp +from uaitrain.operation.stop_train_job.base_stop_op import BaseUAITrainStopTrainJobOp +from uaitrain.operation.delete_train_job.base_delete_op import BaseUAITrainDeleteTrainJobOp +from uaitrain.operation.list_train_job.base_list_job_op import BaseUAITrainListTrainJobOp +from uaitrain.operation.info_train_job.info_train_op import BaseUAITrainRunningJobInfoOp if __name__ == '__main__': parser = argparse.ArgumentParser( @@ -27,10 +32,25 @@ subparsers = parser.add_subparsers(dest='commands', help='commands') pack_op = CaffeUAITrainDockerImagePackOp(subparsers) + create_op = BaseUAITrainCreateTrainJobOp(subparsers) + stop_op = BaseUAITrainStopTrainJobOp(subparsers) + delete_op = BaseUAITrainDeleteTrainJobOp(subparsers) + list_op = BaseUAITrainListTrainJobOp(subparsers) + info_op = BaseUAITrainRunningJobInfoOp(subparsers) cmd_args = vars(parser.parse_args()) if cmd_args['commands'] == 'pack': pack_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'create': + create_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'stop': + stop_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'delete': + delete_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'list': + list_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'info': + info_op.cmd_run(cmd_args) else: - print("UAI Train Deploy Tool Only Support Packing Docker Images Now") + print("Unknown CMD, please use python tf_tool.py -h to check") diff --git a/uaitrain_tool/keras/keras_tool.py b/uaitrain_tool/keras/keras_tool.py index fefc9c3..56d05fa 100644 --- a/uaitrain_tool/keras/keras_tool.py +++ b/uaitrain_tool/keras/keras_tool.py @@ -18,6 +18,11 @@ import argparse from uaitrain.operation.pack_docker_image.keras_pack_op import KerasUAITrainDockerImagePackOp +from uaitrain.operation.create_train_job.base_create_op import BaseUAITrainCreateTrainJobOp +from uaitrain.operation.stop_train_job.base_stop_op import BaseUAITrainStopTrainJobOp +from uaitrain.operation.delete_train_job.base_delete_op import BaseUAITrainDeleteTrainJobOp +from uaitrain.operation.list_train_job.base_list_job_op import BaseUAITrainListTrainJobOp +from uaitrain.operation.info_train_job.info_train_op import BaseUAITrainRunningJobInfoOp if __name__ == '__main__': parser = argparse.ArgumentParser( @@ -27,10 +32,25 @@ subparsers = parser.add_subparsers(dest='commands', help='commands') pack_op = KerasUAITrainDockerImagePackOp(subparsers) + create_op = BaseUAITrainCreateTrainJobOp(subparsers) + stop_op = BaseUAITrainStopTrainJobOp(subparsers) + delete_op = BaseUAITrainDeleteTrainJobOp(subparsers) + list_op = BaseUAITrainListTrainJobOp(subparsers) + info_op = BaseUAITrainRunningJobInfoOp(subparsers) cmd_args = vars(parser.parse_args()) if cmd_args['commands'] == 'pack': pack_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'create': + create_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'stop': + stop_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'delete': + delete_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'list': + list_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'info': + info_op.cmd_run(cmd_args) else: - print("UAI Train Deploy Tool Only Support Packing Docker Images Now") + print("Unknown CMD, please use python tf_tool.py -h to check") diff --git a/uaitrain_tool/mxnet/mxnet_tool.py b/uaitrain_tool/mxnet/mxnet_tool.py index 05fc652..cfa29e5 100644 --- a/uaitrain_tool/mxnet/mxnet_tool.py +++ b/uaitrain_tool/mxnet/mxnet_tool.py @@ -18,6 +18,11 @@ import argparse from uaitrain.operation.pack_docker_image.mxnet_pack_op import MXNetUAITrainDockerImagePackOp +from uaitrain.operation.create_train_job.base_create_op import BaseUAITrainCreateTrainJobOp +from uaitrain.operation.stop_train_job.base_stop_op import BaseUAITrainStopTrainJobOp +from uaitrain.operation.delete_train_job.base_delete_op import BaseUAITrainDeleteTrainJobOp +from uaitrain.operation.list_train_job.base_list_job_op import BaseUAITrainListTrainJobOp +from uaitrain.operation.info_train_job.info_train_op import BaseUAITrainRunningJobInfoOp if __name__ == '__main__': parser = argparse.ArgumentParser( @@ -27,10 +32,25 @@ subparsers = parser.add_subparsers(dest='commands', help='commands') pack_op = MXNetUAITrainDockerImagePackOp(subparsers) + create_op = BaseUAITrainCreateTrainJobOp(subparsers) + stop_op = BaseUAITrainStopTrainJobOp(subparsers) + delete_op = BaseUAITrainDeleteTrainJobOp(subparsers) + list_op = BaseUAITrainListTrainJobOp(subparsers) + info_op = BaseUAITrainRunningJobInfoOp(subparsers) cmd_args = vars(parser.parse_args()) if cmd_args['commands'] == 'pack': pack_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'create': + create_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'stop': + stop_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'delete': + delete_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'list': + list_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'info': + info_op.cmd_run(cmd_args) else: - print("UAI Train Deploy Tool Only Support Packing Docker Images Now") + print("Unknown CMD, please use python tf_tool.py -h to check") diff --git a/uaitrain_tool/pytorch/pytorch_tool.py b/uaitrain_tool/pytorch/pytorch_tool.py index da7bd14..15b2c0b 100644 --- a/uaitrain_tool/pytorch/pytorch_tool.py +++ b/uaitrain_tool/pytorch/pytorch_tool.py @@ -1,36 +1,56 @@ -# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import sys -import os -import argparse -import time - -from uaitrain.operation.pack_docker_image.pytorch_pack_op import PytorchUAITrainDockerImagePackOp - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='AI PyTorch Arch Deployer', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - subparsers = parser.add_subparsers(dest='commands', help='commands') - - pack_op = PytorchUAITrainDockerImagePackOp(subparsers) - cmd_args = vars(parser.parse_args()) - - if cmd_args['commands'] == 'pack': - pack_op.cmd_run(cmd_args) - else: - print("UAI Train Deploy Tool Only Support Packing Docker Images Now") \ No newline at end of file +# Copyright 2017 The UAI-SDK Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import sys +import os +import argparse +import time + +from uaitrain.operation.pack_docker_image.pytorch_pack_op import PytorchUAITrainDockerImagePackOp +from uaitrain.operation.create_train_job.base_create_op import BaseUAITrainCreateTrainJobOp +from uaitrain.operation.stop_train_job.base_stop_op import BaseUAITrainStopTrainJobOp +from uaitrain.operation.delete_train_job.base_delete_op import BaseUAITrainDeleteTrainJobOp +from uaitrain.operation.list_train_job.base_list_job_op import BaseUAITrainListTrainJobOp +from uaitrain.operation.info_train_job.info_train_op import BaseUAITrainRunningJobInfoOp + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='AI PyTorch Arch Deployer', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + subparsers = parser.add_subparsers(dest='commands', help='commands') + + pack_op = PytorchUAITrainDockerImagePackOp(subparsers) + create_op = BaseUAITrainCreateTrainJobOp(subparsers) + stop_op = BaseUAITrainStopTrainJobOp(subparsers) + delete_op = BaseUAITrainDeleteTrainJobOp(subparsers) + list_op = BaseUAITrainListTrainJobOp(subparsers) + info_op = BaseUAITrainRunningJobInfoOp(subparsers) + cmd_args = vars(parser.parse_args()) + + if cmd_args['commands'] == 'pack': + pack_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'create': + create_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'stop': + stop_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'delete': + delete_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'list': + list_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'info': + info_op.cmd_run(cmd_args) + else: + print("Unknown CMD, please use python tf_tool.py -h to check") \ No newline at end of file diff --git a/uaitrain_tool/tf/tf_tool.py b/uaitrain_tool/tf/tf_tool.py index 6920de4..a1f17e2 100644 --- a/uaitrain_tool/tf/tf_tool.py +++ b/uaitrain_tool/tf/tf_tool.py @@ -19,6 +19,11 @@ import time from uaitrain.operation.pack_docker_image.tf_pack_op import TensorFlowUAITrainDockerImagePackOp +from uaitrain.operation.create_train_job.base_create_op import BaseUAITrainCreateTrainJobOp +from uaitrain.operation.stop_train_job.base_stop_op import BaseUAITrainStopTrainJobOp +from uaitrain.operation.delete_train_job.base_delete_op import BaseUAITrainDeleteTrainJobOp +from uaitrain.operation.list_train_job.base_list_job_op import BaseUAITrainListTrainJobOp +from uaitrain.operation.info_train_job.info_train_op import BaseUAITrainRunningJobInfoOp if __name__ == '__main__': parser = argparse.ArgumentParser( @@ -28,10 +33,24 @@ subparsers = parser.add_subparsers(dest='commands', help='commands') pack_op = TensorFlowUAITrainDockerImagePackOp(subparsers) + create_op = BaseUAITrainCreateTrainJobOp(subparsers) + stop_op = BaseUAITrainStopTrainJobOp(subparsers) + delete_op = BaseUAITrainDeleteTrainJobOp(subparsers) + list_op = BaseUAITrainListTrainJobOp(subparsers) + info_op = BaseUAITrainRunningJobInfoOp(subparsers) cmd_args = vars(parser.parse_args()) if cmd_args['commands'] == 'pack': pack_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'create': + create_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'stop': + stop_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'delete': + delete_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'list': + list_op.cmd_run(cmd_args) + elif cmd_args['commands'] == 'info': + info_op.cmd_run(cmd_args) else: - print("UAI Train Deploy Tool Only Support Packing Docker Images Now") - + print("Unknown CMD, please use python tf_tool.py -h to check")