Skip to content

Commit

Permalink
Add apple silicon GPU Acceleration to DL examples
Browse files Browse the repository at this point in the history
  • Loading branch information
NripeshN committed Aug 1, 2023
1 parent b03375b commit 164998c
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 37 deletions.
17 changes: 12 additions & 5 deletions PyTorch/LanguageModeling/BERT/extract_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,17 +210,24 @@ def main():
type=int,
default=-1,
help = "local_rank for distributed training on gpus")
parser.add_argument("--no_cuda",
parser.add_argument("--no_gpu",
action='store_true',
help="Whether not to use CUDA when available")
help="Whether not to use GPU when available")

args = parser.parse_args()

if args.local_rank == -1 or args.no_cuda:
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
n_gpu = torch.cuda.device_count()
if torch.cuda.is_available() and not args.no_gpu:
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_gpu else "cpu")
n_gpu = torch.cuda.device_count()
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() and not args.no_gpu:
device = torch.device('mps' if torch.backends.mps.is_available() and not args.no_gpu else 'cpu') # noqa
n_GPU = 1
else:
device = torch.device("cuda", args.local_rank)
if torch.cuda.is_available():
device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
device = torch.device('mps') # noqa
n_gpu = 1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl')
Expand Down
18 changes: 13 additions & 5 deletions PyTorch/LanguageModeling/BERT/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,9 +450,9 @@ def main():
parser.add_argument("--max_answer_length", default=30, type=int,
help="The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another.")
parser.add_argument("--no_cuda",
parser.add_argument("--no_gpu",
action='store_true',
help="Whether not to use CUDA when available")
help="Whether not to use GPU when available")
parser.add_argument("--do_lower_case",
action='store_true',
help="Whether to lower case the input text. True for uncased models, False for cased models.")
Expand Down Expand Up @@ -482,10 +482,18 @@ def main():
torch.cuda.manual_seed(args.seed)

if args.local_rank == -1 or args.no_cuda:
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
if torch.cuda.is_available() and not args.no_gpu:
device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() and not args.no_gpu:
device = torch.device('mps') # noqa
else:
device = torch.device('cpu')
else:
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
if torch.cuda.is_available():
torch.cuda.set_device(args.local_rank)
device = torch.device('cuda', args.local_rank)
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
device = torch.device('mps') # noqa

tokenizer = BertTokenizer(args.vocab_file, do_lower_case=args.do_lower_case, max_len=512) # for bert large

Expand Down
23 changes: 16 additions & 7 deletions PyTorch/LanguageModeling/BERT/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,9 +439,9 @@ def main():
type=float,
help="Proportion of training to perform linear learning rate warmup for. "
"E.g., 0.1 = 10%% of training.")
parser.add_argument("--no_cuda",
parser.add_argument("--no_GPU",
action='store_true',
help="Whether not to use CUDA when available")
help="Whether not to use GPU when available")
parser.add_argument("--local_rank",
type=int,
default=-1,
Expand Down Expand Up @@ -495,12 +495,21 @@ def main():
"mrpc": 2,
}

if args.local_rank == -1 or args.no_cuda:
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
n_gpu = torch.cuda.device_count()
if args.local_rank == -1 or args.no_GPU:
if torch.cuda.is_available() and not args.no_GPU:
device = torch.device('cuda')
n_gpu = torch.cuda.device_count()
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() and not args.no_GPU: #noqa
device = torch.device('mps')
n_gpu = 1
else:
device = torch.device('cpu')
else:
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
if torch.cuda.is_available():
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): #noqa
device = torch.device('mps')
n_gpu = 1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl')
Expand Down
21 changes: 15 additions & 6 deletions PyTorch/LanguageModeling/BERT/run_pretraining_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@ def main():
default=-1,
type=int,
help="Total number of eval steps to perform, otherwise use full dataset")
parser.add_argument("--no_cuda",
parser.add_argument("--no_gpu",
default=False,
action='store_true',
help="Whether not to use CUDA when available")
help="Whether not to use GPU when available")
parser.add_argument("--local_rank",
type=int,
default=-1,
Expand All @@ -166,12 +166,21 @@ def main():
if 'LOCAL_RANK' in os.environ:
args.local_rank = int(os.environ['LOCAL_RANK'])

if args.local_rank == -1 or args.no_cuda:
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
if args.local_rank == -1 or args.no_gpu:
if torch.cuda.is_available() and not args.no_gpu:
device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() and not args.no_gpu:
device = torch.device('mps')
else:
device = torch.device('cpu')

else:
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
if torch.cuda.is_available():
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
device = torch.device('mps')

# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl', init_method='env://')

Expand Down
24 changes: 17 additions & 7 deletions PyTorch/LanguageModeling/BERT/run_squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,9 +842,9 @@ def main():
parser.add_argument("--verbose_logging", action='store_true',
help="If true, all of the warnings related to data processing will be printed. "
"A number of warnings are expected for a normal SQuAD evaluation.")
parser.add_argument("--no_cuda",
parser.add_argument("--no_gpu",
action='store_true',
help="Whether not to use CUDA when available")
help="Whether not to use GPU when available")
parser.add_argument('--seed',
type=int,
default=42,
Expand Down Expand Up @@ -907,12 +907,22 @@ def main():
if args.use_env and 'LOCAL_RANK' in os.environ:
args.local_rank = int(os.environ['LOCAL_RANK'])

if args.local_rank == -1 or args.no_cuda:
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
n_gpu = torch.cuda.device_count()
if args.local_rank == -1 or args.no_gpu:
if torch.cuda.is_available() and not args.no_gpu:
device = torch.device('cuda')
n_gpu = torch.cuda.device_count()
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() and not args.no_gpu:
device = torch.device('mps')
n_gpu = 1
else:
device = torch.device('cpu')

else:
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
if torch.cuda.is_available():
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
device = torch.device('mps')
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl', init_method='env://')
n_gpu = 1
Expand Down
25 changes: 18 additions & 7 deletions PyTorch/LanguageModeling/BERT/run_swag.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,9 @@ def main():
type=float,
help="Proportion of training to perform linear learning rate warmup for. "
"E.g., 0.1 = 10%% of training.")
parser.add_argument("--no_cuda",
parser.add_argument("--no_gpu",
action='store_true',
help="Whether not to use CUDA when available")
help="Whether not to use GPU when available")
parser.add_argument("--local_rank",
type=int,
default=-1,
Expand All @@ -329,12 +329,23 @@ def main():

args = parser.parse_args()

if args.local_rank == -1 or args.no_cuda:
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
n_gpu = torch.cuda.device_count()
if args.local_rank == -1 or args.no_gpu:
if torch.cuda.is_available() and not args.no_gpu:
device = torch.device('cuda')
n_gpu = torch.cuda.device_count()
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() and not args.no_gpu:
device = torch.device('mps')
n_gpu = 1
else:
device = torch.device('cpu')

else:
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
if torch.cuda.is_available():
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
torch.mps.set_rank(args.local_rank)
device = torch.device("mps")
n_gpu = 1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl')
Expand Down

0 comments on commit 164998c

Please sign in to comment.