Skip to content

Commit

Permalink
Merge branch 'huggingface:main' into dev/jwieczorekhabana/autocast
Browse files Browse the repository at this point in the history
  • Loading branch information
jwieczorekhabana authored Nov 8, 2023
2 parents fdb4d92 + abbd38d commit 1f164dd
Show file tree
Hide file tree
Showing 47 changed files with 825 additions and 635 deletions.
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,4 @@ clean:

test_installs:
python -m pip install .[tests]
python -m pip install git+https://github.com/huggingface/transformers.git
python -m pip install git+https://github.com/huggingface/accelerate.git
11 changes: 6 additions & 5 deletions examples/audio-classification/run_audio_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def check_optimum_habana_min_version(*a, **b):
logger = logging.getLogger(__name__)

# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.33.0")
check_optimum_habana_min_version("1.7.5")
check_min_version("4.34.0")
check_optimum_habana_min_version("1.8.1")

require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")

Expand Down Expand Up @@ -167,15 +167,15 @@ class ModelArguments:
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
},
)
trust_remote_code: bool = field(
default=False,
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand All @@ -201,7 +201,8 @@ def main():

if model_args.use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in Transformers v4.34.", FutureWarning
"The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead.",
FutureWarning,
)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
Expand Down
13 changes: 7 additions & 6 deletions examples/contrastive-image-text/run_bridgetower.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def check_optimum_habana_min_version(*a, **b):
logger = logging.getLogger(__name__)

# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.33.0")
check_optimum_habana_min_version("1.7.5")
check_min_version("4.34.0")
check_optimum_habana_min_version("1.8.1")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")

Expand Down Expand Up @@ -102,15 +102,15 @@ class ModelArguments:
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
},
)
trust_remote_code: bool = field(
default=False,
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down Expand Up @@ -263,7 +263,8 @@ def main():

if model_args.use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in Transformers v4.34.", FutureWarning
"The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead.",
FutureWarning,
)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
Expand Down Expand Up @@ -381,7 +382,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)

Expand Down
13 changes: 7 additions & 6 deletions examples/contrastive-image-text/run_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def check_optimum_habana_min_version(*a, **b):
logger = logging.getLogger(__name__)

# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.33.0")
check_optimum_habana_min_version("1.7.5")
check_min_version("4.34.0")
check_optimum_habana_min_version("1.8.1")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")

Expand Down Expand Up @@ -107,15 +107,15 @@ class ModelArguments:
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
},
)
trust_remote_code: bool = field(
default=False,
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down Expand Up @@ -265,7 +265,8 @@ def main():

if model_args.use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in Transformers v4.34.", FutureWarning
"The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead.",
FutureWarning,
)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
Expand Down Expand Up @@ -380,7 +381,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)

Expand Down
18 changes: 12 additions & 6 deletions examples/image-classification/run_image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from torchvision.transforms import (
CenterCrop,
Compose,
Lambda,
Normalize,
RandomHorizontalFlip,
RandomResizedCrop,
Expand Down Expand Up @@ -63,8 +64,8 @@ def check_optimum_habana_min_version(*a, **b):
logger = logging.getLogger(__name__)

# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.33.0")
check_optimum_habana_min_version("1.7.5")
check_min_version("4.34.0")
check_optimum_habana_min_version("1.8.1")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")

Expand Down Expand Up @@ -163,15 +164,15 @@ class ModelArguments:
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
},
)
trust_remote_code: bool = field(
default=False,
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down Expand Up @@ -203,7 +204,8 @@ def main():

if model_args.use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in Transformers v4.34.", FutureWarning
"The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead.",
FutureWarning,
)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
Expand Down Expand Up @@ -344,7 +346,11 @@ def compute_metrics(p):
size = image_processor.size["shortest_edge"]
else:
size = (image_processor.size["height"], image_processor.size["width"])
normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
normalize = (
Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
if hasattr(image_processor, "image_mean") and hasattr(image_processor, "image_std")
else Lambda(lambda x: x)
)
_train_transforms = Compose(
[
RandomResizedCrop(size),
Expand Down
28 changes: 14 additions & 14 deletions examples/language-modeling/run_clm.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def check_optimum_habana_min_version(*a, **b):
logger = logging.getLogger(__name__)

# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.33.0")
check_optimum_habana_min_version("1.7.5")
check_min_version("4.34.0")
check_optimum_habana_min_version("1.8.1")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

Expand Down Expand Up @@ -131,15 +131,15 @@ class ModelArguments:
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
},
)
trust_remote_code: bool = field(
default=False,
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down Expand Up @@ -167,8 +167,8 @@ class ModelArguments:
default=False,
metadata={
"help": (
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
"set True will benefit LLM loading time and RAM consumption."
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
"Setting it to True will benefit LLM loading time and RAM consumption."
)
},
)
Expand Down Expand Up @@ -274,7 +274,8 @@ def main():

if model_args.use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in Transformers v4.34.", FutureWarning
"The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead.",
FutureWarning,
)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
Expand Down Expand Up @@ -453,7 +454,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)

Expand Down Expand Up @@ -526,17 +527,16 @@ def tokenize_function(examples):

if data_args.block_size is None:
block_size = tokenizer.model_max_length
if block_size > 1024:
if block_size > config.max_position_embeddings:
logger.warning(
"The chosen tokenizer supports a `model_max_length` that is longer than the default `block_size` value"
" of 1024. If you would like to use a longer `block_size` up to `tokenizer.model_max_length` you can"
" override this default with `--block_size xxx`."
f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
f"Using block_size={min(1024, config.max_position_embeddings)} instead. You can change that default value by passing --block_size xxx."
)
block_size = 1024
block_size = min(1024, config.max_position_embeddings)
else:
if data_args.block_size > tokenizer.model_max_length:
logger.warning(
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
)
block_size = min(data_args.block_size, tokenizer.model_max_length)
Expand Down
9 changes: 4 additions & 5 deletions examples/language-modeling/run_lora_clm.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def check_optimum_habana_min_version(*a, **b):
logger = logging.getLogger(__name__)

# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
check_optimum_habana_min_version("1.7.5")
check_optimum_habana_min_version("1.8.1")


@dataclass
Expand Down Expand Up @@ -103,8 +103,7 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
},
)
trust_remote_code: bool = field(
Expand Down Expand Up @@ -300,8 +299,8 @@ def main():
# Log on each process the small summary
b16 = training_args.fp16 or training_args.bf16
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}"
+ f"\ndistributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {b16}"
f"Process rank: {training_args.local_rank}, device: {training_args.device}, "
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {b16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
Expand Down
19 changes: 10 additions & 9 deletions examples/language-modeling/run_mlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ def check_optimum_habana_min_version(*a, **b):
logger = logging.getLogger(__name__)

# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.33.0")
check_optimum_habana_min_version("1.7.5")
check_min_version("4.34.0")
check_optimum_habana_min_version("1.8.1")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

Expand Down Expand Up @@ -128,15 +128,15 @@ class ModelArguments:
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
},
)
trust_remote_code: bool = field(
default=False,
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand All @@ -145,8 +145,8 @@ class ModelArguments:
default=False,
metadata={
"help": (
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
"set True will benefit LLM loading time and RAM consumption."
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
"Setting it to True will benefit LLM loading time and RAM consumption."
)
},
)
Expand Down Expand Up @@ -265,7 +265,8 @@ def main():

if model_args.use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in Transformers v4.34.", FutureWarning
"The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead.",
FutureWarning,
)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
Expand Down Expand Up @@ -435,7 +436,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)

Expand Down Expand Up @@ -480,7 +481,7 @@ def main():
else:
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
Expand Down
2 changes: 1 addition & 1 deletion examples/protein-folding/run_esmfold.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def check_optimum_habana_min_version(*a, **b):


# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
check_optimum_habana_min_version("1.7.5")
check_optimum_habana_min_version("1.8.1")


def convert_outputs_to_pdb(outputs):
Expand Down
Loading

0 comments on commit 1f164dd

Please sign in to comment.