You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Cell In[4], line 5, in load_model(model_name, bnb_config)
2 n_gpus = torch.cuda.device_count()
3 max_memory = f'{40960}MB'
----> 5 model = AutoModelForCausalLM.from_pretrained(
6 model_name,
7 quantization_config=bnb_config,
8 device_map="auto", # dispatch efficiently the model on the available ressources
9 max_memory = {i: max_memory for i in range(n_gpus)},
10 )
11 tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
13 # Needed for LLaMA tokenizer
File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:566, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
564 elif type(config) in cls._model_mapping.keys():
565 model_class = _get_model_class(config, cls._model_mapping)
--> 566 return model_class.from_pretrained(
567 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
568 )
569 raise ValueError(
570 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n"
571 f"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}."
572 )
While running
model, tokenizer = load_model(model_name, bnb_config)
I am getting the following error,
AttributeError Traceback (most recent call last)
Cell In[33], line 4
2 model_name = "meta-llama/Llama-2-7b-hf"
3 bnb_config = create_bnb_config()
----> 4 model, tokenizer = load_model(model_name, bnb_config)
Cell In[4], line 5, in load_model(model_name, bnb_config)
2 n_gpus = torch.cuda.device_count()
3 max_memory = f'{40960}MB'
----> 5 model = AutoModelForCausalLM.from_pretrained(
6 model_name,
7 quantization_config=bnb_config,
8 device_map="auto", # dispatch efficiently the model on the available ressources
9 max_memory = {i: max_memory for i in range(n_gpus)},
10 )
11 tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
13 # Needed for LLaMA tokenizer
File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:566, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
564 elif type(config) in cls._model_mapping.keys():
565 model_class = _get_model_class(config, cls._model_mapping)
--> 566 return model_class.from_pretrained(
567 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
568 )
569 raise ValueError(
570 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n"
571 f"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}."
572 )
File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:3480, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)
3471 if dtype_orig is not None:
3472 torch.set_default_dtype(dtype_orig)
3473 (
3474 model,
3475 missing_keys,
3476 unexpected_keys,
3477 mismatched_keys,
3478 offload_index,
3479 error_msgs,
-> 3480 ) = cls._load_pretrained_model(
3481 model,
3482 state_dict,
3483 loaded_state_dict_keys, # XXX: rename?
3484 resolved_archive_file,
3485 pretrained_model_name_or_path,
3486 ignore_mismatched_sizes=ignore_mismatched_sizes,
3487 sharded_metadata=sharded_metadata,
3488 _fast_init=_fast_init,
3489 low_cpu_mem_usage=low_cpu_mem_usage,
3490 device_map=device_map,
3491 offload_folder=offload_folder,
3492 offload_state_dict=offload_state_dict,
3493 dtype=torch_dtype,
3494 is_quantized=(getattr(model, "quantization_method", None) == QuantizationMethod.BITS_AND_BYTES),
3495 keep_in_fp32_modules=keep_in_fp32_modules,
3496 )
3498 model.is_loaded_in_4bit = load_in_4bit
3499 model.is_loaded_in_8bit = load_in_8bit
File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:3870, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, is_quantized, keep_in_fp32_modules)
3868 if low_cpu_mem_usage:
3869 if not is_fsdp_enabled() or is_fsdp_enabled_and_dist_rank_0():
-> 3870 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
3871 model_to_load,
3872 state_dict,
3873 loaded_keys,
3874 start_prefix,
3875 expected_keys,
3876 device_map=device_map,
3877 offload_folder=offload_folder,
3878 offload_index=offload_index,
3879 state_dict_folder=state_dict_folder,
3880 state_dict_index=state_dict_index,
3881 dtype=dtype,
3882 is_quantized=is_quantized,
3883 is_safetensors=is_safetensors,
3884 keep_in_fp32_modules=keep_in_fp32_modules,
3885 )
3886 error_msgs += new_error_msgs
3887 else:
File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:751, in _load_state_dict_into_meta_model(model, state_dict, loaded_state_dict_keys, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, is_quantized, is_safetensors, keep_in_fp32_modules)
748 fp16_statistics = None
750 if "SCB" not in param_name:
--> 751 set_module_quantized_tensor_to_device(
752 model, param_name, param_device, value=param, fp16_statistics=fp16_statistics
753 )
755 return error_msgs, offload_index, state_dict_index
File /opt/conda/lib/python3.10/site-packages/transformers/integrations/bitsandbytes.py:98, in set_module_quantized_tensor_to_device(module, tensor_name, device, value, fp16_statistics)
96 new_value = bnb.nn.Int8Params(new_value, requires_grad=False, **kwargs).to(device)
97 elif is_4bit:
---> 98 new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(device)
100 module._parameters[tensor_name] = new_value
101 if fp16_statistics is not None:
File /opt/conda/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:179, in Params4bit.to(self, *args, **kwargs)
176 device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
178 if (device is not None and device.type == "cuda" and self.data.device.type == "cpu"):
--> 179 return self.cuda(device)
180 else:
181 s = self.quant_state
File /opt/conda/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:157, in Params4bit.cuda(self, device)
155 def cuda(self, device):
156 w = self.data.contiguous().half().cuda(device)
--> 157 w_4bit, quant_state = bnb.functional.quantize_4bit(w, blocksize=self.blocksize, compress_statistics=self.compress_statistics, quant_type=self.quant_type)
158 self.data = w_4bit
159 self.quant_state = quant_state
File /opt/conda/lib/python3.10/site-packages/bitsandbytes/functional.py:832, in quantize_4bit(A, absmax, out, blocksize, compress_statistics, quant_type)
830 lib.cquantize_blockwise_fp16_fp4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n))
831 else:
--> 832 lib.cquantize_blockwise_fp16_nf4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n))
833 elif A.dtype == torch.bfloat16:
834 if quant_type == 'fp4':
AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4'
The text was updated successfully, but these errors were encountered: