diff --git a/backend/python/autogptq/autogptq.py b/backend/python/autogptq/autogptq.py index db44f5073692..ffb37569bbcd 100755 --- a/backend/python/autogptq/autogptq.py +++ b/backend/python/autogptq/autogptq.py @@ -33,7 +33,7 @@ def LoadModel(self, request, context): model = AutoGPTQForCausalLM.from_quantized(request.Model, model_basename=request.ModelBaseName, use_safetensors=True, - trust_remote_code=True, + trust_remote_code=request.TrustRemoteCode, device=device, use_triton=request.UseTriton, quantize_config=None) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 1b1770574155..fe0b815a2226 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -69,9 +69,9 @@ def LoadModel(self, request, context): model_name = request.Model try: if request.Type == "AutoModelForCausalLM": - self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) + self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) else: - self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True) + self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.CUDA = False