diff --git a/.github/workflows/kind-cluster/determine_models.py b/.github/workflows/kind-cluster/determine_models.py index 402365441..5ace3ba63 100644 --- a/.github/workflows/kind-cluster/determine_models.py +++ b/.github/workflows/kind-cluster/determine_models.py @@ -117,10 +117,10 @@ def check_modified_models(pr_branch): def main(): pr_branch = os.environ.get("PR_BRANCH", "main") # If not specified default to 'main' - force_run_all = os.environ.get("FORCE_RUN_ALL", False) # If not specified default to False + force_run_all = os.environ.get("FORCE_RUN_ALL", "false") # If not specified default to False affected_models = [] - if force_run_all: + if force_run_all != "false": affected_models = [model['name'] for model in YAML_PR['models']] else: # Logic to determine affected models diff --git a/presets/inference/text-generation/inference_api.py b/presets/inference/text-generation/inference_api.py index bf739844d..f6c604a54 100644 --- a/presets/inference/text-generation/inference_api.py +++ b/presets/inference/text-generation/inference_api.py @@ -125,11 +125,11 @@ def health_check(): class GenerateKwargs(BaseModel): max_length: int = 200 # Length of input prompt+max_new_tokens min_length: int = 0 - do_sample: bool = False + do_sample: bool = True early_stopping: bool = False num_beams: int = 1 temperature: float = 1.0 - top_k: int = 50 + top_k: int = 10 top_p: float = 1 typical_p: float = 1 repetition_penalty: float = 1 diff --git a/presets/inference/text-generation/tests/test_inference_api.py b/presets/inference/text-generation/tests/test_inference_api.py index d6506b08b..c15b0f38f 100644 --- a/presets/inference/text-generation/tests/test_inference_api.py +++ b/presets/inference/text-generation/tests/test_inference_api.py @@ -156,9 +156,9 @@ def test_default_generation_params(configured_app): _, kwargs = mock_pipeline.call_args assert kwargs['max_length'] == 200 assert kwargs['min_length'] == 0 - assert kwargs['do_sample'] is False + assert kwargs['do_sample'] is True assert kwargs['temperature'] == 1.0 - assert kwargs['top_k'] == 50 + assert kwargs['top_k'] == 10 assert kwargs['top_p'] == 1 assert kwargs['typical_p'] == 1 assert kwargs['repetition_penalty'] == 1 diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml index 0f68002c5..0441a945a 100644 --- a/presets/models/supported_models.yaml +++ b/presets/models/supported_models.yaml @@ -34,23 +34,30 @@ models: type: text-generation version: https://huggingface.co/tiiuae/falcon-7b/commit/898df1396f35e447d5fe44e0a3ccaaaa69f30d36 runtime: tfs - tag: 0.0.3 + tag: 0.0.4 - name: falcon-7b-instruct type: text-generation version: https://huggingface.co/tiiuae/falcon-7b-instruct/commit/cf4b3c42ce2fdfe24f753f0f0d179202fea59c99 runtime: tfs - tag: 0.0.3 + tag: 0.0.4 + # Tag history: + # 0.0.4 - Adjust default model params (#310) + # 0.0.3 - Update Default Params (#294) + # 0.0.2 - Inference API Cleanup (#233) + # 0.0.1 - Initial Release - name: falcon-40b type: text-generation version: https://huggingface.co/tiiuae/falcon-40b/commit/4a70170c215b36a3cce4b4253f6d0612bb7d4146 runtime: tfs - tag: 0.0.3 + tag: 0.0.5 - name: falcon-40b-instruct type: text-generation version: https://huggingface.co/tiiuae/falcon-40b-instruct/commit/ecb78d97ac356d098e79f0db222c9ce7c5d9ee5f runtime: tfs - tag: 0.0.3 - # Tag history: + tag: 0.0.5 + # Tag history for 40b models: + # 0.0.5 - Adjust default model params (#310) + # 0.0.4 - Skipped due to incomplete upload issue # 0.0.3 - Update Default Params (#294) # 0.0.2 - Inference API Cleanup (#233) # 0.0.1 - Initial Release @@ -60,13 +67,14 @@ models: type: text-generation version: https://huggingface.co/mistralai/Mistral-7B-v0.1/commit/26bca36bde8333b5d7f72e9ed20ccda6a618af24 runtime: tfs - tag: 0.0.3 + tag: 0.0.4 - name: mistral-7b-instruct type: text-generation version: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/commit/b70aa86578567ba3301b21c8a27bea4e8f6d6d61 runtime: tfs - tag: 0.0.3 + tag: 0.0.4 # Tag history: + # 0.0.4 - Adjust default model params (#310) # 0.0.3 - Update Default Params (#294) # 0.0.2 - Inference API Cleanup (#233) # 0.0.1 - Initial Release @@ -76,7 +84,8 @@ models: type: text-generation version: https://huggingface.co/microsoft/phi-2/commit/b10c3eba545ad279e7208ee3a5d644566f001670 runtime: tfs - tag: 0.0.2 + tag: 0.0.3 # Tag history: + # 0.0.3 - Adjust default model params (#310) # 0.0.2 - Update Default Params (#294) # 0.0.1 - Initial Release