-
Notifications
You must be signed in to change notification settings - Fork 97
/
llm_config.yaml
103 lines (90 loc) · 5.9 KB
/
llm_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# WikiChat uses ChainLite (https://github.com/stanford-oval/chainlite), a wrapper around
# LangChain (https://github.com/langchain-ai/langchain) and LiteLLM (https://github.com/BerriAI/litellm) that enables easy and unified API access to hundreds of LLMs.
# This configuration file defines the setup for how ChainLite calls various LLM APIs, and how it logs LLM inputs/outputs.
# To configure it:
# Configure LLM endpoints under the `llm_endpoints` section, specifying the API base URL, version (if needed), API key (if needed),
# and the mapping of model names to their respective deployment identifiers. The name on the left-hand side of each mapping is "engine", the shorthand
# you can use in your code when calling llm_generation_chain(engine=...).
# The name on the right side-hand is the "model", the specific name that LiteLLM expects: https://docs.litellm.ai/docs/providers
# Note that "engine" names should be unique within this file, but "model" names do not have to be unique.
# Follow the examples provided for Azure, OpenAI, Groq, Together, Mistral, and local models as needed, and remove unused llm endpoints.
prompt_dirs: # relative to the location of this file
- "./" # Current directory
- "./pipelines/prompts"
litellm_set_verbose: false
prompt_logging:
log_file: "./data/prompt_logs.jsonl" # Path to the log file for prompt logs, relative to the location of this file
prompts_to_skip: # List of prompts to exclude from logging, relative to the location of this file
- "benchmark/prompts/user_with_passage.prompt"
- "benchmark/prompts/user_with_topic.prompt"
# Configuration for different LLM endpoints
llm_endpoints:
# Example of OpenAI models via Azure
# See more models at https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models.
# For Azure OpenAI, the value on the right hand side should be the "Deployment name" of your model
- api_base: https://[resource].openai.azure.com # Replace [resource] with your Azure OpenAI resource name
api_version: "2024-02-15-preview" # API version for Azure OpenAI
api_key: "AZURE_OPENAI_API_KEY" # This is the the name of the environment variable that contains your Azure OpenAI API key
engine_map:
gpt-35-turbo-instruct: azure_text/gpt-35-turbo-instruct
gpt-4o-mini: azure/gpt-4o-mini
# Example of OpenAI models via openai.com
# See more models at https://platform.openai.com/docs/models/. Note that OpenAI models don't need an "openai/" prefix in engine_map
- api_base: https://api.openai.com/v1
api_key: OPENAI_API_KEY # This is the the name of the environment variable that contains your OpenAI API key
engine_map:
gpt-35-turbo: gpt-3.5-turbo-0125
gpt-4o: gpt-4o-2024-08-06
gpt-4o-mini: gpt-4o-mini
gpt-4: gpt-4-turbo-2024-04-09
# Example of fine-tuned OpenAI models via openai.com
# Visit https://platform.openai.com/finetune/ to learn more about fine-tuned OpenAI models
- api_base: https://api.openai.com/v1
api_key: OPENAI_API_KEY
prompt_format: distilled
engine_map:
gpt-35-turbo-finetuned: ft:gpt-3.5-turbo-1106:[model_id] # Replace [model_id] with your fine-tuned model id
# Example of open models via groq.com
# Groq has limited model availability, but a very fast inference on custom hardware
# See more models at https://console.groq.com/docs/models
- api_key: GROQ_API_KEY # This is the the name of the environment variable that contains your groq.com API key
engine_map:
llama-3-70b-instruct: groq/llama3-70b-8192
# Example of Claude models by Anthropic
# See more models at https://docs.anthropic.com/en/docs/about-claude/models
- api_base: https://api.anthropic.com/v1/messages
api_key: ANTHROPIC_API_KEY # This is the the name of the environment variable that contains your Anthropic API key
engine_map:
claude-sonnet-35: claude-3-5-sonnet-20240620
# Example of open models via together.ai
# See more models at https://docs.together.ai/docs/inference-models
# TODO non-instruct models don't work well because of LiteLLM's formatting issues, does not work with free accounts because of the 1 QPS limit
- api_key: TOGETHER_API_KEY # This is the the name of the environment variable that contains your together.ai API key
engine_map:
llama-2-70b: together_ai/togethercomputer/llama-2-70b
llama-3-70b-instruct: together_ai/meta-llama/Llama-3-70b-chat-hf
mixtral-8-7b: together_ai/mistralai/Mixtral-8x7B-v0.1
mixtral-8-7b-instruct: together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1
mistral-7b: together_ai/mistralai/Mistral-7B-v0.1
# Example of models by Mistral
# See more models at https://docs.mistral.ai/platform/endpoints/
- api_base: https://api.mistral.ai/v1
api_key: MISTRAL_API_KEY
engine_map:
mistral-large: mistral/mistral-large-latest
mistral-medium: mistral/mistral-medium-latest
mistral-small: mistral/mistral-small-latest
mistral-7b-instruct: mistral/open-mistral-7b
mixtral-8-7b-instruct: mistral/open-mixtral-8x7b
# Local **distilled** models served via HuggingFace's text-generation-inference (https://github.com/huggingface/text-generation-inference/)
# This endpoint expects the model to have been fine-tuned with a specific data format generated from this repository.
# Use the next endpoint below instead if you are using general open models (with or without instructions and few-shot examples).
- api_base: http://127.0.0.1:5002
prompt_format: distilled
engine_map:
local_distilled: huggingface/local # The name after huggingface/ does not matter and is unused
# Local models served via HuggingFace's text-generation-inference (https://github.com/huggingface/text-generation-inference/)
# Use this endpoint if you want to host an open model like LLaMA, LLaMA-instruct, or open Mistral models etc.
- api_base: http://127.0.0.1:5002
engine_map:
local: huggingface/local # The name after huggingface/ does not matter and is unused