From 9fd800065b4fd0eec9cde0e1cc994d8a4d71a520 Mon Sep 17 00:00:00 2001 From: Michal Moskal Date: Tue, 29 Oct 2024 21:35:18 +0000 Subject: [PATCH] reworking how to pass config files --- .gitignore | 1 - .vscode/settings.json | 15 ++++ Cargo.lock | 128 +++++++++++++++++++++++++++ README.md | 36 ++++---- llgtrt/Cargo.toml | 1 + llgtrt/chat_templates/llama31.j2 | 92 ++++++++++++++++++++ llgtrt/src/async_exec.rs | 15 +++- llgtrt/src/chat.rs | 27 +----- llgtrt/src/config.rs | 112 ++++++++++++++++-------- llgtrt/src/config_info.json | 57 ++++++++++++ llgtrt/src/constraint_mgr.rs | 33 +++---- llgtrt/src/jsonutil.rs | 101 ++++++++++++++++++++++ llgtrt/src/lib.rs | 3 +- llgtrt/src/logging.rs | 4 +- llgtrt/src/main.rs | 4 +- llgtrt/src/startup.rs | 143 ++++++++++++++++++------------- llgtrt/src/tokenizer.rs | 82 ++++++++---------- llguidance | 2 +- scripts/collect-comments.py | 99 +++++++++++++++++++++ scripts/regen.sh | 4 + 20 files changed, 745 insertions(+), 214 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 llgtrt/chat_templates/llama31.j2 create mode 100644 llgtrt/src/config_info.json create mode 100644 llgtrt/src/jsonutil.rs create mode 100644 scripts/collect-comments.py create mode 100755 scripts/regen.sh diff --git a/.gitignore b/.gitignore index 94c01dc..d242cfb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ build tmp -.vscode/settings.json target model.cache diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..5c1ff3b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,15 @@ +{ + "C_Cpp.autoAddFileAssociations": false, + "files.readonlyInclude": { + "**/config_info.json": true + }, + "cSpell.words": [ + "ckpt", + "fmha", + "llgtrt", + "mpirun", + "npuichigo", + "openai", + "trtllm" + ] +} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 0d0fdde..da5f374 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -287,6 +287,15 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "borrow-or-share" version = "0.2.2" @@ -487,6 +496,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpufeatures" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.4.2" @@ -521,6 +539,16 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "darling" version = "0.14.4" @@ -602,6 +630,16 @@ dependencies = [ "toml", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "dirs" version = "5.0.1" @@ -805,6 +843,16 @@ dependencies = [ "pin-utils", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -1226,6 +1274,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json5" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1" +dependencies = [ + "pest", + "pest_derive", + "serde", +] + [[package]] name = "jsonschema" version = "0.24.0" @@ -1307,6 +1366,7 @@ dependencies = [ "clap", "flexi_logger", "futures-core", + "json5", "llguidance_parser", "log", "minijinja", @@ -1723,6 +1783,51 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pest" +version = "2.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d214365f632b123a47fd913301e14c946c61d1c183ee245fa76eb752e59a02dd" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb55586734301717aea2ac313f50b2eb8f60d2fc3dc01d190eefa2e625f60c4e" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.77", +] + +[[package]] +name = "pest_meta" +version = "2.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75da2a70cf4d9cb76833c990ac9cd3923c9a8905a8929789ce347c84564d03d" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + [[package]] name = "pin-project" version = "1.1.5" @@ -2218,6 +2323,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2595,6 +2711,18 @@ dependencies = [ "serde", ] +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "unicode-bidi" version = "0.3.15" diff --git a/README.md b/README.md index 78a3ebc..634d887 100644 --- a/README.md +++ b/README.md @@ -4,15 +4,15 @@ This project demonstrates how to use [llguidance library](https://github.com/microsoft/llguidance) for constrained output with [NVIDIA TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM), -implementing a server with -[OpenAI REST API](https://platform.openai.com/docs/api-reference/introduction). +implementing a REST server compatible with +[OpenAI APIs](https://platform.openai.com/docs/api-reference/introduction). The server supports regular completions and chat endpoints -with JSON with schema enforcement ("Structured Output" in OpenAI docs), +with JSON with schema enforcement ("Structured Output"), as well as full context-free grammars using [Guidance library](https://github.com/guidance-ai/guidance). This server is similar in spirit to [TensorRT-LLM OpenAI server example](./TensorRT-LLM/examples/apps/openai_server.py), -but python-free and with support for constrained output. +but is Python-free (implemented in Rust) and with support for constrained output. Similarly to the example above, it **does not** use the NVIDIA Triton Inference Server. ## Requirements @@ -89,17 +89,6 @@ HF Transformers `config.json` as well as the `.safetensors` files and `tokenizer.json`). If you're running on more than one 1 GPU, modify the `--tp_size` argument. -### Create config files - -By default, llgtrt will use chat template from `tokenizer_config.json`. -If present, it will also read `tokenizer_config_llgtrt.json` from the same directory -and apply any keys from it to `tokenizer_config.json`. -Afterwards, if `chat_template.j2` file is found, it will be used as the chat template. - -You can also modify TensortRT-LLM's runtime configuration with `runtime.json` file -and `llguidance_parser` configuration with `llguidance.json`. -This is optional, see below. - ### Running the Engine ```bash @@ -110,8 +99,25 @@ The command will print out the actual `docker run` invocation on first line if you want to invoke it directly later. `PORT` defaults to 3000. +### Update configuration + You can pass additional arguments after the engine path. Try running `./docker/run.sh /path/to/hf-models/model-engine --help` for more info. +Most of the options are specified in configuration files, +but which configuration files are used can be modified with command line arguments. + +By default, llgtrt will use chat template from `tokenizer_config.json`. + +If present, it will also read `tokenizer_config_llgtrt.json` from the same directory +and apply any keys from it to `tokenizer_config.json`. +Afterwards, if `chat_template.j2` file is found, it will be used as the chat template. + +You can also modify TensortRT-LLM's runtime configuration with `runtime.json` file +and `llguidance_parser` configuration with `llguidance.json`. +This is optional, see below. + + + The `--help` has up-to-date info on `runtime.json` file - the options can be specified either in these files (replace `-` with `_`) or on command line. diff --git a/llgtrt/Cargo.toml b/llgtrt/Cargo.toml index 3b7ea37..1db729f 100644 --- a/llgtrt/Cargo.toml +++ b/llgtrt/Cargo.toml @@ -23,3 +23,4 @@ rayon = "1.10.0" futures-core = "0.3.30" minijinja = { version = "2.3.1", features = ["preserve_order", "loop_controls", "loader"] } chrono = "0.4.38" +json5 = "0.4.1" diff --git a/llgtrt/chat_templates/llama31.j2 b/llgtrt/chat_templates/llama31.j2 new file mode 100644 index 0000000..0829896 --- /dev/null +++ b/llgtrt/chat_templates/llama31.j2 @@ -0,0 +1,92 @@ +{#- This is adapted from huggingface tokenizer_config.json/chat_template but updated to match #} +{#- https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/prompt_format.md #} +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "21 September 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if not tools is none %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Answer the user's question by making use of the following functions if needed.\n" }} + {{- "If none of the function can be used, please say so.\n" }} + {{- "Here is a list of functions in JSON format:\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '<|python_tag|>{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} + diff --git a/llgtrt/src/async_exec.rs b/llgtrt/src/async_exec.rs index edbdf92..4fb57f3 100644 --- a/llgtrt/src/async_exec.rs +++ b/llgtrt/src/async_exec.rs @@ -16,7 +16,12 @@ use trtllm_rs::{ TlcLogitsEntry, }; -use crate::{chat::ChatBuilder, config::Config, routes::openai::FinishReason, tokenizer::setup_tokenizer}; +use crate::{ + chat::ChatBuilder, + config::{CliConfig, LlgTrtConfig}, + routes::openai::FinishReason, + tokenizer::setup_tokenizer, +}; pub struct StepResults { pub response: ResponseChunk, @@ -330,7 +335,11 @@ impl AsyncExecutor { self.executor.cancel_request(req_id) } - pub fn new(cli_config: &Config, mut executor_init: ExecutorInit) -> Result<(Self, TokEnv, ChatBuilder)> { + pub fn new( + cli_config: &CliConfig, + config: &LlgTrtConfig, + mut executor_init: ExecutorInit, + ) -> Result<(Self, TokEnv, ChatBuilder)> { executor_init.logits_callback = Some(logits_processor); let max_batch_size = executor_init.trt_params.max_batch_size as usize; log::info!("new executor: max_batch_size={max_batch_size}"); @@ -340,7 +349,7 @@ impl AsyncExecutor { executor.check_mpi(); // only setup tokenizer on rank 0 - let (tok_env, chat_builder) = setup_tokenizer(cli_config)?; + let (tok_env, chat_builder) = setup_tokenizer(cli_config, config)?; let trie = tok_env.tok_trie(); let n_vocab = trie.vocab_size(); diff --git a/llgtrt/src/chat.rs b/llgtrt/src/chat.rs index 7c90278..32fa4d6 100644 --- a/llgtrt/src/chat.rs +++ b/llgtrt/src/chat.rs @@ -1,4 +1,5 @@ use crate::{ + jsonutil, routes::openai::{ChatCompletionMessageContentPart, ChatCompletionMessageParams, Tool}, tokenizer::TokenizerConfig, }; @@ -6,15 +7,6 @@ use anyhow::anyhow; use minijinja::{value::Kwargs, Environment, Error, ErrorKind, Value}; use serde::{Deserialize, Serialize}; -const DEFAULT_TEMPLATE: &str = r#"{{- bos_token }} -{%- for message in messages %} - {{- '<|' + message['role'] + |>\n' }} - {{- message['content'] + eos_token }} -{%- endfor %} -{%- if add_generation_prompt %} - {{- '<|assistant|>\n' }} -{%- endif %}"#; - pub struct ChatBuilder { default_context: TemplateContext, env: Environment<'static>, @@ -48,19 +40,6 @@ fn date_string() -> String { chrono::Utc::now().format("%e %B %Y").to_string() } -fn remove_null(v: &mut serde_json::Value) { - if let Some(map) = v.as_object_mut() { - for (_, v) in map.iter_mut() { - remove_null(v); - } - map.retain(|_, v| !v.is_null()); - } - // remove empty arrays - if let Some(arr) = v.as_array_mut() { - arr.iter_mut().for_each(remove_null); - } -} - fn tojson(value: Value, args: Kwargs) -> Result { let indent = match args.get::("indent") { Ok(val) => val, @@ -113,7 +92,7 @@ impl ChatBuilder { let template = config .chat_template .clone() - .unwrap_or_else(|| DEFAULT_TEMPLATE.to_string()); + .expect("chat_template should be set in TokenizerConfig"); log::info!("chat template:\n{}", template); env.add_template_owned("chat", template) .map_err(|e| anyhow!("error parsing chat_template: {}", e))?; @@ -147,7 +126,7 @@ impl ChatBuilder { context.tools = Some(params.tools.clone()); } let mut context = serde_json::to_value(&context)?; - remove_null(&mut context); + jsonutil::remove_null(&mut context); let r = self .env .get_template("chat") diff --git a/llgtrt/src/config.rs b/llgtrt/src/config.rs index b1ee8a3..075e93f 100644 --- a/llgtrt/src/config.rs +++ b/llgtrt/src/config.rs @@ -1,45 +1,76 @@ -use clap::{Args, Parser}; +use clap::Parser; use serde::{Deserialize, Serialize}; -const TRT_CONFIG: &str = "TensorRT-LLM runtime config (runtime.json)"; +use crate::{constraint_mgr::LlgConfig, tokenizer::TokenizerConfig}; -#[derive(Args, Debug, Serialize, Deserialize)] +const CONFIG_INFO: &str = include_str!("config_info.json"); +pub fn config_info() -> serde_json::Value { + serde_json::from_str(CONFIG_INFO).unwrap() +} + +#[derive(Debug, Serialize, Deserialize)] pub struct TrtLlmRuntimeConfig { - /// When set to true, the scheduler is more conservative, so that a started request is never evicted; defaults to false (which improves throughput) - #[clap(long, help_heading = TRT_CONFIG)] - pub guaranteed_no_evict: Option, + /// Make the scheduler more conservative, so that a started request is never evicted. + /// Defaults to false (which improves throughput) + pub guaranteed_no_evict: bool, + + /// Maximum number of concurrent requests + pub max_batch_size: usize, + + /// Maximum number of tokens in batch + pub max_num_tokens: usize, - /// Maximum number of concurrent requests; defaults to 128 - #[clap(long, help_heading = TRT_CONFIG)] - pub max_batch_size: Option, + /// Maximum number of requests in queue (when batch already full) + pub max_queue_size: usize, - /// Maximum number of tokens in batch; defaults to 8192 - #[clap(long, help_heading = TRT_CONFIG)] - pub max_num_tokens: Option, + /// Chunk prefill/generation into pieces + /// Defaults to true (unlike trtllm) + pub enable_chunked_context: bool, - /// Maximum number of requests in queue (when batch already full); defaults to 0 - #[clap(long, help_heading = TRT_CONFIG)] - pub max_queue_size: Option, + /// Prefix-caching (LRU-reuse blocks between requests) + /// Defaults to true (unlike trtllm) + pub enable_kv_cache_reuse: bool, + + /// Fraction of free GPU memory to use for KV cache + pub kv_cache_free_gpu_mem_fraction: f32, + + /// Host memory to use for KV cache + pub kv_cache_host_memory_megabytes: usize, +} - /// Chunk prefill/generation into pieces; defaults to true (unlike trtllm) - #[clap(long, help_heading = TRT_CONFIG)] - pub enable_chunked_context: Option, +impl Default for TrtLlmRuntimeConfig { + fn default() -> Self { + Self { + guaranteed_no_evict: false, + max_batch_size: 128, + max_num_tokens: 8192, + max_queue_size: 0, + enable_chunked_context: true, + enable_kv_cache_reuse: true, + kv_cache_free_gpu_mem_fraction: 0.9, + kv_cache_host_memory_megabytes: 0, + } + } +} - /// Prefix-caching (LRU-reuse blocks between requests); defaults to true (unlike trtllm) - #[clap(long, help_heading = TRT_CONFIG)] - pub enable_kv_cache_reuse: Option, +#[derive(Debug, Serialize, Deserialize, Default)] +pub struct LlgTrtConfig { + /// TensorRT-LLM runtime parameters + /// Defaults should be reasonable, otherwise see + /// https://nvidia.github.io/TensorRT-LLM/performance/perf-best-practices.html + pub runtime: TrtLlmRuntimeConfig, - /// Fraction of free GPU memory to use for KV cache; defaults to 0.9 - #[clap(long, help_heading = TRT_CONFIG)] - pub kv_cache_free_gpu_mem_fraction: Option, + /// Tokenizer configuration (defaults to tokenizer_config.json contents) + /// Typically no changes are needed here, except for chat_template + /// which is best overridden with --chat-template filename.j2 option. + pub tokenizer: TokenizerConfig, - /// Host memory to use for KV cache; defaults to 0 - #[clap(long, help_heading = TRT_CONFIG)] - pub kv_cache_host_memory_megabytes: Option, + /// Configuration for the LLGuidance constraint library + pub llguidance: LlgConfig, } #[derive(Parser, Debug, Serialize, Deserialize)] -pub struct Config { +pub struct CliConfig { /// Host to bind to #[arg(long, short = 'H', default_value_t = String::from("0.0.0.0"))] pub host: String, @@ -56,13 +87,23 @@ pub struct Config { #[arg(long, short = 'T')] pub tokenizer: Option, - /// Path to JSON file TensorRT-LLM runtime config; defaults to runtime.json in engine dir - #[arg(long, short = 'R')] - pub runtime_config: Option, + /// Path to JSON5 configuration file; multiple files are JSON-merged in order; defaults to: + /// /llgtrt.json5 if it exists + #[arg(long, short = 'C')] + pub config: Vec, + + /// Path to chat template file; defaults to /chat_template.j2 if it exists + /// Overrides values in all configs. + #[arg(long)] + pub chat_template: Option, - /// Path to JSON file with llguidance library config; defaults to llguidance.json in engine dir - #[arg(long, short = 'L')] - pub llguidance_config: Option, + /// When present, save the merged configuration to this file and exit; use '-' for stdout + #[arg(long)] + pub save_config: Option, + + /// Similar to --save-config, but includes chat template and tokenizer config + #[arg(long)] + pub save_complete_config: Option, /// Debug output #[arg(long, short = 'd')] @@ -72,9 +113,6 @@ pub struct Config { #[arg(long, short = 'q')] pub quiet: bool, - #[clap(flatten)] - pub runtime_config_inline: TrtLlmRuntimeConfig, - /// Api Key to access the server #[arg(long)] #[serde(skip_serializing_if = "Option::is_none")] diff --git a/llgtrt/src/config_info.json b/llgtrt/src/config_info.json new file mode 100644 index 0000000..028b2c6 --- /dev/null +++ b/llgtrt/src/config_info.json @@ -0,0 +1,57 @@ +{ + "##info##": "Use scripts/regen.sh to re-generate this file", + "runtime": { + "#": "TensorRT-LLM runtime parameters\nDefaults should be reasonable, otherwise see\nhttps://nvidia.github.io/TensorRT-LLM/performance/perf-best-practices.html", + "guaranteed_no_evict": { + "#": "Make the scheduler more conservative, so that a started request is never evicted.\nDefaults to false (which improves throughput)" + }, + "max_batch_size": { + "#": "Maximum number of concurrent requests" + }, + "max_num_tokens": { + "#": "Maximum number of tokens in batch" + }, + "max_queue_size": { + "#": "Maximum number of requests in queue (when batch already full)" + }, + "enable_chunked_context": { + "#": "Chunk prefill/generation into pieces\nDefaults to true (unlike trtllm)" + }, + "enable_kv_cache_reuse": { + "#": "Prefix-caching (LRU-reuse blocks between requests)\nDefaults to true (unlike trtllm)" + }, + "kv_cache_free_gpu_mem_fraction": { + "#": "Fraction of free GPU memory to use for KV cache" + }, + "kv_cache_host_memory_megabytes": { + "#": "Host memory to use for KV cache" + } + }, + "tokenizer": { + "#": "Tokenizer configuration (defaults to tokenizer_config.json contents)\nTypically no changes are needed here, except for chat_template\nwhich is best overridden with --chat-template filename.j2 option." + }, + "llguidance": { + "#": "Configuration for the LLGuidance constraint library", + "limits": { + "#": "Override any of the parser limits.", + "max_items_in_row": { + "#": "For non-ambiguous grammars, this is the maximum \"branching factor\" of the grammar.\nFor ambiguous grammars, this might get hit much quicker.\nDefault: 200" + }, + "initial_lexer_fuel": { + "#": "How much \"fuel\" are we willing to spend to build initial lexer regex AST nodes.\nDefault: 1_000_000 (~20ms)" + }, + "step_lexer_fuel": { + "#": "Maximum lexer fuel for computation of the whole token mask.\nDefault: 500_000 (~10ms)" + }, + "max_lexer_states": { + "#": "Maximum number of lexer states.\nDefault: 10_000" + }, + "max_grammar_size": { + "#": "Maximum size of the grammar (symbols in productions)\nDefault: 500_000 (a few megabytes of JSON)" + } + }, + "log_level": { + "#": "Log level which goes to stderr. In-memory logs per-sequence are managed by ConstraintInit.log_level." + } + } +} \ No newline at end of file diff --git a/llgtrt/src/constraint_mgr.rs b/llgtrt/src/constraint_mgr.rs index 63aa7df..63d04f0 100644 --- a/llgtrt/src/constraint_mgr.rs +++ b/llgtrt/src/constraint_mgr.rs @@ -13,7 +13,16 @@ pub struct LlgConfig { pub limits: ParserLimits, /// Log level which goes to stderr. In-memory logs per-sequence are managed by ConstraintInit.log_level. - pub log_level: Option, + pub log_level: u32, +} + +impl Default for LlgConfig { + fn default() -> Self { + Self { + limits: ParserLimits::default(), + log_level: 1, + } + } } pub struct ConstraintInit { @@ -31,23 +40,7 @@ pub struct ConstraintMgr { } impl ConstraintMgr { - pub fn new( - tok_env: TokEnv, - chat_tok_env: TokEnv, - mut config: serde_json::Value, - ) -> Result { - let defl_limits = serde_json::to_value(ParserLimits::default()).unwrap(); - if let Some(obj) = config["limits"].as_object_mut() { - for (k, v) in defl_limits.as_object().unwrap() { - if !obj.contains_key(k) { - obj.insert(k.clone(), v.clone()); - } - } - } else { - config["limits"] = defl_limits; - } - let config: LlgConfig = serde_json::from_value(config)?; - + pub fn new(tok_env: TokEnv, chat_tok_env: TokEnv, config: &LlgConfig) -> Result { Ok(ConstraintMgr { tok_env, chat_tok_env, @@ -56,8 +49,8 @@ impl ConstraintMgr { backtrack: false, // unlikely ..Default::default() }, - parser_limits: config.limits, - log_stderr_level: config.log_level.unwrap_or(1), + parser_limits: config.limits.clone(), + log_stderr_level: config.log_level, }) } diff --git a/llgtrt/src/jsonutil.rs b/llgtrt/src/jsonutil.rs new file mode 100644 index 0000000..1369cd5 --- /dev/null +++ b/llgtrt/src/jsonutil.rs @@ -0,0 +1,101 @@ +use serde_json::Value; + +pub fn remove_null(v: &mut serde_json::Value) { + if let Some(map) = v.as_object_mut() { + for (_, v) in map.iter_mut() { + remove_null(v); + } + map.retain(|_, v| !v.is_null()); + } + // remove empty arrays + if let Some(arr) = v.as_array_mut() { + arr.iter_mut().for_each(remove_null); + } +} + +pub fn json_merge(a: &mut Value, b: &Value) { + match (a, b) { + (Value::Object(a), Value::Object(b)) => { + for (k, v) in b.iter() { + json_merge(a.entry(k.clone()).or_insert(Value::Null), v); + } + } + (a, b) => *a = b.clone(), + } +} + +fn write_indent(indent: usize, dst: &mut String) { + for _ in 0..indent { + dst.push(' '); + } +} + +fn write_comment(indent: usize, dst: &mut String, comment: Option<&str>) { + if let Some(comment) = comment { + for line in comment.lines() { + write_indent(indent, dst); + dst.push_str("/// "); + dst.push_str(line); + dst.push('\n'); + } + } +} + +const INDENT_LEVEL: usize = 2; + +fn same_default(v: &Value, default: &Value) -> bool { + match default { + Value::Object(_) | Value::Array(_) => false, + _ => v == default, + } +} + +fn json5_write(indent: usize, dst: &mut String, v: &Value, default: &Value, info: &Value) { + match v { + Value::Object(map) => { + if map.is_empty() { + dst.push_str("{}"); + return; + } + dst.push_str("{"); + for (k, v) in map.iter() { + dst.push_str("\n"); + write_comment(indent + INDENT_LEVEL, dst, info[k]["#"].as_str()); + write_indent(indent + INDENT_LEVEL, dst); + if same_default(v, &default[k]) { + dst.push_str("//"); + } + dst.push_str(&serde_json::to_string_pretty(k).unwrap()); + dst.push_str(": "); + json5_write(indent + INDENT_LEVEL, dst, v, &default[k], &info[k]); + dst.push_str(",\n"); + } + write_indent(indent, dst); + dst.push('}'); + } + Value::Array(arr) => { + if arr.is_empty() { + dst.push_str("[]"); + return; + } + dst.push_str("[\n"); + for v in arr.iter() { + write_indent(indent + INDENT_LEVEL, dst); + json5_write(indent + INDENT_LEVEL, dst, v, &Value::Null, info); + dst.push_str(",\n"); + } + write_indent(indent, dst); + dst.push(']'); + } + Value::String(_) | Value::Number(_) | Value::Bool(_) | Value::Null => { + dst.push_str(&serde_json::to_string_pretty(v).unwrap()); + } + } +} + +pub fn json5_to_string(v: &Value, default: &Value, info: &Value) -> String { + let mut dst = String::new(); + write_comment(0, &mut dst, info["#"].as_str()); + json5_write(0, &mut dst, v, default, info); + dst +} diff --git a/llgtrt/src/lib.rs b/llgtrt/src/lib.rs index f1d67e6..016c126 100644 --- a/llgtrt/src/lib.rs +++ b/llgtrt/src/lib.rs @@ -7,4 +7,5 @@ pub mod startup; pub mod state; mod async_exec; pub mod logging; -mod constraint_mgr; \ No newline at end of file +mod constraint_mgr; +pub mod jsonutil; \ No newline at end of file diff --git a/llgtrt/src/logging.rs b/llgtrt/src/logging.rs index 55b2dd7..f5aa880 100644 --- a/llgtrt/src/logging.rs +++ b/llgtrt/src/logging.rs @@ -82,13 +82,13 @@ pub fn init_log(mode: LogMode) -> Result<()> { let logger = match mode { LogMode::Normal => Logger::try_with_env_or_str("info,tokenizers=error")? .format(truncated_format) - .log_to_stdout(), + .log_to_stderr(), LogMode::Test => { Logger::try_with_env_or_str("debug,tokenizers=error")?.write_mode(WriteMode::SupportCapture) } LogMode::Daemon => Logger::try_with_env_or_str("info,tokenizers=error")? .format(daemon_format) - .log_to_stdout(), + .log_to_stderr(), }; logger.start()?; diff --git a/llgtrt/src/main.rs b/llgtrt/src/main.rs index bc5a59c..03bf7f8 100644 --- a/llgtrt/src/main.rs +++ b/llgtrt/src/main.rs @@ -2,12 +2,12 @@ use std::env; use clap::Parser; -use llgtrt::config::Config; +use llgtrt::config::CliConfig; use llgtrt::startup; #[tokio::main] async fn main() -> anyhow::Result<()> { - let config = Config::parse(); + let config = CliConfig::parse(); if config.debug { env::set_var("RUST_LOG", "debug,tokenizers=error"); diff --git a/llgtrt/src/startup.rs b/llgtrt/src/startup.rs index 39e9709..789536f 100644 --- a/llgtrt/src/startup.rs +++ b/llgtrt/src/startup.rs @@ -1,6 +1,6 @@ -use std::fmt::Debug; use std::sync::Arc; +use anyhow::anyhow; use axum::body::Body; use axum::http::{Request, StatusCode}; use axum::middleware::{self, Next}; @@ -10,10 +10,11 @@ use axum::Router; use trtllm_rs::{ClientReqId, ExecutorInit, RequestInit, RequestParams}; use crate::async_exec::AsyncExecutor; -use crate::config::{Config, TrtLlmRuntimeConfig}; +use crate::config::{config_info, CliConfig, LlgTrtConfig}; use crate::constraint_mgr::ConstraintMgr; -use crate::routes; +use crate::jsonutil::json5_to_string; use crate::state::AppState; +use crate::{jsonutil, routes}; async fn auth_middleware( req: Request, @@ -34,66 +35,97 @@ async fn auth_middleware( } } -fn load_config_file( - name0: &Option, - default_name: String, - cli_args: serde_json::Value, -) -> anyhow::Result -where - T: serde::de::DeserializeOwned + Debug, -{ - let name = name0.as_ref().unwrap_or(&default_name); - let mut json: serde_json::Value = if name0.is_some() || std::fs::exists(name).unwrap_or(false) { - log::info!("Loading config from {}", name); - let s = std::fs::read_to_string(name)?; - serde_json::from_str(&s)? - } else { - log::info!("Config file {} not found, using defaults", name); - serde_json::json!({}) +pub async fn run_server(mut cli_config: CliConfig) -> anyhow::Result<()> { + let mut exec_config = ExecutorInit { + engine_path: cli_config.engine.clone(), + logits_callback: None, + trt_params: Default::default(), }; - for (k, v) in cli_args.as_object().unwrap() { - if v.is_null() { - continue; + let defl_config_path = format!("{}/llgtrt.json5", cli_config.engine); + if cli_config.config.is_empty() { + if std::fs::exists(&defl_config_path).unwrap_or(false) { + log::info!("Using default config file {}", defl_config_path); + cli_config.config.push(defl_config_path); + } else { + log::info!( + "No config files specified and default config file {} not found", + defl_config_path + ); } - json.as_object_mut().unwrap().insert(k.clone(), v.clone()); } - let r = serde_json::from_value(json) - .map_err(|e| anyhow::anyhow!("Error parsing config file {}: {}", name, e))?; - log::info!("Loaded config: {:?}", r); - Ok(r) -} + let mut config = LlgTrtConfig::default(); -pub async fn run_server(cli_config: Config) -> anyhow::Result<()> { - let mut exec_config = ExecutorInit { - engine_path: cli_config.engine.clone(), - logits_callback: None, - trt_params: Default::default(), - }; + if cli_config.save_config.is_some() { + log::info!("Skipping tokenizer config load"); + } else { + let tokenizer_folder = cli_config.tokenizer.as_ref().unwrap_or(&cli_config.engine); + let tokenizer_config = format!("{}/tokenizer_config.json", tokenizer_folder); + log::info!("Loading tokenizer config from {:?}", tokenizer_config); + config.tokenizer = serde_json::from_reader(std::fs::File::open(tokenizer_config)?) + .map_err(|e| anyhow!("error loading tokenizer_config.json: {}", e))?; + } - let runtime_config: TrtLlmRuntimeConfig = load_config_file( - &cli_config.runtime_config, - format!("{}/runtime.json", cli_config.engine), - serde_json::to_value(&cli_config.runtime_config_inline)?, - )?; + let mut config = serde_json::to_value(&config)?; + + for file_name in &cli_config.config { + log::info!("Loading JSON5 config from {:?}", file_name); + let file_content = std::fs::read_to_string(&file_name) + .map_err(|e| anyhow!("Error reading config file {}: {}", file_name, e))?; + let patch = json5::from_str::(&file_content) + .map_err(|e| anyhow!("Error in JSON5 in {}: {}", file_name, e))?; + jsonutil::json_merge(&mut config, &patch); + } + + let mut config: LlgTrtConfig = + serde_json::from_value(config).map_err(|e| anyhow!("Error interpreting config: {}", e))?; + + if cli_config.save_config.is_some() { + log::info!("Skipping separate chat template load"); + } else { + let chat_template = cli_config + .chat_template + .clone() + .unwrap_or_else(|| format!("{}/chat_template.j2", cli_config.engine)); + log::info!("Checking for separate chat template in {:?}", chat_template); + if std::fs::exists(&chat_template)? { + config.tokenizer.chat_template = Some(std::fs::read_to_string(chat_template)?); + } + } + if let Some(filename) = cli_config + .save_config + .as_ref() + .or(cli_config.save_complete_config.as_ref()) + { + let r = json5_to_string( + &serde_json::to_value(&config)?, + &serde_json::to_value(&LlgTrtConfig::default())?, + &config_info(), + ); + if filename == "-" { + log::info!("Printing merged config to stdout"); + println!("{}", r); + } else { + log::info!("Saving merged config to {}", filename); + std::fs::write(filename, r)?; + } + return Ok(()); + } + + let runtime_config = &config.runtime; let p = &mut exec_config.trt_params; macro_rules! set_field { ($fld:ident) => { - if let Some(v) = runtime_config.$fld { - p.$fld = v - .try_into() - .expect(concat!("Invalid value for ", stringify!($fld))); - } + p.$fld = runtime_config + .$fld + .try_into() + .expect(concat!("Invalid value for ", stringify!($fld))); }; } - // we default these to true - p.enable_chunked_context = true; - p.enable_kv_cache_reuse = true; - set_field!(enable_chunked_context); set_field!(enable_kv_cache_reuse); set_field!(max_batch_size); @@ -101,24 +133,15 @@ pub async fn run_server(cli_config: Config) -> anyhow::Result<()> { set_field!(max_queue_size); set_field!(guaranteed_no_evict); set_field!(kv_cache_free_gpu_mem_fraction); - - if let Some(v) = runtime_config.kv_cache_host_memory_megabytes { - p.kv_cache_host_memory_bytes = v * 1024 * 1024; - } + p.kv_cache_host_memory_bytes = runtime_config.kv_cache_host_memory_megabytes * 1024 * 1024; log::info!("Initializing executor with config: {:?}", exec_config); - let (executor, tok_env, chat_builder) = AsyncExecutor::new(&cli_config, exec_config)?; + let (executor, tok_env, chat_builder) = AsyncExecutor::new(&cli_config, &config, exec_config)?; // we only get here on rank 0 - let llg_config: serde_json::Value = load_config_file( - &cli_config.llguidance_config, - format!("{}/llguidance.json", cli_config.engine), - serde_json::json!({}), - )?; - - let constraint_mgr = ConstraintMgr::new(tok_env.clone(), tok_env.clone(), llg_config)?; + let constraint_mgr = ConstraintMgr::new(tok_env.clone(), tok_env.clone(), &config.llguidance)?; AsyncExecutor::set_global(executor); diff --git a/llgtrt/src/tokenizer.rs b/llgtrt/src/tokenizer.rs index fbe79c3..a8fd72f 100644 --- a/llgtrt/src/tokenizer.rs +++ b/llgtrt/src/tokenizer.rs @@ -1,15 +1,23 @@ -use crate::{chat::ChatBuilder, config::Config}; -use anyhow::{anyhow, ensure}; +use crate::{ + chat::ChatBuilder, + config::{CliConfig, LlgTrtConfig}, +}; +use anyhow::ensure; use serde::{Deserialize, Serialize}; -use serde_json::Value; -use std::{collections::HashMap, sync::Arc}; +use std::sync::Arc; use toktrie::{TokEnv, TokEnvWithTrie}; +const DEFAULT_TEMPLATE: &str = r#"{{- bos_token }} +{%- for message in messages %} + {{- '<|' + message['role'] + |>\n' }} + {{- message['content'] + eos_token }} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|assistant|>\n' }} +{%- endif %}"#; + #[derive(Debug, Serialize, Deserialize)] pub struct TokenizerConfig { - #[serde(default)] - pub added_tokens_decoder: HashMap, - pub chat_template: Option, #[serde(default)] @@ -24,50 +32,27 @@ pub struct TokenizerConfig { pub mask_token: Option, } -#[derive(Debug, Serialize, Deserialize)] -pub struct TokenProperties { - pub content: String, - #[serde(default)] - pub lstrip: bool, - #[serde(default)] - pub normalized: bool, - #[serde(default)] - pub rstrip: bool, - #[serde(default)] - pub single_word: bool, - #[serde(default)] - pub special: bool, -} - -pub fn setup_tokenizer(cli_config: &Config) -> anyhow::Result<(TokEnv, ChatBuilder)> { - let tokenizer_folder = cli_config.tokenizer.as_ref().unwrap_or(&cli_config.engine); - let tokenizer_config = format!("{}/tokenizer_config.json", tokenizer_folder); - log::info!("Loading tokenizer config from {:?}", tokenizer_config); - let mut tok_cfg: TokenizerConfig = - serde_json::from_reader(std::fs::File::open(tokenizer_config)?) - .map_err(|e| anyhow!("error loading tokenizer_config.json: {}", e))?; - - let tokenizer_config_llg = format!("{}/tokenizer_config_llgtrt.json", tokenizer_folder); - log::info!("Checking for overrides in {:?}", tokenizer_config_llg); - if std::fs::exists(&tokenizer_config_llg)? { - let mut json = serde_json::to_value(&tok_cfg)?; - let overrides: Value = serde_json::from_reader(std::fs::File::open(tokenizer_config_llg)?) - .map_err(|e| anyhow!("JSON error in tokenizer_config_llgtrt.json: {}", e))?; - for (k, v) in overrides.as_object().expect("overrides must be an object") { - if v.is_null() { - continue; - } - json.as_object_mut().unwrap().insert(k.clone(), v.clone()); +impl Default for TokenizerConfig { + fn default() -> Self { + Self { + chat_template: Some(DEFAULT_TEMPLATE.to_string()), + clean_up_tokenization_spaces: false, + eos_token: "".to_string(), + bos_token: None, + unk_token: None, + sep_token: None, + pad_token: None, + cls_token: None, + mask_token: None, } - tok_cfg = serde_json::from_value(json) - .map_err(|e| anyhow!("error applying tokenizer_config_llgtrt.json: {}", e))?; } +} - let chat_template = format!("{}/chat_template.j2", tokenizer_folder); - log::info!("Checking for separate chat template in {:?}", chat_template); - if std::fs::exists(&chat_template)? { - tok_cfg.chat_template = Some(std::fs::read_to_string(chat_template)?); - } +pub fn setup_tokenizer( + cli_config: &CliConfig, + config: &LlgTrtConfig, +) -> anyhow::Result<(TokEnv, ChatBuilder)> { + let tokenizer_folder = cli_config.tokenizer.as_ref().unwrap_or(&cli_config.engine); let tokenizer = format!("{}/tokenizer.json", tokenizer_folder); log::info!("Loading tokenizer from {:?}", tokenizer); @@ -76,6 +61,7 @@ pub fn setup_tokenizer(cli_config: &Config) -> anyhow::Result<(TokEnv, ChatBuild let trie = tok_env.tok_trie(); let mut info = trie.info().clone(); + let tok_cfg = &config.tokenizer; let toks = tok_env.tokenize_special(&tok_cfg.eos_token); ensure!( toks.len() == 1, diff --git a/llguidance b/llguidance index 771cbf0..4b3645c 160000 --- a/llguidance +++ b/llguidance @@ -1 +1 @@ -Subproject commit 771cbf046f0cd0262ec4e878d584df9383d9466f +Subproject commit 4b3645c71bead19f24a22d4534a06b2bd39aa418 diff --git a/scripts/collect-comments.py b/scripts/collect-comments.py new file mode 100644 index 0000000..06fc793 --- /dev/null +++ b/scripts/collect-comments.py @@ -0,0 +1,99 @@ +import re +import os +import json +import sys + +# Regular expressions for capturing struct definitions and their fields with comments +struct_regex = re.compile(r"pub struct (\w+) \{(.*?)^\s*\}", re.DOTALL | re.MULTILINE) +field_regex = re.compile(r"((?:\s*///\s*.*?\n)+)\s*(pub\s+)?(\w+):\s+([\w:<>]+),", re.DOTALL) + +def extract_structs_from_rust_file(file_content): + structs = {} + + # Iterate over each struct in the file + for struct_match in struct_regex.finditer(file_content): + struct_name = struct_match.group(1) + struct_body = struct_match.group(2) + + fields = {} + + # Iterate over each field in the struct body + for field_match in field_regex.finditer(struct_body): + raw_comment = field_match.group(1).strip() + field_name = field_match.group(3).strip() + field_type = field_match.group(4).strip() + + # Join multiple lines of `///` comments with newline + comment = "\n".join([line.strip()[3:].strip() for line in raw_comment.splitlines()]) + + fields[field_name] = {"#": comment, "type": field_type} + + structs[struct_name] = fields + return structs + +def resolve_struct_recursive(struct_name, structs_metadata): + if struct_name not in structs_metadata: + return {} + + resolved_fields = {} + fields = structs_metadata[struct_name] + + for field_name, field_metadata in fields.items(): + comment = field_metadata["#"] + field_type = field_metadata["type"] + + # Check if field type matches another struct, if so, recurse + if field_type in structs_metadata: + resolved_fields[field_name] = { + "#": comment, + **resolve_struct_recursive(field_type, structs_metadata) + } + else: + resolved_fields[field_name] = {"#": comment} + + return resolved_fields + +def process_rust_files(file_list): + structs_metadata = {} + for file_path in file_list: + if os.path.exists(file_path) and file_path.endswith(".rs"): + with open(file_path, 'r', encoding='utf-8') as f: + file_content = f.read() + structs_in_file = extract_structs_from_rust_file(file_content) + structs_metadata.update(structs_in_file) + else: + print(f"Warning: {file_path} not found or not a Rust file.") + return structs_metadata + +def main(): + # Get the list of Rust files from command-line arguments + if len(sys.argv) < 3: + print("Usage: python script.py ...") + sys.exit(1) + + root_struct_name = sys.argv[1] + rust_files = sys.argv[2:] + + # Process the Rust files and extract struct metadata + metadata = process_rust_files(rust_files) + + if root_struct_name not in metadata: + print(f"Error: Struct '{root_struct_name}' not found in the provided files.") + sys.exit(1) + + # Start the recursive resolution from the root struct + resolved_metadata = resolve_struct_recursive(root_struct_name, metadata) + resolved_metadata = { + "##info##": "Use scripts/regen.sh to re-generate this file", + **resolved_metadata + } + + # Output the metadata as JSON + output_file = "llgtrt/src/config_info.json" + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(resolved_metadata, f, indent=1) + + print(f"Metadata written to {output_file}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/regen.sh b/scripts/regen.sh new file mode 100755 index 0000000..531dfb3 --- /dev/null +++ b/scripts/regen.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +cd "$(dirname "$0")/.." +python scripts/collect-comments.py LlgTrtConfig llgtrt/src/*.rs llguidance/parser/src/*.rs