From 0196f14b9049cfcedc669bc54cd1e0167d1a5205 Mon Sep 17 00:00:00 2001 From: Pedro Valente Date: Fri, 5 Apr 2024 13:29:17 +0100 Subject: [PATCH 1/2] fix compilation issues --- crates/llama_cpp/src/model/mod.rs | 18 +++++++++--------- crates/llama_cpp_sys/build.rs | 1 + 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/crates/llama_cpp/src/model/mod.rs b/crates/llama_cpp/src/model/mod.rs index e1375c8..4c89e67 100644 --- a/crates/llama_cpp/src/model/mod.rs +++ b/crates/llama_cpp/src/model/mod.rs @@ -18,9 +18,9 @@ use llama_cpp_sys::{ ggml_row_size, llama_context, llama_context_params, llama_decode, llama_free_model, llama_get_embeddings_ith, llama_get_embeddings_seq, llama_kv_cache_clear, llama_load_model_from_file, llama_model, llama_model_meta_val_str, llama_n_ctx_train, - llama_n_embd, llama_n_vocab, llama_new_context_with_model, llama_token_bos, llama_token_eos, - llama_token_eot, llama_token_get_text, llama_token_middle, llama_token_nl, llama_token_prefix, - llama_token_suffix, llama_token_to_piece, llama_tokenize, + llama_n_embd, llama_n_vocab, llama_new_context_with_model, llama_token, llama_token_bos, + llama_token_eos, llama_token_eot, llama_token_get_text, llama_token_middle, llama_token_nl, + llama_token_prefix, llama_token_suffix, llama_token_to_piece, llama_tokenize, }; pub use params::*; @@ -301,9 +301,9 @@ impl LlamaModel { // `out_buf` is a `Vec`, and `Token` is `#[repr(transparent)]` over an `i32`. llama_tokenize( **model_lock, - content.as_ptr() as *const i8, + content.as_ptr() as *const c_char, content.len() as i32, - out_buf.as_mut_ptr() as *mut i32, + out_buf.as_mut_ptr() as *mut llama_token, out_buf.capacity() as i32, add_bos, special, @@ -376,7 +376,7 @@ impl LlamaModel { llama_token_to_piece( **model_lock, token.0, - buffer.as_mut_ptr() as *mut i8, + buffer.as_mut_ptr() as *mut c_char, std::os::raw::c_int::from(initial_size), ) }; @@ -390,7 +390,7 @@ impl LlamaModel { llama_token_to_piece( **model_lock, token.0, - buffer.as_mut_ptr() as *mut i8, + buffer.as_mut_ptr() as *mut c_char, std::os::raw::c_int::from(buffer.len() as i32), ) }; @@ -431,10 +431,10 @@ impl LlamaModel { // SAFETY: Casting `*mut u8` to `*mut i8` is safe because `u8` and // `i8` have the same size and alignment. The length of token_buf is // accurate for this reason. - llama_cpp_sys::llama_token_to_piece( + llama_token_to_piece( **model_lock, t.0, - token_buf.as_mut_ptr() as *mut i8, + token_buf.as_mut_ptr() as *mut c_char, token_buf.len() as i32, ) }; diff --git a/crates/llama_cpp_sys/build.rs b/crates/llama_cpp_sys/build.rs index 5b6b3b8..3fd60a6 100644 --- a/crates/llama_cpp_sys/build.rs +++ b/crates/llama_cpp_sys/build.rs @@ -429,6 +429,7 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati let mut nvcc = featless_cxx; nvcc.cuda(true) + .std("c++14") .flag("--forward-unknown-to-host-compiler") .flag("-arch=all") .define("K_QUANTS_PER_ITERATION", Some("2")) From 3a47e7f5c9b7684c91db75cf7424b49914a7fb83 Mon Sep 17 00:00:00 2001 From: Pedro Valente Date: Fri, 5 Apr 2024 13:53:05 +0100 Subject: [PATCH 2/2] fix compilation issues 2 --- crates/llama_cpp_sys/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/llama_cpp_sys/build.rs b/crates/llama_cpp_sys/build.rs index 3fd60a6..c1e0744 100644 --- a/crates/llama_cpp_sys/build.rs +++ b/crates/llama_cpp_sys/build.rs @@ -429,7 +429,7 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati let mut nvcc = featless_cxx; nvcc.cuda(true) - .std("c++14") + .std("c++17") .flag("--forward-unknown-to-host-compiler") .flag("-arch=all") .define("K_QUANTS_PER_ITERATION", Some("2"))