Skip to content

Commit

Permalink
Merge pull request #71 from edgenai/fix/compilation-issues
Browse files Browse the repository at this point in the history
Fix/compilation issues
 - should fix issues in arm64 Linux builds and Linux builds using CUDA
  • Loading branch information
pedro-devv authored Apr 5, 2024
2 parents 27e5648 + 3a47e7f commit 2640bba
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 9 deletions.
18 changes: 9 additions & 9 deletions crates/llama_cpp/src/model/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ use llama_cpp_sys::{
ggml_row_size, llama_context, llama_context_params, llama_decode, llama_free_model,
llama_get_embeddings_ith, llama_get_embeddings_seq, llama_kv_cache_clear,
llama_load_model_from_file, llama_model, llama_model_meta_val_str, llama_n_ctx_train,
llama_n_embd, llama_n_vocab, llama_new_context_with_model, llama_token_bos, llama_token_eos,
llama_token_eot, llama_token_get_text, llama_token_middle, llama_token_nl, llama_token_prefix,
llama_token_suffix, llama_token_to_piece, llama_tokenize,
llama_n_embd, llama_n_vocab, llama_new_context_with_model, llama_token, llama_token_bos,
llama_token_eos, llama_token_eot, llama_token_get_text, llama_token_middle, llama_token_nl,
llama_token_prefix, llama_token_suffix, llama_token_to_piece, llama_tokenize,
};
pub use params::*;

Expand Down Expand Up @@ -301,9 +301,9 @@ impl LlamaModel {
// `out_buf` is a `Vec<Token>`, and `Token` is `#[repr(transparent)]` over an `i32`.
llama_tokenize(
**model_lock,
content.as_ptr() as *const i8,
content.as_ptr() as *const c_char,
content.len() as i32,
out_buf.as_mut_ptr() as *mut i32,
out_buf.as_mut_ptr() as *mut llama_token,
out_buf.capacity() as i32,
add_bos,
special,
Expand Down Expand Up @@ -376,7 +376,7 @@ impl LlamaModel {
llama_token_to_piece(
**model_lock,
token.0,
buffer.as_mut_ptr() as *mut i8,
buffer.as_mut_ptr() as *mut c_char,
std::os::raw::c_int::from(initial_size),
)
};
Expand All @@ -390,7 +390,7 @@ impl LlamaModel {
llama_token_to_piece(
**model_lock,
token.0,
buffer.as_mut_ptr() as *mut i8,
buffer.as_mut_ptr() as *mut c_char,
std::os::raw::c_int::from(buffer.len() as i32),
)
};
Expand Down Expand Up @@ -431,10 +431,10 @@ impl LlamaModel {
// SAFETY: Casting `*mut u8` to `*mut i8` is safe because `u8` and
// `i8` have the same size and alignment. The length of token_buf is
// accurate for this reason.
llama_cpp_sys::llama_token_to_piece(
llama_token_to_piece(
**model_lock,
t.0,
token_buf.as_mut_ptr() as *mut i8,
token_buf.as_mut_ptr() as *mut c_char,
token_buf.len() as i32,
)
};
Expand Down
1 change: 1 addition & 0 deletions crates/llama_cpp_sys/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati

let mut nvcc = featless_cxx;
nvcc.cuda(true)
.std("c++17")
.flag("--forward-unknown-to-host-compiler")
.flag("-arch=all")
.define("K_QUANTS_PER_ITERATION", Some("2"))
Expand Down

0 comments on commit 2640bba

Please sign in to comment.