Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip: macos compilation #2

Merged
merged 4 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/cargo_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ jobs:

steps:
- uses: actions/checkout@v3
- name: Install LLVM (for llvm-copy)
if: matrix.platform == 'macos-latest'
run: |
brew update
brew install llvm@17
echo "$(brew --prefix llvm@17)/bin" >> $GITHUB_PATH
- name: Update submodules
run: git submodule update --init --recursive
- name: Build
Expand Down
170 changes: 123 additions & 47 deletions crates/whisper_cpp_sys/build.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use std::{env, fs};
use std::path::PathBuf;
#[cfg(feature = "compat")]
use std::process::Command;
use std::{env, fs};

// TODO add feature compatibility checks

Expand All @@ -19,7 +17,9 @@ fn main() {
config
.define("BUILD_SHARED_LIBS", "OFF")
.define("WHISPER_BUILD_EXAMPLES", "OFF")
.define("WHISPER_BUILD_TESTS", "OFF");
.define("WHISPER_BUILD_TESTS", "OFF")
.define("WHISPER_NO_ACCELERATE", "ON") // TODO accelerate is used by default, but is causing issues atm, check why
.define("WHISPER_METAL", "OFF"); // TODO this is on by default on Apple devices, and is causing issues, see why

#[cfg(not(feature = "avx"))]
{
Expand Down Expand Up @@ -74,16 +74,24 @@ fn main() {

#[cfg(feature = "compat")]
{
compat::redefine_symbols(out_path);
}
}

#[cfg(feature = "compat")]
mod compat {
use std::path::Path;
use std::process::Command;

pub fn redefine_symbols(out_path: impl AsRef<Path>) {
// TODO this whole section is a bit hacky, could probably clean it up a bit, particularly the retrieval of symbols from the library files
// TODO do this for cuda if necessary

let (whisper_lib_name, nm_name, objcopy_name) =
if cfg!(target_os = "linux") || cfg!(target_os = "macos") {
("libwhisper.a", "nm", "objcopy")
} else {
("whisper.lib", "llvm-nm", "llvm-objcopy")
};
let whisper_lib_name = lib_name();
let (nm_name, objcopy_name) = tool_names();
println!("Modifying {whisper_lib_name}, symbols acquired via \"{nm_name}\" and modified via \"{objcopy_name}\"");

let lib_path = out_path.join("lib").join("static");
let lib_path = out_path.as_ref().join("lib").join("static");

// Modifying symbols exposed by the ggml library

Expand Down Expand Up @@ -142,45 +150,113 @@ fn main() {
);
}
}
}

#[cfg(feature = "compat")]
struct Filter<'a> {
prefix: &'a str,
sym_type: char,
}
/// Returns *Whisper.cpp*'s compiled library name, based on the operating system.
fn lib_name() -> &'static str {
if cfg!(target_family = "windows") {
"whisper.lib"
} else if cfg!(target_family = "unix") {
"libwhisper.a"
} else {
println!("cargo:warning=Unknown target family, defaulting to Unix lib names");
"libwhisper.a"
}
}

/// Helper function to turn **`nm`**'s output into an iterator of [`str`] symbols.
///
/// This function expects **`nm`** to be called using the **`-p`** and **`-P`** flags.
#[cfg(feature = "compat")]
fn get_symbols<'a, const N: usize>(
nm_output: &'a str,
filters: [Filter<'a>; N],
) -> impl Iterator<Item=&'a str> + 'a {
nm_output
.lines()
.map(|symbol| {
// Strip irrelevant information

let mut stripped = symbol;
while stripped.split(' ').count() > 2 {
let idx = unsafe { stripped.rfind(' ').unwrap_unchecked() };
stripped = &stripped[..idx]
/// Returns the names of tools equivalent to [nm][nm] and [objcopy][objcopy].
///
/// [nm]: https://www.man7.org/linux/man-pages/man1/nm.1.html
/// [objcopy]: https://www.man7.org/linux/man-pages/man1/objcopy.1.html
fn tool_names() -> (&'static str, &'static str) {
let nm_names;
let objcopy_names;
if cfg!(target_family = "unix") {
nm_names = vec!["nm", "llvm-nm"];
objcopy_names = vec!["objcopy", "llvm-objcopy"];
} else {
nm_names = vec!["llvm-nm"];
objcopy_names = vec!["llvm-objcopy"];
}

let nm_name;

if let Some(path) = option_env!("NM_PATH") {
nm_name = path;
} else {
println!("Looking for \"nm\" or an equivalent tool");
nm_name = find_tool(&nm_names).expect(
"No suitable tool equivalent to \"nm\" has been found in \
PATH, if one is already installed, either add it to PATH or set NM_PATH to its full path",
);
}

let objcopy_name;
if let Some(path) = option_env!("OBJCOPY_PATH") {
objcopy_name = path;
} else {
println!("Looking for \"objcopy\" or an equivalent tool");
objcopy_name = find_tool(&objcopy_names).expect("No suitable tool equivalent to \"objcopy\" has \
been found in PATH, if one is already installed, either add it to PATH or set OBJCOPY_PATH to its full path");
}

(nm_name, objcopy_name)
}

/// Returns the first tool found in the system, given a list of tool names, returning the first one found and
/// printing its version.
///
/// Returns [`Option::None`] if no tool is found.
fn find_tool<'a>(names: &[&'a str]) -> Option<&'a str> {
for name in names {
if let Ok(output) = Command::new(name).arg("--version").output() {
if output.status.success() {
let out_str = String::from_utf8_lossy(&output.stdout);
println!("Valid \"tool\" found:\n{out_str}");
return Some(name);
}
}
stripped
})
.filter(move |symbol| {
// Filter matching symbols

if symbol.split(' ').count() == 2 {
for filter in &filters {
if symbol.ends_with(filter.sym_type) && symbol.starts_with(filter.prefix) {
return true;
}

None
}

/// A filter for a symbol in a library.
struct Filter<'a> {
prefix: &'a str,
sym_type: char,
}

/// Turns **`nm`**'s output into an iterator of [`str`] symbols.
///
/// This function expects **`nm`** to be called using the **`-p`** and **`-P`** flags.
fn get_symbols<'a, const N: usize>(
nm_output: &'a str,
filters: [Filter<'a>; N],
) -> impl Iterator<Item = &'a str> + 'a {
nm_output
.lines()
.map(|symbol| {
// Strip irrelevant information

let mut stripped = symbol;
while stripped.split(' ').count() > 2 {
let idx = unsafe { stripped.rfind(' ').unwrap_unchecked() };
stripped = &stripped[..idx]
}
stripped
})
.filter(move |symbol| {
// Filter matching symbols

if symbol.split(' ').count() == 2 {
for filter in &filters {
if symbol.ends_with(filter.sym_type) && symbol.starts_with(filter.prefix) {
return true;
}
}
}
}
false
})
.map(|symbol| &symbol[..symbol.len() - 2]) // Strip the type, so only the symbol remains
false
})
.map(|symbol| &symbol[..symbol.len() - 2]) // Strip the type, so only the symbol remains
}
}
2 changes: 1 addition & 1 deletion crates/whisper_cpp_sys/thirdparty/whisper.cpp
Submodule whisper.cpp updated 78 files
+0 −34 .devops/main-cuda.Dockerfile
+0 −19 .devops/main.Dockerfile
+11 −19 .github/workflows/build.yml
+0 −57 .github/workflows/docker.yml
+0 −1 .gitignore
+3 −9 CMakeLists.txt
+2 −5 Makefile
+31 −15 Package.swift
+4 −39 README.md
+0 −26 bindings/go/Makefile
+1 −1 bindings/ios
+1 −1 bindings/javascript/package.json
+1 −1 bindings/javascript/whisper.js
+0 −3 examples/CMakeLists.txt
+0 −1 examples/helpers.js
+3 −12 examples/main/README.md
+2 −2 examples/main/main.cpp
+0 −12 examples/server/CMakeLists.txt
+0 −68 examples/server/README.md
+0 −9,262 examples/server/httplib.h
+0 −24,596 examples/server/json.hpp
+0 −811 examples/server/server.cpp
+18 −7 examples/talk-llama/CMakeLists.txt
+0 −9 examples/wchess/CMakeLists.txt
+0 −45 examples/wchess/README.md
+0 −19 examples/wchess/libwchess/CMakeLists.txt
+0 −803 examples/wchess/libwchess/Chessboard.cpp
+0 −33 examples/wchess/libwchess/Chessboard.h
+0 −193 examples/wchess/libwchess/WChess.cpp
+0 −63 examples/wchess/libwchess/WChess.h
+0 −117 examples/wchess/libwchess/test-chessboard.cpp
+0 −8 examples/wchess/wchess.cmd/CMakeLists.txt
+0 −247 examples/wchess/wchess.cmd/wchess.cmd.cpp
+0 −51 examples/wchess/wchess.wasm/CMakeLists.txt
+0 −54 examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.css
+0 −2 examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.min.css
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png
+ examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png
+0 −1,817 examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.js
+0 −2 examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.min.js
+0 −32 examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/CHANGELOG.md
+0 −20 examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/LICENSE.md
+0 −82 examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/README.md
+0 −29 examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/package.json
+0 −499 examples/wchess/wchess.wasm/index-tmpl.html
+0 −2 examples/wchess/wchess.wasm/jquery-3.7.1.min.js
+0 −141 examples/wchess/wchess.wasm/wchess.wasm.cpp
+0 −1 examples/whisper.objc/whisper.objc/ViewController.m
+15 −15 examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
+1 −3 extra/bench.py
+8 −43 ggml-alloc.c
+1 −8 ggml-alloc.h
+21 −46 ggml-backend-impl.h
+156 −563 ggml-backend.c
+17 −62 ggml-backend.h
+503 −1,960 ggml-cuda.cu
+1 −9 ggml-cuda.h
+1 −1 ggml-impl.h
+0 −6 ggml-metal.h
+192 −1,033 ggml-metal.m
+386 −2,365 ggml-metal.metal
+7 −5 ggml-opencl.cpp
+3 −3 ggml-quants.c
+215 −724 ggml.c
+12 −79 ggml.h
+5 −5 models/download-ggml-model.sh
+68 −146 whisper.cpp
+1 −6 whisper.h
75 changes: 50 additions & 25 deletions crates/whisper_cpp_tests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,46 +16,71 @@ mod tests {

#[tokio::test]
async fn it_works() -> Result<(), TestError> {
let model_path_str = std::env::var("WHISPER_TEST_MODEL").unwrap_or_else(|_| {
eprintln!(
"WHISPER_TEST_MODEL environment variable not set. \
Please set this to the path to a GGUF model."
);
let model_paths = {
let mut dir = std::env::var("WHISPER_TEST_MODEL_DIR").unwrap_or_else(|_| {
eprintln!(
"WHISPER_TEST_MODEL environment variable not set. \
Please set this to the path to a Whisper GGUF model file for the test to run."
);

std::process::exit(1)
});
std::process::exit(0)
});

if !dir.ends_with('/') {
dir.push('/');
}

let dir = std::path::Path::new(&dir);
let mut models = tokio::fs::read_dir(dir).await.unwrap();
let mut rv = vec![];

while let Some(model) = models.next_entry().await.unwrap() {
let path = model.path();

if path.is_file() {
let path = path.to_str().unwrap();
if path.ends_with(".bin") {
rv.push(path.to_string());
}
}
}

rv
};

let sample_path_str = std::env::var("WHISPER_TEST_SAMPLE").unwrap_or_else(|_| {
eprintln!(
"WHISPER_TEST_SAMPLE environment variable not set. \
Please set this to the path to a sample wav file."
Please set this to the path to a sample wav file for the test to run."
);

std::process::exit(1)
std::process::exit(0)
});

let model = WhisperModel::new_from_file(model_path_str, false)?;
for model_path_str in model_paths {
let model = WhisperModel::new_from_file(model_path_str, false)?;

let mut session = model.new_session().await?;
let mut session = model.new_session().await?;

let params = WhisperParams::new(WhisperSampling::default_greedy());
let params = WhisperParams::new(WhisperSampling::default_greedy());

let mut file = std::fs::File::open(sample_path_str)?;
let (header, data) = wav::read(&mut file)?;
let sixteens = data.as_sixteen().unwrap();
let samples: Vec<_> = sixteens[..sixteens.len() / header.channel_count as usize]
.iter()
.map(|v| *v as f32 / 32768.)
.collect();
let mut file = std::fs::File::open(&sample_path_str)?;
let (header, data) = wav::read(&mut file)?;
let sixteens = data.as_sixteen().unwrap();
let samples: Vec<_> = sixteens[..sixteens.len() / header.channel_count as usize]
.iter()
.map(|v| *v as f32 / 32768.)
.collect();

session.full(params, &samples).await?;
session.full(params, &samples).await?;

let mut result = "".to_string();
for i in 0..session.segment_count() {
result += &*session.segment_text(i)?;
}
let mut result = "".to_string();
for i in 0..session.segment_count() {
result += &*session.segment_text(i)?;
}

println!("{result}");
println!("{result}");
}

Ok(())
}
Expand Down
Loading