Skip to content

Commit

Permalink
GPU upgrade
Browse files Browse the repository at this point in the history
  • Loading branch information
JohnnyFFM committed Dec 1, 2018
1 parent b2c23cd commit 2eeb527
Show file tree
Hide file tree
Showing 11 changed files with 1,745 additions and 78 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Generated by Cargo
# will have compiled files and executables
/target/
/.vs/
/bin/
/obj/
/packages/

# These are backup files generated by rustfmt
**/*.rs.bk
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "engraver"
version = "2.0.5"
version = "2.2.0"
license = "GPL-3.0"
authors = ["PoC Consortium <[email protected]>"]
description = """
Expand Down
117 changes: 117 additions & 0 deletions src/cpu_hasher.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
use libc::{c_void, size_t, uint64_t};
use std::sync::mpsc::Sender;

extern "C" {
pub fn noncegen(
cache: *mut c_void,
cache_size: size_t,
chunk_offset: size_t,
numeric_ID: uint64_t,
local_startnonce: uint64_t,
local_nonces: uint64_t,
);
pub fn noncegen_sse(
cache: *mut c_void,
cache_size: size_t,
chunk_offset: size_t,
numeric_ID: uint64_t,
local_startnonce: uint64_t,
local_nonces: uint64_t,
);
pub fn noncegen_avx(
cache: *mut c_void,
cache_size: size_t,
chunk_offset: size_t,
numeric_ID: uint64_t,
local_startnonce: uint64_t,
local_nonces: uint64_t,
);
pub fn noncegen_avx2(
cache: *mut c_void,
cache_size: size_t,
chunk_offset: size_t,
numeric_ID: uint64_t,
local_startnonce: uint64_t,
local_nonces: uint64_t,
);
pub fn noncegen_avx512(
cache: *mut c_void,
cache_size: size_t,
chunk_offset: size_t,
numeric_ID: uint64_t,
local_startnonce: uint64_t,
local_nonces: uint64_t,
);
}
pub struct SafeCVoid {
pub ptr: *mut c_void,
}
unsafe impl Send for SafeCVoid {}

pub struct CpuTask {
pub cache: SafeCVoid,
pub cache_size: size_t,
pub chunk_offset: size_t,
pub numeric_id: uint64_t,
pub local_startnonce: uint64_t,
pub local_nonces: uint64_t,
}

pub fn hash_cpu(
tx: Sender<(u8, u8, u64)>,
hasher_task: CpuTask,
simd_ext: String,
) -> impl FnOnce() {
move || {
unsafe {
match &*simd_ext {
"AVX512F" => noncegen_avx512(
hasher_task.cache.ptr,
hasher_task.cache_size,
hasher_task.chunk_offset,
hasher_task.numeric_id,
hasher_task.local_startnonce,
hasher_task.local_nonces,
),
"AVX2" => noncegen_avx2(
hasher_task.cache.ptr,
hasher_task.cache_size,
hasher_task.chunk_offset,
hasher_task.numeric_id,
hasher_task.local_startnonce,
hasher_task.local_nonces,
),
"AVX" => noncegen_avx(
hasher_task.cache.ptr,
hasher_task.cache_size,
hasher_task.chunk_offset,
hasher_task.numeric_id,
hasher_task.local_startnonce,
hasher_task.local_nonces,
),
"SSE2" => noncegen_sse(
hasher_task.cache.ptr,
hasher_task.cache_size,
hasher_task.chunk_offset,
hasher_task.numeric_id,
hasher_task.local_startnonce,
hasher_task.local_nonces,
),
_ => noncegen(
hasher_task.cache.ptr,
hasher_task.cache_size,
hasher_task.chunk_offset,
hasher_task.numeric_id,
hasher_task.local_startnonce,
hasher_task.local_nonces,
),
}
}
// report hashing done
tx.send((0u8, 1u8, 0))
.expect("CPU task can't communicate with scheduler thread.");
// report data in hostmem
tx.send((0u8, 0u8, hasher_task.local_nonces))
.expect("CPU task can't communicate with scheduler thread.");
}
}
78 changes: 78 additions & 0 deletions src/gpu_hasher.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
use chan::Receiver;
use ocl::{gpu_hash, gpu_hash_and_transfer_to_host, gpu_transfer_to_host, GpuContext};
use std::sync::mpsc::Sender;
use std::sync::{Arc, Mutex};

pub struct SafePointer {
pub ptr: *mut u8,
}
unsafe impl Send for SafePointer {}
unsafe impl Sync for SafePointer {}

pub struct GpuTask {
pub cache: SafePointer,
pub cache_size: u64,
pub chunk_offset: u64,
pub numeric_id: u64,
pub local_startnonce: u64,
pub local_nonces: u64,
}

pub fn create_gpu_hasher_thread(
gpu_id: u8,
gpu_context: Arc<Mutex<GpuContext>>,
tx: Sender<(u8, u8, u64)>,
rx_hasher_task: Receiver<Option<GpuTask>>,
) -> impl FnOnce() {
move || {
let mut first_run = true;
let mut buffer_id = 0u8;
let mut last_task = GpuTask {
cache: SafePointer { ptr: &mut 0u8 },
cache_size: 0,
chunk_offset: 0,
numeric_id: 0,
local_startnonce: 0,
local_nonces: 0,
};
for task in rx_hasher_task {
// check if new task or termination
match task {
// new task
Some(task) => {
// first run - just hash
if first_run {
if task.local_nonces != 0 {
first_run = false;
gpu_hash(&gpu_context, &task);
buffer_id = 1 - buffer_id;
last_task = task;
tx.send((gpu_id, 1u8, 0))
.expect("GPU task can't communicate with scheduler thread.");
}
// last run - just transfer
} else if task.local_nonces == 0 {
gpu_transfer_to_host(&gpu_context, buffer_id, &last_task);
first_run = true;
buffer_id = 0;
tx.send((gpu_id, 0u8, last_task.local_nonces))
.expect("GPU task can't communicate with scheduler thread.");
// normal run - hash and transfer async
} else {
gpu_hash_and_transfer_to_host(&gpu_context, buffer_id, &task, &last_task);
buffer_id = 1 - buffer_id;
tx.send((gpu_id, 0u8, last_task.local_nonces))
.expect("GPU task can't communicate with scheduler thread.");
last_task = task;
tx.send((gpu_id, 1u8, 0))
.expect("GPU task can't communicate with scheduler thread.");
}
}
// termination
None => {
break;
}
}
}
}
}
81 changes: 67 additions & 14 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@ extern crate pbr;
extern crate stopwatch;
extern crate sys_info;

mod hasher;
mod cpu_hasher;
#[cfg(feature = "opencl")]
mod gpu_hasher;
#[cfg(feature = "opencl")]
mod ocl;
mod plotter;
mod scheduler;
mod utils;
mod writer;

Expand All @@ -19,13 +24,10 @@ use clap::AppSettings::{ArgRequiredElseHelp, DeriveDisplayOrder, VersionlessSubc
use clap::ArgGroup;
use clap::{App, Arg};
use plotter::{Plotter, PlotterTask};
use std::cmp::min;
use utils::set_low_prio;

fn main() {
#[cfg(not(feature = "opencl"))]
let _opencl = false;
#[cfg(feature = "opencl")]
let opencl = true;
let arg = App::new("Engraver")
.version(crate_version!())
.author(crate_authors!())
Expand Down Expand Up @@ -60,6 +62,12 @@ fn main() {
.long("quiet")
.help("Runs engraver in non-verbose mode")
.global(true),
).arg(
Arg::with_name("benchmark")
.short("b")
.long("bench")
.help("Runs engraver in xPU benchmark mode")
.global(true),
)
/*
.subcommand(
Expand All @@ -74,23 +82,23 @@ fn main() {
.value_name("numeric_ID")
.help("your numeric Burst ID")
.takes_value(true)
.required(true),
.required_unless("ocl-devices"),
).arg(
Arg::with_name("start nonce")
.short("s")
.long("sn")
.value_name("start_nonce")
.help("where you want to start plotting")
.takes_value(true)
.required(true),
.required_unless("ocl-devices"),
).arg(
Arg::with_name("nonces")
.short("n")
.long("n")
.value_name("nonces")
.help("how many nonces you want to plot")
.takes_value(true)
.required(true),
.required_unless("ocl-devices"),
).arg(
Arg::with_name("path")
.short("p")
Expand Down Expand Up @@ -120,14 +128,13 @@ fn main() {
.short("g")
.long("gpu")
.value_name("platform_id:device_id")
.help("*GPU(s) you want to use for plotting")
.help("GPU(s) you want to use for plotting (optional)")
.multiple(true)
.takes_value(true),
]).groups(&[#[cfg(feature = "opencl")]
ArgGroup::with_name("processing")
.args(&["cpu", "gpu"])
.multiple(true)
.required(true)])
.multiple(true)])
/*
.arg(
Arg::with_name("ssd buffer")
Expand Down Expand Up @@ -168,14 +175,35 @@ fn main() {
)*/;

#[cfg(feature = "opencl")]
let arg = arg
.arg(
Arg::with_name("ocl-devices")
.short("o")
.long("opencl")
.help("Display OpenCL platforms and devices")
.global(true),
).arg(
Arg::with_name("zero-copy")
.short("z")
.long("zcb")
.help("Enables zero copy buffers for shared mem (integrated) gpus")
.global(true),
);
let matches = &arg.get_matches();

if matches.is_present("low priority") {
set_low_prio();
}

if matches.is_present("ocl-devices") {
#[cfg(feature = "opencl")]
ocl::platform_info();
return;
}

// plotting
/*
/* subcommand
if let Some(matches) = matches.subcommand_matches("plot") {
*/
let numeric_id = value_t!(matches, "numeric id", u64).unwrap_or_else(|e| e.exit());
Expand All @@ -189,8 +217,30 @@ fn main() {
.unwrap()
});
let mem = value_t!(matches, "memory", String).unwrap_or_else(|_| "0B".to_owned());
let cpu_threads =
value_t!(matches, "cpu", u8).unwrap_or_else(|_| sys_info::cpu_num().unwrap() as u8);
let cpu_threads = value_t!(matches, "cpu", u8).unwrap_or(0u8);

let gpus = if matches.occurrences_of("gpu") > 0 {
let gpu = values_t!(matches, "gpu", String);
Some(gpu.unwrap())
} else {
None
};

// work out number of cpu threads to use
let cores = sys_info::cpu_num().unwrap() as u8;
let cpu_threads = if cpu_threads == 0 {
cores
} else {
min(cores, cpu_threads)
};

// special case: dont use cpu if only a gpu is defined
#[cfg(feature = "opencl")]
let cpu_threads = if matches.occurrences_of("gpu") > 0 && matches.occurrences_of("cpu") == 0 {
0u8
} else {
cpu_threads
};

let p = Plotter::new();
p.run(PlotterTask {
Expand All @@ -200,8 +250,11 @@ fn main() {
output_path,
mem,
cpu_threads,
gpus,
direct_io: !matches.is_present("disable direct i/o"),
async_io: !matches.is_present("disable async i/o"),
quiet: matches.is_present("non-verbosity"),
benchmark: matches.is_present("benchmark"),
zcb: matches.is_present("zero-copy"),
});
}
Loading

0 comments on commit 2eeb527

Please sign in to comment.