From bce38714da455296c59bd05d5691e6c5b4d3b5bd Mon Sep 17 00:00:00 2001 From: Norbert Mauger Date: Sun, 1 Jan 2023 10:22:22 +0100 Subject: [PATCH 01/21] add cuda to ya-runtime-vm 0.3.0 --- qemu/Dockerfile | 5 ++++ runtime/Cargo.toml | 1 + runtime/src/detect_pci.rs | 48 +++++++++++++++++++++++++++++++++++++++ runtime/src/gpu.rs | 29 +++++++++++++++++++++++ runtime/src/lib.rs | 2 ++ runtime/src/main.rs | 12 +++++++++- runtime/src/vmrt.rs | 15 ++++++++++-- 7 files changed, 109 insertions(+), 3 deletions(-) mode change 100644 => 100755 qemu/Dockerfile mode change 100644 => 100755 runtime/Cargo.toml create mode 100755 runtime/src/detect_pci.rs create mode 100755 runtime/src/gpu.rs mode change 100644 => 100755 runtime/src/lib.rs mode change 100644 => 100755 runtime/src/main.rs mode change 100644 => 100755 runtime/src/vmrt.rs diff --git a/qemu/Dockerfile b/qemu/Dockerfile old mode 100644 new mode 100755 index ed13ac65..e300fac0 --- a/qemu/Dockerfile +++ b/qemu/Dockerfile @@ -36,6 +36,11 @@ RUN echo CONFIG_VIRTIO_SCSI=y >> build/config-host.mak RUN echo CONFIG_VIRTIO_BLK=y >> build/config-host.mak RUN echo CONFIG_VIRTIO_9P=y >> build/config-host.mak RUN echo CONFIG_FSDEV_9P=y >> build/config-host.mak +RUN echo CONFIG_VFIO=y >> build/config-host.mak +RUN echo CONFIG_VFIO_PCI=y >> build/config-host.mak +RUN echo CONFIG_EDID=y >> build/config-host.mak +RUN echo CONFIG_VGA_PCI=y >> build/config-host.mak +RUN echo CONFIG_PCIE_PORT=y >> build/config-host.mak RUN cd build && make x86_64-softmmu/all V=1 CFLAGS+="-Os -flto" -j4 LIBS+="-flto -lblkid -luuid -lpixman-1 -lutil" #RUN cd build && make x86_64-softmmu/all V=1 LIBS+="-flto -lblkid -luuid -lpixman-1 -lutil" CONFIG_PARALLEL=y CONFIG_VIRTIO_SERIAL=y diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml old mode 100644 new mode 100755 index 1f86419e..02b5b8a6 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -33,6 +33,7 @@ futures = "0.3" log = "0.4.8" rand = "0.8" raw-cpuid = "10.2.0" +pci-ids = "0.2.4" serde = { version = "^1.0", features = ["derive"] } serde_json = "1.0" strip-ansi-escapes = "0.1.0" diff --git a/runtime/src/detect_pci.rs b/runtime/src/detect_pci.rs new file mode 100755 index 00000000..cb0fb6d5 --- /dev/null +++ b/runtime/src/detect_pci.rs @@ -0,0 +1,48 @@ +use std::process::Command; + +pub fn detect_pci(pci_id: String, vendor_id: String) -> String { + + let mut device_found = false; + let mut device_name = "None"; + + let cmd = Command::new("lspci") + .arg("-vnn") + .output() + .expect("failed to execute process"); + + let stdout = String::from_utf8_lossy(&cmd.stdout); + let lines = stdout.split("\n"); + + for line in lines { + if line.starts_with(&pci_id) { + let index_vid = line[..].find(&vendor_id); + if !index_vid.is_none() + { + device_found = true; + let index_start_vid = index_vid.unwrap(); + let index_start_pid = index_start_vid + line[index_start_vid..].find(":").unwrap()+1; + let index_end_pid = index_start_pid + line[index_start_pid..].find("]").unwrap(); + let s_pid = &line[index_start_pid..index_end_pid]; + let vid = u16::from_str_radix(&vendor_id, 16).unwrap(); + let pid = u16::from_str_radix(s_pid, 16).unwrap(); + let device = pci_ids::Device::from_vid_pid(vid, pid).unwrap(); + device_name = pci_ids::Device::name(device); + } + else { + break; + } + } + else if device_found { + if !line.find("Kernel driver in use").is_none() { + let line_driver: Vec<&str> = line.split(":").collect(); + let driver = line_driver[1].trim_start(); + if driver != "vfio-pci" { + device_name = "None"; + } + break; + } + } + } + + device_name.to_string() +} \ No newline at end of file diff --git a/runtime/src/gpu.rs b/runtime/src/gpu.rs new file mode 100755 index 00000000..875a0ae1 --- /dev/null +++ b/runtime/src/gpu.rs @@ -0,0 +1,29 @@ +use crate::detect_pci; + +pub struct GpuInfo { + pub name: String, +} + +impl GpuInfo { + pub fn try_new() -> anyhow::Result { + + let mut gpu_name = String::from("None"); + let nvidia_vendor_id = String::from("10de"); + + match std::env::var("GPU_PCI") { + Ok(val) => { + if val != "no" { + let gpu_pci_id = String::from(val); + gpu_name = detect_pci::detect_pci(gpu_pci_id, nvidia_vendor_id); + } + }, + Err(_e) => {} + } + + Ok(GpuInfo { + name: gpu_name.to_string(), + }) + + } +} + diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs old mode 100644 new mode 100755 index 0369ea8a..62290f8d --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -1,3 +1,5 @@ +pub mod detect_pci; +pub mod gpu; pub mod cpu; pub mod deploy; pub mod guest_agent_comm; diff --git a/runtime/src/main.rs b/runtime/src/main.rs old mode 100644 new mode 100755 index 7bd02633..c60b01c4 --- a/runtime/src/main.rs +++ b/runtime/src/main.rs @@ -26,6 +26,7 @@ use ya_runtime_sdk::{ }; use ya_runtime_vm::{ cpu::CpuInfo, + gpu::GpuInfo, deploy::Deployment, guest_agent_comm::{RedirectFdType, RemoteCommandResult}, vmrt::{runtime_dir, start_vmrt, RuntimeData}, @@ -314,19 +315,28 @@ async fn stop(runtime_data: Arc>) -> Result<(), server::Error } fn offer() -> anyhow::Result> { + let gpu = GpuInfo::try_new()?; let cpu = CpuInfo::try_new()?; let model = format!( "Stepping {} Family {} Model {}", cpu.model.stepping, cpu.model.family, cpu.model.model ); + let mut capabilities = vec!["inet", "vpn", "manifest-support", "start-entrypoint"]; + let cuda_cap; + + if gpu.name != "None" { + cuda_cap = format!("cuda, {}", gpu.name); + capabilities.push(&cuda_cap); + } + Ok(Some(serde_json::json!({ "properties": { "golem.inf.cpu.vendor": cpu.model.vendor, "golem.inf.cpu.brand": cpu.model.brand, "golem.inf.cpu.model": model, "golem.inf.cpu.capabilities": cpu.capabilities, - "golem.runtime.capabilities": ["inet", "vpn", "manifest-support", "start-entrypoint"] + "golem.runtime.capabilities": capabilities }, "constraints": "" }))) diff --git a/runtime/src/vmrt.rs b/runtime/src/vmrt.rs old mode 100644 new mode 100755 index da387deb..3d5d5c8f --- a/runtime/src/vmrt.rs +++ b/runtime/src/vmrt.rs @@ -74,8 +74,6 @@ pub async fn start_vmrt( "-m", format!("{}M", deployment.mem_mib).as_str(), "-nographic", - "-vga", - "none", "-kernel", FILE_VMLINUZ, "-initrd", @@ -108,6 +106,19 @@ pub async fn start_vmrt( "-no-reboot", ]); + match std::env::var("GPU_PCI") { + Ok(val) => { + if val != "no" { + cmd.arg("-device"); + cmd.arg(format!("vfio-pci,host={}", val).as_str()); + } + }, + Err(_e) => { + cmd.arg("-vga"); + cmd.arg("none"); + } + } + let (vpn, inet) = // backward-compatibility mode if vpn_remote.is_none() && inet_remote.is_none() { From fb0a270796ee9371252d64851cd671d23fbccaae Mon Sep 17 00:00:00 2001 From: Norbert Mauger Date: Sat, 28 Jan 2023 07:53:25 +0100 Subject: [PATCH 02/21] add capabilities cuda, gpu --- runtime/src/main.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/runtime/src/main.rs b/runtime/src/main.rs index c60b01c4..134cc789 100755 --- a/runtime/src/main.rs +++ b/runtime/src/main.rs @@ -328,6 +328,8 @@ fn offer() -> anyhow::Result> { if gpu.name != "None" { cuda_cap = format!("cuda, {}", gpu.name); capabilities.push(&cuda_cap); + capabilities.push(&"cuda"); + capabilities.push(&"gpu"); } Ok(Some(serde_json::json!({ From 9bb577e5cb4e8f72036d3c70ec85a40da84525f7 Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Fri, 16 Jun 2023 11:20:46 +0200 Subject: [PATCH 03/21] Detection of PCI devices removed (self-test img will handle it) --- Cargo.lock | 74 --------------------------------------- runtime/Cargo.toml | 1 - runtime/src/detect_pci.rs | 45 ------------------------ runtime/src/gpu.rs | 26 -------------- runtime/src/lib.rs | 2 -- 5 files changed, 148 deletions(-) delete mode 100755 runtime/src/detect_pci.rs delete mode 100755 runtime/src/gpu.rs diff --git a/Cargo.lock b/Cargo.lock index 38e96927..a4cb02a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1406,12 +1406,6 @@ version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.4.4" @@ -1446,16 +1440,6 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43794a0ace135be66a25d3ae77d41b91615fb68ae937f904090203e81f755b65" -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1551,19 +1535,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" -[[package]] -name = "pci-ids" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d88ae3281b415d856e9c2ddbcdd5961e71c1a3e90138512c04d720241853a6af" -dependencies = [ - "nom", - "phf", - "phf_codegen", - "proc-macro2", - "quote", -] - [[package]] name = "percent-encoding" version = "2.2.0" @@ -1589,44 +1560,6 @@ dependencies = [ "indexmap", ] -[[package]] -name = "phf" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" -dependencies = [ - "phf_shared", - "rand 0.8.4", -] - -[[package]] -name = "phf_shared" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" -dependencies = [ - "siphasher", -] - [[package]] name = "pin-project" version = "1.0.8" @@ -2195,12 +2128,6 @@ dependencies = [ "libc", ] -[[package]] -name = "siphasher" -version = "0.3.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" - [[package]] name = "slab" version = "0.4.5" @@ -2993,7 +2920,6 @@ dependencies = [ "env_logger 0.10.0", "futures", "log", - "pci-ids", "pnet", "rand 0.8.4", "raw-cpuid", diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index e108f7e1..612f23d9 100755 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -34,7 +34,6 @@ futures = "0.3" log = "0.4.8" rand = "0.8" raw-cpuid = "10.7" -pci-ids = "0.2.4" serde = { version = "^1.0", features = ["derive"] } serde_json = "1.0" strip-ansi-escapes = "0.1.0" diff --git a/runtime/src/detect_pci.rs b/runtime/src/detect_pci.rs deleted file mode 100755 index 978ec3a3..00000000 --- a/runtime/src/detect_pci.rs +++ /dev/null @@ -1,45 +0,0 @@ -use std::process::Command; - -pub fn detect_pci(pci_id: String, vendor_id: String) -> String { - let mut device_found = false; - let mut device_name = "None"; - - let cmd = Command::new("lspci") - .arg("-vnn") - .output() - .expect("failed to execute process"); - - let stdout = String::from_utf8_lossy(&cmd.stdout); - let lines = stdout.split("\n"); - - for line in lines { - if line.starts_with(&pci_id) { - let index_vid = line[..].find(&vendor_id); - if !index_vid.is_none() { - device_found = true; - let index_start_vid = index_vid.unwrap(); - let index_start_pid = - index_start_vid + line[index_start_vid..].find(":").unwrap() + 1; - let index_end_pid = index_start_pid + line[index_start_pid..].find("]").unwrap(); - let s_pid = &line[index_start_pid..index_end_pid]; - let vid = u16::from_str_radix(&vendor_id, 16).unwrap(); - let pid = u16::from_str_radix(s_pid, 16).unwrap(); - let device = pci_ids::Device::from_vid_pid(vid, pid).unwrap(); - device_name = pci_ids::Device::name(device); - } else { - break; - } - } else if device_found { - if !line.find("Kernel driver in use").is_none() { - let line_driver: Vec<&str> = line.split(":").collect(); - let driver = line_driver[1].trim_start(); - if driver != "vfio-pci" { - device_name = "None"; - } - break; - } - } - } - - device_name.to_string() -} diff --git a/runtime/src/gpu.rs b/runtime/src/gpu.rs deleted file mode 100755 index c83210b4..00000000 --- a/runtime/src/gpu.rs +++ /dev/null @@ -1,26 +0,0 @@ -use crate::detect_pci; - -pub struct GpuInfo { - pub name: String, -} - -impl GpuInfo { - pub fn try_new() -> anyhow::Result { - let mut gpu_name = String::from("None"); - let nvidia_vendor_id = String::from("10de"); - - match std::env::var("GPU_PCI") { - Ok(val) => { - if val != "no" { - let gpu_pci_id = String::from(val); - gpu_name = detect_pci::detect_pci(gpu_pci_id, nvidia_vendor_id); - } - } - Err(_e) => {} - } - - Ok(GpuInfo { - name: gpu_name.to_string(), - }) - } -} diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 193fdd44..8bcecf0a 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -1,7 +1,5 @@ pub mod cpu; pub mod deploy; -pub mod detect_pci; -pub mod gpu; pub mod guest_agent_comm; mod response_parser; mod self_test; From 57723ad5ec251895d0175ea4ae3387daa5e2badd Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Mon, 19 Jun 2023 15:28:25 +0200 Subject: [PATCH 04/21] Optional --pci-device param --- runtime/src/lib.rs | 48 +++++++++++++++++++++++++++------------- runtime/src/self_test.rs | 7 +++--- runtime/src/vmrt.rs | 18 ++++++--------- 3 files changed, 44 insertions(+), 29 deletions(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 8bcecf0a..8aca32a6 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -68,6 +68,9 @@ pub struct Cli { /// INET endpoint address #[structopt(long)] inet_endpoint: Option, + #[structopt(long)] + /// PCI device identifier + pci_device: Option, } #[derive(ya_runtime_sdk::RuntimeDef, Default)] @@ -101,6 +104,7 @@ impl ya_runtime_sdk::Runtime for Runtime { let vpn_endpoint = ctx.cli.runtime.vpn_endpoint.clone(); let inet_endpoint = ctx.cli.runtime.inet_endpoint.clone(); + let pci_device_id = ctx.cli.runtime.pci_device.clone(); log::info!("VPN endpoint: {vpn_endpoint:?}"); log::info!("INET endpoint: {inet_endpoint:?}"); @@ -123,7 +127,9 @@ impl ya_runtime_sdk::Runtime for Runtime { async move { { let mut data = data.lock().await; - + if let Some(pci_device_id) = pci_device_id { + data.pci_device_id.replace(pci_device_id); + } if let Some(vpn_endpoint) = vpn_endpoint { let endpoint = ContainerEndpoint::try_from(vpn_endpoint).map_err(Error::from)?; @@ -156,17 +162,24 @@ impl ya_runtime_sdk::Runtime for Runtime { &mut self, command: server::RunProcess, mode: RuntimeMode, - _: &mut Context, + ctx: &mut Context, ) -> ProcessIdResponse<'a> { if let RuntimeMode::Command = mode { return async move { Err(anyhow::anyhow!("CLI `run` is not supported")) } .map_err(Into::into) .boxed_local(); } - - run_command(self.data.clone(), command) - .map_err(Into::into) - .boxed_local() + let pci_device_id = ctx.cli.runtime.pci_device.clone(); + let data = self.data.clone(); + async move { + let mut runtime_data = data.lock().await; + if let Some(pci_device_id) = pci_device_id { + runtime_data.pci_device_id.replace(pci_device_id); + } + run_command(data.clone(), command).await + } + .map_err(Into::into) + .boxed_local() } fn kill_command<'a>( @@ -179,20 +192,25 @@ impl ya_runtime_sdk::Runtime for Runtime { .boxed_local() } - fn offer<'a>(&mut self, _: &mut Context) -> OutputResponse<'a> { - self_test::run_self_test(|self_test_result| { - self_test::verify_status(self_test_result) - .and_then(|self_test_result| Ok(serde_json::from_str(&self_test_result)?)) - .and_then(offer) - .map(|offer| serde_json::Value::to_string(&offer)) - }) + fn offer<'a>(&mut self, ctx: &mut Context) -> OutputResponse<'a> { + let pci_device_id = ctx.cli.runtime.pci_device.clone(); + self_test::run_self_test( + |self_test_result| { + self_test::verify_status(self_test_result) + .and_then(|self_test_result| Ok(serde_json::from_str(&self_test_result)?)) + .and_then(offer) + .map(|offer| serde_json::Value::to_string(&offer)) + }, + pci_device_id, + ) // Dead code. ya_runtime_api::server::run_async requires killing the process to stop app .map(|_| Ok(None)) .boxed_local() } - fn test<'a>(&mut self, _ctx: &mut Context) -> EmptyResponse<'a> { - self_test::test().boxed_local() + fn test<'a>(&mut self, ctx: &mut Context) -> EmptyResponse<'a> { + let pci_device_id = ctx.cli.runtime.pci_device.clone(); + self_test::test(pci_device_id).boxed_local() } fn join_network<'a>( diff --git a/runtime/src/self_test.rs b/runtime/src/self_test.rs index 78cc5cc4..fa00d0b2 100644 --- a/runtime/src/self_test.rs +++ b/runtime/src/self_test.rs @@ -13,8 +13,8 @@ use crate::Runtime; const FILE_TEST_IMAGE: &str = "self-test.gvmi"; -pub(crate) async fn test() -> Result<(), Error> { - run_self_test(verify_status).await; +pub(crate) async fn test(pci_device_id: Option) -> Result<(), Error> { + run_self_test(verify_status, pci_device_id).await; // Dead code. ya_runtime_api::server::run_async requires killing a process to stop Ok(()) } @@ -40,7 +40,7 @@ pub(crate) fn verify_status(status: anyhow::Result) -> anyhow::Re } } -pub(crate) async fn run_self_test(handle_result: HANDLER) +pub(crate) async fn run_self_test(handle_result: HANDLER, pci_device_id: Option) where HANDLER: Fn(anyhow::Result) -> anyhow::Result, { @@ -52,6 +52,7 @@ where let runtime_data = RuntimeData { deployment: Some(deployment), + pci_device_id, ..Default::default() }; let runtime = Runtime { diff --git a/runtime/src/vmrt.rs b/runtime/src/vmrt.rs index 8ba53493..354b4f42 100755 --- a/runtime/src/vmrt.rs +++ b/runtime/src/vmrt.rs @@ -29,6 +29,7 @@ pub struct RuntimeData { pub inet: Option, pub deployment: Option, pub ga: Option>>, + pub pci_device_id: Option, } impl RuntimeData { @@ -106,17 +107,12 @@ pub async fn start_vmrt( "-no-reboot", ]); - match std::env::var("GPU_PCI") { - Ok(val) => { - if val != "no" { - cmd.arg("-device"); - cmd.arg(format!("vfio-pci,host={}", val).as_str()); - } - } - Err(_e) => { - cmd.arg("-vga"); - cmd.arg("none"); - } + if let Some(pci_device_id) = &data.pci_device_id { + cmd.arg("-device"); + cmd.arg(format!("vfio-pci,host={}", pci_device_id).as_str()); + } else { + cmd.arg("-vga"); + cmd.arg("none"); } let (vpn, inet) = From b40155455c327c969383fc3c09949c88e277a6a2 Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Thu, 22 Jun 2023 06:07:07 +0200 Subject: [PATCH 05/21] VFIO settigns removed from qemu --- qemu/Dockerfile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/qemu/Dockerfile b/qemu/Dockerfile index e300fac0..ed13ac65 100755 --- a/qemu/Dockerfile +++ b/qemu/Dockerfile @@ -36,11 +36,6 @@ RUN echo CONFIG_VIRTIO_SCSI=y >> build/config-host.mak RUN echo CONFIG_VIRTIO_BLK=y >> build/config-host.mak RUN echo CONFIG_VIRTIO_9P=y >> build/config-host.mak RUN echo CONFIG_FSDEV_9P=y >> build/config-host.mak -RUN echo CONFIG_VFIO=y >> build/config-host.mak -RUN echo CONFIG_VFIO_PCI=y >> build/config-host.mak -RUN echo CONFIG_EDID=y >> build/config-host.mak -RUN echo CONFIG_VGA_PCI=y >> build/config-host.mak -RUN echo CONFIG_PCIE_PORT=y >> build/config-host.mak RUN cd build && make x86_64-softmmu/all V=1 CFLAGS+="-Os -flto" -j4 LIBS+="-flto -lblkid -luuid -lpixman-1 -lutil" #RUN cd build && make x86_64-softmmu/all V=1 LIBS+="-flto -lblkid -luuid -lpixman-1 -lutil" CONFIG_PARALLEL=y CONFIG_VIRTIO_SERIAL=y From 2dc249c58d713d6fb16d6a2aec64e55b96dfc3d6 Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Thu, 22 Jun 2023 06:12:04 +0200 Subject: [PATCH 06/21] Support of env property PCI_DEVICE for --pci-device param --- runtime/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 8aca32a6..1a4ee45f 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -68,7 +68,7 @@ pub struct Cli { /// INET endpoint address #[structopt(long)] inet_endpoint: Option, - #[structopt(long)] + #[structopt(long, env = "PCI_DEVICE")] /// PCI device identifier pci_device: Option, } From f9ee4b036e3787c2525c23958a111aeeeccd3af7 Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Sun, 2 Jul 2023 03:02:07 +0200 Subject: [PATCH 07/21] Self test eagerly parsing stdout as JSON. Self-test process timeout. --- runtime/src/lib.rs | 17 +++++++-- runtime/src/self_test.rs | 78 ++++++++++++++++++++++------------------ 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 1a4ee45f..f4cb5512 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -13,6 +13,7 @@ use std::convert::TryFrom; use std::env; use std::path::{Component, Path, PathBuf}; use std::sync::Arc; +use std::time::Duration; use structopt::StructOpt; use tokio::{ fs, @@ -68,9 +69,18 @@ pub struct Cli { /// INET endpoint address #[structopt(long)] inet_endpoint: Option, - #[structopt(long, env = "PCI_DEVICE")] /// PCI device identifier + #[structopt(long, env = "PCI_DEVICE")] pci_device: Option, + /// Test timeout (in seconds) + #[structopt(long, env = "TEST_TIMEOUT", default_value = "10")] + test_timeout: u64, +} + +impl Cli { + fn test_timeout(&self) -> Duration { + Duration::from_secs(self.test_timeout) + } } #[derive(ya_runtime_sdk::RuntimeDef, Default)] @@ -194,6 +204,7 @@ impl ya_runtime_sdk::Runtime for Runtime { fn offer<'a>(&mut self, ctx: &mut Context) -> OutputResponse<'a> { let pci_device_id = ctx.cli.runtime.pci_device.clone(); + let test_timeout = ctx.cli.runtime.test_timeout(); self_test::run_self_test( |self_test_result| { self_test::verify_status(self_test_result) @@ -202,6 +213,7 @@ impl ya_runtime_sdk::Runtime for Runtime { .map(|offer| serde_json::Value::to_string(&offer)) }, pci_device_id, + test_timeout, ) // Dead code. ya_runtime_api::server::run_async requires killing the process to stop app .map(|_| Ok(None)) @@ -210,7 +222,8 @@ impl ya_runtime_sdk::Runtime for Runtime { fn test<'a>(&mut self, ctx: &mut Context) -> EmptyResponse<'a> { let pci_device_id = ctx.cli.runtime.pci_device.clone(); - self_test::test(pci_device_id).boxed_local() + let test_timeout = ctx.cli.runtime.test_timeout(); + self_test::test(pci_device_id, test_timeout).boxed_local() } fn join_network<'a>( diff --git a/runtime/src/self_test.rs b/runtime/src/self_test.rs index fa00d0b2..331c0f10 100644 --- a/runtime/src/self_test.rs +++ b/runtime/src/self_test.rs @@ -1,7 +1,9 @@ use anyhow::bail; use futures::lock::Mutex; +use serde_json::Value; use std::path::Path; use std::sync::{mpsc, Arc}; +use std::time::Duration; use tokio::fs; use ya_runtime_sdk::runtime_api::server::RuntimeHandler; use ya_runtime_sdk::{runtime_api::server, server::Server, Context, ErrorExt, EventEmitter}; @@ -13,36 +15,23 @@ use crate::Runtime; const FILE_TEST_IMAGE: &str = "self-test.gvmi"; -pub(crate) async fn test(pci_device_id: Option) -> Result<(), Error> { - run_self_test(verify_status, pci_device_id).await; +pub(crate) async fn test(pci_device_id: Option, timeout: Duration) -> Result<(), Error> { + run_self_test(verify_status, pci_device_id, timeout).await; // Dead code. ya_runtime_api::server::run_async requires killing a process to stop Ok(()) } -pub(crate) fn verify_status(status: anyhow::Result) -> anyhow::Result { - let Ok(status) = status else { - bail!("Failed to get self test status: {err}"); - }; - if status.return_code == 0 { - return Ok(String::from_utf8(status.stdout)?); - } - match String::from_utf8(status.stderr) { - Ok(stderr) => anyhow::bail!( - "Process failed, code: {}, stderr: {stderr}", - status.return_code - ), - Err(err) => { - anyhow::bail!( - "Process failed, code: {}. Failed to parse err output: {err}", - status.return_code - ) - } - } +pub(crate) fn verify_status(status: anyhow::Result) -> anyhow::Result { + let status = status?; + Ok(serde_json::to_string(&status)?) } -pub(crate) async fn run_self_test(handle_result: HANDLER, pci_device_id: Option) -where - HANDLER: Fn(anyhow::Result) -> anyhow::Result, +pub(crate) async fn run_self_test( + handle_result: HANDLER, + pci_device_id: Option, + timeout: Duration, +) where + HANDLER: Fn(anyhow::Result) -> anyhow::Result, { let work_dir = std::env::temp_dir(); @@ -89,7 +78,7 @@ where let (final_status_sender, final_status_receiver) = tokio::sync::oneshot::channel(); tokio::spawn(async move { - let status = collect_process_status(&mut status_receiver, pid).await; + let status = collect_process_response(&mut status_receiver, pid, timeout).await; final_status_sender.send(status) }); let process_result = final_status_receiver @@ -159,15 +148,21 @@ impl RuntimeHandler for ProcessOutputHandler { } } -async fn collect_process_status( +/// Collects process `stdout` and tries to parse it into `serde_json::Value`. +/// # Arguments +/// * `status_receiver` of `ProcessStatus` +/// * `pid` +/// * `timeout` used to wait for `ProcessStatus` +async fn collect_process_response( status_receiver: &mut mpsc::Receiver, pid: u64, -) -> anyhow::Result { + timeout: Duration, +) -> anyhow::Result { log::debug!("Start listening on process: {pid}"); let mut stdout = Vec::new(); let mut stderr = Vec::new(); let mut return_code = 0; - while let Ok(status) = status_receiver.recv() { + while let Ok(status) = status_receiver.recv_timeout(timeout) { if status.pid != pid { continue; } @@ -175,14 +170,27 @@ async fn collect_process_status( stderr.append(&mut status.stderr.clone()); return_code = status.return_code; if !status.running { + // Process stopped break; + } else if status.return_code != 0 { + // Process failed. Waiting for final message or timeout. + continue; + } else if let Ok(response) = serde_json::from_slice(&stdout) { + // Succeed parsing response. + return Ok(response); + } + } + if return_code != 0 { + bail!(String::from_utf8_lossy(&stderr).to_string()) + } + match serde_json::from_slice(&stdout) { + Ok(response) => Ok(response), + Err(err) => { + if !stderr.is_empty() { + bail!(String::from_utf8_lossy(&stderr).to_string()) + } else { + bail!(err) + } } } - Ok(ProcessStatus { - pid, - running: false, - return_code, - stdout, - stderr, - }) } From 2d862f0318834c9b2184558ede87fe0e14011703 Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Mon, 3 Jul 2023 02:14:36 +0200 Subject: [PATCH 08/21] Self test test_cpu_cores and test_mem_gib cmdline params --- runtime/src/lib.rs | 16 ++++++++++++++-- runtime/src/self_test.rs | 27 ++++++++++++++++++++------- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index f4cb5512..3a0e4ddc 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -72,9 +72,15 @@ pub struct Cli { /// PCI device identifier #[structopt(long, env = "PCI_DEVICE")] pci_device: Option, - /// Test timeout (in seconds) + /// Test process timeout (in sec) #[structopt(long, env = "TEST_TIMEOUT", default_value = "10")] test_timeout: u64, + /// Number of logical CPU cores for test process + #[structopt(long, env = "TEST_CPU_CORES", default_value = "1")] + test_cpu_cores: usize, + /// Amount of RAM for test process [GiB] + #[structopt(long, env = "TEST_MEM_GIB", default_value = "0.125")] + test_mem_gib: f64, } impl Cli { @@ -205,6 +211,8 @@ impl ya_runtime_sdk::Runtime for Runtime { fn offer<'a>(&mut self, ctx: &mut Context) -> OutputResponse<'a> { let pci_device_id = ctx.cli.runtime.pci_device.clone(); let test_timeout = ctx.cli.runtime.test_timeout(); + let cpu_cores = ctx.cli.runtime.test_cpu_cores; + let mem_gib = ctx.cli.runtime.test_mem_gib; self_test::run_self_test( |self_test_result| { self_test::verify_status(self_test_result) @@ -214,6 +222,8 @@ impl ya_runtime_sdk::Runtime for Runtime { }, pci_device_id, test_timeout, + cpu_cores, + mem_gib, ) // Dead code. ya_runtime_api::server::run_async requires killing the process to stop app .map(|_| Ok(None)) @@ -223,7 +233,9 @@ impl ya_runtime_sdk::Runtime for Runtime { fn test<'a>(&mut self, ctx: &mut Context) -> EmptyResponse<'a> { let pci_device_id = ctx.cli.runtime.pci_device.clone(); let test_timeout = ctx.cli.runtime.test_timeout(); - self_test::test(pci_device_id, test_timeout).boxed_local() + let cpu_cores = ctx.cli.runtime.test_cpu_cores; + let mem_gib = ctx.cli.runtime.test_mem_gib; + self_test::test(pci_device_id, test_timeout, cpu_cores, mem_gib).boxed_local() } fn join_network<'a>( diff --git a/runtime/src/self_test.rs b/runtime/src/self_test.rs index 331c0f10..c7bd0a9e 100644 --- a/runtime/src/self_test.rs +++ b/runtime/src/self_test.rs @@ -15,8 +15,13 @@ use crate::Runtime; const FILE_TEST_IMAGE: &str = "self-test.gvmi"; -pub(crate) async fn test(pci_device_id: Option, timeout: Duration) -> Result<(), Error> { - run_self_test(verify_status, pci_device_id, timeout).await; +pub(crate) async fn test( + pci_device_id: Option, + timeout: Duration, + cpu_cores: usize, + mem_gib: f64, +) -> Result<(), Error> { + run_self_test(verify_status, pci_device_id, timeout, cpu_cores, mem_gib).await; // Dead code. ya_runtime_api::server::run_async requires killing a process to stop Ok(()) } @@ -30,12 +35,14 @@ pub(crate) async fn run_self_test( handle_result: HANDLER, pci_device_id: Option, timeout: Duration, + cpu_cores: usize, + mem_gib: f64, ) where HANDLER: Fn(anyhow::Result) -> anyhow::Result, { let work_dir = std::env::temp_dir(); - let deployment = self_test_deployment(&work_dir) + let deployment = self_test_deployment(&work_dir, cpu_cores, mem_gib) .await .expect("Prepares self test img deployment"); @@ -106,7 +113,11 @@ pub(crate) async fn run_self_test( .await; } -async fn self_test_deployment(work_dir: &Path) -> anyhow::Result { +async fn self_test_deployment( + work_dir: &Path, + cpu_cores: usize, + mem_gib: f64, +) -> anyhow::Result { let package_path = runtime_dir() .expect("Runtime directory not found") .join(FILE_TEST_IMAGE) @@ -114,12 +125,14 @@ async fn self_test_deployment(work_dir: &Path) -> anyhow::Result { .expect("Test image not found"); log::info!("Task package: {}", package_path.display()); + let mem_mib = (mem_gib * 1024.) as usize; let package_file = fs::File::open(package_path.clone()) .await .or_err("Error reading package file")?; - let deployment = Deployment::try_from_input(package_file, 1, 125, package_path.clone()) - .await - .or_err("Error reading package metadata")?; + let deployment = + Deployment::try_from_input(package_file, cpu_cores, mem_mib, package_path.clone()) + .await + .or_err("Error reading package metadata")?; for vol in &deployment.volumes { let vol_dir = work_dir.join(&vol.name); log::debug!("Creating volume dir: {vol_dir:?} for path {}", vol.path); From 4a111c0b97a4f134fba42eb93c20b35f24dfe1b1 Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Wed, 5 Jul 2023 03:14:58 +0200 Subject: [PATCH 09/21] Monitoring self-test output dir instead of stdout --- Cargo.lock | 178 ++++++++++++++++++++++++++++++++- runtime/Cargo.toml | 1 + runtime/src/deploy.rs | 1 - runtime/src/self_test.rs | 205 +++++++++++++++++++++------------------ 4 files changed, 285 insertions(+), 100 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a4cb02a9..886fcb4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -507,6 +507,25 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +dependencies = [ + "cfg-if", +] + [[package]] name = "crypto-common" version = "0.1.5" @@ -816,6 +835,15 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fsevent-sys" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" +dependencies = [ + "libc", +] + [[package]] name = "fuchsia-cprng" version = "0.1.1" @@ -1193,6 +1221,26 @@ dependencies = [ "regex", ] +[[package]] +name = "inotify" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff" +dependencies = [ + "bitflags", + "inotify-sys", + "libc", +] + +[[package]] +name = "inotify-sys" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" +dependencies = [ + "libc", +] + [[package]] name = "instant" version = "0.1.12" @@ -1297,6 +1345,26 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67c21572b4949434e4fc1e1978b99c5f77064153c59d998bf13ecd96fb5ecba7" +[[package]] +name = "kqueue" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c8fc60ba15bf51257aa9807a48a61013db043fcf3a78cb0d916e8e396dcad98" +dependencies = [ + "kqueue-sys", + "libc", +] + +[[package]] +name = "kqueue-sys" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8367585489f01bc55dd27404dcf56b95e6da061a256a666ab23be9ba96a2e587" +dependencies = [ + "bitflags", + "libc", +] + [[package]] name = "language-tags" version = "0.3.2" @@ -1440,6 +1508,24 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43794a0ace135be66a25d3ae77d41b91615fb68ae937f904090203e81f755b65" +[[package]] +name = "notify" +version = "6.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5738a2795d57ea20abec2d6d76c6081186709c0024187cd5977265eda6598b51" +dependencies = [ + "bitflags", + "crossbeam-channel", + "filetime", + "fsevent-sys", + "inotify", + "kqueue", + "libc", + "mio", + "walkdir", + "windows-sys 0.45.0", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1989,6 +2075,15 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.1.0" @@ -2610,6 +2705,16 @@ dependencies = [ "quote", ] +[[package]] +name = "walkdir" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.0" @@ -2747,13 +2852,37 @@ dependencies = [ "windows_x86_64_msvc 0.36.1", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets", + "windows-targets 0.48.0", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] [[package]] @@ -2762,15 +2891,21 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" dependencies = [ - "windows_aarch64_gnullvm", + "windows_aarch64_gnullvm 0.48.0", "windows_aarch64_msvc 0.48.0", "windows_i686_gnu 0.48.0", "windows_i686_msvc 0.48.0", "windows_x86_64_gnu 0.48.0", - "windows_x86_64_gnullvm", + "windows_x86_64_gnullvm 0.48.0", "windows_x86_64_msvc 0.48.0", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.0" @@ -2783,6 +2918,12 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.0" @@ -2795,6 +2936,12 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.0" @@ -2807,6 +2954,12 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.0" @@ -2819,12 +2972,24 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.0" @@ -2837,6 +3002,12 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.0" @@ -2920,6 +3091,7 @@ dependencies = [ "env_logger 0.10.0", "futures", "log", + "notify", "pnet", "rand 0.8.4", "raw-cpuid", diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 612f23d9..88275f39 100755 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -32,6 +32,7 @@ bollard-stubs = "1.40.2" crc = "1.8" futures = "0.3" log = "0.4.8" +notify = { version = "6.0", features = ["crossbeam-channel"] } rand = "0.8" raw-cpuid = "10.7" serde = { version = "^1.0", features = ["derive"] } diff --git a/runtime/src/deploy.rs b/runtime/src/deploy.rs index 0211f8e1..81b6851b 100644 --- a/runtime/src/deploy.rs +++ b/runtime/src/deploy.rs @@ -52,7 +52,6 @@ impl Deployment { input.take(json_len as u64).read_to_string(&mut buf).await?; buf }; - if crc32::checksum_ieee(json.as_bytes()) != crc { return Err(anyhow::anyhow!("Invalid ContainerConfig crc32 sum")); } diff --git a/runtime/src/self_test.rs b/runtime/src/self_test.rs index c7bd0a9e..ae354e81 100644 --- a/runtime/src/self_test.rs +++ b/runtime/src/self_test.rs @@ -1,19 +1,25 @@ use anyhow::bail; use futures::lock::Mutex; +use notify::event::{DataChange, ModifyKind}; +use notify::{Event, EventKind, INotifyWatcher, RecursiveMode, Watcher}; use serde_json::Value; -use std::path::Path; -use std::sync::{mpsc, Arc}; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::Arc; use std::time::Duration; use tokio::fs; -use ya_runtime_sdk::runtime_api::server::RuntimeHandler; -use ya_runtime_sdk::{runtime_api::server, server::Server, Context, ErrorExt, EventEmitter}; -use ya_runtime_sdk::{Error, ProcessStatus, RuntimeStatus}; +use tokio::sync::Notify; +use uuid::Uuid; +use ya_runtime_sdk::runtime_api::deploy::ContainerVolume; +use ya_runtime_sdk::RunProcess; +use ya_runtime_sdk::{runtime_api::server, server::Server, Context, Error, ErrorExt, EventEmitter}; use crate::deploy::Deployment; use crate::vmrt::{runtime_dir, RuntimeData}; use crate::Runtime; const FILE_TEST_IMAGE: &str = "self-test.gvmi"; +const FILE_TEST_EXECUTABLE: &str = "ya-self-test"; pub(crate) async fn test( pci_device_id: Option, @@ -46,63 +52,62 @@ pub(crate) async fn run_self_test( .await .expect("Prepares self test img deployment"); - let runtime_data = RuntimeData { - deployment: Some(deployment), - pci_device_id, - ..Default::default() - }; - let runtime = Runtime { - data: Arc::new(Mutex::new(runtime_data)), - }; + let output_volume = + get_self_test_only_volume(&deployment).expect("Self test image has an output volume"); + let output_file_name = format!("out_{}.json", Uuid::new_v4()); + let output_file_vm = PathBuf::from_str(&output_volume.path) + .expect("Can create self test volume path") + .join(&output_file_name); + let output_dir = work_dir.join(output_volume.name); + let output_file = output_dir.join(&output_file_name); + + let runtime = self_test_runtime(deployment, pci_device_id); server::run_async(|e| async { let ctx = Context::try_new().expect("Creates runtime context"); log::info!("Starting runtime"); - let (status_sender, mut status_receiver) = mpsc::channel(); - let emitter = EventEmitter::spawn(ProcessOutputHandler { - handler: Box::new(e), - status_sender, - }); + let emitter = EventEmitter::spawn(e); let start_response = crate::start(work_dir.clone(), runtime.data.clone(), emitter.clone()) .await .expect("Starts runtime"); log::info!("Runtime start response {:?}", start_response); - let run_process: ya_runtime_sdk::RunProcess = server::RunProcess { - bin: "/ya-self-test".into(), - args: vec!["ya-self-test".into()], - work_dir: "/".into(), + let run_process: RunProcess = server::RunProcess { + bin: format!("/{FILE_TEST_EXECUTABLE}"), + args: vec![ + FILE_TEST_EXECUTABLE.into(), + output_file_vm.to_string_lossy().into(), + ], ..Default::default() }; log::info!("Runtime: {:?}", runtime.data); log::info!("Self test process: {run_process:?}"); + run_command( + runtime.data.clone(), + run_process, + &output_dir, + &output_file, + timeout, + ) + .await + .expect("Can run self-test command"); - let pid: u64 = crate::run_command(runtime.data.clone(), run_process) - .await - .expect("Runs command"); - - let (final_status_sender, final_status_receiver) = tokio::sync::oneshot::channel(); - tokio::spawn(async move { - let status = collect_process_response(&mut status_receiver, pid, timeout).await; - final_status_sender.send(status) - }); - let process_result = final_status_receiver + log::info!("Stopping runtime"); + crate::stop(runtime.data.clone()) .await - .expect("Receives process status"); + .expect("Stops runtime"); - log::info!("Process finished"); - let result = handle_result(process_result).expect("Handles test result"); + log::info!("Handling result"); + let out_result = read_json(&output_file); + std::fs::remove_dir_all(output_dir).expect("Removes self-test output dir"); + let result = handle_result(out_result).expect("Handles test result"); if !result.is_empty() { + // the server refuses to stop by itself; print output to stdout println!("{result}"); } - log::info!("Stopping runtime"); - crate::stop(runtime.data.clone()) - .await - .expect("Stops runtime"); - tokio::spawn(async move { // the server refuses to stop by itself; force quit std::process::exit(0); @@ -113,6 +118,18 @@ pub(crate) async fn run_self_test( .await; } +fn self_test_runtime(deployment: Deployment, pci_device_id: Option) -> Runtime { + let runtime_data = RuntimeData { + deployment: Some(deployment), + pci_device_id, + ..Default::default() + }; + Runtime { + data: Arc::new(Mutex::new(runtime_data)), + } +} + +/// Builds self test deployment based on `FILE_TEST_IMAGE` from path returned by `runtime_dir()` async fn self_test_deployment( work_dir: &Path, cpu_cores: usize, @@ -143,67 +160,63 @@ async fn self_test_deployment( Ok(deployment) } -struct ProcessOutputHandler { - status_sender: mpsc::Sender, - handler: Box, +/// Returns path to self test image only volume. +/// Fails if `self_test_deployment` has no volumes or more than one. +fn get_self_test_only_volume(self_test_deployment: &Deployment) -> anyhow::Result { + if self_test_deployment.volumes.len() != 1 { + bail!("Self test image has to have one volume"); + } + Ok(self_test_deployment.volumes.first().unwrap().clone()) } -impl RuntimeHandler for ProcessOutputHandler { - fn on_process_status<'a>(&self, status: ProcessStatus) -> futures::future::BoxFuture<'a, ()> { - if let Err(err) = self.status_sender.send(status.clone()) { - log::warn!("Failed to send process status {err}"); - } - self.handler.on_process_status(status) - } +/// Runs command, monitors `output_dir` looking for `output_file`. +/// Fails if `output_file` not created before `timeout`. +async fn run_command( + runtime_data: Arc>, + run_process: RunProcess, + output_dir: &Path, + output_file: &Path, + timeout: Duration, +) -> anyhow::Result<()> { + let output_notification = Arc::new(Notify::new()); + // Keep `_watcher` . Watcher shutdowns when dropped. + let _watcher = spawn_output_watcher(output_notification.clone(), output_dir, output_file)?; - fn on_runtime_status<'a>(&self, status: RuntimeStatus) -> futures::future::BoxFuture<'a, ()> { - self.handler.on_runtime_status(status) - } + if let Err(err) = crate::run_command(runtime_data, run_process).await { + bail!("Code: {}, msg: {}", err.code, err.message); + }; + + if let Err(err) = tokio::time::timeout(timeout, output_notification.notified()).await { + log::error!("File {output_file:?} not created before timeout of {timeout:?}s. Err: {err}."); + }; + Ok(()) } -/// Collects process `stdout` and tries to parse it into `serde_json::Value`. -/// # Arguments -/// * `status_receiver` of `ProcessStatus` -/// * `pid` -/// * `timeout` used to wait for `ProcessStatus` -async fn collect_process_response( - status_receiver: &mut mpsc::Receiver, - pid: u64, - timeout: Duration, -) -> anyhow::Result { - log::debug!("Start listening on process: {pid}"); - let mut stdout = Vec::new(); - let mut stderr = Vec::new(); - let mut return_code = 0; - while let Ok(status) = status_receiver.recv_timeout(timeout) { - if status.pid != pid { - continue; - } - stdout.append(&mut status.stdout.clone()); - stderr.append(&mut status.stderr.clone()); - return_code = status.return_code; - if !status.running { - // Process stopped - break; - } else if status.return_code != 0 { - // Process failed. Waiting for final message or timeout. - continue; - } else if let Ok(response) = serde_json::from_slice(&stdout) { - // Succeed parsing response. - return Ok(response); +fn spawn_output_watcher( + output_notification: Arc, + output_dir: &Path, + output_file: &Path, +) -> anyhow::Result { + let output_file = output_file.into(); + let mut watcher = notify::recommended_watcher(move |res| match res { + Ok(Event { + kind: EventKind::Modify(ModifyKind::Data(DataChange::Any)), + paths, + .. + }) if paths.contains(&output_file) => output_notification.notify_waiters(), + Ok(event) => { + log::debug!("Output file watch event: {:?}", event); } - } - if return_code != 0 { - bail!(String::from_utf8_lossy(&stderr).to_string()) - } - match serde_json::from_slice(&stdout) { - Ok(response) => Ok(response), - Err(err) => { - if !stderr.is_empty() { - bail!(String::from_utf8_lossy(&stderr).to_string()) - } else { - bail!(err) - } + Err(error) => { + log::error!("Output file watch error: {:?}", error); } - } + })?; + + watcher.watch(output_dir, RecursiveMode::Recursive)?; + Ok(watcher) +} + +fn read_json(output_file: &Path) -> anyhow::Result { + let output_file = std::fs::File::open(output_file)?; + Ok(serde_json::from_reader(&output_file)?) } From c92f688cb319cd6fcf85d9181ceffdf688f19a9d Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Wed, 5 Jul 2023 09:03:23 +0200 Subject: [PATCH 10/21] Rust toolchain 1.70.0 and rustfmt config. MacOS CI bugfix. --- .github/workflows/build.yml | 5 +-- .github/workflows/release.yml | 8 +++-- runtime/Cargo.toml | 4 ++- runtime/src/self_test.rs | 4 +-- rust-toolchain.toml | 4 +++ rustfmt.toml | 61 +++++++++++++++++++++++++++++++++++ 6 files changed, 78 insertions(+), 8 deletions(-) create mode 100644 rust-toolchain.toml create mode 100644 rustfmt.toml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c6292c36..f5e01b90 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,6 +16,7 @@ jobs: env: RUSTFLAGS: "-D warnings -C opt-level=z -C target-cpu=x86-64 -C debuginfo=1" X86_64_PC_WINDOWS_MSVC_OPENSSL_DIR: c:/vcpkg/installed/x64-windows + rust_stable: 1.70.0 runs-on: ${{ matrix.os }} strategy: matrix: @@ -26,10 +27,10 @@ jobs: - name: Checkout uses: actions/checkout@v1 - - name: Install Last Stable Rust + - name: Install Rust ${{ env.rust_stable }} uses: actions-rs/toolchain@v1 with: - toolchain: stable + toolchain: ${{ env.rust_stable }} - name: Cache cargo registry uses: actions/cache@v1 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ddb8097f..22fb5555 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,8 +6,9 @@ on: - pre-rel-* env: - self-test-img_tag: pre-rel-v0.1.0 + self-test-img_tag: pre-rel-v0.1.0_RC3_file_output self-test-img_repository: golemfactory/ya-self-test-img + rust_stable: 1.70.0 jobs: create-release: @@ -112,9 +113,10 @@ jobs: console.log(release.data.upload_url); return release.data.upload_url - - uses: actions-rs/toolchain@v1 + - name: Install Rust ${{ env.rust_stable }} + uses: actions-rs/toolchain@v1 with: - toolchain: stable + toolchain: ${{ env.rust_stable }} target: x86_64-unknown-linux-musl override: true - name: Build diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 88275f39..94288766 100755 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -32,7 +32,9 @@ bollard-stubs = "1.40.2" crc = "1.8" futures = "0.3" log = "0.4.8" -notify = { version = "6.0", features = ["crossbeam-channel"] } +# "crossbeam-channel" and "macos_fsevent" are default features. +# Remove `macos_fsevent` if `macos` build will get dropped. +notify = { version = "6.0", features = ["crossbeam-channel", "macos_fsevent"] } rand = "0.8" raw-cpuid = "10.7" serde = { version = "^1.0", features = ["derive"] } diff --git a/runtime/src/self_test.rs b/runtime/src/self_test.rs index ae354e81..12e06bc0 100644 --- a/runtime/src/self_test.rs +++ b/runtime/src/self_test.rs @@ -1,7 +1,7 @@ use anyhow::bail; use futures::lock::Mutex; use notify::event::{DataChange, ModifyKind}; -use notify::{Event, EventKind, INotifyWatcher, RecursiveMode, Watcher}; +use notify::{Event, EventKind, RecursiveMode, Watcher, RecommendedWatcher}; use serde_json::Value; use std::path::{Path, PathBuf}; use std::str::FromStr; @@ -196,7 +196,7 @@ fn spawn_output_watcher( output_notification: Arc, output_dir: &Path, output_file: &Path, -) -> anyhow::Result { +) -> anyhow::Result { let output_file = output_file.into(); let mut watcher = notify::recommended_watcher(move |res| match res { Ok(Event { diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 00000000..2989cbb2 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,4 @@ +[toolchain] +channel = "1.70.0" +components = ["rustfmt", "clippy"] +targets = ["x86_64-unknown-linux-musl"] diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 00000000..e278f857 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,61 @@ +max_width = 100 +hard_tabs = false +tab_spaces = 4 +newline_style = "Auto" +use_small_heuristics = "Default" +#indent_style = "Block" +#wrap_comments = false +#comment_width = 80 +#normalize_comments = false +#license_template_path = "" +#format_strings = false +#format_macro_matchers = false +#format_macro_bodies = true +#empty_item_single_line = true +#struct_lit_single_line = true +#fn_single_line = false +#where_single_line = false +#imports_indent = "Block" +#imports_layout = "Mixed" +#merge_imports = false +reorder_imports = true +reorder_modules = true +#reorder_impl_items = false +#type_punctuation_density = "Wide" +#space_before_colon = false +#space_after_colon = true +#spaces_around_ranges = false +#binop_separator = "Front" +remove_nested_parens = true +#combine_control_expr = true +#struct_field_align_threshold = 0 +#match_arm_blocks = true +#force_multiline_blocks = false +#fn_args_density = "Tall" +#brace_style = "SameLineWhere" +#control_brace_style = "AlwaysSameLine" +#trailing_semicolon = true +#trailing_comma = "Vertical" +#match_block_trailing_comma = false +#blank_lines_upper_bound = 1 +#blank_lines_lower_bound = 0 +#edition = "Edition2015" +merge_derives = true +use_try_shorthand = false +use_field_init_shorthand = false +force_explicit_abi = true +#condense_wildcard_suffixes = false +#color = "Auto" +#required_version = "0.99.1" +#unstable_features = true +#disable_all_formatting = false +#skip_children = false +#hide_parse_errors = false +#error_on_line_overflow = false +#error_on_unformatted = false +#report_todo = "Never" +#report_fixme = "Never" +#ignore = [] +#emit_mode = "Files" +#make_backup = false +#inline_attribute_width=50 # unstable From b90291181070bf708b16f3f36fefa2eca7d89ab6 Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Wed, 5 Jul 2023 11:46:11 +0200 Subject: [PATCH 11/21] Logging self test stdout/err. Formatting. --- runtime/src/self_test.rs | 86 ++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 20 deletions(-) diff --git a/runtime/src/self_test.rs b/runtime/src/self_test.rs index 12e06bc0..7fea489a 100644 --- a/runtime/src/self_test.rs +++ b/runtime/src/self_test.rs @@ -1,7 +1,7 @@ use anyhow::bail; use futures::lock::Mutex; use notify::event::{DataChange, ModifyKind}; -use notify::{Event, EventKind, RecursiveMode, Watcher, RecommendedWatcher}; +use notify::{Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher}; use serde_json::Value; use std::path::{Path, PathBuf}; use std::str::FromStr; @@ -11,8 +11,9 @@ use tokio::fs; use tokio::sync::Notify; use uuid::Uuid; use ya_runtime_sdk::runtime_api::deploy::ContainerVolume; -use ya_runtime_sdk::RunProcess; +use ya_runtime_sdk::runtime_api::server::RuntimeHandler; use ya_runtime_sdk::{runtime_api::server, server::Server, Context, Error, ErrorExt, EventEmitter}; +use ya_runtime_sdk::{ProcessStatus, RunProcess, RuntimeStatus}; use crate::deploy::Deployment; use crate::vmrt::{runtime_dir, RuntimeData}; @@ -63,32 +64,22 @@ pub(crate) async fn run_self_test( let runtime = self_test_runtime(deployment, pci_device_id); - server::run_async(|e| async { + server::run_async(|emitter| async { let ctx = Context::try_new().expect("Creates runtime context"); log::info!("Starting runtime"); - let emitter = EventEmitter::spawn(e); - let start_response = crate::start(work_dir.clone(), runtime.data.clone(), emitter.clone()) + let start_response = start_runtime(emitter, work_dir.clone(), runtime.data.clone()) .await .expect("Starts runtime"); log::info!("Runtime start response {:?}", start_response); - let run_process: RunProcess = server::RunProcess { - bin: format!("/{FILE_TEST_EXECUTABLE}"), - args: vec![ - FILE_TEST_EXECUTABLE.into(), - output_file_vm.to_string_lossy().into(), - ], - ..Default::default() - }; - log::info!("Runtime: {:?}", runtime.data); - log::info!("Self test process: {run_process:?}"); - run_command( + log::info!("Running self test command"); + run_self_test_command( runtime.data.clone(), - run_process, &output_dir, &output_file, + &output_file_vm, timeout, ) .await @@ -101,13 +92,15 @@ pub(crate) async fn run_self_test( log::info!("Handling result"); let out_result = read_json(&output_file); - std::fs::remove_dir_all(output_dir).expect("Removes self-test output dir"); let result = handle_result(out_result).expect("Handles test result"); if !result.is_empty() { // the server refuses to stop by itself; print output to stdout println!("{result}"); } + log::debug!("Deleting output files"); + std::fs::remove_dir_all(output_dir).expect("Removes self-test output dir"); + tokio::spawn(async move { // the server refuses to stop by itself; force quit std::process::exit(0); @@ -169,15 +162,36 @@ fn get_self_test_only_volume(self_test_deployment: &Deployment) -> anyhow::Resul Ok(self_test_deployment.volumes.first().unwrap().clone()) } +/// Starts runtime with runtime handler wrapped to log process stdout and stdderr +async fn start_runtime( + handler: HANDLER, + work_dir: PathBuf, + runtime_data: Arc>, +) -> anyhow::Result> { + let emitter = ProcessOutputLogger::new(handler); + let emitter = EventEmitter::spawn(emitter); + crate::start(work_dir.clone(), runtime_data, emitter.clone()).await +} + /// Runs command, monitors `output_dir` looking for `output_file`. /// Fails if `output_file` not created before `timeout`. -async fn run_command( +async fn run_self_test_command( runtime_data: Arc>, - run_process: RunProcess, output_dir: &Path, output_file: &Path, + output_file_vm: &Path, timeout: Duration, ) -> anyhow::Result<()> { + let run_process: RunProcess = server::RunProcess { + bin: format!("/{FILE_TEST_EXECUTABLE}"), + args: vec![ + FILE_TEST_EXECUTABLE.into(), + output_file_vm.to_string_lossy().into(), + ], + ..Default::default() + }; + log::info!("Self test process: {run_process:?}"); + let output_notification = Arc::new(Notify::new()); // Keep `_watcher` . Watcher shutdowns when dropped. let _watcher = spawn_output_watcher(output_notification.clone(), output_dir, output_file)?; @@ -220,3 +234,35 @@ fn read_json(output_file: &Path) -> anyhow::Result { let output_file = std::fs::File::open(output_file)?; Ok(serde_json::from_reader(&output_file)?) } +struct ProcessOutputLogger { + handler: Box, +} + +impl ProcessOutputLogger { + fn new(handler: HANDLER) -> Self { + let handler = Box::new(handler); + Self { handler } + } +} + +impl RuntimeHandler for ProcessOutputLogger { + fn on_process_status<'a>(&self, status: ProcessStatus) -> futures::future::BoxFuture<'a, ()> { + if !status.stdout.is_empty() { + log::debug!( + "PID: {}, stdout: {}", + status.pid, + String::from_utf8_lossy(&status.stdout) + ); + } else if !status.stderr.is_empty() { + log::debug!( + "PID: {}, stderr: {}", + status.pid, + String::from_utf8_lossy(&status.stderr) + ); + } + self.handler.on_process_status(status) + } + fn on_runtime_status<'a>(&self, status: RuntimeStatus) -> futures::future::BoxFuture<'a, ()> { + self.handler.on_runtime_status(status) + } +} From c20d3fe7f19b2d9ac73a32bf5863c87577c63339 Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Thu, 6 Jul 2023 16:38:34 +0200 Subject: [PATCH 12/21] Monitoring of output file bugfix. Notification on Closing write access. --- runtime/src/self_test.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/runtime/src/self_test.rs b/runtime/src/self_test.rs index 7fea489a..2429c7c7 100644 --- a/runtime/src/self_test.rs +++ b/runtime/src/self_test.rs @@ -1,6 +1,6 @@ use anyhow::bail; use futures::lock::Mutex; -use notify::event::{DataChange, ModifyKind}; +use notify::event::{AccessKind, AccessMode}; use notify::{Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher}; use serde_json::Value; use std::path::{Path, PathBuf}; @@ -214,12 +214,12 @@ fn spawn_output_watcher( let output_file = output_file.into(); let mut watcher = notify::recommended_watcher(move |res| match res { Ok(Event { - kind: EventKind::Modify(ModifyKind::Data(DataChange::Any)), + kind: EventKind::Access(AccessKind::Close(AccessMode::Write)), paths, .. }) if paths.contains(&output_file) => output_notification.notify_waiters(), Ok(event) => { - log::debug!("Output file watch event: {:?}", event); + log::trace!("Output file watch event: {:?}", event); } Err(error) => { log::error!("Output file watch error: {:?}", error); From 32ff18b983c092aa58c687d3982028b973f3cb14 Mon Sep 17 00:00:00 2001 From: Kamil Koczurek Date: Sun, 9 Jul 2023 10:50:29 +0200 Subject: [PATCH 13/21] correct gpu path --- runtime/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 1a4ee45f..a76039a3 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -364,8 +364,8 @@ fn offer(self_test_result: serde_json::Value) -> anyhow::Result Date: Sun, 9 Jul 2023 10:50:51 +0200 Subject: [PATCH 14/21] validate self_test_results contents --- runtime/src/lib.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index a76039a3..aa48fd2d 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -371,10 +371,12 @@ fn offer(self_test_result: serde_json::Value) -> anyhow::Result bool { - // NYI - false + let Some(root) = self_test_result.as_object() else { + return false; + }; + + root.get("gpu").is_some() } async fn join_network( From c3ef7d0215c59a0c22aeaf89a3a7dc4e243d9639 Mon Sep 17 00:00:00 2001 From: pwalski <4924911+pwalski@users.noreply.github.com> Date: Mon, 10 Jul 2023 10:33:51 +0200 Subject: [PATCH 15/21] Offer template without empty self test result (#3) * Offer template without empty self test result * No trailing whitespace in offer-template --------- Co-authored-by: Przemyslaw Walski --- runtime/src/lib.rs | 26 +++++++++++++++++++------- runtime/src/self_test.rs | 2 +- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index b8362612..f2a2d940 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -379,21 +379,33 @@ fn offer(self_test_result: serde_json::Value) -> anyhow::Result bool { diff --git a/runtime/src/self_test.rs b/runtime/src/self_test.rs index 2429c7c7..0a442a00 100644 --- a/runtime/src/self_test.rs +++ b/runtime/src/self_test.rs @@ -95,7 +95,7 @@ pub(crate) async fn run_self_test( let result = handle_result(out_result).expect("Handles test result"); if !result.is_empty() { // the server refuses to stop by itself; print output to stdout - println!("{result}"); + print!("{result}"); } log::debug!("Deleting output files"); From e6e890d49d1b1c93d18bf75b95dd97317e10c8f2 Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Wed, 12 Jul 2023 12:35:59 +0200 Subject: [PATCH 16/21] Run command deadlock bugfix. --- runtime/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index f2a2d940..c66444f5 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -188,8 +188,8 @@ impl ya_runtime_sdk::Runtime for Runtime { let pci_device_id = ctx.cli.runtime.pci_device.clone(); let data = self.data.clone(); async move { - let mut runtime_data = data.lock().await; if let Some(pci_device_id) = pci_device_id { + let mut runtime_data = data.lock().await; runtime_data.pci_device_id.replace(pci_device_id); } run_command(data.clone(), command).await From b2a13830716f86d1d81323caafcdd69de251ca50 Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Wed, 19 Jul 2023 23:24:14 +0200 Subject: [PATCH 17/21] Env variables name update --- runtime/src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index c66444f5..54381c0b 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -70,16 +70,16 @@ pub struct Cli { #[structopt(long)] inet_endpoint: Option, /// PCI device identifier - #[structopt(long, env = "PCI_DEVICE")] + #[structopt(long, env = "YA_RUNTIME_VM_PCI_DEVICE")] pci_device: Option, /// Test process timeout (in sec) - #[structopt(long, env = "TEST_TIMEOUT", default_value = "10")] + #[structopt(long, env = "YA_RUNTIME_VM_TEST_TIMEOUT", default_value = "10")] test_timeout: u64, /// Number of logical CPU cores for test process - #[structopt(long, env = "TEST_CPU_CORES", default_value = "1")] + #[structopt(long, env = "YA_RUNTIME_VM_TEST_CPU_CORES", default_value = "1")] test_cpu_cores: usize, /// Amount of RAM for test process [GiB] - #[structopt(long, env = "TEST_MEM_GIB", default_value = "0.125")] + #[structopt(long, env = "YA_RUNTIME_VM_TEST_MEM_GIB", default_value = "0.125")] test_mem_gib: f64, } From bc55e7d7e4825a47be40eeae34a668019d9a4fde Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Thu, 20 Jul 2023 11:44:00 +0200 Subject: [PATCH 18/21] self-test-img_tag version update: v0.1.4 --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 22fb5555..14f8ff72 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,7 +6,7 @@ on: - pre-rel-* env: - self-test-img_tag: pre-rel-v0.1.0_RC3_file_output + self-test-img_tag: v0.1.4 self-test-img_repository: golemfactory/ya-self-test-img rust_stable: 1.70.0 From bc706169f1cb2c08ab4606414c9119297b985454 Mon Sep 17 00:00:00 2001 From: pwalski <4924911+pwalski@users.noreply.github.com> Date: Thu, 20 Jul 2023 13:15:59 +0200 Subject: [PATCH 19/21] Apply suggestions from code review Co-authored-by: nieznanysprawiciel --- runtime/src/lib.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 54381c0b..a52ae25d 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -409,11 +409,10 @@ fn offer(self_test_result: serde_json::Value) -> anyhow::Result bool { - let Some(root) = self_test_result.as_object() else { - return false; - }; - - root.get("gpu").is_some() + self_test_result + .as_object() + .and_then(|root| root.get("gpu")) + .is_some() } async fn join_network( From 97c2bcbef71635b772d2bf35de4cbdcd0ed31f77 Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Thu, 20 Jul 2023 13:41:24 +0200 Subject: [PATCH 20/21] Test default mem limit 0.5 GiB --- runtime/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index a52ae25d..4fb257fc 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -79,7 +79,7 @@ pub struct Cli { #[structopt(long, env = "YA_RUNTIME_VM_TEST_CPU_CORES", default_value = "1")] test_cpu_cores: usize, /// Amount of RAM for test process [GiB] - #[structopt(long, env = "YA_RUNTIME_VM_TEST_MEM_GIB", default_value = "0.125")] + #[structopt(long, env = "YA_RUNTIME_VM_TEST_MEM_GIB", default_value = "0.5")] test_mem_gib: f64, } From f31c6a4d67b01acfc6fd0790bd2945f2c74ac4eb Mon Sep 17 00:00:00 2001 From: Przemyslaw Walski Date: Thu, 20 Jul 2023 15:40:09 +0200 Subject: [PATCH 21/21] Test config struct --- runtime/src/lib.rs | 34 +++++++++++++++++----------------- runtime/src/self_test.rs | 20 +++++++++----------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 4fb257fc..158c508e 100755 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -72,6 +72,18 @@ pub struct Cli { /// PCI device identifier #[structopt(long, env = "YA_RUNTIME_VM_PCI_DEVICE")] pci_device: Option, + #[structopt(flatten)] + test_config: TestConfig, +} + +#[derive(ya_runtime_sdk::RuntimeDef, Default)] +#[cli(Cli)] +pub struct Runtime { + data: Arc>, +} + +#[derive(StructOpt, Clone, Default)] +struct TestConfig { /// Test process timeout (in sec) #[structopt(long, env = "YA_RUNTIME_VM_TEST_TIMEOUT", default_value = "10")] test_timeout: u64, @@ -83,18 +95,12 @@ pub struct Cli { test_mem_gib: f64, } -impl Cli { +impl TestConfig { fn test_timeout(&self) -> Duration { Duration::from_secs(self.test_timeout) } } -#[derive(ya_runtime_sdk::RuntimeDef, Default)] -#[cli(Cli)] -pub struct Runtime { - data: Arc>, -} - impl ya_runtime_sdk::Runtime for Runtime { fn deploy<'a>(&mut self, ctx: &mut Context) -> OutputResponse<'a> { let workdir = ctx.cli.workdir.clone().expect("Workdir not provided"); @@ -210,9 +216,7 @@ impl ya_runtime_sdk::Runtime for Runtime { fn offer<'a>(&mut self, ctx: &mut Context) -> OutputResponse<'a> { let pci_device_id = ctx.cli.runtime.pci_device.clone(); - let test_timeout = ctx.cli.runtime.test_timeout(); - let cpu_cores = ctx.cli.runtime.test_cpu_cores; - let mem_gib = ctx.cli.runtime.test_mem_gib; + let test_config = ctx.cli.runtime.test_config.clone(); self_test::run_self_test( |self_test_result| { self_test::verify_status(self_test_result) @@ -221,9 +225,7 @@ impl ya_runtime_sdk::Runtime for Runtime { .map(|offer| serde_json::Value::to_string(&offer)) }, pci_device_id, - test_timeout, - cpu_cores, - mem_gib, + test_config, ) // Dead code. ya_runtime_api::server::run_async requires killing the process to stop app .map(|_| Ok(None)) @@ -232,10 +234,8 @@ impl ya_runtime_sdk::Runtime for Runtime { fn test<'a>(&mut self, ctx: &mut Context) -> EmptyResponse<'a> { let pci_device_id = ctx.cli.runtime.pci_device.clone(); - let test_timeout = ctx.cli.runtime.test_timeout(); - let cpu_cores = ctx.cli.runtime.test_cpu_cores; - let mem_gib = ctx.cli.runtime.test_mem_gib; - self_test::test(pci_device_id, test_timeout, cpu_cores, mem_gib).boxed_local() + let test_config = ctx.cli.runtime.test_config.clone(); + self_test::test(pci_device_id, test_config).boxed_local() } fn join_network<'a>( diff --git a/runtime/src/self_test.rs b/runtime/src/self_test.rs index 0a442a00..1b32a593 100644 --- a/runtime/src/self_test.rs +++ b/runtime/src/self_test.rs @@ -17,18 +17,16 @@ use ya_runtime_sdk::{ProcessStatus, RunProcess, RuntimeStatus}; use crate::deploy::Deployment; use crate::vmrt::{runtime_dir, RuntimeData}; -use crate::Runtime; +use crate::{Runtime, TestConfig}; const FILE_TEST_IMAGE: &str = "self-test.gvmi"; const FILE_TEST_EXECUTABLE: &str = "ya-self-test"; pub(crate) async fn test( pci_device_id: Option, - timeout: Duration, - cpu_cores: usize, - mem_gib: f64, + test_config: TestConfig, ) -> Result<(), Error> { - run_self_test(verify_status, pci_device_id, timeout, cpu_cores, mem_gib).await; + run_self_test(verify_status, pci_device_id, test_config).await; // Dead code. ya_runtime_api::server::run_async requires killing a process to stop Ok(()) } @@ -41,15 +39,13 @@ pub(crate) fn verify_status(status: anyhow::Result) -> anyhow::Result( handle_result: HANDLER, pci_device_id: Option, - timeout: Duration, - cpu_cores: usize, - mem_gib: f64, + test_config: TestConfig, ) where HANDLER: Fn(anyhow::Result) -> anyhow::Result, { let work_dir = std::env::temp_dir(); - let deployment = self_test_deployment(&work_dir, cpu_cores, mem_gib) + let deployment = self_test_deployment(&work_dir, &test_config) .await .expect("Prepares self test img deployment"); @@ -75,6 +71,7 @@ pub(crate) async fn run_self_test( log::info!("Runtime: {:?}", runtime.data); log::info!("Running self test command"); + let timeout = test_config.test_timeout(); run_self_test_command( runtime.data.clone(), &output_dir, @@ -125,8 +122,7 @@ fn self_test_runtime(deployment: Deployment, pci_device_id: Option) -> R /// Builds self test deployment based on `FILE_TEST_IMAGE` from path returned by `runtime_dir()` async fn self_test_deployment( work_dir: &Path, - cpu_cores: usize, - mem_gib: f64, + test_config: &TestConfig, ) -> anyhow::Result { let package_path = runtime_dir() .expect("Runtime directory not found") @@ -134,6 +130,8 @@ async fn self_test_deployment( .canonicalize() .expect("Test image not found"); + let cpu_cores = test_config.test_cpu_cores; + let mem_gib = test_config.test_mem_gib; log::info!("Task package: {}", package_path.display()); let mem_mib = (mem_gib * 1024.) as usize; let package_file = fs::File::open(package_path.clone())