From a9091f1e4cf5539ce70c45bd0d2243fe84ffd9ee Mon Sep 17 00:00:00 2001 From: Patrick Meade Date: Tue, 22 Oct 2024 19:04:45 -0500 Subject: [PATCH] Add plans for new DiskArchiver component --- .github/workflows/ci.yml | 2 +- .gitignore | 3 + Cargo.lock | 44 ++-- bin/workbench | 10 + doc/jadeite-disk-archiver.txt | 456 ++++++++++++++++++++++++++++++++++ src/bin/workbench.rs | 4 - 6 files changed, 492 insertions(+), 27 deletions(-) create mode 100644 doc/jadeite-disk-archiver.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 21508ec..5c741ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: - run: cargo fmt --all -- --check - run: cargo generate-lockfile --locked - run: cargo check --locked - - run: cargo clippy --locked + - run: cargo clippy --locked -- --deny warnings - run: cargo verify-project --locked test: diff --git a/.gitignore b/.gitignore index 105c104..e5c2ec8 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ # don't version rustfmt backup files **/*.rs.bk + +# don't version my notes +/NOTES.md diff --git a/Cargo.lock b/Cargo.lock index 8d85c63..a811fdd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -152,15 +152,15 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.7.2" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" [[package]] name = "cc" -version = "1.1.30" +version = "1.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16803a61b81d9eabb7eae2588776c4c1e584b738ede45fdbb4c972cec1e9945" +checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" dependencies = [ "shlex", ] @@ -548,9 +548,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.160" +version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0b21006cd1874ae9e650973c565615676dc4a274c965bb0a73796dac838ce4f" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "log" @@ -664,9 +664,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.88" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" dependencies = [ "unicode-ident", ] @@ -850,18 +850,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.210" +version = "1.0.213" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.213" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" dependencies = [ "proc-macro2", "quote", @@ -870,9 +870,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.129" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbcf9b78a125ee667ae19388837dd12294b858d101fdd393cb9d5501ef09eb2" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "itoa", "memchr", @@ -946,9 +946,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "syn" -version = "2.0.79" +version = "2.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021" dependencies = [ "proc-macro2", "quote", @@ -984,18 +984,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.64" +version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.64" +version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" dependencies = [ "proc-macro2", "quote", @@ -1019,9 +1019,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.40.0" +version = "1.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" +checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" dependencies = [ "backtrace", "bytes", diff --git a/bin/workbench b/bin/workbench index 2a08f55..7a4275a 100755 --- a/bin/workbench +++ b/bin/workbench @@ -3,4 +3,14 @@ export RUST_LOG=${RUST_LOG:="trace"} +if ! dpkg -l | grep -q musl-tools; then + echo "musl-tools is not installed. Installing..." + sudo apt update + sudo apt install musl-tools --yes +fi + +rustup target add x86_64-unknown-linux-musl + +cargo build -Zbuild-std --target=x86_64-unknown-linux-musl + cargo run --all-features --bin workbench --frozen --target=x86_64-unknown-linux-musl diff --git a/doc/jadeite-disk-archiver.txt b/doc/jadeite-disk-archiver.txt new file mode 100644 index 0000000..4eef34b --- /dev/null +++ b/doc/jadeite-disk-archiver.txt @@ -0,0 +1,456 @@ +# jadeite-disk-archiver.txt +# +# This document contains a kind of pseudo-code schematic for the +# operation of the DiskArchiver component of jadeite 2.39.0. This +# provides a blueprint for the construction of the replacement +# component in the datamove repository. + + + +edu.wisc.icecube.jade.engine.Jade + main(args) + + context + + process = context.get(args[0]) + process.run() + + + +edu.wisc.icecube.jade.process.archiver.disk.JadeDiskArchiver + run() + - ensure host + - create thread pool + + worker + - schedule worker in thread pool + - start Spark (for REST API /status) + + getStatus() + + + +edu.wisc.icecube.jade.process.archiver.disk.DiskArchiverWorkerImpl + run() // inherited from edu.wisc.icecube.jade.process.WorkerImpl + runSafely() + - determine host + - parse disk archives + closeOnSemaphore() + for-each(disk-archive-paths) + if(closeSemaphore.exists()) { + closeCopyPath(disk-archive-path) + getDiskLabelUuid + writeArchivalDiskMetadata(disk-archive-path, labelUuid) + DB.loadDiskByUuid(labelUuid) + create ArchivalDiskMetadata + write ArchivalDiskMetadata to disk/labelUuid + DB.closeByUuid(labelUuid) + getDiskByUuid(labelUuid) + sendEmailStreamingDiskEnded(disk-archive-path, disk) + + build up data-object + + load template + parseFreeMarkerTemplate(template, data-object) + sendEmail(recipients, subject, text) + purgeDiskCache = true + } + executeScan() + listFiles(inboxDir).filter(startsWith(KEY_PREFIX)) + processScan() + for-each(files) + getFilePairUuid + DB.findByUuid + retVal.push(row) + archiveFilePairsToDisk(filePairs, host, diskArchives) + for-each(filePairs) + archiveFilePairToDisk(filePair, host, diskArchives) + for-each(diskArchives) + for-each(numCopies) + archiveFilePairToDiskCopy(filePair, host, diskArchive, copyId) + findOrCreateArchivalCopy(host, diskArchive, copyId) + findArchivalCopy(host, diskArchive, copyId) + if(no-archival-copy) { + createArchivalCopy(host, diskArchive, copyId) + findArchivalCopy(host, diskArchive, copyId) + } + addFilePairToCopyPath(filePair, destDisk) + addFilePairToDisk(diskLabel, filePair) + moveArchiveFileToDiskCache() + if(purgeDiskCache) { + purgeDiskCache() + listFiles(cacheDir) + for-each(files) + getFilePairUuid + DB.findByUuid + for-each(disk-archives) + canBePurgedFromDiskCache(file, diskArchive) + diskArchive.getNumCopies() + file.getUuid() + getDiskClosedCopyCount(uuid, diskArchive) + for-each(file.disks) + isBad -> continue + isClosed==false -> continue + no-archive -> continue + archive!=diskArchive -> continue + copyCount++ + closedCopyCount >= numCopies + if(purgeOk) { + file.delete() + } + } + + + +Configuration Variables (JadeDiskArchiver) + @Value("${disk.archiver.dir.cache}") + private File diskArchiverDirCache; + + @Value("${disk.archiver.dir.inbox}") + private File diskArchiverDirInbox; + + @Value("${disk.archiver.status.port}") + private int diskArchiverStatusPort; + + @Value("${disk.archiver.thread.count}") + private int diskArchiverThreadCount; + + @Value("${disk.archiver.thread.delay.initial}") + private long diskArchiverThreadDelayInitial; + + @Value("${disk.archiver.thread.delay.repeat}") + private long diskArchiverThreadDelayRepeat; + + @Value("${disk.archiver.thread.name.format}") + private String diskArchiverThreadNameFormat; + + @Value("${jade.hostname}") + private String hostname; + + @Value("${disk.archiver.dir.problem.files}") + private File problemFilesDirectory; + + + +Configuration Variables (DiskArchiverWorkerImpl) + @Value("${disk.archiver.dir.cache}") + private String diskArchiverDirCache; + + @Value("${disk.archiver.dir.inbox}") + private String diskArchiverDirInbox; + + @Value("${disk.archiver.dir.problem.files}") + private String diskArchiverDirProblemFiles; + + @Value("${disk.archiver.metadata.maximum.size}") + private long diskArchiverMetadataMaximumSize; + + @Value("${disk.archiver.timeout.ls}") + private long diskArchiverTimeoutLs; + + @Value("${disk.archiver.timeout.lsblk}") + private long diskArchiverTimeoutLsblk; + + @Value("${disk.archiver.timeout.mountpoint}") + private long diskArchiverTimeoutMountpoint; + + @Value("${jade.hostname}") + private String hostname; + + @Value("${command.ls}") private String LS_COMMAND; + @Value("${command.lsblk}") private String LSBLK_COMMAND; + @Value("${command.mountpoint}") private String MOUNTPOINT_COMMAND; + + + +public class ArchivalDiskMetadata +{ + public long capacity; + public int copyId; + public long dateCreated; + public long dateUpdated; + public String diskArchiveUuid; + public ArchivalDiskFile[] files; + public long id; + public String label; + public String uuid; +} + + + +public class ArchivalDiskFile +{ + public String archiveChecksum; + public String archiveFile; + public long archiveSize; + public String binaryFile; + public long binarySize; + public long dataStreamId; + public String dataStreamUuid; + public String dataWarehousePath; + public long dateCreated; + public long dateFetched; + public long dateProcessed; + public long dateUpdated; + public long dateVerified; + public DIFPlus DIF_Plus; + public int diskCount; + public String fetchChecksum; + public String fetchedByHost; + public String fingerprint; +// public long id; + public String metadataFile; + public String originChecksum; + public long originModificationDate; + public String semaphoreFile; + public String uuid; + public String xmlMetadata; +} + + + +[Tue Oct 22 21:45:13 jade@jade02 log]$jade status disk-archiver --live +{ + "cacheAge": 800035, + "inboxAge": 38, + "problemFileCount": 0, + "status": "OK", + "inboxCount": 0, + "archivalDisks": { + "/mnt/slot12": { + "status": "Not Mounted", + "id": 0 + }, + "/mnt/slot7": { + "status": "Available", + "id": 0, + "available": true + }, + "/mnt/slot8": { + "status": "Not Mounted", + "id": 0 + }, + "/mnt/slot10": { + "status": "Not Mounted", + "id": 0 + }, + "/mnt/slot5": { + "status": "Available", + "id": 0, + "available": true + }, + "/mnt/slot11": { + "status": "Not Mounted", + "id": 0 + }, + "/mnt/slot6": { + "status": "Available", + "id": 0, + "available": true + }, + "/mnt/slot3": { + "status": "In-Use", + "id": 1685, + "closed": false, + "copyId": 2, + "onHold": false, + "uuid": "8e49c095-7702-4f22-92c5-4b4d5d2bb76f", + "archive": "IceCube Disk Archive", + "label": "IceCube_2_2024_0108" + }, + "/mnt/slot4": { + "status": "In-Use", + "id": 1683, + "closed": false, + "copyId": 1, + "onHold": false, + "uuid": "29affab2-2469-4d70-a1c8-4b2e67294437", + "archive": "IceCube Disk Archive", + "label": "IceCube_1_2024_0102" + }, + "/mnt/slot1": { + "status": "Finished", + "id": 1659, + "closed": true, + "copyId": 1, + "onHold": false, + "uuid": "8464d018-60d5-4fbb-bd00-30a15f0c32ed", + "archive": "IceCube Disk Archive", + "label": "IceCube_1_2024_0091" + }, + "/mnt/slot2": { + "status": "Available", + "id": 0, + "available": true + }, + "/mnt/slot9": { + "status": "Not Mounted", + "id": 0 + } + } +} + + + +[Tue Oct 22 21:45:24 jade@jade02 log]$jade status disk-archiver --json +{ + "workers": [ + { + "archivalDisks": { + "/mnt/slot12": { + "status": "Not Mounted", + "id": 0 + }, + "/mnt/slot7": { + "status": "Available", + "id": 0, + "available": true + }, + "/mnt/slot8": { + "status": "Not Mounted", + "id": 0 + }, + "/mnt/slot10": { + "status": "Not Mounted", + "id": 0 + }, + "/mnt/slot5": { + "status": "Available", + "id": 0, + "available": true + }, + "/mnt/slot11": { + "status": "Not Mounted", + "id": 0 + }, + "/mnt/slot6": { + "status": "Available", + "id": 0, + "available": true + }, + "/mnt/slot3": { + "status": "In-Use", + "id": 1685, + "closed": false, + "copyId": 2, + "onHold": false, + "uuid": "8e49c095-7702-4f22-92c5-4b4d5d2bb76f", + "archive": "IceCube Disk Archive", + "label": "IceCube_2_2024_0108" + }, + "/mnt/slot4": { + "status": "In-Use", + "id": 1683, + "closed": false, + "copyId": 1, + "onHold": false, + "uuid": "29affab2-2469-4d70-a1c8-4b2e67294437", + "archive": "IceCube Disk Archive", + "label": "IceCube_1_2024_0102" + }, + "/mnt/slot1": { + "status": "Finished", + "id": 1659, + "closed": true, + "copyId": 1, + "onHold": false, + "uuid": "8464d018-60d5-4fbb-bd00-30a15f0c32ed", + "archive": "IceCube Disk Archive", + "label": "IceCube_1_2024_0091" + }, + "/mnt/slot2": { + "status": "Available", + "id": 0, + "available": true + }, + "/mnt/slot9": { + "status": "Not Mounted", + "id": 0 + } + }, + "inboxCount": 5 + } + ], + "cacheAge": 800130, + "inboxAge": 33, + "problemFileCount": 0, + "status": "OK" +} + + + +public class JadeProcessStatus +{ + public static final String STATUS_CRITICAL = "CRITICAL"; + public static final String STATUS_FULL_STOP = "FULL_STOP"; + public static final String STATUS_OK = "OK"; + public static final String STATUS_UNKNOWN = "UNKNOWN"; + + public long cacheAge; + public long inboxAge; + public String message; + public int problemFileCount; + public String status; +} + +public class JadeDiskArchiverStatus extends JadeProcessStatus +{ + public DiskArchiverWorkerStatus[] workers; +} + +public class WorkerStatus +{ + public int inboxCount; +} + +public class DiskArchiverWorkerStatus extends WorkerStatus +{ + public Map archivalDisks; +} + +public class DiskStatus +{ + public String status; + public long id; + public Boolean closed; + public Integer copyId; + public Boolean onHold; + public String uuid; + public String archive; + public Boolean available; + public String label; +} + + + +public class LiveDiskArchiverStatus +{ + public LiveDiskArchiverStatus(JadeDiskArchiverStatus jdas) + { + if(jdas == null) { return; } + cacheAge = jdas.cacheAge; + inboxAge = jdas.inboxAge; + message = jdas.message; + problemFileCount = jdas.problemFileCount; + status = jdas.status; + + if(jdas.workers == null) { return; } + if(jdas.workers.length < 1) { return; } + inboxCount = jdas.workers[0].inboxCount; + archivalDisks = jdas.workers[0].archivalDisks; + } + + // JadeProcessStatus + public long cacheAge; + public long inboxAge; + public String message; + public int problemFileCount; + public String status; + + // JadeDiskArchiverStatus.workers + public int inboxCount; + public Map archivalDisks; +} + + + +Rust ideas: +- Use crate `nix` to avoid a shell out to `mountpoint` +- Use crate `sha2` to avoid a shell out to `sha512sum` +- Use crate `lettre` to send simple e-mails +- Use crate `diesel` or `sea-orm` for JADE DB (MySQL) interactions diff --git a/src/bin/workbench.rs b/src/bin/workbench.rs index e528d45..757478a 100644 --- a/src/bin/workbench.rs +++ b/src/bin/workbench.rs @@ -1,5 +1,4 @@ // workbench.rs -//---------------------------------------------------------------------------------------------------------------------- use log::trace; use wipac_datamove::ensure_minimum_usize; @@ -9,6 +8,3 @@ fn main() { ensure_minimum_usize(); trace!("Hello, datamove!"); } - -//---------------------------------------------------------------------------------------------------------------------- -// workbench.rs