From c54ed430ce95de45c88b419f8df9abab22adffc3 Mon Sep 17 00:00:00 2001 From: Jake Lane Date: Tue, 7 Nov 2023 04:11:02 +1100 Subject: [PATCH] Write cache large blobs in chunks to avoid write file limits in Node (#9355) --- packages/core/cache/src/FSCache.js | 44 +++++++++++++++++++++--- packages/core/cache/src/LMDBCache.js | 43 ++++++++++++++++++++--- packages/core/cache/src/constants.js | 4 +++ packages/dev/bundle-stats-cli/src/cli.js | 4 +-- packages/dev/query/package.json | 1 + packages/dev/query/src/cli.js | 7 ++-- packages/dev/query/src/index.js | 22 +++++++----- 7 files changed, 103 insertions(+), 22 deletions(-) create mode 100644 packages/core/cache/src/constants.js diff --git a/packages/core/cache/src/FSCache.js b/packages/core/cache/src/FSCache.js index ff7355a0096..e963a02ff60 100644 --- a/packages/core/cache/src/FSCache.js +++ b/packages/core/cache/src/FSCache.js @@ -13,6 +13,8 @@ import {serialize, deserialize, registerSerializableClass} from '@parcel/core'; // flowlint-next-line untyped-import:off import packageJson from '../package.json'; +import {WRITE_LIMIT_CHUNK} from './constants'; + const pipeline: (Readable, Writable) => Promise = promisify( stream.pipeline, ); @@ -81,16 +83,50 @@ export class FSCache implements Cache { } } + #getFilePath(key: string, index: number): string { + return path.join(this.dir, `${key}-${index}`); + } + hasLargeBlob(key: string): Promise { - return this.fs.exists(this._getCachePath(`${key}-large`)); + return this.fs.exists(this.#getFilePath(key, 0)); } - getLargeBlob(key: string): Promise { - return this.fs.readFile(this._getCachePath(`${key}-large`)); + async getLargeBlob(key: string): Promise { + const buffers: Promise[] = []; + for (let i = 0; await this.fs.exists(this.#getFilePath(key, i)); i += 1) { + const file: Promise = this.fs.readFile(this.#getFilePath(key, i)); + + buffers.push(file); + } + + return Buffer.concat(await Promise.all(buffers)); } async setLargeBlob(key: string, contents: Buffer | string): Promise { - await this.fs.writeFile(this._getCachePath(`${key}-large`), contents); + const chunks = Math.ceil(contents.length / WRITE_LIMIT_CHUNK); + + if (chunks === 1) { + // If there's one chunk, don't slice the content + await this.fs.writeFile(this.#getFilePath(key, 0), contents); + return; + } + + const writePromises: Promise[] = []; + for (let i = 0; i < chunks; i += 1) { + writePromises.push( + this.fs.writeFile( + this.#getFilePath(key, i), + typeof contents === 'string' + ? contents.slice(i * WRITE_LIMIT_CHUNK, (i + 1) * WRITE_LIMIT_CHUNK) + : contents.subarray( + i * WRITE_LIMIT_CHUNK, + (i + 1) * WRITE_LIMIT_CHUNK, + ), + ), + ); + } + + await Promise.all(writePromises); } async get(key: string): Promise { diff --git a/packages/core/cache/src/LMDBCache.js b/packages/core/cache/src/LMDBCache.js index 0a83b0b1f56..eab9dd24bb3 100644 --- a/packages/core/cache/src/LMDBCache.js +++ b/packages/core/cache/src/LMDBCache.js @@ -12,6 +12,7 @@ import {NodeFS} from '@parcel/fs'; import packageJson from '../package.json'; // $FlowFixMe import lmdb from 'lmdb'; +import {WRITE_LIMIT_CHUNK} from './constants'; const pipeline: (Readable, Writable) => Promise = promisify( stream.pipeline, @@ -91,16 +92,50 @@ export class LMDBCache implements Cache { return Promise.resolve(this.store.get(key)); } + #getFilePath(key: string, index: number): string { + return path.join(this.dir, `${key}-${index}`); + } + hasLargeBlob(key: string): Promise { - return this.fs.exists(path.join(this.dir, key)); + return this.fs.exists(this.#getFilePath(key, 0)); } - getLargeBlob(key: string): Promise { - return this.fs.readFile(path.join(this.dir, key)); + async getLargeBlob(key: string): Promise { + const buffers: Promise[] = []; + for (let i = 0; await this.fs.exists(this.#getFilePath(key, i)); i += 1) { + const file: Promise = this.fs.readFile(this.#getFilePath(key, i)); + + buffers.push(file); + } + + return Buffer.concat(await Promise.all(buffers)); } async setLargeBlob(key: string, contents: Buffer | string): Promise { - await this.fs.writeFile(path.join(this.dir, key), contents); + const chunks = Math.ceil(contents.length / WRITE_LIMIT_CHUNK); + + if (chunks === 1) { + // If there's one chunk, don't slice the content + await this.fs.writeFile(this.#getFilePath(key, 0), contents); + return; + } + + const writePromises: Promise[] = []; + for (let i = 0; i < chunks; i += 1) { + writePromises.push( + this.fs.writeFile( + this.#getFilePath(key, i), + typeof contents === 'string' + ? contents.slice(i * WRITE_LIMIT_CHUNK, (i + 1) * WRITE_LIMIT_CHUNK) + : contents.subarray( + i * WRITE_LIMIT_CHUNK, + (i + 1) * WRITE_LIMIT_CHUNK, + ), + ), + ); + } + + await Promise.all(writePromises); } refresh(): void { diff --git a/packages/core/cache/src/constants.js b/packages/core/cache/src/constants.js new file mode 100644 index 00000000000..0b6e9277384 --- /dev/null +++ b/packages/core/cache/src/constants.js @@ -0,0 +1,4 @@ +// @flow strict-local + +// Node has a file size limit of 2 GB +export const WRITE_LIMIT_CHUNK = 2 * 1024 ** 3; diff --git a/packages/dev/bundle-stats-cli/src/cli.js b/packages/dev/bundle-stats-cli/src/cli.js index c4deed1549c..fd60930f27b 100644 --- a/packages/dev/bundle-stats-cli/src/cli.js +++ b/packages/dev/bundle-stats-cli/src/cli.js @@ -18,9 +18,9 @@ import {getBundleStats} from '@parcel/reporter-bundle-stats/src/BundleStatsRepor import {PackagedBundle as PackagedBundleClass} from '@parcel/core/src/public/Bundle'; import type {commander$Command} from 'commander'; -function run({cacheDir, outDir}) { +async function run({cacheDir, outDir}) { // 1. load bundle graph and info via parcel~query - let {bundleGraph, bundleInfo} = loadGraphs(cacheDir); + let {bundleGraph, bundleInfo} = await loadGraphs(cacheDir); if (bundleGraph == null) { console.error('Bundle Graph could not be found'); diff --git a/packages/dev/query/package.json b/packages/dev/query/package.json index b4a8e2aab64..2c9f9d22d73 100644 --- a/packages/dev/query/package.json +++ b/packages/dev/query/package.json @@ -7,6 +7,7 @@ "dependencies": { "@parcel/core": "2.10.2", "@parcel/graph": "3.0.2", + "@parcel/cache": "2.10.2", "nullthrows": "^1.1.1", "table": "^6.8.1", "v8-compile-cache": "^2.0.0" diff --git a/packages/dev/query/src/cli.js b/packages/dev/query/src/cli.js index 13e7b546ff2..32665480f09 100644 --- a/packages/dev/query/src/cli.js +++ b/packages/dev/query/src/cli.js @@ -20,7 +20,7 @@ import {Priority} from '@parcel/core/src/types'; import {loadGraphs} from './index.js'; -export function run(input: string[]) { +export async function run(input: string[]) { let args = input; let cacheDir = path.join(process.cwd(), '.parcel-cache'); if (args[0] === '--cache') { @@ -37,8 +37,9 @@ export function run(input: string[]) { } console.log('Loading graphs...'); - let {assetGraph, bundleGraph, bundleInfo, requestTracker} = - loadGraphs(cacheDir); + let {assetGraph, bundleGraph, bundleInfo, requestTracker} = await loadGraphs( + cacheDir, + ); if (requestTracker == null) { console.error('Request Graph could not be found'); diff --git a/packages/dev/query/src/index.js b/packages/dev/query/src/index.js index 78b00417c17..1746fd010db 100644 --- a/packages/dev/query/src/index.js +++ b/packages/dev/query/src/index.js @@ -8,6 +8,7 @@ import path from 'path'; import v8 from 'v8'; import nullthrows from 'nullthrows'; import invariant from 'assert'; +import {LMDBCache} from '@parcel/cache/src/LMDBCache'; const { AssetGraph, @@ -19,12 +20,12 @@ const { }, } = require('./deep-imports.js'); -export function loadGraphs(cacheDir: string): {| +export async function loadGraphs(cacheDir: string): Promise<{| assetGraph: ?AssetGraph, bundleGraph: ?BundleGraph, requestTracker: ?RequestTracker, bundleInfo: ?Map, -|} { +|}> { function filesBySizeAndModifiedTime() { let files = fs.readdirSync(cacheDir).map(f => { let stat = fs.statSync(path.join(cacheDir, f)); @@ -38,11 +39,14 @@ export function loadGraphs(cacheDir: string): {| } let requestTracker; + const cache = new LMDBCache(cacheDir); for (let f of filesBySizeAndModifiedTime()) { - // if (bundleGraph && assetGraph && requestTracker) break; - if (path.extname(f) !== '') continue; + // Empty filename or not the first chunk + if (path.extname(f) !== '' && !f.endsWith('-0')) continue; try { - let obj = v8.deserialize(fs.readFileSync(f)); + let obj = v8.deserialize( + await cache.getLargeBlob(path.basename(f).slice(0, -'-0'.length)), + ); /* if (obj.assetGraph != null && obj.assetGraph.value.hash != null) { assetGraph = AssetGraph.deserialize(obj.assetGraph.value); } else if (obj.bundleGraph != null) { @@ -90,7 +94,7 @@ export function loadGraphs(cacheDir: string): {| ); if (bundleGraphRequestNode != null) { bundleGraph = BundleGraph.deserialize( - loadLargeBlobRequestRequestSync(cacheDir, bundleGraphRequestNode) + (await loadLargeBlobRequestRequest(cache, bundleGraphRequestNode)) .bundleGraph.value, ); @@ -99,7 +103,7 @@ export function loadGraphs(cacheDir: string): {| ).find(n => n.type === 'request' && n.value.type === 'asset_graph_request'); if (assetGraphRequest != null) { assetGraph = AssetGraph.deserialize( - loadLargeBlobRequestRequestSync(cacheDir, assetGraphRequest).assetGraph + (await loadLargeBlobRequestRequest(cache, assetGraphRequest)).assetGraph .value, ); } @@ -120,9 +124,9 @@ export function loadGraphs(cacheDir: string): {| return {assetGraph, bundleGraph, requestTracker, bundleInfo}; } -function loadLargeBlobRequestRequestSync(cacheDir, node) { +async function loadLargeBlobRequestRequest(cache, node) { invariant(node.type === 'request'); return v8.deserialize( - fs.readFileSync(path.join(cacheDir, nullthrows(node.value.resultCacheKey))), + await cache.getLargeBlob(nullthrows(node.value.resultCacheKey)), ); }