From be2ebd2ea2b5d4266ed3d25a6628884201e1ac91 Mon Sep 17 00:00:00 2001 From: Jake Lane Date: Wed, 1 Nov 2023 14:13:40 +1100 Subject: [PATCH 1/3] Write large blobs in chunks to avoid write file limits in Node --- packages/core/cache/src/FSCache.js | 35 ++++++++++++++++++--- packages/core/cache/src/IDBCache.browser.js | 2 +- packages/core/cache/src/LMDBCache.js | 34 +++++++++++++++++--- packages/core/cache/src/constants.js | 4 +++ packages/core/cache/src/types.js | 2 +- packages/dev/query/package.json | 1 + packages/dev/query/src/cli.js | 7 +++-- packages/dev/query/src/index.js | 24 ++++++++------ 8 files changed, 84 insertions(+), 25 deletions(-) create mode 100644 packages/core/cache/src/constants.js diff --git a/packages/core/cache/src/FSCache.js b/packages/core/cache/src/FSCache.js index ff7355a0096..48577ae803c 100644 --- a/packages/core/cache/src/FSCache.js +++ b/packages/core/cache/src/FSCache.js @@ -13,6 +13,8 @@ import {serialize, deserialize, registerSerializableClass} from '@parcel/core'; // flowlint-next-line untyped-import:off import packageJson from '../package.json'; +import {WRITE_LIMIT_CHUNK} from './constants'; + const pipeline: (Readable, Writable) => Promise = promisify( stream.pipeline, ); @@ -81,16 +83,39 @@ export class FSCache implements Cache { } } + #getFilePath(key: string, index: number): string { + return path.join(this.dir, `${key}-${index}`); + } + hasLargeBlob(key: string): Promise { - return this.fs.exists(this._getCachePath(`${key}-large`)); + return this.fs.exists(this.#getFilePath(key, 0)); } - getLargeBlob(key: string): Promise { - return this.fs.readFile(this._getCachePath(`${key}-large`)); + async getLargeBlob(key: string): Promise { + const buffers: Promise[] = []; + for (let i = 0; await this.fs.exists(this.#getFilePath(key, i)); i += 1) { + const file: Promise = this.fs.readFile(this.#getFilePath(key, i)); + + buffers.push(file); + } + + return Buffer.concat(await Promise.all(buffers)); } - async setLargeBlob(key: string, contents: Buffer | string): Promise { - await this.fs.writeFile(this._getCachePath(`${key}-large`), contents); + async setLargeBlob(key: string, contents: Buffer): Promise { + const chunks = Math.ceil(contents.length / WRITE_LIMIT_CHUNK); + + const writePromises: Promise[] = []; + for (let i = 0; i < chunks; i += 1) { + writePromises.push( + this.fs.writeFile( + this.#getFilePath(key, i), + contents.subarray(i * WRITE_LIMIT_CHUNK, (i + 1) * WRITE_LIMIT_CHUNK), + ), + ); + } + + await Promise.all(writePromises); } async get(key: string): Promise { diff --git a/packages/core/cache/src/IDBCache.browser.js b/packages/core/cache/src/IDBCache.browser.js index 15fbf65adbb..6de017c5906 100644 --- a/packages/core/cache/src/IDBCache.browser.js +++ b/packages/core/cache/src/IDBCache.browser.js @@ -114,7 +114,7 @@ export class IDBCache implements Cache { return this.getBlob(key); } - setLargeBlob(key: string, contents: Buffer | string): Promise { + setLargeBlob(key: string, contents: Buffer): Promise { return this.setBlob(key, contents); } diff --git a/packages/core/cache/src/LMDBCache.js b/packages/core/cache/src/LMDBCache.js index 0a83b0b1f56..bfe6428dae9 100644 --- a/packages/core/cache/src/LMDBCache.js +++ b/packages/core/cache/src/LMDBCache.js @@ -12,6 +12,7 @@ import {NodeFS} from '@parcel/fs'; import packageJson from '../package.json'; // $FlowFixMe import lmdb from 'lmdb'; +import {WRITE_LIMIT_CHUNK} from './constants'; const pipeline: (Readable, Writable) => Promise = promisify( stream.pipeline, @@ -91,16 +92,39 @@ export class LMDBCache implements Cache { return Promise.resolve(this.store.get(key)); } + #getFilePath(key: string, index: number): string { + return path.join(this.dir, `${key}-${index}`); + } + hasLargeBlob(key: string): Promise { - return this.fs.exists(path.join(this.dir, key)); + return this.fs.exists(this.#getFilePath(key, 0)); } - getLargeBlob(key: string): Promise { - return this.fs.readFile(path.join(this.dir, key)); + async getLargeBlob(key: string): Promise { + const buffers: Promise[] = []; + for (let i = 0; await this.fs.exists(this.#getFilePath(key, i)); i += 1) { + const file: Promise = this.fs.readFile(this.#getFilePath(key, i)); + + buffers.push(file); + } + + return Buffer.concat(await Promise.all(buffers)); } - async setLargeBlob(key: string, contents: Buffer | string): Promise { - await this.fs.writeFile(path.join(this.dir, key), contents); + async setLargeBlob(key: string, contents: Buffer): Promise { + const chunks = Math.ceil(contents.length / WRITE_LIMIT_CHUNK); + + const writePromises: Promise[] = []; + for (let i = 0; i < chunks; i += 1) { + writePromises.push( + this.fs.writeFile( + this.#getFilePath(key, i), + contents.subarray(i * WRITE_LIMIT_CHUNK, (i + 1) * WRITE_LIMIT_CHUNK), + ), + ); + } + + await Promise.all(writePromises); } refresh(): void { diff --git a/packages/core/cache/src/constants.js b/packages/core/cache/src/constants.js new file mode 100644 index 00000000000..0b6e9277384 --- /dev/null +++ b/packages/core/cache/src/constants.js @@ -0,0 +1,4 @@ +// @flow strict-local + +// Node has a file size limit of 2 GB +export const WRITE_LIMIT_CHUNK = 2 * 1024 ** 3; diff --git a/packages/core/cache/src/types.js b/packages/core/cache/src/types.js index 163b3d57644..55063964bbd 100644 --- a/packages/core/cache/src/types.js +++ b/packages/core/cache/src/types.js @@ -12,7 +12,7 @@ export interface Cache { setBlob(key: string, contents: Buffer | string): Promise; hasLargeBlob(key: string): Promise; getLargeBlob(key: string): Promise; - setLargeBlob(key: string, contents: Buffer | string): Promise; + setLargeBlob(key: string, contents: Buffer): Promise; getBuffer(key: string): Promise; /** * In a multi-threaded environment, where there are potentially multiple Cache diff --git a/packages/dev/query/package.json b/packages/dev/query/package.json index b4a8e2aab64..2c9f9d22d73 100644 --- a/packages/dev/query/package.json +++ b/packages/dev/query/package.json @@ -7,6 +7,7 @@ "dependencies": { "@parcel/core": "2.10.2", "@parcel/graph": "3.0.2", + "@parcel/cache": "2.10.2", "nullthrows": "^1.1.1", "table": "^6.8.1", "v8-compile-cache": "^2.0.0" diff --git a/packages/dev/query/src/cli.js b/packages/dev/query/src/cli.js index 13e7b546ff2..32665480f09 100644 --- a/packages/dev/query/src/cli.js +++ b/packages/dev/query/src/cli.js @@ -20,7 +20,7 @@ import {Priority} from '@parcel/core/src/types'; import {loadGraphs} from './index.js'; -export function run(input: string[]) { +export async function run(input: string[]) { let args = input; let cacheDir = path.join(process.cwd(), '.parcel-cache'); if (args[0] === '--cache') { @@ -37,8 +37,9 @@ export function run(input: string[]) { } console.log('Loading graphs...'); - let {assetGraph, bundleGraph, bundleInfo, requestTracker} = - loadGraphs(cacheDir); + let {assetGraph, bundleGraph, bundleInfo, requestTracker} = await loadGraphs( + cacheDir, + ); if (requestTracker == null) { console.error('Request Graph could not be found'); diff --git a/packages/dev/query/src/index.js b/packages/dev/query/src/index.js index 78b00417c17..75630136639 100644 --- a/packages/dev/query/src/index.js +++ b/packages/dev/query/src/index.js @@ -8,6 +8,7 @@ import path from 'path'; import v8 from 'v8'; import nullthrows from 'nullthrows'; import invariant from 'assert'; +import {LMDBCache} from '@parcel/cache/src/LMDBCache'; const { AssetGraph, @@ -19,12 +20,12 @@ const { }, } = require('./deep-imports.js'); -export function loadGraphs(cacheDir: string): {| +export async function loadGraphs(cacheDir: string): Promise<{| assetGraph: ?AssetGraph, bundleGraph: ?BundleGraph, requestTracker: ?RequestTracker, bundleInfo: ?Map, -|} { +|}> { function filesBySizeAndModifiedTime() { let files = fs.readdirSync(cacheDir).map(f => { let stat = fs.statSync(path.join(cacheDir, f)); @@ -38,11 +39,14 @@ export function loadGraphs(cacheDir: string): {| } let requestTracker; + const cache = new LMDBCache(cacheDir); for (let f of filesBySizeAndModifiedTime()) { - // if (bundleGraph && assetGraph && requestTracker) break; - if (path.extname(f) !== '') continue; + // Empty filename or not the first chunk + if (path.extname(f) !== '' && !f.endsWith('-0')) continue; try { - let obj = v8.deserialize(fs.readFileSync(f)); + let obj = v8.deserialize( + await cache.getLargeBlob(path.basename(f).slice(0, -'-0'.length)), + ); /* if (obj.assetGraph != null && obj.assetGraph.value.hash != null) { assetGraph = AssetGraph.deserialize(obj.assetGraph.value); } else if (obj.bundleGraph != null) { @@ -90,7 +94,7 @@ export function loadGraphs(cacheDir: string): {| ); if (bundleGraphRequestNode != null) { bundleGraph = BundleGraph.deserialize( - loadLargeBlobRequestRequestSync(cacheDir, bundleGraphRequestNode) + (await loadLargeBlobRequestRequestSync(cache, bundleGraphRequestNode)) .bundleGraph.value, ); @@ -99,8 +103,8 @@ export function loadGraphs(cacheDir: string): {| ).find(n => n.type === 'request' && n.value.type === 'asset_graph_request'); if (assetGraphRequest != null) { assetGraph = AssetGraph.deserialize( - loadLargeBlobRequestRequestSync(cacheDir, assetGraphRequest).assetGraph - .value, + (await loadLargeBlobRequestRequestSync(cache, assetGraphRequest)) + .assetGraph.value, ); } } @@ -120,9 +124,9 @@ export function loadGraphs(cacheDir: string): {| return {assetGraph, bundleGraph, requestTracker, bundleInfo}; } -function loadLargeBlobRequestRequestSync(cacheDir, node) { +async function loadLargeBlobRequestRequestSync(cache, node) { invariant(node.type === 'request'); return v8.deserialize( - fs.readFileSync(path.join(cacheDir, nullthrows(node.value.resultCacheKey))), + await cache.getLargeBlob(nullthrows(node.value.resultCacheKey)), ); } From 3a797447136812e9785ad7524adfda6078b884ac Mon Sep 17 00:00:00 2001 From: Jake Lane Date: Thu, 2 Nov 2023 14:59:26 +1100 Subject: [PATCH 2/3] Bring back string as allowed large blob type --- packages/core/cache/src/FSCache.js | 9 +++++++-- packages/core/cache/src/IDBCache.browser.js | 2 +- packages/core/cache/src/LMDBCache.js | 9 +++++++-- packages/core/cache/src/types.js | 2 +- packages/dev/bundle-stats-cli/src/cli.js | 4 ++-- packages/dev/query/src/index.js | 8 ++++---- 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/packages/core/cache/src/FSCache.js b/packages/core/cache/src/FSCache.js index 48577ae803c..cc4b85e33e6 100644 --- a/packages/core/cache/src/FSCache.js +++ b/packages/core/cache/src/FSCache.js @@ -102,7 +102,7 @@ export class FSCache implements Cache { return Buffer.concat(await Promise.all(buffers)); } - async setLargeBlob(key: string, contents: Buffer): Promise { + async setLargeBlob(key: string, contents: Buffer | string): Promise { const chunks = Math.ceil(contents.length / WRITE_LIMIT_CHUNK); const writePromises: Promise[] = []; @@ -110,7 +110,12 @@ export class FSCache implements Cache { writePromises.push( this.fs.writeFile( this.#getFilePath(key, i), - contents.subarray(i * WRITE_LIMIT_CHUNK, (i + 1) * WRITE_LIMIT_CHUNK), + typeof contents === 'string' + ? contents.slice(i * WRITE_LIMIT_CHUNK, (i + 1) * WRITE_LIMIT_CHUNK) + : contents.subarray( + i * WRITE_LIMIT_CHUNK, + (i + 1) * WRITE_LIMIT_CHUNK, + ), ), ); } diff --git a/packages/core/cache/src/IDBCache.browser.js b/packages/core/cache/src/IDBCache.browser.js index 6de017c5906..15fbf65adbb 100644 --- a/packages/core/cache/src/IDBCache.browser.js +++ b/packages/core/cache/src/IDBCache.browser.js @@ -114,7 +114,7 @@ export class IDBCache implements Cache { return this.getBlob(key); } - setLargeBlob(key: string, contents: Buffer): Promise { + setLargeBlob(key: string, contents: Buffer | string): Promise { return this.setBlob(key, contents); } diff --git a/packages/core/cache/src/LMDBCache.js b/packages/core/cache/src/LMDBCache.js index bfe6428dae9..01cf6f74351 100644 --- a/packages/core/cache/src/LMDBCache.js +++ b/packages/core/cache/src/LMDBCache.js @@ -111,7 +111,7 @@ export class LMDBCache implements Cache { return Buffer.concat(await Promise.all(buffers)); } - async setLargeBlob(key: string, contents: Buffer): Promise { + async setLargeBlob(key: string, contents: Buffer | string): Promise { const chunks = Math.ceil(contents.length / WRITE_LIMIT_CHUNK); const writePromises: Promise[] = []; @@ -119,7 +119,12 @@ export class LMDBCache implements Cache { writePromises.push( this.fs.writeFile( this.#getFilePath(key, i), - contents.subarray(i * WRITE_LIMIT_CHUNK, (i + 1) * WRITE_LIMIT_CHUNK), + typeof contents === 'string' + ? contents.slice(i * WRITE_LIMIT_CHUNK, (i + 1) * WRITE_LIMIT_CHUNK) + : contents.subarray( + i * WRITE_LIMIT_CHUNK, + (i + 1) * WRITE_LIMIT_CHUNK, + ), ), ); } diff --git a/packages/core/cache/src/types.js b/packages/core/cache/src/types.js index 55063964bbd..163b3d57644 100644 --- a/packages/core/cache/src/types.js +++ b/packages/core/cache/src/types.js @@ -12,7 +12,7 @@ export interface Cache { setBlob(key: string, contents: Buffer | string): Promise; hasLargeBlob(key: string): Promise; getLargeBlob(key: string): Promise; - setLargeBlob(key: string, contents: Buffer): Promise; + setLargeBlob(key: string, contents: Buffer | string): Promise; getBuffer(key: string): Promise; /** * In a multi-threaded environment, where there are potentially multiple Cache diff --git a/packages/dev/bundle-stats-cli/src/cli.js b/packages/dev/bundle-stats-cli/src/cli.js index c4deed1549c..fd60930f27b 100644 --- a/packages/dev/bundle-stats-cli/src/cli.js +++ b/packages/dev/bundle-stats-cli/src/cli.js @@ -18,9 +18,9 @@ import {getBundleStats} from '@parcel/reporter-bundle-stats/src/BundleStatsRepor import {PackagedBundle as PackagedBundleClass} from '@parcel/core/src/public/Bundle'; import type {commander$Command} from 'commander'; -function run({cacheDir, outDir}) { +async function run({cacheDir, outDir}) { // 1. load bundle graph and info via parcel~query - let {bundleGraph, bundleInfo} = loadGraphs(cacheDir); + let {bundleGraph, bundleInfo} = await loadGraphs(cacheDir); if (bundleGraph == null) { console.error('Bundle Graph could not be found'); diff --git a/packages/dev/query/src/index.js b/packages/dev/query/src/index.js index 75630136639..1746fd010db 100644 --- a/packages/dev/query/src/index.js +++ b/packages/dev/query/src/index.js @@ -94,7 +94,7 @@ export async function loadGraphs(cacheDir: string): Promise<{| ); if (bundleGraphRequestNode != null) { bundleGraph = BundleGraph.deserialize( - (await loadLargeBlobRequestRequestSync(cache, bundleGraphRequestNode)) + (await loadLargeBlobRequestRequest(cache, bundleGraphRequestNode)) .bundleGraph.value, ); @@ -103,8 +103,8 @@ export async function loadGraphs(cacheDir: string): Promise<{| ).find(n => n.type === 'request' && n.value.type === 'asset_graph_request'); if (assetGraphRequest != null) { assetGraph = AssetGraph.deserialize( - (await loadLargeBlobRequestRequestSync(cache, assetGraphRequest)) - .assetGraph.value, + (await loadLargeBlobRequestRequest(cache, assetGraphRequest)).assetGraph + .value, ); } } @@ -124,7 +124,7 @@ export async function loadGraphs(cacheDir: string): Promise<{| return {assetGraph, bundleGraph, requestTracker, bundleInfo}; } -async function loadLargeBlobRequestRequestSync(cache, node) { +async function loadLargeBlobRequestRequest(cache, node) { invariant(node.type === 'request'); return v8.deserialize( await cache.getLargeBlob(nullthrows(node.value.resultCacheKey)), From c5efb5329c26018fdfc8034c5b47ba52d8d578b4 Mon Sep 17 00:00:00 2001 From: Jake Lane Date: Fri, 3 Nov 2023 09:25:36 +1100 Subject: [PATCH 3/3] Optimise single chunk behaviour --- packages/core/cache/src/FSCache.js | 6 ++++++ packages/core/cache/src/LMDBCache.js | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/packages/core/cache/src/FSCache.js b/packages/core/cache/src/FSCache.js index cc4b85e33e6..e963a02ff60 100644 --- a/packages/core/cache/src/FSCache.js +++ b/packages/core/cache/src/FSCache.js @@ -105,6 +105,12 @@ export class FSCache implements Cache { async setLargeBlob(key: string, contents: Buffer | string): Promise { const chunks = Math.ceil(contents.length / WRITE_LIMIT_CHUNK); + if (chunks === 1) { + // If there's one chunk, don't slice the content + await this.fs.writeFile(this.#getFilePath(key, 0), contents); + return; + } + const writePromises: Promise[] = []; for (let i = 0; i < chunks; i += 1) { writePromises.push( diff --git a/packages/core/cache/src/LMDBCache.js b/packages/core/cache/src/LMDBCache.js index 01cf6f74351..eab9dd24bb3 100644 --- a/packages/core/cache/src/LMDBCache.js +++ b/packages/core/cache/src/LMDBCache.js @@ -114,6 +114,12 @@ export class LMDBCache implements Cache { async setLargeBlob(key: string, contents: Buffer | string): Promise { const chunks = Math.ceil(contents.length / WRITE_LIMIT_CHUNK); + if (chunks === 1) { + // If there's one chunk, don't slice the content + await this.fs.writeFile(this.#getFilePath(key, 0), contents); + return; + } + const writePromises: Promise[] = []; for (let i = 0; i < chunks; i += 1) { writePromises.push(