From 873a44622f8cc2a8060d73b6ded03ff3f70cbd15 Mon Sep 17 00:00:00 2001 From: wuuxigh Date: Wed, 23 Oct 2024 16:36:51 -0700 Subject: [PATCH] feat: optional checksum algorithm for upload (#13939) Co-authored-by: AllanZhengYP --- packages/aws-amplify/package.json | 2 +- .../apis/uploadData/multipartHandlers.test.ts | 55 +++--- .../s3/apis/uploadData/putObjectJob.test.ts | 169 +++++++++++------- .../s3/utils/client/S3/cases/listParts.ts | 6 +- .../providers/s3/utils/crc32.native.test.ts | 13 -- .../providers/s3/utils/crc32.test.ts | 5 - .../s3/utils/getCombinedCrc32.native.test.ts | 108 +++++++++++ .../s3/utils/getCombinedCrc32.test.ts | 108 +++++++++++ .../providers/s3/utils/md5.native.test.ts | 131 -------------- .../s3/utils/readFile.native.test.ts | 119 ++++++++++++ .../providers/s3/utils/readFile.test.ts | 90 ++++++++++ .../uploadData/multipart/initialUpload.ts | 33 ++-- .../uploadData/multipart/uploadCache.ts | 4 +- .../uploadData/multipart/uploadHandlers.ts | 8 + .../apis/internal/uploadData/putObjectJob.ts | 12 +- .../storage/src/providers/s3/types/options.ts | 8 + .../utils/client/utils/deserializeHelpers.ts | 1 - .../src/providers/s3/utils/constants.ts | 2 + .../src/providers/s3/utils/crc32.native.ts | 11 -- .../storage/src/providers/s3/utils/crc32.ts | 69 ++++--- .../s3/utils/getCombinedCrc32.native.ts | 46 +++++ .../providers/s3/utils/getCombinedCrc32.ts | 34 ++++ .../src/providers/s3/utils/hexUtils.ts | 13 ++ .../storage/src/providers/s3/utils/md5.ts | 16 +- .../{md5.native.ts => readFile.native.ts} | 19 +- .../src/providers/s3/utils/readFile.ts | 17 ++ tsconfig.json | 2 +- 27 files changed, 764 insertions(+), 337 deletions(-) delete mode 100644 packages/storage/__tests__/providers/s3/utils/crc32.native.test.ts create mode 100644 packages/storage/__tests__/providers/s3/utils/getCombinedCrc32.native.test.ts create mode 100644 packages/storage/__tests__/providers/s3/utils/getCombinedCrc32.test.ts delete mode 100644 packages/storage/__tests__/providers/s3/utils/md5.native.test.ts create mode 100644 packages/storage/__tests__/providers/s3/utils/readFile.native.test.ts create mode 100644 packages/storage/__tests__/providers/s3/utils/readFile.test.ts delete mode 100644 packages/storage/src/providers/s3/utils/crc32.native.ts create mode 100644 packages/storage/src/providers/s3/utils/getCombinedCrc32.native.ts create mode 100644 packages/storage/src/providers/s3/utils/getCombinedCrc32.ts create mode 100644 packages/storage/src/providers/s3/utils/hexUtils.ts rename packages/storage/src/providers/s3/utils/{md5.native.ts => readFile.native.ts} (68%) create mode 100644 packages/storage/src/providers/s3/utils/readFile.ts diff --git a/packages/aws-amplify/package.json b/packages/aws-amplify/package.json index a629f3d5a51..30a90ed816d 100644 --- a/packages/aws-amplify/package.json +++ b/packages/aws-amplify/package.json @@ -497,7 +497,7 @@ "name": "[Storage] uploadData (S3)", "path": "./dist/esm/storage/index.mjs", "import": "{ uploadData }", - "limit": "22.39 kB" + "limit": "22.54 kB" } ] } diff --git a/packages/storage/__tests__/providers/s3/apis/uploadData/multipartHandlers.test.ts b/packages/storage/__tests__/providers/s3/apis/uploadData/multipartHandlers.test.ts index 06771dce52e..944f6d4f550 100644 --- a/packages/storage/__tests__/providers/s3/apis/uploadData/multipartHandlers.test.ts +++ b/packages/storage/__tests__/providers/s3/apis/uploadData/multipartHandlers.test.ts @@ -1,9 +1,6 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -import { Blob as BlobPolyfill, File as FilePolyfill } from 'node:buffer'; -import { WritableStream as WritableStreamPolyfill } from 'node:stream/web'; - import { AWSCredentials } from '@aws-amplify/core/internals/utils'; import { Amplify, defaultStorage } from '@aws-amplify/core'; @@ -20,17 +17,16 @@ import { StorageValidationErrorCode, validationErrorMap, } from '../../../../../src/errors/types/validation'; -import { UPLOADS_STORAGE_KEY } from '../../../../../src/providers/s3/utils/constants'; -import { byteLength } from '../../../../../src/providers/s3/apis/internal/uploadData/byteLength'; +import { + CHECKSUM_ALGORITHM_CRC32, + UPLOADS_STORAGE_KEY, +} from '../../../../../src/providers/s3/utils/constants'; import { CanceledError } from '../../../../../src/errors/CanceledError'; import { StorageOptions } from '../../../../../src/types'; import '../testUtils'; import { calculateContentCRC32 } from '../../../../../src/providers/s3/utils/crc32'; import { calculateContentMd5 } from '../../../../../src/providers/s3/utils'; - -global.Blob = BlobPolyfill as any; -global.File = FilePolyfill as any; -global.WritableStream = WritableStreamPolyfill as any; +import { byteLength } from '../../../../../src/providers/s3/apis/internal/uploadData/byteLength'; jest.mock('@aws-amplify/core'); jest.mock('../../../../../src/providers/s3/utils/client/s3data'); @@ -47,9 +43,10 @@ const bucket = 'bucket'; const region = 'region'; const defaultKey = 'key'; const defaultContentType = 'application/octet-stream'; -const defaultCacheKey = '8388608_application/octet-stream_bucket_public_key'; +const defaultCacheKey = + 'Jz3O2w==_8388608_application/octet-stream_bucket_public_key'; const testPath = 'testPath/object'; -const testPathCacheKey = `8388608_${defaultContentType}_${bucket}_custom_${testPath}`; +const testPathCacheKey = `Jz3O2w==_8388608_${defaultContentType}_${bucket}_custom_${testPath}`; const mockCreateMultipartUpload = jest.mocked(createMultipartUpload); const mockUploadPart = jest.mocked(uploadPart); @@ -83,10 +80,6 @@ const mockCalculateContentCRC32Mock = () => { seed: 0, }); }; -const mockCalculateContentCRC32Undefined = () => { - mockCalculateContentCRC32.mockReset(); - mockCalculateContentCRC32.mockResolvedValue(undefined); -}; const mockCalculateContentCRC32Reset = () => { mockCalculateContentCRC32.mockReset(); mockCalculateContentCRC32.mockImplementation( @@ -291,6 +284,9 @@ describe('getMultipartUploadHandlers with key', () => { const { multipartUploadJob } = getMultipartUploadHandlers({ key: defaultKey, data: twoPartsPayload, + options: { + checksumAlgorithm: CHECKSUM_ALGORITHM_CRC32, + }, }); await multipartUploadJob(); @@ -301,9 +297,11 @@ describe('getMultipartUploadHandlers with key', () => { * * uploading each part calls calculateContentCRC32 1 time each * - * these steps results in 5 calls in total + * 1 time for optionsHash + * + * these steps results in 6 calls in total */ - expect(calculateContentCRC32).toHaveBeenCalledTimes(5); + expect(calculateContentCRC32).toHaveBeenCalledTimes(6); expect(calculateContentMd5).not.toHaveBeenCalled(); expect(mockUploadPart).toHaveBeenCalledTimes(2); expect(mockUploadPart).toHaveBeenCalledWith( @@ -317,8 +315,7 @@ describe('getMultipartUploadHandlers with key', () => { }, ); - it('should use md5 if crc32 is returning undefined', async () => { - mockCalculateContentCRC32Undefined(); + it('should use md5 if no using crc32', async () => { mockMultipartUploadSuccess(); Amplify.libraryOptions = { Storage: { @@ -372,6 +369,9 @@ describe('getMultipartUploadHandlers with key', () => { { key: defaultKey, data: file, + options: { + checksumAlgorithm: CHECKSUM_ALGORITHM_CRC32, + }, }, file.size, ); @@ -615,7 +615,7 @@ describe('getMultipartUploadHandlers with key', () => { expect(Object.keys(cacheValue)).toEqual([ expect.stringMatching( // \d{13} is the file lastModified property of a file - /someName_\d{13}_8388608_application\/octet-stream_bucket_public_key/, + /someName_\d{13}_Jz3O2w==_8388608_application\/octet-stream_bucket_public_key/, ), ]); }); @@ -979,6 +979,9 @@ describe('getMultipartUploadHandlers with path', () => { const { multipartUploadJob } = getMultipartUploadHandlers({ path: testPath, data: twoPartsPayload, + options: { + checksumAlgorithm: CHECKSUM_ALGORITHM_CRC32, + }, }); await multipartUploadJob(); @@ -989,9 +992,11 @@ describe('getMultipartUploadHandlers with path', () => { * * uploading each part calls calculateContentCRC32 1 time each * - * these steps results in 5 calls in total + * 1 time for optionsHash + * + * these steps results in 6 calls in total */ - expect(calculateContentCRC32).toHaveBeenCalledTimes(5); + expect(calculateContentCRC32).toHaveBeenCalledTimes(6); expect(calculateContentMd5).not.toHaveBeenCalled(); expect(mockUploadPart).toHaveBeenCalledTimes(2); expect(mockUploadPart).toHaveBeenCalledWith( @@ -1005,8 +1010,7 @@ describe('getMultipartUploadHandlers with path', () => { }, ); - it('should use md5 if crc32 is returning undefined', async () => { - mockCalculateContentCRC32Undefined(); + it('should use md5 if no using crc32', async () => { mockMultipartUploadSuccess(); Amplify.libraryOptions = { Storage: { @@ -1060,6 +1064,9 @@ describe('getMultipartUploadHandlers with path', () => { { path: testPath, data: file, + options: { + checksumAlgorithm: CHECKSUM_ALGORITHM_CRC32, + }, }, file.size, ); diff --git a/packages/storage/__tests__/providers/s3/apis/uploadData/putObjectJob.test.ts b/packages/storage/__tests__/providers/s3/apis/uploadData/putObjectJob.test.ts index 143b4a20316..1ad28b2dc20 100644 --- a/packages/storage/__tests__/providers/s3/apis/uploadData/putObjectJob.test.ts +++ b/packages/storage/__tests__/providers/s3/apis/uploadData/putObjectJob.test.ts @@ -1,9 +1,6 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -import { Blob as BlobPolyfill, File as FilePolyfill } from 'node:buffer'; -import { WritableStream as WritableStreamPolyfill } from 'node:stream/web'; - import { AWSCredentials } from '@aws-amplify/core/internals/utils'; import { Amplify } from '@aws-amplify/core'; @@ -15,10 +12,8 @@ import { calculateContentMd5 } from '../../../../../src/providers/s3/utils'; import * as CRC32 from '../../../../../src/providers/s3/utils/crc32'; import { putObjectJob } from '../../../../../src/providers/s3/apis/internal/uploadData/putObjectJob'; import '../testUtils'; - -global.Blob = BlobPolyfill as any; -global.File = FilePolyfill as any; -global.WritableStream = WritableStreamPolyfill as any; +import { UploadDataChecksumAlgorithm } from '../../../../../src/providers/s3/types/options'; +import { CHECKSUM_ALGORITHM_CRC32 } from '../../../../../src/providers/s3/utils/constants'; jest.mock('../../../../../src/providers/s3/utils/client/s3data'); jest.mock('../../../../../src/providers/s3/utils', () => { @@ -75,66 +70,79 @@ mockPutObject.mockResolvedValue({ /* TODO Remove suite when `key` parameter is removed */ describe('putObjectJob with key', () => { beforeEach(() => { + mockPutObject.mockClear(); jest.spyOn(CRC32, 'calculateContentCRC32').mockRestore(); }); - it('should supply the correct parameters to putObject API handler', async () => { - const abortController = new AbortController(); - const inputKey = 'key'; - const data = 'data'; - const mockContentType = 'contentType'; - const contentDisposition = 'contentDisposition'; - const contentEncoding = 'contentEncoding'; - const mockMetadata = { key: 'value' }; - const onProgress = jest.fn(); - const useAccelerateEndpoint = true; + it.each<{ checksumAlgorithm: UploadDataChecksumAlgorithm | undefined }>([ + { checksumAlgorithm: CHECKSUM_ALGORITHM_CRC32 }, + { checksumAlgorithm: undefined }, + ])( + 'should supply the correct parameters to putObject API handler with checksumAlgorithm as $checksumAlgorithm', + async ({ checksumAlgorithm }) => { + const abortController = new AbortController(); + const inputKey = 'key'; + const data = 'data'; + const mockContentType = 'contentType'; + const contentDisposition = 'contentDisposition'; + const contentEncoding = 'contentEncoding'; + const mockMetadata = { key: 'value' }; + const onProgress = jest.fn(); + const useAccelerateEndpoint = true; - const job = putObjectJob( - { + const job = putObjectJob( + { + key: inputKey, + data, + options: { + contentDisposition, + contentEncoding, + contentType: mockContentType, + metadata: mockMetadata, + onProgress, + useAccelerateEndpoint, + checksumAlgorithm, + }, + }, + abortController.signal, + ); + const result = await job(); + expect(result).toEqual({ key: inputKey, - data, - options: { - contentDisposition, - contentEncoding, - contentType: mockContentType, - metadata: mockMetadata, - onProgress, - useAccelerateEndpoint, + eTag: 'eTag', + versionId: 'versionId', + contentType: 'contentType', + metadata: { key: 'value' }, + size: undefined, + }); + expect(mockPutObject).toHaveBeenCalledTimes(1); + await expect(mockPutObject).toBeLastCalledWithConfigAndInput( + { + credentials, + region, + abortSignal: abortController.signal, + onUploadProgress: expect.any(Function), + useAccelerateEndpoint: true, + userAgentValue: expect.any(String), }, - }, - abortController.signal, - ); - const result = await job(); - expect(result).toEqual({ - key: inputKey, - eTag: 'eTag', - versionId: 'versionId', - contentType: 'contentType', - metadata: { key: 'value' }, - size: undefined, - }); - expect(mockPutObject).toHaveBeenCalledTimes(1); - await expect(mockPutObject).toBeLastCalledWithConfigAndInput( - { - credentials, - region, - abortSignal: abortController.signal, - onUploadProgress: expect.any(Function), - useAccelerateEndpoint: true, - userAgentValue: expect.any(String), - }, - { - Bucket: bucket, - Key: `public/${inputKey}`, - Body: data, - ContentType: mockContentType, - ContentDisposition: contentDisposition, - ContentEncoding: contentEncoding, - Metadata: mockMetadata, - ChecksumCRC32: 'rfPzYw==', - }, - ); - }); + { + Bucket: bucket, + Key: `public/${inputKey}`, + Body: data, + ContentType: mockContentType, + ContentDisposition: contentDisposition, + ContentEncoding: contentEncoding, + Metadata: mockMetadata, + + // ChecksumCRC32 is set when putObjectJob() is called with checksumAlgorithm: 'crc-32' + ChecksumCRC32: + checksumAlgorithm === CHECKSUM_ALGORITHM_CRC32 + ? 'rfPzYw==' + : undefined, + }, + ); + }, + ); it('should set ContentMD5 if object lock is enabled', async () => { jest @@ -193,7 +201,6 @@ describe('putObjectJob with key', () => { Key: 'public/key', Body: data, ContentType: 'application/octet-stream', - ChecksumCRC32: 'rfPzYw==', }, ); }); @@ -225,7 +232,6 @@ describe('putObjectJob with key', () => { Key: 'public/key', Body: data, ContentType: 'application/octet-stream', - ChecksumCRC32: 'rfPzYw==', }, ); }); @@ -238,18 +244,39 @@ describe('putObjectJob with path', () => { jest.spyOn(CRC32, 'calculateContentCRC32').mockRestore(); }); - test.each([ + it.each<{ checksumAlgorithm: UploadDataChecksumAlgorithm | undefined }>([ + { checksumAlgorithm: CHECKSUM_ALGORITHM_CRC32 }, + { checksumAlgorithm: undefined }, + ]); + + test.each<{ + path: string | (() => string); + expectedKey: string; + checksumAlgorithm: UploadDataChecksumAlgorithm | undefined; + }>([ { path: testPath, expectedKey: testPath, + checksumAlgorithm: CHECKSUM_ALGORITHM_CRC32, }, { path: () => testPath, expectedKey: testPath, + checksumAlgorithm: CHECKSUM_ALGORITHM_CRC32, + }, + { + path: testPath, + expectedKey: testPath, + checksumAlgorithm: undefined, + }, + { + path: () => testPath, + expectedKey: testPath, + checksumAlgorithm: undefined, }, ])( - 'should supply the correct parameters to putObject API handler when path is $path', - async ({ path: inputPath, expectedKey }) => { + 'should supply the correct parameters to putObject API handler when path is $path and checksumAlgorithm is $checksumAlgorithm', + async ({ path: inputPath, expectedKey, checksumAlgorithm }) => { const abortController = new AbortController(); const data = 'data'; const mockContentType = 'contentType'; @@ -270,6 +297,7 @@ describe('putObjectJob with path', () => { metadata: mockMetadata, onProgress, useAccelerateEndpoint, + checksumAlgorithm, }, }, abortController.signal, @@ -301,7 +329,12 @@ describe('putObjectJob with path', () => { ContentDisposition: contentDisposition, ContentEncoding: contentEncoding, Metadata: mockMetadata, - ChecksumCRC32: 'rfPzYw==', + + // ChecksumCRC32 is set when putObjectJob() is called with checksumAlgorithm: 'crc-32' + ChecksumCRC32: + checksumAlgorithm === CHECKSUM_ALGORITHM_CRC32 + ? 'rfPzYw==' + : undefined, }, ); }, @@ -439,7 +472,6 @@ describe('putObjectJob with path', () => { Key: 'path/', Body: data, ContentType: 'application/octet-stream', - ChecksumCRC32: 'rfPzYw==', }, ); }); @@ -471,7 +503,6 @@ describe('putObjectJob with path', () => { Key: 'path/', Body: data, ContentType: 'application/octet-stream', - ChecksumCRC32: 'rfPzYw==', }, ); }); diff --git a/packages/storage/__tests__/providers/s3/utils/client/S3/cases/listParts.ts b/packages/storage/__tests__/providers/s3/utils/client/S3/cases/listParts.ts index 396035c09dd..059dfcaec0a 100644 --- a/packages/storage/__tests__/providers/s3/utils/client/S3/cases/listParts.ts +++ b/packages/storage/__tests__/providers/s3/utils/client/S3/cases/listParts.ts @@ -38,11 +38,13 @@ const listPartsHappyCase: ApiFunctionalTestCase = [ '1' + 'etag1' + '5242880' + + 'checksum1' + '' + '' + '2' + 'etag2' + '1024' + + 'checksum2' + '' + '', }, @@ -53,12 +55,12 @@ const listPartsHappyCase: ApiFunctionalTestCase = [ { PartNumber: 1, ETag: 'etag1', - Size: 5242880, + ChecksumCRC32: 'checksum1', }, { PartNumber: 2, ETag: 'etag2', - Size: 1024, + ChecksumCRC32: 'checksum2', }, ], }, diff --git a/packages/storage/__tests__/providers/s3/utils/crc32.native.test.ts b/packages/storage/__tests__/providers/s3/utils/crc32.native.test.ts deleted file mode 100644 index 0f4c1adce27..00000000000 --- a/packages/storage/__tests__/providers/s3/utils/crc32.native.test.ts +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -import { calculateContentCRC32 } from '../../../../src/providers/s3/utils/crc32.native'; - -const MB = 1024 * 1024; -const getBlob = (size: number) => new Blob(['1'.repeat(size)]); - -describe('calculate crc32 native', () => { - it('should return undefined', async () => { - expect(await calculateContentCRC32(getBlob(8 * MB))).toEqual(undefined); - }); -}); diff --git a/packages/storage/__tests__/providers/s3/utils/crc32.test.ts b/packages/storage/__tests__/providers/s3/utils/crc32.test.ts index e2195ddf5e4..28058a1fc1d 100644 --- a/packages/storage/__tests__/providers/s3/utils/crc32.test.ts +++ b/packages/storage/__tests__/providers/s3/utils/crc32.test.ts @@ -1,8 +1,6 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -import { Blob as BlobPolyfill, File as FilePolyfill } from 'node:buffer'; -import { WritableStream as WritableStreamPolyfill } from 'node:stream/web'; import { TextDecoder as TextDecoderPolyfill, TextEncoder as TextEncoderPolyfill, @@ -10,9 +8,6 @@ import { import { calculateContentCRC32 } from '../../../../src/providers/s3/utils/crc32'; -global.Blob = BlobPolyfill as any; -global.File = FilePolyfill as any; -global.WritableStream = WritableStreamPolyfill as any; global.TextEncoder = TextEncoderPolyfill as any; global.TextDecoder = TextDecoderPolyfill as any; diff --git a/packages/storage/__tests__/providers/s3/utils/getCombinedCrc32.native.test.ts b/packages/storage/__tests__/providers/s3/utils/getCombinedCrc32.native.test.ts new file mode 100644 index 00000000000..d0de37089b9 --- /dev/null +++ b/packages/storage/__tests__/providers/s3/utils/getCombinedCrc32.native.test.ts @@ -0,0 +1,108 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + TextDecoder as TextDecoderPolyfill, + TextEncoder as TextEncoderPolyfill, +} from 'node:util'; + +import { getCombinedCrc32 } from '../../../../src/providers/s3/utils/getCombinedCrc32.native'; +import { byteLength } from '../../../../src/providers/s3/apis/internal/uploadData/byteLength'; + +global.TextEncoder = TextEncoderPolyfill as any; +global.TextDecoder = TextDecoderPolyfill as any; + +const MB = 1024 * 1024; +const getBlob = (size: number) => new Blob(['1'.repeat(size)]); +const encoder = new TextEncoder(); + +describe('calculate crc32', () => { + describe.each([ + { + type: 'file', + size: '4B', + data: new File(['data'], 'someName'), + expected: { + checksum: 'wu1R0Q==-1', + }, + }, + { + type: 'blob', + size: '4B', + data: new Blob(['data']), + expected: { + checksum: 'wu1R0Q==-1', + }, + }, + { + type: 'string', + size: '4B', + data: 'data', + expected: { + checksum: 'wu1R0Q==-1', + }, + }, + { + type: 'arrayBuffer', + size: '4B', + data: new Uint8Array(encoder.encode('data')).buffer, + expected: { + checksum: 'wu1R0Q==-1', + }, + }, + { + type: 'arrayBufferView', + size: '4B', + data: new DataView(encoder.encode('1234 data 5678').buffer, 5, 4), + expected: { + checksum: 'wu1R0Q==-1', + }, + }, + { + type: 'file', + size: '8MB', + data: new File([getBlob(8 * MB)], 'someName'), + expected: { + checksum: 'hwOICA==-2', + }, + }, + { + type: 'blob', + size: '8MB', + data: getBlob(8 * MB), + expected: { + checksum: 'hwOICA==-2', + }, + }, + { + type: 'string', + size: '8MB', + data: '1'.repeat(8 * MB), + expected: { + checksum: 'hwOICA==-2', + }, + }, + { + type: 'arrayBuffer', + size: '8MB', + data: new Uint8Array(encoder.encode('1'.repeat(8 * MB))).buffer, + expected: { + checksum: 'hwOICA==-2', + }, + }, + { + type: 'arrayBufferView', + size: '8MB', + data: encoder.encode('1'.repeat(8 * MB)), + expected: { + checksum: 'hwOICA==-2', + }, + }, + ])('output for data type of $type with size $size', ({ data, expected }) => { + it('should match expected checksum results', async () => { + expect((await getCombinedCrc32(data, byteLength(data)))!).toEqual( + expected.checksum, + ); + }); + }); +}); diff --git a/packages/storage/__tests__/providers/s3/utils/getCombinedCrc32.test.ts b/packages/storage/__tests__/providers/s3/utils/getCombinedCrc32.test.ts new file mode 100644 index 00000000000..299bd8d90e5 --- /dev/null +++ b/packages/storage/__tests__/providers/s3/utils/getCombinedCrc32.test.ts @@ -0,0 +1,108 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + TextDecoder as TextDecoderPolyfill, + TextEncoder as TextEncoderPolyfill, +} from 'node:util'; + +import { getCombinedCrc32 } from '../../../../src/providers/s3/utils/getCombinedCrc32'; +import { byteLength } from '../../../../src/providers/s3/apis/internal/uploadData/byteLength'; + +global.TextEncoder = TextEncoderPolyfill as any; +global.TextDecoder = TextDecoderPolyfill as any; + +const MB = 1024 * 1024; +const getBlob = (size: number) => new Blob(['1'.repeat(size)]); +const encoder = new TextEncoder(); + +describe('calculate crc32', () => { + describe.each([ + { + type: 'file', + size: '4B', + data: new File(['data'], 'someName'), + expected: { + checksum: 'wu1R0Q==-1', + }, + }, + { + type: 'blob', + size: '4B', + data: new Blob(['data']), + expected: { + checksum: 'wu1R0Q==-1', + }, + }, + { + type: 'string', + size: '4B', + data: 'data', + expected: { + checksum: 'wu1R0Q==-1', + }, + }, + { + type: 'arrayBuffer', + size: '4B', + data: new Uint8Array(encoder.encode('data')).buffer, + expected: { + checksum: 'wu1R0Q==-1', + }, + }, + { + type: 'arrayBufferView', + size: '4B', + data: new DataView(encoder.encode('1234 data 5678').buffer, 5, 4), + expected: { + checksum: 'wu1R0Q==-1', + }, + }, + { + type: 'file', + size: '8MB', + data: new File([getBlob(8 * MB)], 'someName'), + expected: { + checksum: 'hwOICA==-2', + }, + }, + { + type: 'blob', + size: '8MB', + data: getBlob(8 * MB), + expected: { + checksum: 'hwOICA==-2', + }, + }, + { + type: 'string', + size: '8MB', + data: '1'.repeat(8 * MB), + expected: { + checksum: 'hwOICA==-2', + }, + }, + { + type: 'arrayBuffer', + size: '8MB', + data: new Uint8Array(encoder.encode('1'.repeat(8 * MB))).buffer, + expected: { + checksum: 'hwOICA==-2', + }, + }, + { + type: 'arrayBufferView', + size: '8MB', + data: encoder.encode('1'.repeat(8 * MB)), + expected: { + checksum: 'hwOICA==-2', + }, + }, + ])('output for data type of $type with size $size', ({ data, expected }) => { + it('should match expected checksum results', async () => { + expect((await getCombinedCrc32(data, byteLength(data)))!).toEqual( + expected.checksum, + ); + }); + }); +}); diff --git a/packages/storage/__tests__/providers/s3/utils/md5.native.test.ts b/packages/storage/__tests__/providers/s3/utils/md5.native.test.ts deleted file mode 100644 index ec70d0a8e14..00000000000 --- a/packages/storage/__tests__/providers/s3/utils/md5.native.test.ts +++ /dev/null @@ -1,131 +0,0 @@ -import { Buffer } from 'buffer'; - -import { Md5 } from '@smithy/md5-js'; - -import { calculateContentMd5 } from '../../../../src/providers/s3/utils/md5.native'; -import { toBase64 } from '../../../../src/providers/s3/utils/client/utils'; - -jest.mock('@smithy/md5-js'); -jest.mock('../../../../src/providers/s3/utils/client/utils'); -jest.mock('buffer'); - -interface MockFileReader { - error?: any; - result?: any; - onload?(): void; - onabort?(): void; - onerror?(): void; - readAsArrayBuffer?(): void; - readAsDataURL?(): void; -} - -// The FileReader in React Native 0.71 did not support `readAsArrayBuffer`. This native implementation accomodates this -// by attempting to use `readAsArrayBuffer` and changing the file reading strategy if it throws an error. -// TODO: This file should be removable when we drop support for React Native 0.71 -describe('calculateContentMd5 (native)', () => { - const stringContent = 'string-content'; - const base64data = 'base-64-data'; - const fileReaderResult = new ArrayBuffer(8); - const fileReaderBase64Result = `data:foo/bar;base64,${base64data}`; - const fileReaderError = new Error(); - // assert mocks - const mockBufferFrom = Buffer.from as jest.Mock; - const mockToBase64 = toBase64 as jest.Mock; - const mockMd5 = Md5 as jest.Mock; - // create mocks - const mockSuccessfulFileReader: MockFileReader = { - readAsArrayBuffer: jest.fn(() => { - mockSuccessfulFileReader.result = fileReaderResult; - mockSuccessfulFileReader.onload?.(); - }), - }; - const mockAbortedFileReader: MockFileReader = { - readAsArrayBuffer: jest.fn(() => { - mockAbortedFileReader.onabort?.(); - }), - }; - const mockFailedFileReader: MockFileReader = { - readAsArrayBuffer: jest.fn(() => { - mockFailedFileReader.error = fileReaderError; - mockFailedFileReader.onerror?.(); - }), - }; - const mockPartialFileReader: MockFileReader = { - readAsArrayBuffer: jest.fn(() => { - throw new Error('Not implemented'); - }), - readAsDataURL: jest.fn(() => { - mockPartialFileReader.result = fileReaderBase64Result; - mockPartialFileReader.onload?.(); - }), - }; - - beforeAll(() => { - mockBufferFrom.mockReturnValue(fileReaderResult); - }); - - afterEach(() => { - jest.clearAllMocks(); - mockMd5.mockReset(); - }); - - it.each([ - { type: 'string', content: stringContent }, - { type: 'ArrayBuffer view', content: new Uint8Array() }, - { type: 'ArrayBuffer', content: new ArrayBuffer(8) }, - ])('calculates MD5 for content type: $type', async ({ content }) => { - await calculateContentMd5(content); - const [mockMd5Instance] = mockMd5.mock.instances; - expect(mockMd5Instance.update.mock.calls[0][0]).toBe(content); - expect(mockToBase64).toHaveBeenCalled(); - }); - - it('calculates MD5 for content type: blob', async () => { - Object.defineProperty(global, 'FileReader', { - writable: true, - value: jest.fn(() => mockSuccessfulFileReader), - }); - await calculateContentMd5(new Blob([stringContent])); - const [mockMd5Instance] = mockMd5.mock.instances; - expect(mockMd5Instance.update.mock.calls[0][0]).toBe(fileReaderResult); - expect(mockSuccessfulFileReader.readAsArrayBuffer).toHaveBeenCalled(); - expect(mockToBase64).toHaveBeenCalled(); - }); - - it('rejects on file reader abort', async () => { - Object.defineProperty(global, 'FileReader', { - writable: true, - value: jest.fn(() => mockAbortedFileReader), - }); - await expect( - calculateContentMd5(new Blob([stringContent])), - ).rejects.toThrow('Read aborted'); - expect(mockAbortedFileReader.readAsArrayBuffer).toHaveBeenCalled(); - expect(mockToBase64).not.toHaveBeenCalled(); - }); - - it('rejects on file reader error', async () => { - Object.defineProperty(global, 'FileReader', { - writable: true, - value: jest.fn(() => mockFailedFileReader), - }); - await expect( - calculateContentMd5(new Blob([stringContent])), - ).rejects.toThrow(fileReaderError); - expect(mockFailedFileReader.readAsArrayBuffer).toHaveBeenCalled(); - expect(mockToBase64).not.toHaveBeenCalled(); - }); - - it('tries again using a different strategy if readAsArrayBuffer is unavailable', async () => { - Object.defineProperty(global, 'FileReader', { - writable: true, - value: jest.fn(() => mockPartialFileReader), - }); - await calculateContentMd5(new Blob([stringContent])); - const [mockMd5Instance] = mockMd5.mock.instances; - expect(mockMd5Instance.update.mock.calls[0][0]).toBe(fileReaderResult); - expect(mockPartialFileReader.readAsDataURL).toHaveBeenCalled(); - expect(mockBufferFrom).toHaveBeenCalledWith(base64data, 'base64'); - expect(mockToBase64).toHaveBeenCalled(); - }); -}); diff --git a/packages/storage/__tests__/providers/s3/utils/readFile.native.test.ts b/packages/storage/__tests__/providers/s3/utils/readFile.native.test.ts new file mode 100644 index 00000000000..cdd9aeff616 --- /dev/null +++ b/packages/storage/__tests__/providers/s3/utils/readFile.native.test.ts @@ -0,0 +1,119 @@ +import { Buffer } from 'buffer'; + +import { readFile } from '../../../../src/providers/s3/utils/readFile.native'; + +jest.mock('buffer', () => ({ + Buffer: { + from: jest.fn(() => new Uint8Array()), + }, +})); + +describe('readFile', () => { + let mockFileReader: any; + + beforeEach(() => { + mockFileReader = { + onload: null, + onabort: null, + onerror: null, + readAsArrayBuffer: jest.fn(), + readAsDataURL: jest.fn(), + result: null, + }; + + (global as any).FileReader = jest.fn(() => mockFileReader); + }); + + afterEach(() => { + jest.resetAllMocks(); + }); + + it('should read file as ArrayBuffer when supported', async () => { + const mockFile = new Blob(['test content']); + const mockArrayBuffer = new ArrayBuffer(8); + + mockFileReader.readAsArrayBuffer.mockImplementation(() => { + mockFileReader.result = mockArrayBuffer; + mockFileReader.onload(); + }); + + const result = await readFile(mockFile); + + expect(mockFileReader.readAsArrayBuffer).toHaveBeenCalledWith(mockFile); + expect(result).toBe(mockArrayBuffer); + }); + + it('should fallback to readAsDataURL when readAsArrayBuffer is not supported', async () => { + const mockFile = new Blob(['test content']); + const mockBase64Data = 'base64encodeddata'; + const mockDataURL = `data:application/octet-stream;base64,${mockBase64Data}`; + + mockFileReader.readAsArrayBuffer.mockImplementation(() => { + throw new Error('readAsArrayBuffer not supported'); + }); + + mockFileReader.readAsDataURL.mockImplementation(() => { + mockFileReader.result = mockDataURL; + mockFileReader.onload(); + }); + + await readFile(mockFile); + + expect(mockFileReader.readAsArrayBuffer).toHaveBeenCalledWith(mockFile); + expect(mockFileReader.readAsDataURL).toHaveBeenCalledWith(mockFile); + expect(Buffer.from).toHaveBeenCalledWith(mockBase64Data, 'base64'); + }); + + it('should reject when read is aborted', async () => { + const mockFile = new Blob(['test content']); + + mockFileReader.readAsArrayBuffer.mockImplementation(() => { + mockFileReader.onabort(); + }); + + await expect(readFile(mockFile)).rejects.toThrow('Read aborted'); + }); + + it('should reject when an error occurs during reading', async () => { + const mockFile = new Blob(['test content']); + const mockError = new Error('Read error'); + + mockFileReader.readAsArrayBuffer.mockImplementation(() => { + mockFileReader.error = mockError; + mockFileReader.onerror(); + }); + + await expect(readFile(mockFile)).rejects.toThrow(mockError); + }); + + it('should handle empty files', async () => { + const mockFile = new Blob([]); + const mockArrayBuffer = new ArrayBuffer(0); + + mockFileReader.readAsArrayBuffer.mockImplementation(() => { + mockFileReader.result = mockArrayBuffer; + mockFileReader.onload(); + }); + + const result = await readFile(mockFile); + + expect(result).toBeInstanceOf(ArrayBuffer); + expect(result.byteLength).toBe(0); + }); + + it('should handle large files', async () => { + const largeContent = 'a'.repeat(1024 * 1024 * 10); // 10MB of data + const mockFile = new Blob([largeContent]); + const mockArrayBuffer = new ArrayBuffer(1024 * 1024 * 10); + + mockFileReader.readAsArrayBuffer.mockImplementation(() => { + mockFileReader.result = mockArrayBuffer; + mockFileReader.onload(); + }); + + const result = await readFile(mockFile); + + expect(result).toBe(mockArrayBuffer); + expect(result.byteLength).toBe(1024 * 1024 * 10); + }); +}); diff --git a/packages/storage/__tests__/providers/s3/utils/readFile.test.ts b/packages/storage/__tests__/providers/s3/utils/readFile.test.ts new file mode 100644 index 00000000000..81baac510fc --- /dev/null +++ b/packages/storage/__tests__/providers/s3/utils/readFile.test.ts @@ -0,0 +1,90 @@ +import { readFile } from '../../../../src/providers/s3/utils/readFile'; + +describe('readFile', () => { + let mockFileReader: any; + + beforeEach(() => { + mockFileReader = { + onload: null, + onabort: null, + onerror: null, + readAsArrayBuffer: jest.fn(), + readAsDataURL: jest.fn(), + result: null, + }; + + (global as any).FileReader = jest.fn(() => mockFileReader); + }); + + afterEach(() => { + jest.resetAllMocks(); + }); + + it('should read file as ArrayBuffer when supported', async () => { + const mockFile = new Blob(['test content']); + const mockArrayBuffer = new ArrayBuffer(8); + + mockFileReader.readAsArrayBuffer.mockImplementation(() => { + mockFileReader.result = mockArrayBuffer; + mockFileReader.onload(); + }); + + const result = await readFile(mockFile); + + expect(mockFileReader.readAsArrayBuffer).toHaveBeenCalledWith(mockFile); + expect(result).toBe(mockArrayBuffer); + }); + + it('should reject when read is aborted', async () => { + const mockFile = new Blob(['test content']); + + mockFileReader.readAsArrayBuffer.mockImplementation(() => { + mockFileReader.onabort(); + }); + + await expect(readFile(mockFile)).rejects.toThrow('Read aborted'); + }); + + it('should reject when an error occurs during reading', async () => { + const mockFile = new Blob(['test content']); + const mockError = new Error('Read error'); + + mockFileReader.readAsArrayBuffer.mockImplementation(() => { + mockFileReader.error = mockError; + mockFileReader.onerror(); + }); + + await expect(readFile(mockFile)).rejects.toThrow(mockError); + }); + + it('should handle empty files', async () => { + const mockFile = new Blob([]); + const mockArrayBuffer = new ArrayBuffer(0); + + mockFileReader.readAsArrayBuffer.mockImplementation(() => { + mockFileReader.result = mockArrayBuffer; + mockFileReader.onload(); + }); + + const result = await readFile(mockFile); + + expect(result).toBeInstanceOf(ArrayBuffer); + expect(result.byteLength).toBe(0); + }); + + it('should handle large files', async () => { + const largeContent = 'a'.repeat(1024 * 1024 * 10); // 10MB of data + const mockFile = new Blob([largeContent]); + const mockArrayBuffer = new ArrayBuffer(1024 * 1024 * 10); + + mockFileReader.readAsArrayBuffer.mockImplementation(() => { + mockFileReader.result = mockArrayBuffer; + mockFileReader.onload(); + }); + + const result = await readFile(mockFile); + + expect(result).toBe(mockArrayBuffer); + expect(result.byteLength).toBe(1024 * 1024 * 10); + }); +}); diff --git a/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/initialUpload.ts b/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/initialUpload.ts index ff7c181f7fe..95b13f2c6e8 100644 --- a/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/initialUpload.ts +++ b/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/initialUpload.ts @@ -9,19 +9,20 @@ import { import { ContentDisposition, ResolvedS3Config, + UploadDataChecksumAlgorithm, } from '../../../../types/options'; import { StorageUploadDataPayload } from '../../../../../../types'; import { Part, createMultipartUpload } from '../../../../utils/client/s3data'; import { logger } from '../../../../../../utils'; -import { calculateContentCRC32 } from '../../../../utils/crc32'; import { constructContentDisposition } from '../../../../utils/constructContentDisposition'; +import { CHECKSUM_ALGORITHM_CRC32 } from '../../../../utils/constants'; +import { getCombinedCrc32 } from '../../../../utils/getCombinedCrc32.native'; import { cacheMultipartUpload, findCachedUploadParts, getUploadsCacheKey, } from './uploadCache'; -import { getDataChunker } from './getDataChunker'; interface LoadOrCreateMultipartUploadOptions { s3Config: ResolvedS3Config; @@ -36,6 +37,8 @@ interface LoadOrCreateMultipartUploadOptions { metadata?: Record; size?: number; abortSignal?: AbortSignal; + checksumAlgorithm?: UploadDataChecksumAlgorithm; + optionsHash: string; resumableUploadsCache?: KeyValueStorageInterface; expectedBucketOwner?: string; } @@ -65,6 +68,8 @@ export const loadOrCreateMultipartUpload = async ({ contentEncoding, metadata, abortSignal, + checksumAlgorithm, + optionsHash, resumableUploadsCache, expectedBucketOwner, }: LoadOrCreateMultipartUploadOptions): Promise => { @@ -92,6 +97,7 @@ export const loadOrCreateMultipartUpload = async ({ bucket, accessLevel, key, + optionsHash, }); const cachedUploadParts = await findCachedUploadParts({ @@ -113,7 +119,10 @@ export const loadOrCreateMultipartUpload = async ({ finalCrc32: cachedUpload.finalCrc32, }; } else { - const finalCrc32 = await getCombinedCrc32(data, size); + const finalCrc32 = + checksumAlgorithm === CHECKSUM_ALGORITHM_CRC32 + ? await getCombinedCrc32(data, size) + : undefined; const { UploadId } = await createMultipartUpload( { @@ -150,6 +159,7 @@ export const loadOrCreateMultipartUpload = async ({ bucket, accessLevel, key, + optionsHash, }); await cacheMultipartUpload(resumableUploadsCache, uploadCacheKey, { uploadId: UploadId!, @@ -166,20 +176,3 @@ export const loadOrCreateMultipartUpload = async ({ }; } }; - -const getCombinedCrc32 = async ( - data: StorageUploadDataPayload, - size: number | undefined, -) => { - const crc32List: ArrayBuffer[] = []; - const dataChunker = getDataChunker(data, size); - for (const { data: checkData } of dataChunker) { - const checksumArrayBuffer = (await calculateContentCRC32(checkData)) - ?.checksumArrayBuffer; - if (checksumArrayBuffer === undefined) return undefined; - - crc32List.push(checksumArrayBuffer); - } - - return `${(await calculateContentCRC32(new Blob(crc32List)))?.checksum}-${crc32List.length}`; -}; diff --git a/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/uploadCache.ts b/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/uploadCache.ts index 6c05a967d7b..22f80e741a7 100644 --- a/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/uploadCache.ts +++ b/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/uploadCache.ts @@ -103,6 +103,7 @@ interface UploadsCacheKeyOptions { accessLevel?: StorageAccessLevel; key: string; file?: File; + optionsHash: string; } /** @@ -117,6 +118,7 @@ export const getUploadsCacheKey = ({ bucket, accessLevel, key, + optionsHash, }: UploadsCacheKeyOptions) => { let levelStr; const resolvedContentType = @@ -129,7 +131,7 @@ export const getUploadsCacheKey = ({ levelStr = accessLevel === 'guest' ? 'public' : accessLevel; } - const baseId = `${size}_${resolvedContentType}_${bucket}_${levelStr}_${key}`; + const baseId = `${optionsHash}_${size}_${resolvedContentType}_${bucket}_${levelStr}_${key}`; if (file) { return `${file.name}_${file.lastModified}_${baseId}`; diff --git a/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/uploadHandlers.ts b/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/uploadHandlers.ts index c23d2cc5052..a17d9b1e086 100644 --- a/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/uploadHandlers.ts +++ b/packages/storage/src/providers/s3/apis/internal/uploadData/multipart/uploadHandlers.ts @@ -36,6 +36,7 @@ import { import { getStorageUserAgentValue } from '../../../../utils/userAgent'; import { logger } from '../../../../../../utils'; import { validateObjectNotExists } from '../validateObjectNotExists'; +import { calculateContentCRC32 } from '../../../../utils/crc32'; import { StorageOperationOptionsInput } from '../../../../../../types/inputs'; import { uploadPartExecutor } from './uploadPartExecutor'; @@ -149,6 +150,10 @@ export const getMultipartUploadHandlers = ( resolvedAccessLevel = resolveAccessLevel(accessLevel); } + const optionsHash = ( + await calculateContentCRC32(JSON.stringify(uploadDataOptions)) + ).checksum; + if (!inProgressUpload) { const { uploadId, cachedParts, finalCrc32 } = await loadOrCreateMultipartUpload({ @@ -164,6 +169,8 @@ export const getMultipartUploadHandlers = ( data, size, abortSignal: abortController.signal, + checksumAlgorithm: uploadDataOptions?.checksumAlgorithm, + optionsHash, resumableUploadsCache, expectedBucketOwner, }); @@ -182,6 +189,7 @@ export const getMultipartUploadHandlers = ( bucket: resolvedBucket!, size, key: objectKey, + optionsHash, }) : undefined; diff --git a/packages/storage/src/providers/s3/apis/internal/uploadData/putObjectJob.ts b/packages/storage/src/providers/s3/apis/internal/uploadData/putObjectJob.ts index 8ebd49f9c4a..577282ffb4c 100644 --- a/packages/storage/src/providers/s3/apis/internal/uploadData/putObjectJob.ts +++ b/packages/storage/src/providers/s3/apis/internal/uploadData/putObjectJob.ts @@ -16,7 +16,10 @@ import { import { ItemWithKey, ItemWithPath } from '../../../types/outputs'; import { putObject } from '../../../utils/client/s3data'; import { getStorageUserAgentValue } from '../../../utils/userAgent'; -import { STORAGE_INPUT_KEY } from '../../../utils/constants'; +import { + CHECKSUM_ALGORITHM_CRC32, + STORAGE_INPUT_KEY, +} from '../../../utils/constants'; import { calculateContentCRC32 } from '../../../utils/crc32'; import { constructContentDisposition } from '../../../utils/constructContentDisposition'; @@ -62,11 +65,16 @@ export const putObjectJob = contentType = 'application/octet-stream', preventOverwrite, metadata, + checksumAlgorithm, onProgress, expectedBucketOwner, } = uploadDataOptions ?? {}; - const checksumCRC32 = await calculateContentCRC32(data); + const checksumCRC32 = + checksumAlgorithm === CHECKSUM_ALGORITHM_CRC32 + ? await calculateContentCRC32(data) + : undefined; + const contentMD5 = // check if checksum exists. ex: should not exist in react native !checksumCRC32 && isObjectLockEnabled diff --git a/packages/storage/src/providers/s3/types/options.ts b/packages/storage/src/providers/s3/types/options.ts index 0ff007451bb..39891185185 100644 --- a/packages/storage/src/providers/s3/types/options.ts +++ b/packages/storage/src/providers/s3/types/options.ts @@ -198,6 +198,8 @@ export type DownloadDataOptions = CommonOptions & export type DownloadDataWithKeyOptions = ReadOptions & DownloadDataOptions; export type DownloadDataWithPathOptions = DownloadDataOptions; +export type UploadDataChecksumAlgorithm = 'crc-32'; + export type UploadDataOptions = CommonOptions & TransferOptions & { /** @@ -228,6 +230,12 @@ export type UploadDataOptions = CommonOptions & * @default false */ preventOverwrite?: boolean; + /** + * The algorithm used to compute a checksum for the object. Used to verify that the data received by S3 + * matches what was originally sent. Disabled by default. + * @default undefined + */ + checksumAlgorithm?: UploadDataChecksumAlgorithm; }; /** @deprecated Use {@link UploadDataWithPathOptions} instead. */ diff --git a/packages/storage/src/providers/s3/utils/client/utils/deserializeHelpers.ts b/packages/storage/src/providers/s3/utils/client/utils/deserializeHelpers.ts index ea90d46939a..0c2e3d2c7c0 100644 --- a/packages/storage/src/providers/s3/utils/client/utils/deserializeHelpers.ts +++ b/packages/storage/src/providers/s3/utils/client/utils/deserializeHelpers.ts @@ -214,7 +214,6 @@ export const deserializeCompletedPartList = (input: any[]): CompletedPart[] => map(item, { PartNumber: ['PartNumber', deserializeNumber], ETag: 'ETag', - Size: ['Size', deserializeNumber], ChecksumCRC32: 'ChecksumCRC32', }), ); diff --git a/packages/storage/src/providers/s3/utils/constants.ts b/packages/storage/src/providers/s3/utils/constants.ts index 538f3a902ff..72a58b778de 100644 --- a/packages/storage/src/providers/s3/utils/constants.ts +++ b/packages/storage/src/providers/s3/utils/constants.ts @@ -28,3 +28,5 @@ export const STORAGE_INPUT_KEY = 'key'; export const STORAGE_INPUT_PATH = 'path'; export const DEFAULT_DELIMITER = '/'; + +export const CHECKSUM_ALGORITHM_CRC32 = 'crc-32'; diff --git a/packages/storage/src/providers/s3/utils/crc32.native.ts b/packages/storage/src/providers/s3/utils/crc32.native.ts deleted file mode 100644 index 389cb5fc87b..00000000000 --- a/packages/storage/src/providers/s3/utils/crc32.native.ts +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -import { CRC32Checksum } from './crc32'; - -export const calculateContentCRC32 = async ( - content: Blob | string | ArrayBuffer | ArrayBufferView, - _seed = 0, -): Promise => { - return undefined; -}; diff --git a/packages/storage/src/providers/s3/utils/crc32.ts b/packages/storage/src/providers/s3/utils/crc32.ts index 6d9e194c3af..b11e97085ba 100644 --- a/packages/storage/src/providers/s3/utils/crc32.ts +++ b/packages/storage/src/providers/s3/utils/crc32.ts @@ -3,6 +3,11 @@ import crc32 from 'crc-32'; +import { hexToArrayBuffer, hexToBase64 } from './hexUtils'; +import { readFile } from './readFile'; + +const CHUNK_SIZE = 1024 * 1024; // 1MB chunks + export interface CRC32Checksum { checksumArrayBuffer: ArrayBuffer; checksum: string; @@ -12,23 +17,51 @@ export interface CRC32Checksum { export const calculateContentCRC32 = async ( content: Blob | string | ArrayBuffer | ArrayBufferView, seed = 0, -): Promise => { +): Promise => { let internalSeed = seed; - let blob: Blob; - if (content instanceof Blob) { - blob = content; + if (content instanceof ArrayBuffer || ArrayBuffer.isView(content)) { + let uint8Array: Uint8Array; + + if (content instanceof ArrayBuffer) { + uint8Array = new Uint8Array(content); + } else { + uint8Array = new Uint8Array( + content.buffer, + content.byteOffset, + content.byteLength, + ); + } + + let offset = 0; + while (offset < uint8Array.length) { + const end = Math.min(offset + CHUNK_SIZE, uint8Array.length); + const chunk = uint8Array.slice(offset, end); + internalSeed = crc32.buf(chunk, internalSeed) >>> 0; + offset = end; + } } else { - blob = new Blob([content]); + let blob: Blob; + + if (content instanceof Blob) { + blob = content; + } else { + blob = new Blob([content]); + } + + let offset = 0; + while (offset < blob.size) { + const end = Math.min(offset + CHUNK_SIZE, blob.size); + const chunk = blob.slice(offset, end); + const arrayBuffer = await readFile(chunk); + const uint8Array = new Uint8Array(arrayBuffer); + + internalSeed = crc32.buf(uint8Array, internalSeed) >>> 0; + + offset = end; + } } - await blob.stream().pipeTo( - new WritableStream({ - write(chunk) { - internalSeed = crc32.buf(chunk, internalSeed) >>> 0; - }, - }), - ); const hex = internalSeed.toString(16).padStart(8, '0'); return { @@ -37,15 +70,3 @@ export const calculateContentCRC32 = async ( seed: internalSeed, }; }; - -const hexToArrayBuffer = (hexString: string) => - new Uint8Array((hexString.match(/\w{2}/g)! ?? []).map(h => parseInt(h, 16))) - .buffer; - -const hexToBase64 = (hexString: string) => - btoa( - hexString - .match(/\w{2}/g)! - .map((a: string) => String.fromCharCode(parseInt(a, 16))) - .join(''), - ); diff --git a/packages/storage/src/providers/s3/utils/getCombinedCrc32.native.ts b/packages/storage/src/providers/s3/utils/getCombinedCrc32.native.ts new file mode 100644 index 00000000000..f15b4fec3a9 --- /dev/null +++ b/packages/storage/src/providers/s3/utils/getCombinedCrc32.native.ts @@ -0,0 +1,46 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { StorageUploadDataPayload } from '../../../types'; +import { getDataChunker } from '../apis/internal/uploadData/multipart/getDataChunker'; + +import { calculateContentCRC32 } from './crc32'; + +/** + * Calculates a combined CRC32 checksum for the given data. + * + * This function chunks the input data, calculates CRC32 for each chunk, + * and then combines these checksums into a single value. + * + * @async + * @param {StorageUploadDataPayload} data - The data to calculate the checksum for. + * @param {number | undefined} size - The size of each chunk. If undefined, a default chunk size will be used. + * @returns {Promise} A promise that resolves to a string containing the combined CRC32 checksum + * and the number of chunks, separated by a hyphen. + */ +export const getCombinedCrc32 = async ( + data: StorageUploadDataPayload, + size: number | undefined, +) => { + const crc32List: Uint8Array[] = []; + const dataChunker = getDataChunker(data, size); + + let totalLength = 0; + for (const { data: checkData } of dataChunker) { + const checksum = new Uint8Array( + (await calculateContentCRC32(checkData)).checksumArrayBuffer, + ); + totalLength += checksum.length; + crc32List.push(checksum); + } + + // Combine all Uint8Arrays into a single Uint8Array + const combinedArray = new Uint8Array(totalLength); + let offset = 0; + for (const crc32Hash of crc32List) { + combinedArray.set(crc32Hash, offset); + offset += crc32Hash.length; + } + + return `${(await calculateContentCRC32(combinedArray.buffer)).checksum}-${crc32List.length}`; +}; diff --git a/packages/storage/src/providers/s3/utils/getCombinedCrc32.ts b/packages/storage/src/providers/s3/utils/getCombinedCrc32.ts new file mode 100644 index 00000000000..91082038523 --- /dev/null +++ b/packages/storage/src/providers/s3/utils/getCombinedCrc32.ts @@ -0,0 +1,34 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { StorageUploadDataPayload } from '../../../types'; +import { getDataChunker } from '../apis/internal/uploadData/multipart/getDataChunker'; + +import { calculateContentCRC32 } from './crc32'; + +/** + * Calculates a combined CRC32 checksum for the given data. + * + * This function chunks the input data, calculates CRC32 for each chunk, + * and then combines these checksums into a single value. + * + * @async + * @param {StorageUploadDataPayload} data - The data to calculate the checksum for. + * @param {number | undefined} size - The size of each chunk. If undefined, a default chunk size will be used. + * @returns {Promise} A promise that resolves to a string containing the combined CRC32 checksum + * and the number of chunks, separated by a hyphen. + */ +export const getCombinedCrc32 = async ( + data: StorageUploadDataPayload, + size: number | undefined, +) => { + const crc32List: ArrayBuffer[] = []; + const dataChunker = getDataChunker(data, size); + for (const { data: checkData } of dataChunker) { + const { checksumArrayBuffer } = await calculateContentCRC32(checkData); + + crc32List.push(checksumArrayBuffer); + } + + return `${(await calculateContentCRC32(new Blob(crc32List))).checksum}-${crc32List.length}`; +}; diff --git a/packages/storage/src/providers/s3/utils/hexUtils.ts b/packages/storage/src/providers/s3/utils/hexUtils.ts new file mode 100644 index 00000000000..febb0d42e62 --- /dev/null +++ b/packages/storage/src/providers/s3/utils/hexUtils.ts @@ -0,0 +1,13 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { toBase64 } from './client/runtime'; + +export const hexToUint8Array = (hexString: string) => + new Uint8Array((hexString.match(/\w{2}/g)! ?? []).map(h => parseInt(h, 16))); + +export const hexToArrayBuffer = (hexString: string) => + hexToUint8Array(hexString).buffer; + +export const hexToBase64 = (hexString: string) => + toBase64(hexToUint8Array(hexString)); diff --git a/packages/storage/src/providers/s3/utils/md5.ts b/packages/storage/src/providers/s3/utils/md5.ts index 98e04fdaf99..05cb09a4a5b 100644 --- a/packages/storage/src/providers/s3/utils/md5.ts +++ b/packages/storage/src/providers/s3/utils/md5.ts @@ -4,6 +4,7 @@ import { Md5 } from '@smithy/md5-js'; import { toBase64 } from './client/utils'; +import { readFile } from './readFile'; export const calculateContentMd5 = async ( content: Blob | string | ArrayBuffer | ArrayBufferView, @@ -15,18 +16,3 @@ export const calculateContentMd5 = async ( return toBase64(digest); }; - -const readFile = (file: Blob): Promise => - new Promise((resolve, reject) => { - const reader = new FileReader(); - reader.onload = () => { - resolve(reader.result as ArrayBuffer); - }; - reader.onabort = () => { - reject(new Error('Read aborted')); - }; - reader.onerror = () => { - reject(reader.error); - }; - reader.readAsArrayBuffer(file); - }); diff --git a/packages/storage/src/providers/s3/utils/md5.native.ts b/packages/storage/src/providers/s3/utils/readFile.native.ts similarity index 68% rename from packages/storage/src/providers/s3/utils/md5.native.ts rename to packages/storage/src/providers/s3/utils/readFile.native.ts index a0c5a2365d8..29ccbfa5966 100644 --- a/packages/storage/src/providers/s3/utils/md5.native.ts +++ b/packages/storage/src/providers/s3/utils/readFile.native.ts @@ -3,25 +3,10 @@ import { Buffer } from 'buffer'; -import { Md5 } from '@smithy/md5-js'; - -import { toBase64 } from './client/utils'; - -// The FileReader in React Native 0.71 did not support `readAsArrayBuffer`. This native implementation accomodates this +// The FileReader in React Native 0.71 did not support `readAsArrayBuffer`. This native implementation accommodates this // by attempting to use `readAsArrayBuffer` and changing the file reading strategy if it throws an error. // TODO: This file should be removable when we drop support for React Native 0.71 -export const calculateContentMd5 = async ( - content: Blob | string | ArrayBuffer | ArrayBufferView, -): Promise => { - const hasher = new Md5(); - const buffer = content instanceof Blob ? await readFile(content) : content; - hasher.update(buffer); - const digest = await hasher.digest(); - - return toBase64(digest); -}; - -const readFile = (file: Blob): Promise => +export const readFile = (file: Blob): Promise => new Promise((resolve, reject) => { const reader = new FileReader(); reader.onload = () => { diff --git a/packages/storage/src/providers/s3/utils/readFile.ts b/packages/storage/src/providers/s3/utils/readFile.ts new file mode 100644 index 00000000000..5d3782569d2 --- /dev/null +++ b/packages/storage/src/providers/s3/utils/readFile.ts @@ -0,0 +1,17 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +export const readFile = (file: Blob): Promise => + new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => { + resolve(reader.result as ArrayBuffer); + }; + reader.onabort = () => { + reject(new Error('Read aborted')); + }; + reader.onerror = () => { + reject(reader.error); + }; + reader.readAsArrayBuffer(file); + }); diff --git a/tsconfig.json b/tsconfig.json index 7a38e92756a..53556e642d3 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -25,5 +25,5 @@ "module": "es2020", "types": ["node", "jest"] }, - "exclude": ["node_modules", "dist", ".eslintrc.js", "scripts"] + "exclude": ["node_modules", "dist", ".eslintrc.js"] }