From 5d5d202fa1f7c1ac923ecd1ed49e0d655cdaa0ed Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Thu, 15 Feb 2024 23:48:54 +0200 Subject: [PATCH] Update volume ingestion e2e tests Signed-off-by: Levko Kravets --- .github/workflows/main.yml | 3 + lib/DBSQLSession.ts | 4 + tests/e2e/staging/.gitignore | 1 - tests/e2e/staging_ingestion.test.js | 124 ++++++++++++++++++---------- tests/e2e/utils/config.js | 6 +- 5 files changed, 93 insertions(+), 45 deletions(-) delete mode 100644 tests/e2e/staging/.gitignore diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 030d063e..64a5dfd0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -74,6 +74,9 @@ jobs: E2E_PATH: ${{ secrets.TEST_PECO_WAREHOUSE_HTTP_PATH }} E2E_ACCESS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} E2E_TABLE_SUFFIX: ${{github.sha}} + E2E_CATALOG: peco + E2E_SCHEMA: default + E2E_VOLUME: e2etests cache-name: cache-node-modules NYC_REPORT_DIR: coverage_e2e diff --git a/lib/DBSQLSession.ts b/lib/DBSQLSession.ts index 8ccf4dea..9863bc0c 100644 --- a/lib/DBSQLSession.ts +++ b/lib/DBSQLSession.ts @@ -280,6 +280,10 @@ export default class DBSQLSession implements IDBSQLSession { const agent = await connectionProvider.getAgent(); const response = await fetch(presignedUrl, { method: 'DELETE', headers, agent }); + // Looks that AWS and Azure have a different behavior of HTTP `DELETE` for non-existing files + // AWS assumes that - since file already doesn't exist - the goal is achieved, and returns HTTP 200 + // Azure, on the other hand, is somewhat stricter and check if file exists before deleting it. And if + // file doesn't exist - Azure returns HTTP 404 if (!response.ok) { throw new StagingError(`HTTP error ${response.status} ${response.statusText}`); } diff --git a/tests/e2e/staging/.gitignore b/tests/e2e/staging/.gitignore deleted file mode 100644 index 72e8ffc0..00000000 --- a/tests/e2e/staging/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* diff --git a/tests/e2e/staging_ingestion.test.js b/tests/e2e/staging_ingestion.test.js index f7bba4f9..bf083c0b 100644 --- a/tests/e2e/staging_ingestion.test.js +++ b/tests/e2e/staging_ingestion.test.js @@ -1,9 +1,32 @@ -const { expect, AssertionError } = require('chai'); +const { expect } = require('chai'); +const fs = require('fs'); +const path = require('path'); +const os = require('os'); +const uuid = require('uuid'); const config = require('./utils/config'); const { DBSQLClient } = require('../..'); -const fs = require('fs'); +const StagingError = require('../../dist/errors/StagingError').default; describe('Staging Test', () => { + const catalog = config.database[0]; + const schema = config.database[1]; + const volume = config.volume; + + const localPath = fs.mkdtempSync(path.join(os.tmpdir(), 'databricks-sql-tests-')); + + before(() => { + expect(catalog).to.not.be.undefined; + expect(schema).to.not.be.undefined; + expect(volume).to.not.be.undefined; + }); + + after(() => { + fs.rmSync(localPath, { + recursive: true, + force: true, + }); + }); + it('put staging data and receive it', async () => { const client = new DBSQLClient(); await client.connect({ @@ -11,23 +34,28 @@ describe('Staging Test', () => { path: config.path, token: config.token, }); - let tempPath = 'tests/e2e/staging/data'; - fs.writeFileSync(tempPath, 'Hello World!'); const session = await client.openSession({ - initialCatalog: config.database[0], - initialSchema: config.database[1], + initialCatalog: catalog, + initialSchema: schema, + }); + + const expectedData = 'Hello World!'; + const stagingFileName = `/Volumes/${catalog}/${schema}/${volume}/${uuid.v4()}.csv`; + const localFile = path.join(localPath, `${uuid.v4()}.csv`); + + fs.writeFileSync(localFile, expectedData); + await session.executeStatement(`PUT '${localFile}' INTO '${stagingFileName}' OVERWRITE`, { + stagingAllowedLocalPath: [localPath], + }); + fs.rmSync(localFile); + + await session.executeStatement(`GET '${stagingFileName}' TO '${localFile}'`, { + stagingAllowedLocalPath: [localPath], }); - await session.executeStatement( - `PUT '${tempPath}' INTO '/Volumes/${config.database[0]}/${config.database[1]}/e2etests/file1.csv' OVERWRITE`, - { stagingAllowedLocalPath: ['tests/e2e/staging'] }, - ); - await session.executeStatement( - `GET '/Volumes/${config.database[0]}/${config.database[1]}/e2etests/file1.csv' TO 'tests/e2e/staging/file'`, - { stagingAllowedLocalPath: ['tests/e2e/staging'] }, - ); - let result = fs.readFileSync('tests/e2e/staging/file'); - expect(result.toString() === 'Hello World!').to.be.true; + const result = fs.readFileSync(localFile); + fs.rmSync(localFile); + expect(result.toString() === expectedData).to.be.true; }); it('put staging data and remove it', async () => { @@ -37,31 +65,37 @@ describe('Staging Test', () => { path: config.path, token: config.token, }); - let tempPath = 'tests/e2e/staging/data'; - fs.writeFileSync(tempPath, (data = 'Hello World!')); - let session = await client.openSession({ - initialCatalog: config.database[0], - initialSchema: config.database[1], + const session = await client.openSession({ + initialCatalog: catalog, + initialSchema: schema, }); - await session.executeStatement( - `PUT '${tempPath}' INTO '/Volumes/${config.database[0]}/${config.database[1]}/e2etests/file1.csv' OVERWRITE`, - { stagingAllowedLocalPath: ['tests/e2e/staging'] }, - ); - await session.executeStatement(`REMOVE '/Volumes/${config.database[0]}/${config.database[1]}/e2etests/file1.csv'`, { - stagingAllowedLocalPath: ['tests/e2e/staging'], + + const expectedData = 'Hello World!'; + const stagingFileName = `/Volumes/${catalog}/${schema}/${volume}/${uuid.v4()}.csv`; + const localFile = path.join(localPath, `${uuid.v4()}.csv`); + + fs.writeFileSync(localFile, expectedData); + await session.executeStatement(`PUT '${localFile}' INTO '${stagingFileName}' OVERWRITE`, { + stagingAllowedLocalPath: [localPath], }); + fs.rmSync(localFile); + + await session.executeStatement(`REMOVE '${stagingFileName}'`, { stagingAllowedLocalPath: [localPath] }); try { - await session.executeStatement( - `GET '/Volumes/${config.database[0]}/${config.database[1]}/e2etests/file1.csv' TO 'tests/e2e/staging/file'`, - { stagingAllowedLocalPath: ['tests/e2e/staging'] }, - ); + await session.executeStatement(`GET '${stagingFileName}' TO '${localFile}'`, { + stagingAllowedLocalPath: [localPath], + }); } catch (error) { - if (error instanceof AssertionError) { + if (error instanceof StagingError) { + // File should not exist after deleting + expect(error.message).to.contain('404'); + } else { throw error; } - expect(error.message).to.contain('404'); // File should not exist after deleting + } finally { + fs.rmSync(localFile, { force: true }); } }); @@ -72,16 +106,22 @@ describe('Staging Test', () => { path: config.path, token: config.token, }); - let tempPath = 'tests/e2e/staging/data'; - fs.writeFileSync(tempPath, (data = 'Hello World!')); - let session = await client.openSession({ - initialCatalog: config.database[0], - initialSchema: config.database[1], + const session = await client.openSession({ + initialCatalog: catalog, + initialSchema: schema, }); - await session.executeStatement( - `REMOVE '/Volumes/${config.database[0]}/${config.database[1]}/e2etests/non_existing.csv'`, - { stagingAllowedLocalPath: ['tests/e2e/staging'] }, - ); + + const stagingFileName = `/Volumes/${catalog}/${schema}/${volume}/${uuid.v4()}.csv`; + + try { + await session.executeStatement(`REMOVE '${stagingFileName}'`, { stagingAllowedLocalPath: [localPath] }); + } catch (error) { + if (error instanceof StagingError) { + expect(error.message).to.contain('404'); + } else { + throw error; + } + } }); }); diff --git a/tests/e2e/utils/config.js b/tests/e2e/utils/config.js index bc62ef1e..bc3cff8e 100644 --- a/tests/e2e/utils/config.js +++ b/tests/e2e/utils/config.js @@ -4,7 +4,7 @@ try { } catch (e) {} const catalog = process.env.E2E_CATALOG || undefined; -const database = process.env.E2E_DATABASE || undefined; +const schema = process.env.E2E_SCHEMA || undefined; // Create file named `config.local.js` in the same directory and override config there module.exports = { @@ -17,7 +17,9 @@ module.exports = { // Access token: dapi******************************** token: process.env.E2E_ACCESS_TOKEN, // Catalog and database to use for testing; specify both or leave array empty to use defaults - database: catalog || database ? [catalog, database] : [], + database: catalog || schema ? [catalog, schema] : [], + // Volume to use for testing + volume: process.env.E2E_VOLUME, // Suffix used for tables that will be created during tests tableSuffix: process.env.E2E_TABLE_SUFFIX, ...overrides,