diff --git a/data_rows/DatabaseStorageUsageHistory.js b/data_rows/DatabaseStorageUsageHistory.js new file mode 100644 index 0000000..c1c2302 --- /dev/null +++ b/data_rows/DatabaseStorageUsageHistory.js @@ -0,0 +1,89 @@ +import DataRow from "./DataRow"; +import { TYPES } from "./Types"; +import { randomFromArray } from "../helpers"; + +class InitializeError extends Error {} + +/** + * DATABASE_STORAGE_USAGE_HISTORY View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/database_storage_usage_history.html + */ +export class DatabaseStorageUsageHistory extends DataRow { + constructor(databaseID, databaseName, date) { + super(); + if (!this.constructor._sizes) { + throw new InitializeError( + `Must call ${this.constructor.name}.initialize() first` + ); + } + this.USAGE_DATE = date.toISOString(); + this.DATABASE_ID = databaseID; + this.DATABASE_NAME = databaseName; + this._setAverageDatabaseBytes(); + this._setAverageFailsafeBytes(); + } + + _setAverageDatabaseBytes() { + let size = this.constructor._sizes[this.DATABASE_NAME]; + if (Math.random() < 0.1) { + const changeDegree = Math.random() * (0.2 - 0.05) + 0.05; + const change = Math.round(size * changeDegree); + if (Math.random() < 0.5) { + size -= change; + } else { + size += change; + } + } + this.AVERAGE_DATABASE_BYTES = size; + } + + _setAverageFailsafeBytes() { + this.AVERAGE_FAILSAFE_BYTES = 0; + if (Math.random() < 0.1) { + this.AVERAGE_FAILSAFE_BYTES = Math.round( + Math.random() * this.AVERAGE_DATABASE_BYTES + ); + } + } + + static initialize(dbs) { + if (this._sizes) { + throw new InitializeError( + `Must call ${this.name}.initialize() only once` + ); + } + const seedSizes = [1e12, 1e13, 1e14, 1e15]; + const sizes = {}; + for (const db of dbs) { + const seed = randomFromArray(seedSizes); + sizes[db.DATABASE_NAME] = Math.round(Math.random() * seed); + } + this._sizes = sizes; + } + + static types() { + return [ + { + name: "USAGE_DATE", + type: TYPES.timestamp + }, + { + name: "DATABASE_ID", + type: TYPES.integer + }, + { + name: "DATABASE_NAME", + type: TYPES.string + }, + { + name: "AVERAGE_DATABASE_BYTES", + type: TYPES.integer + }, + { + name: "AVERAGE_FAILSAFE_BYTES", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/Databases.js b/data_rows/Databases.js new file mode 100644 index 0000000..c0a7566 --- /dev/null +++ b/data_rows/Databases.js @@ -0,0 +1,40 @@ +import DataRow from "./DataRow"; +import { TYPES } from "./Types"; + +/** + * DATABASES View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/databases.html + */ +export class Databases extends DataRow { + constructor(name, id) { + super(); + this.DATABASE_NAME = name; + this.DATABASE_ID = id; + } + + static generate() { + const names = [ + "squiggly_database", + "jims_database", + "jacksonbase", + "prod", + "staging_db", + "dev1" + ]; + return names.map((name, idx) => new this(name, idx + 1)); + } + + static types() { + return [ + { + name: "DATABASE_NAME", + type: TYPES.string + }, + { + name: "DATABASE_ID", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/LoadHistory.js b/data_rows/LoadHistory.js new file mode 100644 index 0000000..4a3dc6a --- /dev/null +++ b/data_rows/LoadHistory.js @@ -0,0 +1,42 @@ +import DataRow from "./DataRow"; +import { TYPES } from "./Types"; + +/** + * LOAD_HISTORY View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/load_history.html + */ +export class LoadHistory extends DataRow { + constructor(tableID, date) { + super(); + this.TABLE_ID = tableID; + this.LAST_LOAD_TIME = date.toISOString(); + this.ROW_COUNT = Math.round(Math.random() * 100); + this.ERROR_COUNT = Math.random() < 0.1 ? Math.round(Math.random() * 10) : 0; + } + + static oddsNew() { + return 0.2; + } + + static types() { + return [ + { + name: "TABLE_ID", + type: TYPES.integer + }, + { + name: "LAST_LOAD_TIME", + type: TYPES.timestamp + }, + { + name: "ROW_COUNT", + type: TYPES.integer + }, + { + name: "ERROR_COUNT", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/LoginHistory.js b/data_rows/LoginHistory.js new file mode 100644 index 0000000..5d37359 --- /dev/null +++ b/data_rows/LoginHistory.js @@ -0,0 +1,54 @@ +import DataRow from "./DataRow"; +import { TYPES } from "./Types"; +import * as helpers from "../helpers"; + +/** + * LOGIN_HISTORY View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/login_history.html + */ +export class LoginHistory extends DataRow { + constructor(date) { + super(); + this.EVENT_ID = helpers.getID(); + this.EVENT_TIMESTAMP = date.toISOString(); + const user = helpers.randomFromArray(this.constructor.getUsers()); + this.USER_NAME = user.name; + this.REPORTED_CLIENT_TYPE = user.driver; + this.IS_SUCCESS = Math.random() < 0.9 ? "YES" : "NO"; + } + + static getUsers() { + return [ + { name: "WEB_CLIENT", driver: "JAVASCRIPT_DRIVER", querySpeed: 1.2 }, + { name: "BOB", driver: "OTHER", querySpeed: 2.0 }, + { name: "BI_APP", driver: "JDBC_DRIVER", querySpeed: 0.8 }, + { name: "JANE", driver: "SNOWFLAKE_UI", querySpeed: 1.0 } + ]; + } + + static types() { + return [ + { + name: "EVENT_ID", + type: TYPES.string + }, + { + name: "EVENT_TIMESTAMP", + type: TYPES.timestamp + }, + { + name: "USER_NAME", + type: TYPES.string + }, + { + name: "REPORTED_CLIENT_TYPE", + type: TYPES.string + }, + { + name: "IS_SUCCESS", + type: TYPES.string + } + ]; + } +} diff --git a/data_rows/QueryHistory.js b/data_rows/QueryHistory.js new file mode 100644 index 0000000..886737a --- /dev/null +++ b/data_rows/QueryHistory.js @@ -0,0 +1,147 @@ +import uuid4 from "uuid/v4"; + +import DataRow from "./DataRow"; +import { LoginHistory } from "./LoginHistory"; +import { TYPES } from "./Types"; +import * as helpers from "../helpers"; + +/** + * QUERY_HISTORY View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/query_history.html + */ +export class QueryHistory extends DataRow { + constructor(databaseName, date) { + super(); + this.QUERY_ID = uuid4(); + this.setQueryText(); + this.DATABASE_NAME = databaseName; + const queryType = helpers.randomFromArray(this._getQueryTypes()); + this.QUERY_TYPE = queryType.type; + const user = helpers.randomFromArray(LoginHistory.getUsers()); + this.USER_NAME = user.name; + [this.WAREHOUSE_NAME, this.WAREHOUSE_SIZE] = helpers.randomFromArray([ + ["BIG_WH", "X-Large"], + ["SMALL_WH", "Small"] + ]); + this.setExecutionStatus(); + this.START_TIME = date.toISOString(); + this.COMPILATION_TIME = Math.round(Math.random() * 1000); + this.setExecutionTime(user.querySpeed, queryType.querySpeed); + this.QUEUED_REPAIR_TIME = 0; + this.QUEUED_OVERLOAD_TIME = + Math.random() < 0.2 ? Math.round(Math.random() * 1000) : 0; + this.setTransactionBlockedtime(); + } + + setQueryText() { + const queries = ["SHOW USERS", "SHOW WAREHOUSES", "SELECT foo FROM bar"]; + this.QUERY_TEXT = helpers.randomFromArray(queries); + } + + _getQueryTypes() { + return [ + { type: "WITH", querySpeed: 1.2 }, + { type: "REPLACE", querySpeed: 0.5 }, + { type: "SHOW", querySpeed: 0.2 }, + { type: "CREATE", querySpeed: 0.1 }, + { type: "COPY", querySpeed: 1.5 }, + { type: "SELECT", querySpeed: 1.9 }, + { type: "UNKNOWN", querySpeed: 2.2 } + ]; + } + + setExecutionStatus() { + const statuses = [ + { weight: 0.9, name: "SUCCESS" }, + { weight: 0.31, name: "RUNNING" }, + { weight: 0.28, name: "QUEUED" }, + { weight: 0.24, name: "BLOCKED" }, + { weight: 0.19, name: "RESUMING_WAREHOUSE" }, + { weight: 0.15, name: "FAILED_WITH_ERROR" }, + { weight: 0.1, name: "FAILED_WITH_INCIDENT" } + ]; + this.EXECUTION_STATUS = helpers.randomFromArrayByWeight(statuses); + } + + setExecutionTime(userQuerySpeed, queryTypeSpeed) { + const warehouseQuerySpeed = { + BIG_WH: 0.5, + SMALL_WH: 1.5 + }[this.WAREHOUSE_NAME]; + const factor = warehouseQuerySpeed * userQuerySpeed * queryTypeSpeed * 1000; + this.EXECUTION_TIME = Math.round(Math.random() * factor); + } + + setTransactionBlockedtime() { + this.TRANSACTION_BLOCKED_TIME = 0; + if (Math.random() < 0.05) { + this.TRANSACTION_BLOCKED_TIME = Math.round(Math.random() * 1000); + } + } + + static oddsNew() { + return 0.3; + } + + static types() { + return [ + { + name: "QUERY_ID", + type: TYPES.string + }, + { + name: "QUERY_TEXT", + type: TYPES.string + }, + { + name: "DATABASE_NAME", + type: TYPES.string + }, + { + name: "QUERY_TYPE", + type: TYPES.string + }, + { + name: "USER_NAME", + type: TYPES.string + }, + { + name: "WAREHOUSE_NAME", + type: TYPES.string + }, + { + name: "WAREHOUSE_SIZE", + type: TYPES.string + }, + { + name: "EXECUTION_STATUS", + type: TYPES.string + }, + { + name: "START_TIME", + type: TYPES.timestamp + }, + { + name: "COMPILATION_TIME", + type: TYPES.integer + }, + { + name: "EXECUTION_TIME", + type: TYPES.integer + }, + { + name: "QUEUED_REPAIR_TIME", + type: TYPES.integer + }, + { + name: "QUEUED_OVERLOAD_TIME", + type: TYPES.integer + }, + { + name: "TRANSACTION_BLOCKED_TIME", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/StorageUsage.js b/data_rows/StorageUsage.js new file mode 100644 index 0000000..46a9bed --- /dev/null +++ b/data_rows/StorageUsage.js @@ -0,0 +1,44 @@ +import DataRow from "./DataRow"; +import { TYPES } from "./Types"; + +/** + * STORAGE_USAGE View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/storage_usage.html#storage-usage-view + */ +export class StorageUsage extends DataRow { + constructor(dbStorageUsageHistories, date) { + super(); + this.USAGE_DATE = date.toISOString(); + this.STORAGE_BYTES = dbStorageUsageHistories.reduce( + (sum, current) => sum + current.AVERAGE_DATABASE_BYTES, + 0 + ); + this.STAGE_BYTES = Math.round(Math.random() * this.STORAGE_BYTES); + this.FAILSAFE_BYTES = 0; + if (Math.random() < 0.1) { + this.FAILSAFE_BYTES = Math.round(this.STORAGE_BYTES / 10); + } + } + + static types() { + return [ + { + name: "USAGE_DATE", + type: TYPES.timestamp + }, + { + name: "STORAGE_BYTES", + type: TYPES.integer + }, + { + name: "STAGE_BYTES", + type: TYPES.integer + }, + { + name: "FAILSAFE_BYTES", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/Tables.js b/data_rows/Tables.js new file mode 100644 index 0000000..c473e7c --- /dev/null +++ b/data_rows/Tables.js @@ -0,0 +1,23 @@ +import DataRow from "./DataRow"; +import { TYPES } from "./Types"; + +export class Tables extends DataRow { + constructor(id) { + super(); + this.ID = id; + } + + static generate() { + const ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + return ids.map(id => new this(id)); + } + + static types() { + return [ + { + name: "ID", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/WarehouseMeteringHistory.js b/data_rows/WarehouseMeteringHistory.js new file mode 100644 index 0000000..884f64c --- /dev/null +++ b/data_rows/WarehouseMeteringHistory.js @@ -0,0 +1,49 @@ +import DataRow from "./DataRow"; +import { TYPES } from "./Types"; +import { randomFromArray } from "../helpers"; + +/** + * WAREHOUSE_METERING_HISTORY View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/warehouse_metering_history.html + */ +export class WarehouseMeteringHistory extends DataRow { + constructor(date) { + super(); + this.setStartAndEnd(date); + this.WAREHOUSE_NAME = randomFromArray(["BIG_WH", "SMALL_WH"]); + this.CREDITS_USED = Math.random() * 100; + if (this.WAREHOUSE_NAME == "SMALL_WH") { + this.CREDITS_USED *= 0.3; + } + this.CREDITS_USED.toFixed(2); + } + + setStartAndEnd(date) { + this.START_TIME = date.toISOString(); + const end = new Date(date); + end.setHours(date.getHours() + 1); + this.END_TIME = end; + } + + static types() { + return [ + { + name: "START_TIME", + type: TYPES.timestamp + }, + { + name: "END_TIME", + type: TYPES.timestamp + }, + { + name: "WAREHOUSE_NAME", + type: TYPES.string + }, + { + name: "CREDITS_USED", + type: TYPES.float + } + ]; + } +} diff --git a/data_rows/index.js b/data_rows/index.js new file mode 100644 index 0000000..0566a76 --- /dev/null +++ b/data_rows/index.js @@ -0,0 +1,41 @@ +import { Databases } from "./Databases"; +import { DatabaseStorageUsageHistory } from "./DatabaseStorageUsageHistory"; +import { LoadHistory } from "./LoadHistory"; +import { LoginHistory } from "./LoginHistory"; +import { QueryHistory } from "./QueryHistory"; +import { StorageUsage } from "./StorageUsage"; +import { Tables } from "./Tables"; +import { WarehouseMeteringHistory } from "./WarehouseMeteringHistory"; + +export const TABLE_NAMES = [ + "DATABASE_STORAGE_USAGE_HISTORY", + "DATABASES", + "LOAD_HISTORY", + "LOGIN_HISTORY", + "QUERY_HISTORY", + "STORAGE_USAGE", + "TABLES", + "WAREHOUSE_METERING_HISTORY" +]; + +export const TABLE_MAP = { + DATABASE_STORAGE_USAGE_HISTORY: DatabaseStorageUsageHistory, + DATABASES: Databases, + LOAD_HISTORY: LoadHistory, + LOGIN_HISTORY: LoginHistory, + QUERY_HISTORY: QueryHistory, + STORAGE_USAGE: StorageUsage, + TABLES: Tables, + WAREHOUSE_METERING_HISTORY: WarehouseMeteringHistory +}; + +export { + Databases, + DatabaseStorageUsageHistory, + LoadHistory, + LoginHistory, + QueryHistory, + StorageUsage, + Tables, + WarehouseMeteringHistory +}; diff --git a/main.js b/main.js index 6b2b3db..df5d70a 100644 --- a/main.js +++ b/main.js @@ -1 +1,214 @@ -console.log("hello world"); +import fs from "fs"; +import path from "path"; + +import s3 from "./services/s3"; +import Snowflake from "./services/snowflake"; +import { Writer } from "./data_rows/DataRow"; +import * as models from "./data_rows"; +import * as helpers from "./helpers"; + +const dbs = []; +const tables = []; +const loadHistories = []; +const queryHistories = []; +const storageUsages = []; +const dbStorageUsageHistories = []; +const loginHistories = []; +const warehouseMeteringHistories = []; + +/** + * Create one-time data and kickoff daily data generation + */ +async function generateData(endDate, daysToGen) { + const startDate = new Date( + new Date(endDate).setDate(endDate.getDate() - daysToGen) + ); + startDate.setHours(0, 0, 0, 0); + + dbs.push(...models.Databases.generate()); + models.DatabaseStorageUsageHistory.initialize(dbs); + tables.push(...models.Tables.generate()); + + for ( + let processingDay = new Date(startDate); + processingDay < endDate; + processingDay.setDate(processingDay.getDate() + 1) + ) { + generateDailyData(processingDay); + } + await writeCSVFiles(); +} + +/** + * Create daily data and kickoff hourly data generation + */ +function generateDailyData(processingDay) { + console.log( + `${new Date().toISOString()} Generating data for: ` + + `${processingDay.toISOString()}` + ); + const todaysDBStorageHistories = []; + for (const db of dbs) { + const dsuh = new models.DatabaseStorageUsageHistory( + db.DATABASE_ID, + db.DATABASE_NAME, + processingDay + ); + dbStorageUsageHistories.push(dsuh); + todaysDBStorageHistories.push(dsuh); + } + storageUsages.push( + new models.StorageUsage(todaysDBStorageHistories, processingDay) + ); + + const endHour = new Date(processingDay).setDate(processingDay.getDate() + 1); + for ( + let processingHour = new Date(processingDay); + processingHour < endHour; + processingHour.setHours(processingHour.getHours() + 1) + ) { + generateHourlyData(processingHour); + } +} + +/** + * Create hourly data and kickoff per-minute data generation + */ +function generateHourlyData(processingHour) { + warehouseMeteringHistories.push( + new models.WarehouseMeteringHistory(processingHour) + ); + if (models.LoadHistory.rollDice()) { + const tableID = helpers.randomFromArray(tables).ID; + loadHistories.push(new models.LoadHistory(tableID, processingHour)); + } + loginHistories.push(new models.LoginHistory(processingHour)); + + const endMinute = new Date(processingHour).setHours( + processingHour.getHours() + 1 + ); + for ( + let processingMinute = new Date(processingHour); + processingMinute < endMinute; + processingMinute.setMinutes(processingMinute.getMinutes() + 1) + ) { + generatePerMinuteData(processingMinute, queryHistories); + } +} + +function generatePerMinuteData(processingMinute, queryHistories) { + if (models.QueryHistory.rollDice()) { + const dbName = helpers.randomFromArray(dbs).DATABASE_NAME; + queryHistories.push(new models.QueryHistory(dbName, processingMinute)); + } +} + +async function writeCSVFiles() { + const filewriter = new Writer(); + for (const db of dbs) { + await filewriter.write("./out/DATABASES.csv", db); + } + for (const table of tables) { + await filewriter.write("./out/TABLES.csv", table); + } + for (const loadHistory of loadHistories) { + await filewriter.write("./out/LOAD_HISTORY.csv", loadHistory); + } + for (const loginHistory of loginHistories) { + await filewriter.write("./out/LOGIN_HISTORY.csv", loginHistory); + } + for (const queryHistory of queryHistories) { + await filewriter.write("./out/QUERY_HISTORY.csv", queryHistory); + } + for (const storageUsage of storageUsages) { + await filewriter.write("./out/STORAGE_USAGE.csv", storageUsage); + } + for (const dbStorageUsageHistory of dbStorageUsageHistories) { + await filewriter.write( + "./out/DATABASE_STORAGE_USAGE_HISTORY.csv", + dbStorageUsageHistory + ); + } + for (const warehouseMeteringHistory of warehouseMeteringHistories) { + await filewriter.write( + "./out/WAREHOUSE_METERING_HISTORY.csv", + warehouseMeteringHistory + ); + } +} + +async function deleteLocalFiles() { + const directory = path.join(__dirname, "./out"); + const files = fs.readdirSync(directory); + for (const file of files) { + const fullPath = path.join(directory, file); + console.log("deleting: " + fullPath); + fs.unlinkSync(fullPath); + } +} + +async function uploadSessionDataSnowflake(snowflakeService) { + const BUCKET_NAME = "looker-applications-demo-loading-data"; + const STAGE_NAME = "lookerApplicationsDemoStage"; + await snowflakeService.createStage(STAGE_NAME, BUCKET_NAME); + const s3service = new s3(); + + const csvFiles = fs.readdirSync("./out"); + for (const csvFile of csvFiles) { + console.log("uploading to s3: " + csvFile); + const tablename = csvFile.split(".")[0]; + await s3service.putFile(`./out/${csvFile}`, BUCKET_NAME, csvFile); + await snowflakeService.copyInto(tablename, STAGE_NAME, csvFile); + } +} + +async function createSnowflakeTables(snowflakeService, tables) { + for (const tablename of tables) { + const columns = models.TABLE_MAP[tablename].snowflakeColumns(); + const resp = await snowflakeService.createTable(tablename, columns); + console.log(resp); + } +} + +async function deleteSnowflakeTables(snowflakeService, tables) { + for (const tablename of tables) { + const resp = await snowflakeService.dropTable(tablename); + console.log(resp); + } +} + +async function generate(daysAgoToEnd, daysToGenerate) { + try { + const endDate = new Date(); + endDate.setHours(0, 0, 0, 0); + endDate.setDate(endDate.getDate() - daysAgoToEnd); + + await deleteLocalFiles(); + + await generateData(endDate, daysToGenerate); + + const snowflakeService = new Snowflake(); + await snowflakeService.doAllGrants("data_apps_load_role"); + await snowflakeService.createSchema("account_usage"); + await deleteSnowflakeTables(snowflakeService, models.TABLE_NAMES); + await createSnowflakeTables(snowflakeService, models.TABLE_NAMES); + await uploadSessionDataSnowflake(snowflakeService); + } catch (e) { + console.log(e); + throw e; + } +} + +async function backfill() { + const DAYS_TO_BACKFILL = 360; + const BATCH_SIZE = 360; + + let overallDay = DAYS_TO_BACKFILL; + while (overallDay > 0) { + console.log(overallDay); + overallDay -= BATCH_SIZE; + await generate(overallDay, BATCH_SIZE); + } +} + +backfill().catch(e => console.log(e));