diff --git a/data_rows/DataRow.js b/data_rows/DataRow.js index 70d332a..fd49dd6 100644 --- a/data_rows/DataRow.js +++ b/data_rows/DataRow.js @@ -2,7 +2,7 @@ import stringify from "csv-stringify"; import fs from "graceful-fs"; export default class DataRow { - oddsNew() { + static oddsNew() { return 1; } diff --git a/data_rows/DatabaseStorageUsageHistory.js b/data_rows/DatabaseStorageUsageHistory.js new file mode 100644 index 0000000..7c72f66 --- /dev/null +++ b/data_rows/DatabaseStorageUsageHistory.js @@ -0,0 +1,90 @@ +import DataRow from "./DataRow"; +import TYPES from "./Types"; +import { randomFromArray } from "../helpers"; + +class InitializeError extends Error {} + +/** + * DATABASE_STORAGE_USAGE_HISTORY View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/database_storage_usage_history.html + */ +export default class DatabaseStorageUsageHistory extends DataRow { + constructor(databaseID, databaseName, date) { + super(); + if (!this.constructor._sizes) { + throw new InitializeError( + `Must call ${this.constructor.name}.initialize() first` + ); + } + this.USAGE_DATE = date.toISOString(); + this.DATABASE_ID = databaseID; + this.DATABASE_NAME = databaseName; + this._setAverageDatabaseBytes(); + this._setAverageFailsafeBytes(); + } + + _setAverageDatabaseBytes() { + let size = this.constructor._sizes[this.DATABASE_NAME]; + if (Math.random() < 0.1) { + const changeDegree = Math.random() * (0.2 - 0.05) + 0.05; + const change = Math.round(size * changeDegree); + if (Math.random() < 0.3) { + size -= change; + } else { + size += change; + } + } + this.constructor._sizes[this.DATABASE_NAME] = size; + this.AVERAGE_DATABASE_BYTES = size; + } + + _setAverageFailsafeBytes() { + this.AVERAGE_FAILSAFE_BYTES = 0; + if (Math.random() < 0.1) { + this.AVERAGE_FAILSAFE_BYTES = Math.round( + Math.random() * this.AVERAGE_DATABASE_BYTES + ); + } + } + + static initialize(dbs) { + if (this._sizes) { + throw new InitializeError( + `Must call ${this.name}.initialize() only once` + ); + } + const seedSizes = [1.1e15, 1.2e15, 1.3e15, 1.4e15]; + const sizes = {}; + for (const db of dbs) { + const seed = randomFromArray(seedSizes); + sizes[db.DATABASE_NAME] = Math.round(Math.random() * seed); + } + this._sizes = sizes; + } + + static types() { + return [ + { + name: "USAGE_DATE", + type: TYPES.timestamp + }, + { + name: "DATABASE_ID", + type: TYPES.integer + }, + { + name: "DATABASE_NAME", + type: TYPES.string + }, + { + name: "AVERAGE_DATABASE_BYTES", + type: TYPES.integer + }, + { + name: "AVERAGE_FAILSAFE_BYTES", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/Databases.js b/data_rows/Databases.js new file mode 100644 index 0000000..69993a8 --- /dev/null +++ b/data_rows/Databases.js @@ -0,0 +1,40 @@ +import DataRow from "./DataRow"; +import TYPES from "./Types"; + +/** + * DATABASES View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/databases.html + */ +export default class Databases extends DataRow { + constructor(name, id) { + super(); + this.DATABASE_NAME = name; + this.DATABASE_ID = id; + } + + static generate() { + const names = [ + "squiggly_database", + "jims_database", + "jacksonbase", + "prod", + "staging_db", + "dev1" + ]; + return names.map((name, idx) => new this(name, idx + 1)); + } + + static types() { + return [ + { + name: "DATABASE_NAME", + type: TYPES.string + }, + { + name: "DATABASE_ID", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/LoadHistory.js b/data_rows/LoadHistory.js new file mode 100644 index 0000000..6f309e2 --- /dev/null +++ b/data_rows/LoadHistory.js @@ -0,0 +1,44 @@ +import DataRow from "./DataRow"; +import TYPES from "./Types"; + +const ODDS_NEW = 0.2; + +/** + * LOAD_HISTORY View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/load_history.html + */ +export default class LoadHistory extends DataRow { + constructor(tableID, date) { + super(); + this.TABLE_ID = tableID; + this.LAST_LOAD_TIME = date.toISOString(); + this.ROW_COUNT = Math.round(Math.random() * 100); + this.ERROR_COUNT = Math.random() < 0.1 ? Math.round(Math.random() * 10) : 0; + } + + static oddsNew() { + return ODDS_NEW; + } + + static types() { + return [ + { + name: "TABLE_ID", + type: TYPES.integer + }, + { + name: "LAST_LOAD_TIME", + type: TYPES.timestamp + }, + { + name: "ROW_COUNT", + type: TYPES.integer + }, + { + name: "ERROR_COUNT", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/LoginHistory.js b/data_rows/LoginHistory.js new file mode 100644 index 0000000..30d64cc --- /dev/null +++ b/data_rows/LoginHistory.js @@ -0,0 +1,54 @@ +import DataRow from "./DataRow"; +import TYPES from "./Types"; +import * as helpers from "../helpers"; + +/** + * LOGIN_HISTORY View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/login_history.html + */ +export default class LoginHistory extends DataRow { + constructor(date) { + super(); + this.EVENT_ID = helpers.getID(); + this.EVENT_TIMESTAMP = date.toISOString(); + const user = helpers.randomFromArray(this.constructor.getUsers()); + this.USER_NAME = user.name; + this.REPORTED_CLIENT_TYPE = user.driver; + this.IS_SUCCESS = Math.random() < 0.9 ? "YES" : "NO"; + } + + static getUsers() { + return [ + { name: "WEB_CLIENT", driver: "JAVASCRIPT_DRIVER", querySpeed: 1.2 }, + { name: "BOB", driver: "OTHER", querySpeed: 2.0 }, + { name: "BI_APP", driver: "JDBC_DRIVER", querySpeed: 0.8 }, + { name: "JANE", driver: "SNOWFLAKE_UI", querySpeed: 1.0 } + ]; + } + + static types() { + return [ + { + name: "EVENT_ID", + type: TYPES.string + }, + { + name: "EVENT_TIMESTAMP", + type: TYPES.timestamp + }, + { + name: "USER_NAME", + type: TYPES.string + }, + { + name: "REPORTED_CLIENT_TYPE", + type: TYPES.string + }, + { + name: "IS_SUCCESS", + type: TYPES.string + } + ]; + } +} diff --git a/data_rows/QueryHistory.js b/data_rows/QueryHistory.js new file mode 100644 index 0000000..5f546a5 --- /dev/null +++ b/data_rows/QueryHistory.js @@ -0,0 +1,172 @@ +import uuid4 from "uuid/v4"; + +import DataRow from "./DataRow"; +import LoginHistory from "./LoginHistory"; +import TYPES from "./Types"; +import * as helpers from "../helpers"; + +const ODDS_NEW = 0.3; + +/** + * QUERY_HISTORY View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/query_history.html + */ +export default class QueryHistory extends DataRow { + constructor(databaseName, date) { + super(); + this.QUERY_ID = uuid4(); + this.DATABASE_NAME = databaseName; + const queryInfo = helpers.randomFromArrayByWeight( + this._getQueryInfo(), + true + ); + this.QUERY_TEXT = queryInfo.text; + this.QUERY_TYPE = queryInfo.type; + const user = helpers.randomFromArray(LoginHistory.getUsers()); + this.USER_NAME = user.name; + [this.WAREHOUSE_NAME, this.WAREHOUSE_SIZE] = helpers.randomFromArray([ + ["BIG_WH", "X-Large"], + ["SMALL_WH", "Small"] + ]); + this._setExecutionStatus(); + this.START_TIME = date.toISOString(); + this.COMPILATION_TIME = Math.round(Math.random() * 1000); + this._setExecutionTime(user.querySpeed, queryInfo.querySpeed); + this.QUEUED_REPAIR_TIME = 0; + this.QUEUED_OVERLOAD_TIME = + Math.random() < 0.2 ? Math.round(Math.random() * 1000) : 0; + this._setTransactionBlockedtime(); + } + + _getQueryInfo() { + return [ + { + weight: 0.02, + type: "UNKNOWN", + querySpeed: 2.2, + text: "What's all this then?" + }, + { + weight: 0.04, + type: "CREATE", + querySpeed: 0.1, + text: "CREATE DATABASE" + }, + { weight: 0.06, type: "COPY", querySpeed: 1.5, text: "COPY TABLE" }, + { weight: 0.08, type: "SHOW", querySpeed: 0.2, text: "SHOW USERS" }, + { + weight: 0.1, + type: "REPLACE", + querySpeed: 0.5, + text: "REPLACE INTO fooTable" + }, + { + weight: 0.2, + type: "WITH", + querySpeed: 1.2, + text: "WITH fooTable (bar)" + }, + { + weight: 0.5, + type: "SELECT", + querySpeed: 1.9, + text: "SELECT * FROM fooTable" + } + ]; + } + + _setExecutionStatus() { + const statuses = [ + { weight: 0.9, name: "SUCCESS" }, + { weight: 0.31, name: "RUNNING" }, + { weight: 0.28, name: "QUEUED" }, + { weight: 0.24, name: "BLOCKED" }, + { weight: 0.19, name: "RESUMING_WAREHOUSE" }, + { weight: 0.15, name: "FAILED_WITH_ERROR" }, + { weight: 0.1, name: "FAILED_WITH_INCIDENT" } + ]; + this.EXECUTION_STATUS = helpers.randomFromArrayByWeight(statuses); + } + + _setExecutionTime(userQuerySpeed, queryTypeSpeed) { + const warehouseQuerySpeed = { + BIG_WH: 0.5, + SMALL_WH: 1.5 + }[this.WAREHOUSE_NAME]; + const factor = warehouseQuerySpeed * userQuerySpeed * queryTypeSpeed * 1000; + this.EXECUTION_TIME = Math.round(Math.random() * factor); + } + + _setTransactionBlockedtime() { + this.TRANSACTION_BLOCKED_TIME = 0; + if (Math.random() < 0.05) { + this.TRANSACTION_BLOCKED_TIME = Math.round(Math.random() * 1000); + } + } + + static oddsNew() { + return ODDS_NEW; + } + + static types() { + return [ + { + name: "QUERY_ID", + type: TYPES.string + }, + { + name: "QUERY_TEXT", + type: TYPES.string + }, + { + name: "DATABASE_NAME", + type: TYPES.string + }, + { + name: "QUERY_TYPE", + type: TYPES.string + }, + { + name: "USER_NAME", + type: TYPES.string + }, + { + name: "WAREHOUSE_NAME", + type: TYPES.string + }, + { + name: "WAREHOUSE_SIZE", + type: TYPES.string + }, + { + name: "EXECUTION_STATUS", + type: TYPES.string + }, + { + name: "START_TIME", + type: TYPES.timestamp + }, + { + name: "COMPILATION_TIME", + type: TYPES.integer + }, + { + name: "EXECUTION_TIME", + type: TYPES.integer + }, + { + name: "QUEUED_REPAIR_TIME", + type: TYPES.integer + }, + { + name: "QUEUED_OVERLOAD_TIME", + type: TYPES.integer + }, + { + name: "TRANSACTION_BLOCKED_TIME", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/StorageUsage.js b/data_rows/StorageUsage.js new file mode 100644 index 0000000..1fbb169 --- /dev/null +++ b/data_rows/StorageUsage.js @@ -0,0 +1,44 @@ +import DataRow from "./DataRow"; +import TYPES from "./Types"; + +/** + * STORAGE_USAGE View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/storage_usage.html#storage-usage-view + */ +export default class StorageUsage extends DataRow { + constructor(dbStorageUsageHistories, date) { + super(); + this.USAGE_DATE = date.toISOString(); + this.STORAGE_BYTES = dbStorageUsageHistories.reduce( + (sum, current) => sum + current.AVERAGE_DATABASE_BYTES, + 0 + ); + this.STAGE_BYTES = Math.round(Math.random() * this.STORAGE_BYTES); + this.FAILSAFE_BYTES = 0; + if (Math.random() < 0.1) { + this.FAILSAFE_BYTES = Math.round(this.STORAGE_BYTES / 10); + } + } + + static types() { + return [ + { + name: "USAGE_DATE", + type: TYPES.timestamp + }, + { + name: "STORAGE_BYTES", + type: TYPES.integer + }, + { + name: "STAGE_BYTES", + type: TYPES.integer + }, + { + name: "FAILSAFE_BYTES", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/Tables.js b/data_rows/Tables.js new file mode 100644 index 0000000..2ba1e89 --- /dev/null +++ b/data_rows/Tables.js @@ -0,0 +1,23 @@ +import DataRow from "./DataRow"; +import TYPES from "./Types"; + +export default class Tables extends DataRow { + constructor(id) { + super(); + this.ID = id; + } + + static generate() { + const ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + return ids.map(id => new this(id)); + } + + static types() { + return [ + { + name: "ID", + type: TYPES.integer + } + ]; + } +} diff --git a/data_rows/Types.js b/data_rows/Types.js index 422f614..22ca70c 100644 --- a/data_rows/Types.js +++ b/data_rows/Types.js @@ -1,8 +1,10 @@ -export const TYPES = { - boolean: 'BOOLEAN', - integer: 'INTEGER', - string: 'STRING', - timestamp: 'TIMESTAMP', - float: 'FLOAT', - date: 'DATE' -} +const TYPES = { + boolean: "BOOLEAN", + integer: "INTEGER", + string: "STRING", + timestamp: "TIMESTAMP", + float: "FLOAT", + date: "DATE" +}; + +export default TYPES; diff --git a/data_rows/WarehouseMeteringHistory.js b/data_rows/WarehouseMeteringHistory.js new file mode 100644 index 0000000..7d59538 --- /dev/null +++ b/data_rows/WarehouseMeteringHistory.js @@ -0,0 +1,49 @@ +import DataRow from "./DataRow"; +import TYPES from "./Types"; +import { randomFromArray } from "../helpers"; + +/** + * WAREHOUSE_METERING_HISTORY View + * + * https://docs.snowflake.net/manuals/sql-reference/account-usage/warehouse_metering_history.html + */ +export default class WarehouseMeteringHistory extends DataRow { + constructor(date) { + super(); + this._setStartAndEnd(date); + this.WAREHOUSE_NAME = randomFromArray(["BIG_WH", "SMALL_WH"]); + let credits_used = Math.random(); + if (this.WAREHOUSE_NAME == "SMALL_WH") { + credits_used *= 0.3; + } + this.CREDITS_USED = credits_used.toFixed(2); + } + + _setStartAndEnd(date) { + this.START_TIME = date.toISOString(); + const end = new Date(date); + end.setHours(date.getHours() + 1); + this.END_TIME = end; + } + + static types() { + return [ + { + name: "START_TIME", + type: TYPES.timestamp + }, + { + name: "END_TIME", + type: TYPES.timestamp + }, + { + name: "WAREHOUSE_NAME", + type: TYPES.string + }, + { + name: "CREDITS_USED", + type: TYPES.float + } + ]; + } +} diff --git a/data_rows/index.js b/data_rows/index.js new file mode 100644 index 0000000..1799071 --- /dev/null +++ b/data_rows/index.js @@ -0,0 +1,41 @@ +import Databases from "./Databases"; +import DatabaseStorageUsageHistory from "./DatabaseStorageUsageHistory"; +import LoadHistory from "./LoadHistory"; +import LoginHistory from "./LoginHistory"; +import QueryHistory from "./QueryHistory"; +import StorageUsage from "./StorageUsage"; +import Tables from "./Tables"; +import WarehouseMeteringHistory from "./WarehouseMeteringHistory"; + +export const TABLE_NAMES = [ + "DATABASE_STORAGE_USAGE_HISTORY", + "DATABASES", + "LOAD_HISTORY", + "LOGIN_HISTORY", + "QUERY_HISTORY", + "STORAGE_USAGE", + "TABLES", + "WAREHOUSE_METERING_HISTORY" +]; + +export const TABLE_MAP = { + DATABASE_STORAGE_USAGE_HISTORY: DatabaseStorageUsageHistory, + DATABASES: Databases, + LOAD_HISTORY: LoadHistory, + LOGIN_HISTORY: LoginHistory, + QUERY_HISTORY: QueryHistory, + STORAGE_USAGE: StorageUsage, + TABLES: Tables, + WAREHOUSE_METERING_HISTORY: WarehouseMeteringHistory +}; + +export { + Databases, + DatabaseStorageUsageHistory, + LoadHistory, + LoginHistory, + QueryHistory, + StorageUsage, + Tables, + WarehouseMeteringHistory +}; diff --git a/helpers.js b/helpers.js index f78fbfc..c10207f 100644 --- a/helpers.js +++ b/helpers.js @@ -2,12 +2,12 @@ export function randomFromArray(theArray) { return theArray[Math.floor(Math.random() * theArray.length)]; } -export function randomFromArrayByWeight(theArray) { +export function randomFromArrayByWeight(theArray, all = false) { let sumWeight = 0; const r = Math.random(); for (let item of theArray) { sumWeight += item.weight; - if (r <= sumWeight) return item.name; + if (r <= sumWeight) return all ? item : item.name; } } diff --git a/main.js b/main.js index 6b2b3db..d086ef1 100644 --- a/main.js +++ b/main.js @@ -1 +1,203 @@ -console.log("hello world"); +import fs from "fs"; +import path from "path"; + +import s3 from "./services/s3"; +import Snowflake from "./services/snowflake"; +import { Writer } from "./data_rows/DataRow"; +import * as models from "./data_rows"; +import * as helpers from "./helpers"; + +const dbs = []; +const tables = []; +const loadHistories = []; +const queryHistories = []; +const storageUsages = []; +const dbStorageUsageHistories = []; +const loginHistories = []; +const warehouseMeteringHistories = []; + +/** + * Create one-time data and kickoff daily data generation + */ +async function generateData(endDate, daysToGen) { + const startDate = new Date( + new Date(endDate).setDate(endDate.getDate() - daysToGen) + ); + startDate.setHours(0, 0, 0, 0); + + dbs.push(...models.Databases.generate()); + models.DatabaseStorageUsageHistory.initialize(dbs); + tables.push(...models.Tables.generate()); + + for ( + let processingDay = new Date(startDate); + processingDay < endDate; + processingDay.setDate(processingDay.getDate() + 1) + ) { + generateDailyData(processingDay); + } + await writeCSVFiles(); +} + +/** + * Create daily data and kickoff hourly data generation + */ +function generateDailyData(processingDay) { + console.log( + `${new Date().toISOString()} Generating data for: ` + + `${processingDay.toISOString()}` + ); + const todaysDBStorageHistories = []; + for (const db of dbs) { + const dsuh = new models.DatabaseStorageUsageHistory( + db.DATABASE_ID, + db.DATABASE_NAME, + processingDay + ); + dbStorageUsageHistories.push(dsuh); + todaysDBStorageHistories.push(dsuh); + } + storageUsages.push( + new models.StorageUsage(todaysDBStorageHistories, processingDay) + ); + + const endHour = new Date(processingDay).setDate(processingDay.getDate() + 1); + for ( + let processingHour = new Date(processingDay); + processingHour < endHour; + processingHour.setHours(processingHour.getHours() + 1) + ) { + generateHourlyData(processingHour); + } +} + +/** + * Create hourly data and kickoff per-minute data generation + */ +function generateHourlyData(processingHour) { + warehouseMeteringHistories.push( + new models.WarehouseMeteringHistory(processingHour) + ); + if (models.LoadHistory.rollDice()) { + const tableID = helpers.randomFromArray(tables).ID; + loadHistories.push(new models.LoadHistory(tableID, processingHour)); + } + loginHistories.push(new models.LoginHistory(processingHour)); + + const endMinute = new Date(processingHour).setHours( + processingHour.getHours() + 1 + ); + for ( + let processingMinute = new Date(processingHour); + processingMinute < endMinute; + processingMinute.setMinutes(processingMinute.getMinutes() + 1) + ) { + generatePerMinuteData(processingMinute, queryHistories); + } +} + +function generatePerMinuteData(processingMinute, queryHistories) { + if (models.QueryHistory.rollDice()) { + const dbName = helpers.randomFromArray(dbs).DATABASE_NAME; + queryHistories.push(new models.QueryHistory(dbName, processingMinute)); + } +} + +async function writeCSVFiles() { + const filewriter = new Writer(); + for (const db of dbs) { + await filewriter.write("./out/DATABASES.csv", db); + } + for (const table of tables) { + await filewriter.write("./out/TABLES.csv", table); + } + for (const loadHistory of loadHistories) { + await filewriter.write("./out/LOAD_HISTORY.csv", loadHistory); + } + for (const loginHistory of loginHistories) { + await filewriter.write("./out/LOGIN_HISTORY.csv", loginHistory); + } + for (const queryHistory of queryHistories) { + await filewriter.write("./out/QUERY_HISTORY.csv", queryHistory); + } + for (const storageUsage of storageUsages) { + await filewriter.write("./out/STORAGE_USAGE.csv", storageUsage); + } + for (const dbStorageUsageHistory of dbStorageUsageHistories) { + await filewriter.write( + "./out/DATABASE_STORAGE_USAGE_HISTORY.csv", + dbStorageUsageHistory + ); + } + for (const warehouseMeteringHistory of warehouseMeteringHistories) { + await filewriter.write( + "./out/WAREHOUSE_METERING_HISTORY.csv", + warehouseMeteringHistory + ); + } +} + +async function deleteLocalFiles() { + const directory = path.join(__dirname, "./out"); + const files = fs.readdirSync(directory); + for (const file of files) { + const fullPath = path.join(directory, file); + console.log("deleting: " + fullPath); + fs.unlinkSync(fullPath); + } +} + +async function uploadSessionDataSnowflake(snowflakeService) { + const BUCKET_NAME = "looker-applications-demo-loading-data"; + const STAGE_NAME = "lookerApplicationsDemoStage"; + await snowflakeService.createStage(STAGE_NAME, BUCKET_NAME); + const s3service = new s3(); + + const csvFiles = fs.readdirSync("./out"); + for (const csvFile of csvFiles) { + console.log("uploading to s3: " + csvFile); + const tablename = csvFile.split(".")[0]; + await s3service.putFile(`./out/${csvFile}`, BUCKET_NAME, csvFile); + await snowflakeService.copyInto(tablename, STAGE_NAME, csvFile); + } +} + +async function createSnowflakeTables(snowflakeService, tables) { + for (const tablename of tables) { + const columns = models.TABLE_MAP[tablename].snowflakeColumns(); + const resp = await snowflakeService.createTable(tablename, columns); + console.log(resp); + } +} + +async function deleteSnowflakeTables(snowflakeService, tables) { + for (const tablename of tables) { + const resp = await snowflakeService.dropTable(tablename); + console.log(resp); + } +} + +async function generate(daysAgoToEnd, daysToGenerate) { + try { + const endDate = new Date(); + endDate.setHours(0, 0, 0, 0); + endDate.setDate(endDate.getDate() - daysAgoToEnd); + + await deleteLocalFiles(); + + await generateData(endDate, daysToGenerate); + + const snowflakeService = new Snowflake(); + await snowflakeService.doAllGrants("data_apps_load_role"); + await snowflakeService.createSchema("account_usage"); + await deleteSnowflakeTables(snowflakeService, models.TABLE_NAMES); + await createSnowflakeTables(snowflakeService, models.TABLE_NAMES); + await snowflakeService.doAllGrants("looker_role"); + await uploadSessionDataSnowflake(snowflakeService); + } catch (e) { + console.log(e); + throw e; + } +} + +generate(0, 360).catch(e => console.log(e));