From 4d449c075944a9d98c6172897bca65fac27bb35b Mon Sep 17 00:00:00 2001 From: ecole Date: Mon, 15 Feb 2021 20:10:28 +0700 Subject: [PATCH] added azure terraform deployment module Updated as per pull request comments fixed dns prefix --- assets/azure/az.user_data.sh | 199 +++++++++++++++++++++++++ packer/README.md | 3 + packer/prestoclients.json | 19 +++ packer/prestoclients/update-machine.sh | 2 +- terraform-azure/README.md | 88 +++++++++++ terraform-azure/clients-lb.tf | 97 ++++++++++++ terraform-azure/clients.tf | 113 ++++++++++++++ terraform-azure/coordinator-lb.tf | 73 +++++++++ terraform-azure/coordinator.tf | 87 +++++++++++ terraform-azure/images.tf | 11 ++ terraform-azure/main.tf | 32 ++++ terraform-azure/outputs.tf | 33 ++++ terraform-azure/variables.tf | 125 ++++++++++++++++ terraform-azure/workers-spot.tf | 71 +++++++++ terraform-azure/workers.tf | 84 +++++++++++ 15 files changed, 1036 insertions(+), 1 deletion(-) create mode 100644 assets/azure/az.user_data.sh create mode 100644 terraform-azure/README.md create mode 100644 terraform-azure/clients-lb.tf create mode 100644 terraform-azure/clients.tf create mode 100644 terraform-azure/coordinator-lb.tf create mode 100644 terraform-azure/coordinator.tf create mode 100644 terraform-azure/images.tf create mode 100644 terraform-azure/main.tf create mode 100644 terraform-azure/outputs.tf create mode 100644 terraform-azure/variables.tf create mode 100644 terraform-azure/workers-spot.tf create mode 100644 terraform-azure/workers.tf diff --git a/assets/azure/az.user_data.sh b/assets/azure/az.user_data.sh new file mode 100644 index 0000000..6624744 --- /dev/null +++ b/assets/azure/az.user_data.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash +set -ex + +exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 + +function setup_hive_metastore { + + echo "setup_hive_metastore stub.." + + # Mount persistent storage and apply Hive Metastore schema if needed + DEVICE_NAME="/dev/disk/azure/scsi1/lun0" + MOUNT_PATH=/var/lib/mysql + + + sudo mv $MOUNT_PATH /tmp/mysql.backup + sudo mkdir -p $MOUNT_PATH + + if sudo mount -o defaults -t ext4 "$DEVICE_NAME" $MOUNT_PATH; then + echo 'Successfully mounted existing disk' + else + echo 'Trying to mount a fresh disk' + sudo mkfs.ext4 -m 0 -F -E lazy_itable_init=0,lazy_journal_init=0,discard "$DEVICE_NAME" + sudo mount -o defaults -t ext4 "$DEVICE_NAME" $MOUNT_PATH && echo 'Successfully mounted a fresh disk' + sudo cp -ar /tmp/mysql.backup/* $MOUNT_PATH/ + fi + + sudo chown mysql:mysql -R $MOUNT_PATH + sudo chmod 700 $MOUNT_PATH + + service mysql start + systemctl enable mysql + + . /etc/environment + export HADOOP_HOME=$HADOOP_HOME + + if ! "$HIVE_HOME"/bin/schematool -validate -dbType mysql; then + echo "Mysql schema is not valid" + "$HIVE_HOME"/bin/schematool -dbType mysql -initSchema + fi + + echo "Initializing Hive Metastore ($HIVE_HOME)..." + service hive-metastore start + systemctl enable hive-metastore +} + + + +cat <<'EOF' >/etc/security/limits.d/100-presto-nofile.conf +presto soft nofile 16384 +presto hard nofile 16384 +EOF + +/usr/bin/printf " +node.environment=${environment_name} +node.id=$(hostname) +node.data-dir=/var/lib/presto/ +" > /etc/presto/node.properties + +/usr/bin/printf "-server +-Xmx${heap_size}G +-XX:-UseBiasedLocking +-XX:+UseG1GC +-XX:G1HeapRegionSize=32M +-XX:+ExplicitGCInvokesConcurrent +-XX:+HeapDumpOnOutOfMemoryError +-XX:+ExitOnOutOfMemoryError +-XX:+UseGCOverheadLimit +-XX:ReservedCodeCacheSize=512M +-Djdk.attach.allowAttachSelf=true +-Djdk.nio.maxCachedBufferSize=2000000 +-Duser.timezone=UTC +" > /etc/presto/jvm.config + + +# +# Configure as COORDINATOR +# +if [[ "${mode_presto}" == "coordinator" ]]; then + echo "Configuring node as a [${mode_presto}]..." + + /usr/bin/printf " +# +# coordinator +# +coordinator=true +discovery-server.enabled=true +discovery.uri=http://localhost:${http_port} +node-scheduler.include-coordinator=false + +http-server.http.port=${http_port} +# query.max-memory-per-node has to be <= query.max-total-memory-per-node +#query.max-memory-per-node=${query_max_memory_per_node}GB +#query.max-total-memory-per-node=${query_max_total_memory_per_node}GB +query.max-memory=${query_max_memory}GB +# query.max-total-memory defaults to query.max-memory * 2 so we are good +${extra_worker_configs} +" > /etc/presto/config.properties + + setup_hive_metastore +fi + + + +# +# Configure as WORKER +# +if [[ "${mode_presto}" == "worker" ]]; then + echo "Configuring node as a [${mode_presto}]..." + + /usr/bin/printf " +# +# worker +# +coordinator=false +discovery.uri=http://${address_presto_coordinator}:${http_port} +node-scheduler.include-coordinator=false + +http-server.http.port=${http_port} +# query.max-memory-per-node has to be <= query.max-total-memory-per-node +#query.max-memory-per-node=${query_max_memory_per_node}GB +#query.max-total-memory-per-node=${query_max_total_memory_per_node}GB +query.max-memory=${query_max_memory}GB +# query.max-total-memory defaults to query.max-memory * 2 so we are good +${extra_worker_configs} +" > /etc/presto/config.properties +fi + +# +# Configure as BOTH coordinator and worker +# +if [[ "${mode_presto}" == "coordinator-worker" ]]; then + echo "Configuring node as a [${mode_presto}]..." + + /usr/bin/printf " +# +# coordinator-worker +# +coordinator=true +discovery-server.enabled=true +discovery.uri=http://localhost:${http_port} +node-scheduler.include-coordinator=true + +http-server.http.port=${http_port} +# query.max-memory-per-node has to be <= query.max-total-memory-per-node +#query.max-memory-per-node=${query_max_memory_per_node}GB +#query.max-total-memory-per-node=${query_max_total_memory_per_node}GB +query.max-memory=${query_max_memory}GB +# query.max-total-memory defaults to query.max-memory * 2 so we are good +${extra_worker_configs} +" > /etc/presto/config.properties + + setup_hive_metastore +fi + +if [[ "${mode_presto}" == "worker" ]]; then + echo "Waiting for Presto Coordinator to come online at: http://${address_presto_coordinator}:${http_port}" + while ! nc -z ${address_presto_coordinator} ${http_port}; do + sleep 5 + done +fi + + +AZURE_ACCOUNT="${az_account_name}" +AZURE_KEY="${az_access_key}" + +if [ ! -z "$AZURE_ACCOUNT" ] && [ ! -z "$AZURE_KEY" ]; then + # Update hive-site.xml + /usr/bin/printf " + + fs.azure.account.key.$AZURE_ACCOUNT.blob.core.windows.net + $AZURE_KEY + + " > /tmp/hive-site-partial.txt + sudo sed -i "s//$(sed 's@[/\&]@\\&@g;$!s/$/\\/' /tmp/hive-site-partial.txt)/g" /usr/local/apache-hive-*-bin/conf/hive-site.xml + rm /tmp/hive-site-partial.txt + + # Update hive.properties + /usr/bin/printf "\nhive.allow-drop-table=true" >> /etc/presto/catalog/hive.properties + /usr/bin/printf "\nhive.non-managed-table-writes-enabled=true" >> /etc/presto/catalog/hive.properties + /usr/bin/printf "\n#hive.time-zone=UTC" >> /etc/presto/catalog/hive.properties + /usr/bin/printf "\nhive.hive.azure.wasb-storage-account=$AZURE_ACCOUNT" >> /etc/presto/catalog/hive.properties + /usr/bin/printf "\nhive.hive.azure.wasb-access-key=$AZURE_KEY" >> /etc/presto/catalog/hive.properties + /usr/bin/printf "\n" >> /etc/presto/catalog/hive.properties +fi + + +echo "Starting presto..." +systemctl enable presto.service +systemctl start presto.service + +if [[ "${mode_presto}" == "coordinator" ]] || [[ "${mode_presto}" == "coordinator-worker" ]]; then + echo "Waiting for Presto Coordinator to start" + while ! presto --execute='select * from system.runtime.nodes'; do + sleep 10 + done + echo "Presto Coordinator is now online" +fi + + diff --git a/packer/README.md b/packer/README.md index 8855c7c..f412201 100644 --- a/packer/README.md +++ b/packer/README.md @@ -94,6 +94,9 @@ Building the AMIs is done using the following commands: ```bash packer build -only=amazon-ebs -var-file=variables.json presto.json + +packer build -only=azure-arm -var-file=variables.json presto.json + ``` Override the aws_region and aws_az variables to change the target region and diff --git a/packer/prestoclients.json b/packer/prestoclients.json index 463dcb8..edcad4a 100644 --- a/packer/prestoclients.json +++ b/packer/prestoclients.json @@ -36,6 +36,25 @@ }, "spot_price_auto_product": "Linux/UNIX (Amazon VPC)", "spot_price": "auto" + }, + { + "type": "azure-arm", + + "client_id": "{{user `azure_client_id`}}", + "client_secret": "{{user `azure_client_secret`}}", + "tenant_id": "{{user `azure_tenant_id`}}", + "subscription_id": "{{user `azure_subscription_id`}}", + + "managed_image_resource_group_name": "{{user `azure_resource_group_name`}}", + "managed_image_name": "prestoclients-{{isotime \"2006-01-02T030405\"}}", + + "os_type": "Linux", + "image_publisher": "Canonical", + "image_offer": "UbuntuServer", + "image_sku": "18.04-LTS", + + "location": "{{user `azure_location`}}", + "vm_size": "Standard_DS2_v2" } ], "provisioners": [ diff --git a/packer/prestoclients/update-machine.sh b/packer/prestoclients/update-machine.sh index 8e88289..0340345 100644 --- a/packer/prestoclients/update-machine.sh +++ b/packer/prestoclients/update-machine.sh @@ -26,7 +26,7 @@ cd /opt/certs openssl genrsa -des3 -passout pass:xxxx -out keypair 2048 openssl rsa -passin pass:xxxx -in keypair -out server.key rm keypair -touch /home/ubuntu/.rnd +touch ~/.rnd openssl req -new -key server.key -out server.csr -subj "/CN=*" openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt rm server.csr diff --git a/terraform-azure/README.md b/terraform-azure/README.md new file mode 100644 index 0000000..a66abac --- /dev/null +++ b/terraform-azure/README.md @@ -0,0 +1,88 @@ +# Azure deployment + +## Create the machine images with Packer + +Go to the packer folder and see the README there. Once you have the machine image IDs, return here and continue with the next steps. + +## Create key-pair or use your own + +This deployment is configured to use your default SSH keys as machine credentials. If you want to use other keys, change the path to the keys you want to use (look for `key_path` in variables.tf). Use [this guide](https://help.github.com/articles/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent/) to generate new keys if needed. + +## Configurations + +Edit `variables.tf` or create separate `terraform.tfvars` file to specify the following: + +* `azure_location` - the Azure location where to launch the cluster in. +* `azure_subscription_id`, `azure_client_id`, `azure_client_secret`, `azure_tenant_id` - the same credentials used in the Packer step. See the README there for instructions on how to retrieve them. +* `presto_cluster` - the name of the Presto cluster to launch. +* `key_path` - the filesystem path to the SSH key to use as virtual machines login credentials. +* `coordinator_instance_type`, `worker_instance_type`, `client_instance_type` - Azure machine instance types to use for each machine type in the cluster. + +The rest of the configurations are mostly around cluster topology and machine types and sizes. + +### Cluster topology + +Two modes of deployment are supported: + +* A recommended configuration, with dedicated coordinator node, worker nodes and client nodes. This is a production-ready and best-practice configuration. +* Coordinator+Worker node mode - mostly useful for experimentation + +The default mode is the single-node mode. To change it to the recommended configuration, edit `variables.tf` and set number of worker and client nodes to at least 1. + +All nodes with the `coordinator` and `client` role will be attached to an Azure load balancer, so access to all client nodes can be done via the DNS it exposes. + +## Launch the cluster with Terraform + +```bash +terraform plan +terraform apply +``` + +When terraform is done, you should see a lot of output ending with something like this: + +``` +Apply complete! Resources: 14 added, 0 changed, 0 destroyed. + +The state of your infrastructure has been saved to the path +below. This state is required to modify and destroy your +infrastructure, so keep it safe. To inspect the complete state +use the `terraform show` command. + +State path: terraform.tfstate + +Outputs: + +public_dns = presto-cluster-foo.eastus.cloudapp.azure.com +vm_password = rBTKoLsf7x8ODZVd +``` + +Note `clients_lb_public_ipaddress` and `vm-password` - that's your entry point to the cluster and the password for the `exampleuser` default user. + +### Look around + +The client nodes are the ones exposed to external networks. They provide endpoints for Kibana, Grafana, Cerebro and direct Presto access. By default client nodes are accessible via their public IPs and the DNS of the load balancer they are attached to (see above). + +Client nodes listen on port 8080 and are password protected. Access is managed by nginx which is expecting a username and password pair. Default user name is exampleuser and the password is generated automatically when deploying. You can change those defaults by editing [this file](https://github.com/synhershko/presto-cloud-deploy/blob/master/packer/install-nginx.sh) and running Packer again. + +On client nodes you will find: + +* [Redash](https://redash.io) (data query / visualisation tool) is available on http://host:8500 +* [Superset](https://superset.apache.org/) (data exploration platform) is available on http://host:8600 +* [Zeppelin](https://zeppelin.apache.org/) (web-based data notebook) is available on http://host:8700 + +The default credentials are `admin` or `admin@redash` as username, and password as generated by Terraform during the deployment (will show up as `clients_password` after deployment when you run `terraform output`). + +To ssh to one of the instances: + +```bash +ssh ubuntu@{public IP / DNS of the instance or load balancer} +``` + +## Backups + +The Azure repository plugin is installed on the cluster and ready to be used for index snapshots and (should you ever need) a restore. + +### Auto- and manual- scale out + +The entire stack is deployed using Azure scale-sets, which are easy to scale up and down manually (from the Azure portal, from the command line, or using the same Terraform scripts), or automatically based on host metrics and application metrics using [Azure scale-set features](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-autoscale-overview). + diff --git a/terraform-azure/clients-lb.tf b/terraform-azure/clients-lb.tf new file mode 100644 index 0000000..d62e9f7 --- /dev/null +++ b/terraform-azure/clients-lb.tf @@ -0,0 +1,97 @@ +locals { + presto_clients_address = var.public_facing ? azurerm_public_ip.clients[0].ip_address : azurerm_lb.clients.private_ip_address + clients_private_ip = var.public_facing ? [] : [0] + clients_frontend_ip_configuration_name = var.public_facing ? "presto-${var.presto_cluster}-clients-public-ip" : "presto-${var.presto_cluster}-clients-private-ip" + +} + + +resource "azurerm_public_ip" "clients" { + count = var.public_facing ? 1 : 0 + + name = "presto-${var.presto_cluster}-clients-public-ip" + location = var.azure_location + resource_group_name = azurerm_resource_group.presto.name + domain_name_label = "presto-${var.presto_cluster}-clients" + allocation_method = "Static" +} + +resource "azurerm_lb" "clients" { + + location = var.azure_location + name = "presto-${var.presto_cluster}-clients-lb" + resource_group_name = azurerm_resource_group.presto.name + + + dynamic frontend_ip_configuration { + for_each = azurerm_public_ip.clients + content { + name = "presto-${var.presto_cluster}-clients-public-ip" + public_ip_address_id = frontend_ip_configuration.value.id + } + } + + dynamic frontend_ip_configuration { + for_each = local.clients_private_ip + content { + name = "presto-${var.presto_cluster}-clients-private-ip" + subnet_id = azurerm_subnet.presto_subnet.id + private_ip_address_allocation = "Dynamic" + public_ip_address_id = null + } + } + +} + + +resource "azurerm_lb_backend_address_pool" "clients-lb-backend" { + name = "presto-${var.presto_cluster}-clients-lb-backend" + resource_group_name = azurerm_resource_group.presto.name + loadbalancer_id = azurerm_lb.clients.id +} + +resource "azurerm_lb_probe" "clients-httpprobe" { + name = "es-${var.presto_cluster}-clients-lb-probe" + port = 8080 + protocol = "Http" + request_path = "/status" + resource_group_name = azurerm_resource_group.presto.name + loadbalancer_id = azurerm_lb.clients.id +} + +resource "azurerm_lb_rule" "clients-lb-rule-redash" { + name = "es-${var.presto_cluster}-clients-lb-rule-redash" + backend_port = 8500 + frontend_port = 8500 + + frontend_ip_configuration_name = local.clients_frontend_ip_configuration_name + backend_address_pool_id = azurerm_lb_backend_address_pool.clients-lb-backend.id + protocol = "Tcp" + loadbalancer_id = azurerm_lb.clients.id + resource_group_name = azurerm_resource_group.presto.name +} + +resource "azurerm_lb_rule" "clients-lb-rule-superset" { + name = "es-${var.presto_cluster}-clients-lb-rule-superset" + backend_port = 8600 + frontend_port = 8600 + + frontend_ip_configuration_name = local.clients_frontend_ip_configuration_name + backend_address_pool_id = azurerm_lb_backend_address_pool.clients-lb-backend.id + protocol = "Tcp" + loadbalancer_id = azurerm_lb.clients.id + resource_group_name = azurerm_resource_group.presto.name +} + +resource "azurerm_lb_rule" "clients-lb-rule-zeppelin" { + name = "es-${var.presto_cluster}-clients-lb-rule-zeppelin" + backend_port = 8700 + frontend_port = 8700 + + frontend_ip_configuration_name = local.clients_frontend_ip_configuration_name + backend_address_pool_id = azurerm_lb_backend_address_pool.clients-lb-backend.id + protocol = "Tcp" + loadbalancer_id = azurerm_lb.clients.id + resource_group_name = azurerm_resource_group.presto.name +} + diff --git a/terraform-azure/clients.tf b/terraform-azure/clients.tf new file mode 100644 index 0000000..482eae6 --- /dev/null +++ b/terraform-azure/clients.tf @@ -0,0 +1,113 @@ +data "template_file" "client-userdata-script" { + count = var.count_clients != "0" ? 1 : 0 + template = file("${path.module}/../assets/client_user_data.sh") + + vars = { + presto_coordinator_host = azurerm_lb.coordinator.private_ip_address + coordinator_port = var.http_port + admin_password = var.count_clients != "0" ? random_string.clients-admin-password[0].result : "" + cert_pem = tls_self_signed_cert.presto-clients-cert.cert_pem + key_pem = tls_private_key.presto-clients-private-key.private_key_pem + } +} + +resource "random_string" "clients-admin-password" { + count = var.count_clients != "0" ? 1 : 0 + length = 16 + special = false +} + +resource "tls_private_key" "presto-clients-private-key" { + algorithm = "ECDSA" + ecdsa_curve = "P384" +} + +resource "tls_self_signed_cert" "presto-clients-cert" { + key_algorithm = "ECDSA" + private_key_pem = tls_private_key.presto-clients-private-key.private_key_pem + + subject { + common_name = "*" + } + + validity_period_hours = 48 + + allowed_uses = [ + "key_encipherment", + "digital_signature", + "server_auth", + ] +} + + +resource "azurerm_virtual_machine_scale_set" "clients" { + + name = "presto-${var.presto_cluster}-clients" + resource_group_name = azurerm_resource_group.presto.name + location = var.azure_location + upgrade_policy_mode = "Manual" + overprovision = false + + + sku { + name = var.client_instance_type + tier = "Standard" + capacity = var.count_clients + } + + + os_profile { + computer_name_prefix = "${var.presto_cluster}-client" + admin_username = "ubuntu" + admin_password = random_string.vm-login-password.result + custom_data = data.template_file.client-userdata-script[0].rendered + } + + network_profile { + name = "presto-${var.presto_cluster}-net-profile" + primary = true + + + ip_configuration { + name = "presto-${var.presto_cluster}-client-ip-profile" + subnet_id = azurerm_subnet.presto_subnet.id + primary = true + load_balancer_backend_address_pool_ids = [ + azurerm_lb_backend_address_pool.clients-lb-backend.id + ] + + public_ip_address_configuration { + domain_name_label = "presto-${var.presto_cluster}-client" + idle_timeout = 4 + name = "presto-${var.presto_cluster}-client" + } + } + } + + + storage_profile_image_reference { + id = data.azurerm_image.presto-client.id + } + + storage_profile_os_disk { + caching = "ReadWrite" + create_option = "FromImage" + managed_disk_type = "Standard_LRS" + } + + os_profile_linux_config { + disable_password_authentication = true + ssh_keys { + path = "/home/ubuntu/.ssh/authorized_keys" + key_data = file(var.key_path) + } + } + + storage_profile_data_disk { + lun = 0 + caching = "ReadWrite" + create_option = "Empty" + disk_size_gb = var.presto_coordinator_volume_size + managed_disk_type = "Standard_LRS" + } +} \ No newline at end of file diff --git a/terraform-azure/coordinator-lb.tf b/terraform-azure/coordinator-lb.tf new file mode 100644 index 0000000..357c09f --- /dev/null +++ b/terraform-azure/coordinator-lb.tf @@ -0,0 +1,73 @@ +locals { + presto_coordinator_address = var.public_facing ? azurerm_public_ip.coordinator[0].ip_address : azurerm_lb.coordinator.private_ip_address + coordinator_private_ip = var.public_facing ? [] : [0] + coordinator_frontend_ip_configuration_name = var.public_facing ? "presto-${var.presto_cluster}-public-ip" : "presto-${var.presto_cluster}-private-ip" + +} + + +resource "azurerm_public_ip" "coordinator" { + count = var.public_facing ? 1 : 0 + + name = "presto-${var.presto_cluster}-coordinator-public-ip" + location = var.azure_location + resource_group_name = azurerm_resource_group.presto.name + domain_name_label = "presto-${var.presto_cluster}-coordinator" + allocation_method = "Static" +} + +resource "azurerm_lb" "coordinator" { + + location = var.azure_location + name = "presto-${var.presto_cluster}-coordinator-lb" + resource_group_name = azurerm_resource_group.presto.name + + + dynamic frontend_ip_configuration { + for_each = azurerm_public_ip.coordinator + content { + name = "presto-${var.presto_cluster}-public-ip" + public_ip_address_id = frontend_ip_configuration.value.id + } + } + + dynamic frontend_ip_configuration { + for_each = local.coordinator_private_ip + content { + name = "presto-${var.presto_cluster}-private-ip" + subnet_id = azurerm_subnet.presto_subnet.id + private_ip_address_allocation = "Dynamic" + public_ip_address_id = null + } + } + +} + + +resource "azurerm_lb_backend_address_pool" "coordinator-lb-backend" { + name = "presto-${var.presto_cluster}-coordinator-lb-backend" + resource_group_name = azurerm_resource_group.presto.name + loadbalancer_id = azurerm_lb.coordinator.id +} + +resource "azurerm_lb_probe" "coordinator-httpprobe" { + name = "es-${var.presto_cluster}-coordinator-lb-probe" + port = 8080 + protocol = "Http" + request_path = "/status" + resource_group_name = azurerm_resource_group.presto.name + loadbalancer_id = azurerm_lb.coordinator.id +} + +resource "azurerm_lb_rule" "coordinator-lb-rule" { + name = "es-${var.presto_cluster}-coordinator-lb-rule" + backend_port = 8080 + frontend_port = 8080 + + frontend_ip_configuration_name = local.coordinator_frontend_ip_configuration_name + backend_address_pool_id = azurerm_lb_backend_address_pool.coordinator-lb-backend.id + protocol = "Tcp" + loadbalancer_id = azurerm_lb.coordinator.id + resource_group_name = azurerm_resource_group.presto.name +} + diff --git a/terraform-azure/coordinator.tf b/terraform-azure/coordinator.tf new file mode 100644 index 0000000..a8c8186 --- /dev/null +++ b/terraform-azure/coordinator.tf @@ -0,0 +1,87 @@ + +data "template_file" "coordinator-userdata-script" { + template = templatefile("${path.module}/../assets/azure/az.user_data.sh", { + cloud_provider = "azure" + environment_name = var.environment + http_port = var.http_port + mode_presto = var.count_workers == "0" && var.count_workers_spot == "0" ? "coordinator-worker" : "coordinator" + heap_size = var.coordinator_heap_size + query_max_memory_per_node = ceil(var.worker_heap_size * 0.4) + query_max_total_memory_per_node = ceil(var.worker_heap_size * 0.6) + query_max_memory = var.query_max_memory + az_account_name = var.azure_client_id == null ? "" : var.azure_client_id + az_access_key = var.azure_client_secret == null ? "" : var.azure_client_secret + address_presto_coordinator = local.presto_coordinator_address + extra_worker_configs = var.extra_worker_configs + + }) +} + +resource "azurerm_virtual_machine_scale_set" "coordinator" { + + name = "presto-${var.presto_cluster}-coordinator" + resource_group_name = azurerm_resource_group.presto.name + location = var.azure_location + sku { + name = var.coordinator_instance_type + tier = "Standard" + capacity = "1" + } + upgrade_policy_mode = "Manual" + overprovision = false + + os_profile { + computer_name_prefix = "${var.presto_cluster}-coordinator" + admin_username = "ubuntu" + admin_password = random_string.vm-login-password.result + custom_data = data.template_file.coordinator-userdata-script.rendered + } + + network_profile { + name = "presto-${var.presto_cluster}-net-profile" + primary = true + accelerated_networking = true + + ip_configuration { + name = "presto-${var.presto_cluster}-coordinator-ip-profile" + subnet_id = azurerm_subnet.presto_subnet.id + primary = true + load_balancer_backend_address_pool_ids = [ + azurerm_lb_backend_address_pool.coordinator-lb-backend.id + ] + + public_ip_address_configuration { + domain_name_label = "presto-${var.presto_cluster}-coordinator-vm" + idle_timeout = 4 + name = "presto-${var.presto_cluster}-coordinator-vm" + } + } + } + + + storage_profile_image_reference { + id = data.azurerm_image.presto.id + } + + storage_profile_os_disk { + caching = "ReadWrite" + create_option = "FromImage" + managed_disk_type = "Standard_LRS" + } + + os_profile_linux_config { + disable_password_authentication = true + ssh_keys { + path = "/home/ubuntu/.ssh/authorized_keys" + key_data = file(var.key_path) + } + } + + storage_profile_data_disk { + lun = 0 + caching = "ReadWrite" + create_option = "Empty" + disk_size_gb = var.presto_coordinator_volume_size + managed_disk_type = "Standard_LRS" + } +} \ No newline at end of file diff --git a/terraform-azure/images.tf b/terraform-azure/images.tf new file mode 100644 index 0000000..36edeae --- /dev/null +++ b/terraform-azure/images.tf @@ -0,0 +1,11 @@ +data "azurerm_image" "presto" { + resource_group_name = "packer-presto-images" + name_regex = "^presto-\\d{4,4}-\\d{2,2}-\\d{2,2}T\\d{6,6}" + sort_descending = true +} + +data "azurerm_image" "presto-client" { + resource_group_name = "packer-presto-images" + name_regex = "^prestoclients-\\d{4,4}-\\d{2,2}-\\d{2,2}T\\d{6,6}" + sort_descending = true +} diff --git a/terraform-azure/main.tf b/terraform-azure/main.tf new file mode 100644 index 0000000..7c05359 --- /dev/null +++ b/terraform-azure/main.tf @@ -0,0 +1,32 @@ +provider "azurerm" { + subscription_id = var.azure_subscription_id + client_id = var.azure_client_id + client_secret = var.azure_client_secret + tenant_id = var.azure_tenant_id + features {} +} + +resource "random_string" "vm-login-password" { + length = 16 + special = true + override_special = "!@#%&-_" +} + +resource "azurerm_resource_group" "presto" { + location = var.azure_location + name = "presto-cluster-${var.presto_cluster}" +} + +resource "azurerm_virtual_network" "presto_vnet" { + name = "presto-${var.presto_cluster}-vnet" + location = var.azure_location + resource_group_name = azurerm_resource_group.presto.name + address_space = ["10.1.0.0/24"] +} + +resource "azurerm_subnet" "presto_subnet" { + name = "presto-${var.presto_cluster}-subnet" + resource_group_name = azurerm_resource_group.presto.name + virtual_network_name = azurerm_virtual_network.presto_vnet.name + address_prefixes = ["10.1.0.0/24"] +} \ No newline at end of file diff --git a/terraform-azure/outputs.tf b/terraform-azure/outputs.tf new file mode 100644 index 0000000..e486467 --- /dev/null +++ b/terraform-azure/outputs.tf @@ -0,0 +1,33 @@ +output "es_image_id" { + value = data.azurerm_image.presto.name +} + +output "kibana_image_id" { + value = data.azurerm_image.presto-client.name +} + +output "clients_public_dns" { + value = azurerm_public_ip.clients.*.fqdn +} + +output "clients_public_ip_address" { + value = azurerm_public_ip.clients.*.ip_address +} + +output "public_dns" { + value = azurerm_public_ip.coordinator.*.fqdn +} + +output "public_ip_address" { + value = azurerm_public_ip.coordinator.*.ip_address +} + + +output "vm_password" { + value = random_string.vm-login-password.result +} + + +output "clients_password" { + value = random_string.clients-admin-password[0].result +} diff --git a/terraform-azure/variables.tf b/terraform-azure/variables.tf new file mode 100644 index 0000000..eb7858f --- /dev/null +++ b/terraform-azure/variables.tf @@ -0,0 +1,125 @@ +variable "azure_location" { + type = "string" + default = "East US" +} + +variable "azure_client_id" { + type = "string" + default = "" +} + +variable "azure_client_secret" { + type = "string" + default = "" +} + +variable "azure_subscription_id" { + type = "string" +} + +variable "azure_tenant_id" { + type = "string" +} + +variable "presto_cluster" { + description = "Name of the elasticsearch cluster, used in node discovery" + default = "my-cluster" +} + +variable "key_path" { + description = "Key name to be used with the launched EC2 instances." + default = "~/.ssh/id_rsa.pub" +} + +variable "environment" { + default = "default" +} + + + +variable "http_port" { + description = "Port on which to expose the Presto UI." + type = string + default = "8080" +} + +variable "query_max_memory" { + description = "Total cluster memory a single query may consume. This property may be used to ensure that single query cannot use all resources in cluster. The value should be set to be higher than what typical expected query in system will need." + type = number + default = 500 +} + + +variable "count_clients" { + description = "Number of nodes with Apache Superset and Redash installed." + type = string + default = 1 +} + +variable "count_workers" { + description = "Number of workers to launch." + type = string + default = 0 +} + +variable "count_workers_spot" { + description = "Number of workers on spot instances to launch." + type = string + default = 0 +} + +variable "worker_spot_hourly_price" { + type = string + default = "0.99" +} + + +variable "coordinator_instance_type" { + type = "string" + default = "Standard_D12_v2" +} + +variable "worker_instance_type" { + type = "string" + default = "Standard_D12_v2" +} + +variable "worker_spot_instance_type" { + type = "string" + default = "Standard_D12_v2" +} + +variable "client_instance_type" { + type = "string" + default = "Standard_A2_v2" +} + +variable "coordinator_heap_size" { + # needs to be ~80% of available machine memory + type = string + default = 25 +} + +variable "presto_coordinator_volume_size" { + type = string + default = 10 +} + +variable "worker_heap_size" { + description = "JVM heap size for workers. Recommended to set to 80% of instance memory" + type = string + default = 102 # 80% of available memory +} + +variable "extra_worker_configs" { + type = string + default = < 0) ? 1 : 0 + + name = "presto-${var.presto_cluster}-spot-workers" + resource_group_name = azurerm_resource_group.presto.name + location = var.azure_location + sku = var.worker_spot_instance_type + overprovision = false + priority = "Spot" + eviction_policy = "Delete" + max_bid_price = var.worker_spot_hourly_price + source_image_id = data.azurerm_image.presto.id + admin_username = "ubuntu" + instances = var.count_workers_spot + custom_data = base64encode(data.template_file.worker-spot-userdata-script.rendered) + admin_password = random_string.vm-login-password.result + computer_name_prefix = "${var.presto_cluster}-worker-spot" + + admin_ssh_key { + username = "ubuntu" + public_key = file(var.key_path) + } + + os_disk { + caching = "ReadWrite" + storage_account_type = "Standard_LRS" + } + + network_interface { + name = "presto-${var.presto_cluster}-spot-net-profile" + primary = true + + ip_configuration { + name = "presto-${var.presto_cluster}-spot-worker-ip-profile" + subnet_id = azurerm_subnet.presto_subnet.id + primary = true + + public_ip_address { + domain_name_label = "presto-${var.presto_cluster}-spot-worker" + idle_timeout_in_minutes = 4 + name = "presto-${var.presto_cluster}-spot-worker" + } + } + } + + + data_disk { + lun = 0 + caching = "ReadWrite" + disk_size_gb = var.presto_coordinator_volume_size + storage_account_type = "Standard_LRS" + } +} \ No newline at end of file diff --git a/terraform-azure/workers.tf b/terraform-azure/workers.tf new file mode 100644 index 0000000..71474aa --- /dev/null +++ b/terraform-azure/workers.tf @@ -0,0 +1,84 @@ +data "template_file" "worker-userdata-script" { + template = templatefile("${path.module}/../assets/azure/az.user_data.sh", { + cloud_provider = "azure" + mode_presto = "worker" + environment_name = var.environment + http_port = var.http_port + address_presto_coordinator = local.presto_coordinator_address + heap_size = var.worker_heap_size + az_account_name = var.azure_client_id == null ? "" : var.azure_client_id + az_access_key = var.azure_client_secret == null ? "" : var.azure_client_secret + query_max_memory_per_node = ceil(var.worker_heap_size * 0.4) + query_max_total_memory_per_node = ceil(var.worker_heap_size * 0.6) + query_max_memory = var.query_max_memory + extra_worker_configs = var.extra_worker_configs + }) +} + + +resource "azurerm_virtual_machine_scale_set" "workers" { + + count = (var.count_workers > 0) ? 1 : 0 + name = "presto-${var.presto_cluster}-workers" + resource_group_name = azurerm_resource_group.presto.name + location = var.azure_location + sku { + name = var.worker_instance_type + tier = "Standard" + capacity = var.count_workers + } + + upgrade_policy_mode = "Manual" + overprovision = false + + os_profile { + computer_name_prefix = "${var.presto_cluster}-worker" + admin_username = "ubuntu" + admin_password = random_string.vm-login-password.result + custom_data = data.template_file.worker-userdata-script.rendered + } + + network_profile { + name = "presto-${var.presto_cluster}-net-profile" + primary = true + accelerated_networking = true + + ip_configuration { + name = "presto-${var.presto_cluster}-worker-ip-profile" + subnet_id = azurerm_subnet.presto_subnet.id + primary = true + public_ip_address_configuration { + domain_name_label = "presto-${var.presto_cluster}-worker" + idle_timeout = 4 + name = "presto-${var.presto_cluster}-worker" + } + } + } + + + storage_profile_image_reference { + id = data.azurerm_image.presto.id + } + + storage_profile_os_disk { + caching = "ReadWrite" + create_option = "FromImage" + managed_disk_type = "Standard_LRS" + } + + os_profile_linux_config { + disable_password_authentication = true + ssh_keys { + path = "/home/ubuntu/.ssh/authorized_keys" + key_data = file(var.key_path) + } + } + + storage_profile_data_disk { + lun = 0 + caching = "ReadWrite" + create_option = "Empty" + disk_size_gb = var.presto_coordinator_volume_size + managed_disk_type = "Standard_LRS" + } +} \ No newline at end of file