From bdeb755d3d12409d1f330f094f94b0a7248d89ec Mon Sep 17 00:00:00 2001 From: Manuel Carrer Date: Tue, 19 Nov 2024 17:17:14 +0100 Subject: [PATCH] WIP refactor 2 --- ansible/.gitignore | 3 - ansible/{readme.md => README.md} | 61 +++-- ansible/bigip.yml | 19 +- ansible/configure.yml | 63 ++--- ansible/deploy.yml | 3 - ansible/group_vars/servers/main.yml | 27 ++ ansible/migrate.yml | 1 + ansible/provision.yml | 14 +- ansible/roles/bigip/{vars => files}/bigip.sql | 0 ansible/roles/bigip/tasks/main.yml | 13 + ansible/roles/deploy/tasks/main.yml | 4 + ansible/roles/ostack/defaults/main.yml | 42 +-- ansible/roles/ostack/tasks/create-primary.yml | 246 ++++++++++++++++++ ansible/roles/ostack/tasks/create-standby.yml | 139 ++++++++++ .../roles/ostack/tasks/move_floating_ip.yml | 63 +++++ ansible/roles/ostack/tasks/networks.yml | 12 + .../tasks/networks/create-ipalias-network.yml | 42 +++ .../tasks/networks/create-project-network.yml | 28 ++ .../create-project-security-group.yml | 21 ++ .../ostack/tasks/networks/open-for-ping.yml | 15 ++ ansible/roles/ostack/tasks/vm_create.yml | 92 +++++++ ansible/roles/ostack/tasks/vm_format.yml | 9 + .../tasks/vm_format/format-mount-disk.yml | 45 ++++ .../tasks/vm_format/install-postgres.yml | 136 ++++++++++ .../roles/ostack/tasks/vm_format/netplan.yml | 57 ++++ ansible/roles/ssh/default/main.yml | 3 + .../ssh/tasks/{main.yml => postgres.yml} | 24 +- ansible/roles/ssh/tasks/share-ssh-keys.yml | 0 ansible/roles/ssh/tasks/users.yml | 10 + ansible/roles/switchover/default/main.yml | 2 + ansible/roles/switchover/tasks/main.yml | 61 ++++- ansible/roles/switchover/tasks/switchover.yml | 62 ----- ansible/switchover.yml | 38 +-- 33 files changed, 1168 insertions(+), 187 deletions(-) rename ansible/{readme.md => README.md} (82%) create mode 100644 ansible/group_vars/servers/main.yml rename ansible/roles/bigip/{vars => files}/bigip.sql (100%) create mode 100644 ansible/roles/ostack/tasks/create-primary.yml create mode 100644 ansible/roles/ostack/tasks/create-standby.yml create mode 100644 ansible/roles/ostack/tasks/move_floating_ip.yml create mode 100644 ansible/roles/ostack/tasks/networks.yml create mode 100644 ansible/roles/ostack/tasks/networks/create-ipalias-network.yml create mode 100644 ansible/roles/ostack/tasks/networks/create-project-network.yml create mode 100644 ansible/roles/ostack/tasks/networks/create-project-security-group.yml create mode 100644 ansible/roles/ostack/tasks/networks/open-for-ping.yml create mode 100644 ansible/roles/ostack/tasks/vm_create.yml create mode 100644 ansible/roles/ostack/tasks/vm_format.yml create mode 100644 ansible/roles/ostack/tasks/vm_format/format-mount-disk.yml create mode 100644 ansible/roles/ostack/tasks/vm_format/install-postgres.yml create mode 100644 ansible/roles/ostack/tasks/vm_format/netplan.yml create mode 100644 ansible/roles/ssh/default/main.yml rename ansible/roles/ssh/tasks/{main.yml => postgres.yml} (71%) delete mode 100644 ansible/roles/ssh/tasks/share-ssh-keys.yml create mode 100644 ansible/roles/ssh/tasks/users.yml create mode 100644 ansible/roles/switchover/default/main.yml delete mode 100644 ansible/roles/switchover/tasks/switchover.yml diff --git a/ansible/.gitignore b/ansible/.gitignore index 89364e80..032107fb 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -2,6 +2,3 @@ notes.txt ansible.cfg .yamlfmt .run.sh - -roles/deploy/files/resources -roles/deploy/files/lard_ingestion diff --git a/ansible/readme.md b/ansible/README.md similarity index 82% rename from ansible/readme.md rename to ansible/README.md index 4ca809da..066f34fe 100644 --- a/ansible/readme.md +++ b/ansible/README.md @@ -22,7 +22,7 @@ ansible-galaxy collection install -fr requirements.yml You need to create application credentials in the project you are going to create the instances in, so that the ansible scripts can connect to the right -ostack_cloud which in our case needs to be called lard. +`ostack_cloud` which in our case needs to be called lard. The file should exist in `~/.config/openstack/clouds.yml`. If have MET access see what is written at the start of the readme [here](https://gitlab.met.no/it/infra/ostack-ansible21x-examples) @@ -34,32 +34,55 @@ Go to "Compute" then "Key Pairs" and import your public key for use in the provi ### Provision! -The IPs in `inventory.yml` should correspond to floating ips you have requested -in the network section of the open stack GUI. If you need to delete the old VMs -(compute -> instances) and Volumes (volumes -> volumes) you can do so in the -ostack GUI. +The IPs associated to the hosts in `inventory.yml` should correspond to +floating ips you have requested in the network section of the open stack GUI. +If you need to delete the old VMs (compute -> instances) and Volumes (volumes +-> volumes) you can do so in the ostack GUI. -> \[!CAUTION\] For some reason when deleting things to build up again one of the IPs -> did not get disassociated properly, and I had to do this manually (network -> -> floating IPs). +> \[!CAUTION\] When deleting things to build up again, if for some reason one of the IPs +> does not get disassociated properly, you have to do it manually from the GUI (network -> floating IPs). -The vars for the network and addssh tasks are encrypted with ansible-vault -(ansible-vault decrypt roles/networks/vars/main.yml, ansible-vault decrypt -roles/addshhkeys/vars/main.yml, ansible-vault decrypt -roles/vm_format/vars/main.yml). But if this has been setup before in the ostack -project, these have likely already been run and therefore already exits so you -could comment out this role from provision.yml. Passwords are in [ci_cd variables](https://gitlab.met.no/met/obsklim/bakkeobservasjoner/lagring-og-distribusjon/db-products/poda/-/settings/ci_cd). +The vars for the `network` and `addssh` roles are encrypted with ansible-vault ```terminal -ansible-playbook -i inventory.yml -e ostack_key_name=xxx provision.yml +ansible-vault decrypt roles/networks/vars/main.yml +ansible-vault decrypt roles/addsshkeys/vars/main.yml +ansible-vault decrypt roles/vm_format/vars/main.yml +``` + +But if this has been setup before in the ostack project, these have likely +already been run and therefore already exits so you could comment out this role +from `provision.yml`. +Passwords are in [ci_cd variables](https://gitlab.met.no/met/obsklim/bakkeobservasjoner/lagring-og-distribusjon/db-products/poda/-/settings/ci_cd). + +```terminal +ansible-playbook -i inventory.yml -e ostack_key_name=xxx provision.yml ``` After provisioning the next steps may need to ssh into the hosts, and thus you need to add them to your known hosts. -Ansible appears to be crap at this, so its best to do it before running the next step by going: -`ssh ubuntu@157.249.*.*` -For all the VMs. +Ansible appears to be crap at this, so its best to do it before running the next step. +First of all, it might be helpful to create host aliases and add them to your `~/.ssh/config` file, +so you don't have to remember the IPs by heart. An example host alias looks like the following: + +```ssh +Host lard-a + HostName 157.249.*.* + User ubuntu +``` + +Then run: + +```terminal +ssh lard-a +ssh lard-b +``` + If cleaning up from tearing down a previous set of VMs you may also need to remove them first: -`ssh-keygen -f "/home/louiseo/.ssh/known_hosts" -R "157.249.*.*"` + +```terminal +ssh-keygen -f "~/.ssh/known_hosts" -R lard-a +ssh-keygen -f "~/.ssh/known_hosts" -R lard-b +``` ### Configure! diff --git a/ansible/bigip.yml b/ansible/bigip.yml index a92217d8..67347d43 100644 --- a/ansible/bigip.yml +++ b/ansible/bigip.yml @@ -6,23 +6,6 @@ ostack_cloud: lard ostack_region: Ostack2-EXT gather_facts: false - pre_tasks: - # copy file, so we have an .sql file to apply locally - - name: Create a directory if it does not exist - ansible.builtin.file: - path: /etc/postgresql/16/db/bigip - state: directory - mode: '0755' - become: true - - - name: Copy the schema to the remote 1 - ansible.builtin.copy: - src: ./roles/bigip/vars/bigip.sql - dest: /etc/postgresql/16/db/bigip/bigip.sql - mode: '0755' - become: true - - # loops over both servers roles: + # NOTE: it will fail to create table in the standby (since read only) - role: bigip - # will fail to create table in the standby (since read only) diff --git a/ansible/configure.yml b/ansible/configure.yml index ea369e6e..519ec72c 100644 --- a/ansible/configure.yml +++ b/ansible/configure.yml @@ -2,38 +2,39 @@ - name: Mount disks and install stuff on the VMs hosts: servers remote_user: ubuntu + gather_facts: false vars: - ostack_cloud: lard - ipalias_network_name: ipalias - ostack_region: Ostack2-EXT - pre_tasks: - - name: List ansible_hosts_all difference from ansible_host (aka the vm not currently being iterated on) - ansible.builtin.debug: - msg: "{{ (ansible_play_hosts_all | difference([inventory_hostname])) | first }}" - roles: - - role: addsshkeys - - role: vm_format - vars: - name_stuff: "{{ inventory_hostname }}" # name of current vm for finding ipalias port - - role: ssh - vars: - vm_ip: "{{ ansible_host }}" # the current vm's ip + primary: lard-a + ostack_primary_floating_ip: # provide via cmd + ostack_db_password: # provide via cmd + ostack_repmgr_password: # provide via cmd -- name: Setup primary and standby - vars: - ostack_cloud: lard - ostack_region: Ostack2-EXT - hosts: localhost - gather_facts: false + tasks: + - name: Add user SSH keys + ansible.builtin.include_role: + name: ssh + tasks_from: users.yml + + - name: Format VM + ansible.builtin.include_role: + name: ostack + tasks_from: vm_format.yml - roles: - - role: primarystandbysetup - when: inventory_hostname == "lard-a" + - name: Share postgres SSH key between hosts + ansible.builtin.include_role: + name: ssh + tasks_from: postgres.yml - - role: standbysetup - when: inventory_hostname == "lard-b" - # vars: - # primary_name: lard-a - # primary_ip: '{{ ansible_host }}' # the first one is a - # standby_name: lard-b - # standby_ip: '{{ hostvars[groups["servers"][1]].ansible_host }}' # the second one is b + - name: Setup primary host + ansible.builtin.include_role: + name: ostack + tasks_from: create_primary.yml + when: inventory_hostname == primary + + - name: Setup standby host + ansible.builtin.include_role: + name: ostack + tasks_from: create_standby.yml + vars: + ostack_primary_host_ip: "{{ hostvars[primary].ansible_host }}" + when: inventory_hostname != primary diff --git a/ansible/deploy.yml b/ansible/deploy.yml index 3863db58..e5f0d57e 100644 --- a/ansible/deploy.yml +++ b/ansible/deploy.yml @@ -1,10 +1,7 @@ --- - name: Deploy binaries - # Deploy on both VMs, only the primary is "active" hosts: servers remote_user: ubuntu gather_facts: false - # All role tasks require root user - become: true roles: - role: deploy diff --git a/ansible/group_vars/servers/main.yml b/ansible/group_vars/servers/main.yml new file mode 100644 index 00000000..8cce8107 --- /dev/null +++ b/ansible/group_vars/servers/main.yml @@ -0,0 +1,27 @@ +--- +ostack_cloud: lard +ostack_state: present +ostack_region: Ostack2-EXT +ostack2: true + +# networks +ostack_network_name: "{{ vault_ostack_network_name }}" +ostack_network_cidr: "{{ vault_ostack_network_cidr }}" +ostack_netword_dns: "{{ vault_ostack_netword_dns }}" +ostack_network_security_groups: "{{ vault_ostack_network_security_groups }}" +ostack_ipalias_network_cidr: "{{ vault_ostack_ipalias_network_cidr }}" + +# vm_create +ostack_vm_flavor: "{{ vault_ostack_flavor }}" +ostack_vm_image: "{{ vault_ostack_image }}" +ostack_vm_security_groups: "{{ vault_ostack_security_groups }}" +ostack_vm_volume_type: "{{ vault_ostack_volume_type }}" +ostack_vm_volume_size: "{{ vault_ostack_volume_size }}" +# ostack_vm_key_name: provide via cmd + +# vm_format +ostack_mount_device: "{{ vault_ostack_mount_device }}" +ostack_mount_point: "/mnt/ssd-data" + +# ssh +ssh_user_key_list: "{{ vault_ssh_user_key_list }}" diff --git a/ansible/migrate.yml b/ansible/migrate.yml index cf6019ca..722fac08 100644 --- a/ansible/migrate.yml +++ b/ansible/migrate.yml @@ -4,6 +4,7 @@ remote_user: ubuntu gather_facts: false vars: + # TODO: is there a better way to get this fact automatically? primary: lard-a tasks: diff --git a/ansible/provision.yml b/ansible/provision.yml index 22f6ca2c..369d9ec6 100644 --- a/ansible/provision.yml +++ b/ansible/provision.yml @@ -1,16 +1,20 @@ --- -- name: Setup networks and 2 vms +- name: Provision hosts: servers gather_facts: false + vars: + ostack_vm_key_name: # provide via cmd tasks: - - name: Setup networks # noqa: run-once[task] + - name: Setup networks ansible.builtin.include_role: - name: networks + name: ostack + tasks_from: networks.yml delegate_to: localhost run_once: true - - name: Setup VMs + - name: Create VMs ansible.builtin.include_role: - name: vm + name: ostack + tasks_from: vm_create.yml delegate_to: localhost diff --git a/ansible/roles/bigip/vars/bigip.sql b/ansible/roles/bigip/files/bigip.sql similarity index 100% rename from ansible/roles/bigip/vars/bigip.sql rename to ansible/roles/bigip/files/bigip.sql diff --git a/ansible/roles/bigip/tasks/main.yml b/ansible/roles/bigip/tasks/main.yml index a705cedb..f915dfc3 100644 --- a/ansible/roles/bigip/tasks/main.yml +++ b/ansible/roles/bigip/tasks/main.yml @@ -1,4 +1,17 @@ --- +- name: Create bigip directory if it does not exist + ansible.builtin.file: + path: /etc/postgresql/16/db/bigip + state: directory + mode: '0755' + +- name: Copy the bigip schema to the remote + ansible.builtin.copy: + src: bigip.sql + dest: /etc/postgresql/16/db/bigip/bigip.sql + mode: '0755' + +# TODO: add failed_when inventory_hostname != primary - name: Create bigip user and basic database # this is allowed to fail on the secondary, should work on the primary and be replicated over ignore_errors: true diff --git a/ansible/roles/deploy/tasks/main.yml b/ansible/roles/deploy/tasks/main.yml index fabd544b..a44b90bf 100644 --- a/ansible/roles/deploy/tasks/main.yml +++ b/ansible/roles/deploy/tasks/main.yml @@ -4,6 +4,7 @@ ansible.builtin.group: name: lard state: present + become: true - name: Create lard user ansible.builtin.user: @@ -13,6 +14,7 @@ append: true state: present create_home: false + become: true # TODO: should we deploy in non root user? - name: Copy files to server @@ -22,6 +24,7 @@ mode: "{{ item.mode }}" owner: root group: root + become: true loop: "{{ deploy_files }}" - name: Start LARD ingestion service @@ -30,3 +33,4 @@ name: lard_ingestion state: restarted enabled: true + become: true diff --git a/ansible/roles/ostack/defaults/main.yml b/ansible/roles/ostack/defaults/main.yml index 9f555ef3..2b087e5b 100644 --- a/ansible/roles/ostack/defaults/main.yml +++ b/ansible/roles/ostack/defaults/main.yml @@ -1,28 +1,36 @@ --- -# TODO: separate what should be public and what private - -# public +# PUBLIC ostack_cloud: lard ostack_region: Ostack2-EXT ostack_ipalias_network_name: ipalias -# ostack_state: present +ostack_state: present -# private -## networks +# PRIVATE +# networks ostack_network_name: - -# TODO: probably makes sense to move these to network if they are not reused -# and networks_dns should be moved here since it depends on ostack_region -ostack_cidr: -ostack_ipalias_cidr: -ostack_security_groups: +ostack_network_cidr: +ostack_netword_dns: # dict[ostack_region -> list(ipv4)] +ostack_network_security_groups: - name: rule: subnet: port: +ostack_ipalias_network_cidr: + +# vm_create +ostack_vm_image: +ostack_vm_flavor: +ostack_vm_key_name: +ostack_vm_security_groups: +ostack_vm_volume_type: +ostack_vm_volume_size: + +# vm_format +ostack_mount_device: +ostack_mount_point: +ostack_repmgr_password: -## vm -ostack_availability_zone: -ostack_image: -ostack_flavor: -ostack_key_name: +# create_primary / create_standby +ostack_db_password: +ostack_primary_floating_ip: +ostack_primary_ip: diff --git a/ansible/roles/ostack/tasks/create-primary.yml b/ansible/roles/ostack/tasks/create-primary.yml new file mode 100644 index 00000000..a81da6f4 --- /dev/null +++ b/ansible/roles/ostack/tasks/create-primary.yml @@ -0,0 +1,246 @@ +--- +# set up a role and provide suitable entries in pg_hba.conf with the database +# field set to replication + +# ensure max_wal_senders is set to a sufficiently large value in the conf file +# (also possibly max_replication_slots?) When running a standby server, you +# must set this parameter to the same or higher value than on the primary +# server. Otherwise, queries will not be allowed in the standby server. + +# set wal_keep_size to a value large enough to ensure that WAL segments are not +# recycled too early, or configure a replication slot for the standby? if there +# is a WAL archive accessible to the standby this may not be needed? + +# On systems that support the keepalive socket option, setting +# tcp_keepalives_idle, tcp_keepalives_interval and tcp_keepalives_count helps +# the primary promptly notice a broken connection. + +# example auth +# Allow the user "foo" from host 192.168.1.100 to connect to the primary +# as a replication standby if the user's password is correctly supplied. +# +# TYPE DATABASE USER ADDRESS METHOD +# host replication foo 192.168.1.100/32 md5 +- name: Create a new database with name lard + community.postgresql.postgresql_db: + name: lard + become: true + become_user: postgres + +- name: Copy the db folder to the remote + ansible.builtin.copy: + src: "{{ playbook_dir }}/../db/" + dest: /etc/postgresql/16/db/ + mode: "0755" + become: true + +# TODO: automatically loop over the sql files in order (needs prepending IDs?) +# with_fileglob: "/etc/postgresql/16/db/*" +- name: Create schemas and tables in lard + community.postgresql.postgresql_script: + db: lard + path: "/etc/postgresql/16/db/{{ item }}" + become: true + become_user: postgres + loop: + - public.sql + - partitions_generated.sql + - labels.sql + - flags.sql + +- name: Connect to lard database, create lard_user + community.postgresql.postgresql_user: + db: lard + name: lard_user + password: "{{ ostack_db_password }}" + role_attr_flags: SUPERUSER # not desired, but the privelege granting doesn't seem to work? + become: true + become_user: postgres + +# - name: Grant lard_user priveleges on lard database +# community.postgresql.postgresql_privs: +# type: database +# db: lard +# privs: ALL +# role: lard_user +# become: true +# become_user: postgres + +# MAKE IT THE PRIMARY +- name: Set wal_level parameter + community.postgresql.postgresql_set: + name: wal_level + value: replica # https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL + become: true + become_user: postgres + +- name: Set hot_standby parameter + community.postgresql.postgresql_set: + name: hot_standby + value: true + become: true + become_user: postgres + +- name: Set hot_standby_feedback parameter + community.postgresql.postgresql_set: + name: hot_standby_feedback + value: true + become: true + become_user: postgres + +- name: Set max_wal_senders parameter + community.postgresql.postgresql_set: + name: max_wal_senders + value: 10 + become: true + become_user: postgres + +- name: Set wal_log_hints parameter # needs to be enabled to use pg_rewind + # https://www.postgresql.org/docs/current/app-pgrewind.html + community.postgresql.postgresql_set: + name: wal_log_hints + value: true + become: true + become_user: postgres + +- name: Set max_replication_slots parameter + community.postgresql.postgresql_set: + name: max_replication_slots + value: 10 + become: true + become_user: postgres + +# make it SYNCHRONOUS REPLICATION (without the next two settings it would be asynchronous) +- name: Set synchronous_standby_names parameter + community.postgresql.postgresql_set: + name: synchronous_standby_names # https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-SYNCHRONOUS-STANDBY-NAMES + value: "*" # all the standbys + become: true + become_user: postgres + +- name: Set synchronous_commit parameter + community.postgresql.postgresql_set: + name: synchronous_commit # https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-SYNCHRONOUS-COMMIT + value: "on" # will not give standby query consistency (tradeoff for better write performance), but will give standby durable commit after OS crash + become: true + become_user: postgres + +# repmgr +# https://www.repmgr.org/docs/current/quickstart-repmgr-conf.html +- name: Create a repmgr.conf if it does not exist + ansible.builtin.file: + path: /etc/repmgr.conf + state: touch + mode: "0755" + become: true + +- name: Set contents of repmgr.conf + ansible.builtin.copy: + dest: "/etc/repmgr.conf" + content: | + node_id=1 + node_name='{{ inventory_hostname }}' + conninfo='host={{ ansible_host }} user=repmgr dbname=repmgr connect_timeout=2' + data_directory='{{ ostack_mount_point }}/16/main' + service_start_command='sudo /bin/systemctl start postgresql.service' + service_stop_command='sudo /bin/systemctl stop postgresql.service' + service_restart_command='sudo /bin/systemctl restart postgresql.service' + service_reload_command='sudo /bin/systemctl reload postgresql.service' + mode: "0755" + become: true + +# https://www.repmgr.org/docs/current/quickstart-primary-register.html +- name: Run repmgr to register the primary # noqa no-changed-when + ansible.builtin.command: repmgr -f /etc/repmgr.conf primary register -F # only need -F if rerunning + become: true + become_user: postgres + register: register_primary_results + +- name: Print out the register_primary_results + ansible.builtin.debug: + msg: "repmgr {{ register_primary_results }}" + +# # STUFF FOR REPLICATION (do not need if using repmgr) +# - name: Create replicator user with replication priveleges +# community.postgresql.postgresql_user: +# name: replicator +# password: '{{ replicator_password }}' +# role_attr_flags: REPLICATION +# become: true +# become_user: postgres + +# # also specifically allow the replicator user +# - name: Change hba conf to allow replicator to connect +# community.postgresql.postgresql_pg_hba: +# dest: /etc/postgresql/16/main/pg_hba.conf +# databases: replication +# contype: host +# users: replicator +# #address: all +# address: '{{ standby_host }}' +# method: trust # seems to hang with md5, how to make auth work? +# become: true + +# # create replication slot +# - name: Create physical replication slot if doesn't exist +# become_user: postgres +# community.postgresql.postgresql_slot: +# slot_name: replication_slot +# #db: lard +# become: true + +# make sure these changes take effect? +- name: Restart service postgres + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +- name: Attach primary floating ip + delegate_to: localhost + block: + - name: Gather information about primary server + openstack.cloud.server_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: "{{ inventory_hostname }}" + register: primary_server + + - name: Print out the ipalias port information for the server + ansible.builtin.debug: + msg: "Server {{ primary_server.servers[0].addresses.ipalias }}" + + # give the primary a particular floating ip + - name: Attach floating ip address that we keep connected to the primary + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ primary_server.servers[0].id }}" + reuse: true + network: public + fixed_address: "{{ primary_server.servers[0].addresses.ipalias[0].addr }}" + floating_ip_address: "{{ ostack_primary_floating_ip }}" + wait: true + # unfortunately it seems that attaching the floating ip results in a + # timeout even though it actually succeeds + ignore_errors: true + register: attach_result + when: primary_server.servers[0].addresses.ipalias | length <=1 + + - name: Print out result of attaching floating ip address + ansible.builtin.debug: + msg: "{{ attach_result }}" + +- name: Check floating ip is attached + openstack.cloud.floating_ip_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + floating_ip_address: "{{ ostack_primary_floating_ip }}" + register: fip + delegate_to: localhost + +# this will not run if the ip is not now on the vm +- name: Print out the floating ip information to confirm its ok + ansible.builtin.debug: + msg: "Floating ip {{ fip }}" + when: fip.floating_ips[0].port_details.device_id == primary_server.servers[0].id diff --git a/ansible/roles/ostack/tasks/create-standby.yml b/ansible/roles/ostack/tasks/create-standby.yml new file mode 100644 index 00000000..0f3ab7ff --- /dev/null +++ b/ansible/roles/ostack/tasks/create-standby.yml @@ -0,0 +1,139 @@ +--- +# create standby.signal file in data directory + +# configure streaming WAL primary_conninfo needs a libpq connection string (ip +# address + other details needed to connect to primary server) + +# since we want the standby to be able to operate as the primary, we need to +# configure the WAL archiving, connections, and auth like the primary example: +# primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass +# options=''-c wal_sender_timeout=5000''' restore_command = 'cp +# /path/to/archive/%f %p' archive_cleanup_command = 'pg_archivecleanup +# /path/to/archive %r' + +# add the following line to the postgresql.conf file on the standby The standby +# connects to the primary that is running on host 192.168.1.50 and port 5432 as +# the user "foo" whose password is "foopass". primary_conninfo = +# 'host=192.168.1.50 port=5432 user=foo password=foopass' + +# use the replication slot on the primary (in file after the primary_conninfo) +# primary_slot_name = 'node_a_slot' repmgr +# https://www.repmgr.org/docs/current/quickstart-standby-clone.html must be +# done before the standby is put into read only mode (therefore not idempotent) +- name: Create a repmgr.conf if it does not exist + ansible.builtin.file: + path: /etc/repmgr.conf + state: touch + mode: "0755" + become: true + +- name: Set contents of repmgr.conf + ansible.builtin.copy: + dest: "/etc/repmgr.conf" + content: | + node_id=2 + node_name='{{ inventory_hostname }}' + conninfo='host={{ ansible_host }} user=repmgr dbname=repmgr connect_timeout=2' + data_directory='{{ ostack_mount_point }}/16/main' + service_start_command='sudo /bin/systemctl start postgresql.service' + service_stop_command='sudo /bin/systemctl stop postgresql.service' + service_restart_command='sudo /bin/systemctl restart postgresql.service' + service_reload_command='sudo /bin/systemctl reload postgresql.service' + mode: "0755" + become: true + +- name: Stop service postgres, if running + ansible.builtin.systemd_service: + name: postgresql + state: stopped + become: true + +# https://www.repmgr.org/docs/current/quickstart-standby-clone.html +# TODO: can you use `ostack_primary_floating_ip` here? +- name: Run repmgr to dry run clone # noqa no-changed-when + ansible.builtin.command: repmgr -h '{{ ostack_primary_host_ip }}' -U repmgr -d repmgr -f /etc/repmgr.conf standby clone --dry-run + become: true + become_user: postgres + register: dry_run_clone_results + +- name: Print out the dry_run_clone_results + ansible.builtin.debug: + msg: "repmgr {{ dry_run_clone_results }}" + +- name: Run repmgr to clone standby from primary # noqa no-changed-when + ansible.builtin.command: repmgr -h '{{ ostack_primary_host_ip }}' -U repmgr -d repmgr -f /etc/repmgr.conf standby clone -F + become: true + register: clone_results + become_user: postgres + +- name: Print out the clone_results + ansible.builtin.debug: + msg: "repmgr {{ clone_results }}" + +# try to clean up so can run standby clone ? +# - name: Recursively remove directory +# ansible.builtin.file: +# path: /mnt/ssd-b/16/main +# state: absent +# become: true +# - name: Create a main directory if it does not exist +# ansible.builtin.file: +# path: /mnt/ssd-b/16/main +# state: directory +# mode: '0700' +# become: true +# become_user: postgres + +# https://www.postgresql.org/docs/current/app-pgbasebackup.html +# NOTE: this part is not idempotent, so if a db is already in the dir, it will +# fail hence the stuff above that means this should not be run on a database with +# data!!! not needed if using repmgr, since clone calls this +# - name: Run pg_basebackup to initialize the replica / standby +# ansible.builtin.shell: | +# export PGPASSWORD="{{ replicator_password }}" && +# pg_basebackup --pgdata=/mnt/ssd-b/16/main -R --slot=replication_slot --user=replicator --host={{ primary_host }} --port=5432 +# args: +# executable: /bin/bash +# become: true +# become_user: postgres +# register: basebackup_results + +# - name: Print out the basebackup_results +# debug: msg="backup {{ basebackup_results }}" + +- name: Restart service postgres + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +- name: Waits for port 5432 to be available, don't check for initial 10 seconds + ansible.builtin.wait_for: + host: 0.0.0.0 + port: 5432 + delay: 10 + state: started + +# https://www.repmgr.org/docs/current/quickstart-register-standby.html +- name: Run repmgr to register the standby # noqa no-changed-when + ansible.builtin.command: repmgr -f /etc/repmgr.conf standby register + become: true + become_user: postgres + register: register_standby_results + +- name: Print out the register_standby_results + ansible.builtin.debug: + msg: "repmgr {{ register_standby_results }}" + +# run some sql... to confirm clone? +- name: Do some sql to test for the existence of lard...? + community.postgresql.postgresql_query: + db: lard + query: select count(*) from timeseries + register: query_results + become: true + become_user: postgres + +- name: Print out the query + ansible.builtin.debug: + msg: "Query {{ query_results }}" diff --git a/ansible/roles/ostack/tasks/move_floating_ip.yml b/ansible/roles/ostack/tasks/move_floating_ip.yml new file mode 100644 index 00000000..02848b03 --- /dev/null +++ b/ansible/roles/ostack/tasks/move_floating_ip.yml @@ -0,0 +1,63 @@ +--- +# Switch over the primary's particular floating ip +# this makes sense to do after successfully switching over, +# however it means that the stuff writing to the primary needs to be +# robust enough to handle getting told the db is in a read only state for a short period. +- name: Move primary floating ip + block: + # remove from old primary + - name: Detach floating ip address that we keep connected to the primary + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ ostack_primary }}" + state: absent + network: public + floating_ip_address: "{{ ostack_primary_floating_ip }}" + + - name: Gather information about new primary server + openstack.cloud.server_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: "{{ ostack_standby }}" + register: new_primary_server + + - name: Print out the ipalias port information for the server + ansible.builtin.debug: + msg: "Server {{ new_primary_server.servers[0].addresses.ipalias }}" + + # add to what is now primary (used to be standby) + # unfortunately it seems that attaching the floating ip results in a timeout + # even though it actually succeeds + - name: Attach floating ip address that we keep connected to the primary + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ new_primary_server.servers[0].id }}" + state: present + reuse: true + network: public + fixed_address: "{{ new_primary_server.servers[0].addresses.ipalias[0].addr }}" + floating_ip_address: "{{ ostack_primary_floating_ip }}" + wait: true + # timeout: 60 + when: new_primary_server.servers[0].addresses.ipalias | length <=1 + ignore_errors: true + register: attach_result + + - name: Print out result of attaching floating ip address + ansible.builtin.debug: + msg: "{{ attach_result }}" + +- name: Check floating ip is attached + openstack.cloud.floating_ip_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + floating_ip_address: "{{ ostack_primary_floating_ip }}" + register: fip + +# this will not run if the ip is not now on the right vm +- name: Print out the floating ip information to confirm its ok + ansible.builtin.debug: + msg: "Floating ip {{ fip }}" + when: fip.floating_ips[0].port_details.device_id == new_primary_server.servers[0].id diff --git a/ansible/roles/ostack/tasks/networks.yml b/ansible/roles/ostack/tasks/networks.yml new file mode 100644 index 00000000..348669e6 --- /dev/null +++ b/ansible/roles/ostack/tasks/networks.yml @@ -0,0 +1,12 @@ +--- +- name: Create the project network (if it doesn't exist) + ansible.builtion.import_tasks: networks/create-project-network.yml + +- name: Create the project security group (if it doesn't exist) + ansible.builtion.import_tasks: networks/create-project-security-group.yml + +- name: Create the ipalias network (if it doesn't exist) + ansible.builtion.import_tasks: networks/create-ipalias-network.yml + +- name: Create ping security group + ansible.builtion.import_tasks: networks/open-for-ping.yml diff --git a/ansible/roles/ostack/tasks/networks/create-ipalias-network.yml b/ansible/roles/ostack/tasks/networks/create-ipalias-network.yml new file mode 100644 index 00000000..df0250aa --- /dev/null +++ b/ansible/roles/ostack/tasks/networks/create-ipalias-network.yml @@ -0,0 +1,42 @@ +--- +- name: Create ipalias network + openstack.cloud.network: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: ipalias + external: false + state: present + run_once: true + +- name: Create ipalias network subnet + openstack.cloud.subnet: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + network_name: ipalias + name: ipalias-subnet + cidr: "{{ ostack_ipalias_network_cidr }}" + state: present + dns_nameservers: "{{ ostack_networks_dns[ostack_region] }}" + run_once: true + +- name: Connect ipalias network to public network + openstack.cloud.router: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: ipalias-router + network: public + interfaces: + - ipalias-subnet + run_once: true + +- name: Remove default gateway for subnet + openstack.cloud.subnet: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + network_name: ipalias + name: ipalias-subnet + cidr: "{{ network_ostack_network_cidr }}" + no_gateway_ip: true + state: present + run_once: true diff --git a/ansible/roles/ostack/tasks/networks/create-project-network.yml b/ansible/roles/ostack/tasks/networks/create-project-network.yml new file mode 100644 index 00000000..62a97538 --- /dev/null +++ b/ansible/roles/ostack/tasks/networks/create-project-network.yml @@ -0,0 +1,28 @@ +--- +- name: Create private network + openstack.cloud.network: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: "{{ ostack_network_name }}" + external: false + +- name: Create private network subnet + openstack.cloud.subnet: + state: present + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + network_name: "{{ ostack_network_name }}" + name: "{{ ostack_network_name }}-subnet" + cidr: "{{ ostack_network_cidr }}" + dns_nameservers: "{{ networks_dns[ostack_region] }}" + +- name: Connect private network to public network + openstack.cloud.router: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: public-router + network: public + interfaces: + - "{{ ostack_network_name }}-subnet" diff --git a/ansible/roles/ostack/tasks/networks/create-project-security-group.yml b/ansible/roles/ostack/tasks/networks/create-project-security-group.yml new file mode 100644 index 00000000..4fa115dd --- /dev/null +++ b/ansible/roles/ostack/tasks/networks/create-project-security-group.yml @@ -0,0 +1,21 @@ +--- +- name: Create security groups + openstack.cloud.security_group: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: "{{ item }}" + description: Created with Ansible + loop: '{{ ostack_network_security_groups | map(attribute="name") | list | unique }}' + +- name: Populate security groups + openstack.cloud.security_group_rule: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + security_group: "{{ item.name }}" + protocol: tcp + port_range_max: "{{ item.rule.port }}" + port_range_min: "{{ item.rule.port }}" + remote_ip_prefix: "{{ item.rule.subnet }}" + loop: "{{ ostack_network_security_groups }}" + loop_control: + label: "updating security group {{ item.name }} with rule {{ item.rule }}" diff --git a/ansible/roles/ostack/tasks/networks/open-for-ping.yml b/ansible/roles/ostack/tasks/networks/open-for-ping.yml new file mode 100644 index 00000000..88de7ae8 --- /dev/null +++ b/ansible/roles/ostack/tasks/networks/open-for-ping.yml @@ -0,0 +1,15 @@ +--- +- name: Create ping security group + openstack.cloud.security_group: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: ping + description: Created with Ansible + +- name: Populate ping security group + openstack.cloud.security_group_rule: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + security_group: ping + protocol: icmp + remote_ip_prefix: "157.249.0.0/16" diff --git a/ansible/roles/ostack/tasks/vm_create.yml b/ansible/roles/ostack/tasks/vm_create.yml new file mode 100644 index 00000000..c825fcad --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_create.yml @@ -0,0 +1,92 @@ +--- +- name: Create VM + openstack.cloud.server: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + availability_zone: "{{ ostack_availability_zone }}" + name: "{{ inventory_hostname }}" + image: "{{ ostack_vm_image }}" + flavor: "{{ ostack_vm_flavor }}" + network: "{{ ostack_network_name }}" + key_name: "{{ ostack_vm_key_name }}" + security_groups: "{{ ostack_vm_security_groups }}" + state: "{{ ostack_state }}" + # do not give ip, since want to assign a specific one in next step (so as to reuse the ones we have) + auto_ip: false + register: server + +- name: Print out the server information + ansible.builtin.debug: + msg: "Server {{ lookup('ansible.builtin.dict', server) }}" + +- name: Attach floating ip address + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ server.server.id }}" + reuse: true + network: public + # TODO: check that this is correct + floating_ip_address: "{{ ansible_host }}" + wait: true + timeout: 60 + +- name: Create Volume + openstack.cloud.volume: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + availability_zone: "{{ ostack_availability_zone }}" + name: "{{ inventory_hostname }}" + volume_type: "{{ ostack_vm_volume_type }}" + size: "{{ ostack_vm_volume_size }}" + register: volume + +- name: Print out the volume information + ansible.builtin.debug: + msg: "Volume {{ lookup('ansible.builtin.dict', volume) }}" + +- name: Attach a volume to a compute host + openstack.cloud.server_volume: + state: present + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + volume: "{{ volume.volume.id }}" + server: "{{ server.server.id }}" + device: /dev/vdb + +- name: Create port for ipalias and set security groups + openstack.cloud.port: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: "ipalias-{{ inventory_hostname }}" + network: "{{ ostack_ipalias_network_name }}" + security_groups: "{{ ostack_vm_security_groups }}" + +- name: Get port info + openstack.cloud.port_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + port: "ipalias-{{ inventory_hostname }}" + register: ipalias_port + +- name: Print out the port information + ansible.builtin.debug: + msg: "Port {{ lookup('ansible.builtin.dict', ipalias_port) }}" + +# TODO: Can't this be done directly above +# by assigning `server.server.id` to `device_id`? +- name: Add the ipalias network to server + ansible.builtin.command: # noqa no-changed-when + argv: + - openstack + - --os-cloud + - "{{ ostack_cloud }}" + - --os-region-name + - "{{ ostack_region }}" + - server + - add + - port + - "{{ server.server.id }}" + - "ipalias-{{ inventory_hostname }}" + when: ipalias_port.ports.0.device_id | length <=0 diff --git a/ansible/roles/ostack/tasks/vm_format.yml b/ansible/roles/ostack/tasks/vm_format.yml new file mode 100644 index 00000000..67961468 --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format.yml @@ -0,0 +1,9 @@ +--- +- name: Netplan + ansible.builtin.import_tasks: vm_format/netplan.yml + +- name: Format and mount the disk + ansible.builtin.import_tasks: vm_format/format-mount-disk.yml + +- name: Install postgres + ansible.builtin.import_tasks: vm_format/install-postgres.yml diff --git a/ansible/roles/ostack/tasks/vm_format/format-mount-disk.yml b/ansible/roles/ostack/tasks/vm_format/format-mount-disk.yml new file mode 100644 index 00000000..a31da5d3 --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format/format-mount-disk.yml @@ -0,0 +1,45 @@ +--- +- name: Create mount point + ansible.builtin.file: + path: "{{ ostack_mount_point }}" + state: directory + owner: ubuntu # change to postgres? + group: ubuntu # change to postgres? + mode: 'u=rw,g=rws,o=r' + become: true + +- name: Create ext4 filesystem on mount device + community.general.filesystem: + dev: '{{ ostack_mount_device }}' + fstype: ext4 + become: true + +- name: Read device information (always use unit when probing) + community.general.parted: + device: '{{ ostack_mount_device }}' + unit: MiB + register: sdb_info + become: true + +- name: Print out the device information + ansible.builtin.debug: + msg: "Partitions {{ sdb_info.partitions }}" + +# this also changes the fstab so its still there when rebooted! +- name: Mount the device on the mount point + ansible.posix.mount: + path: "{{ ostack_mount_point }}" + src: '{{ ostack_mount_device }}' + fstype: ext4 + state: mounted + become: true + +- name: Fetch the UUID of mounted device + ansible.builtin.command: blkid --match-tag UUID --output value '{{ ostack_mount_device }}' + changed_when: false + register: blkid_cmd + become: true + +- name: Print out the UUID + ansible.builtin.debug: + msg: "UUID {{ blkid_cmd.stdout }}" diff --git a/ansible/roles/ostack/tasks/vm_format/install-postgres.yml b/ansible/roles/ostack/tasks/vm_format/install-postgres.yml new file mode 100644 index 00000000..ae858317 --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format/install-postgres.yml @@ -0,0 +1,136 @@ +--- +- name: Add postgres apt key by id from a keyserver + ansible.builtin.apt_key: + url: https://www.postgresql.org/media/keys/ACCC4CF8.asc + state: present + become: true + +- name: Add postgres repository into sources list + ansible.builtin.apt_repository: + repo: deb https://apt.postgresql.org/pub/repos/apt jammy-pgdg main + state: present + become: true + +- name: Install a list of packages + ansible.builtin.apt: + pkg: + - nano + - postgresql-16 + - postgresql-16-repmgr # https://www.repmgr.org/docs/current/install-requirements.html + - pip # needed for installing psycopg2 + - acl # needed for becoming unpriveleged user (such as postgres) + update_cache: true + become: true + +- name: Install pip packages + ansible.builtin.pip: + name: "{{ item }}" + become: true + loop: + # TODO: should probably use psycopg3 instead, v2 is deprecated + - psycopg2-binary # dependency for ansible.community.postgresql + - openstacksdk + +# Make so the data is actually kept on the ssd mount +# First stop postgres service +- name: Stop service postgres, if running + ansible.builtin.systemd_service: + name: postgresql + state: stopped + become: true + +- name: Rsync postgres directory to ssd mount + ansible.posix.synchronize: + archive: true + src: /var/lib/postgresql/ + dest: "{{ ostack_mount_point }}" + become: true + +- name: Comment out original data_directory + ansible.builtin.replace: + dest: /etc/postgresql/16/main/postgresql.conf + regexp: '^data_directory' + replace: '#data_directory' + become: true + +- name: Modify postgresql config + ansible.builtin.blockinfile: + dest: /etc/postgresql/16/main/postgresql.conf + block: | + data_directory = "{{ ostack_mount_point }}/16/main" + listen_addresses = "*" + become: true + +# probably want to restrict this once we know what will connect? +# but the security group rules should take care of limiting to met ranges +- name: Change hba conf to allow connections + community.postgresql.postgresql_pg_hba: + dest: /etc/postgresql/16/main/pg_hba.conf + contype: host + address: all # can we put met internal ip range(s)? + method: md5 + # users and database default to all + become: true + +# make sure these changes take effect +- name: Start up postgres service again + ansible.builtin.systemd_service: + name: postgresql + state: started + become: true + +# REPMGR +- name: Create repmgr user # https://www.repmgr.org/docs/current/quickstart-repmgr-user-database.html + community.postgresql.postgresql_user: + name: repmgr + password: '{{ ostack_repmgr_password }}' + role_attr_flags: SUPERUSER + become: true + become_user: postgres + +- name: Create a repmgr database, with owner repmgr + community.postgresql.postgresql_db: + name: repmgr + owner: repmgr + become: true + become_user: postgres + +- name: Change hba conf to allow repmgr to connect for replication + community.postgresql.postgresql_pg_hba: + dest: /etc/postgresql/16/main/pg_hba.conf + databases: replication + contype: host + users: repmgr + address: all + # address: '{{ standby_host }}' + method: trust + become: true + +- name: Change hba conf to allow repmgr to connect to the repmgr db + community.postgresql.postgresql_pg_hba: + dest: /etc/postgresql/16/main/pg_hba.conf + databases: repmgr + contype: host + users: repmgr + address: all + # address: '{{ standby_host }}' + method: trust + become: true + +- name: Restart postgres + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +- name: Allow the postgres user to run /bin/systemctl restart, stop, start postgres + community.general.sudoers: + name: postgresql + user: postgres + commands: + - /bin/systemctl restart postgresql.service + - /bin/systemctl stop postgresql.service + - /bin/systemctl start postgresql.service + - /bin/systemctl reload postgresql.service + nopassword: true + become: true diff --git a/ansible/roles/ostack/tasks/vm_format/netplan.yml b/ansible/roles/ostack/tasks/vm_format/netplan.yml new file mode 100644 index 00000000..f7a6b4af --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format/netplan.yml @@ -0,0 +1,57 @@ +--- +- name: Get port info + openstack.cloud.port_info: + cloud: '{{ ostack_cloud }}' + region_name: '{{ ostack_region }}' + port: 'ipalias-{{ inventory_hostname }}' + register: ipalias_port + delegate_to: localhost + +- name: Print out the port information + ansible.builtin.debug: + msg: "Port {{ lookup('ansible.builtin.dict', ipalias_port) }}" + +- name: IP alias netplan configuration + ansible.builtin.set_fact: + netplan_config: + network: + version: 2 + ethernets: + ens6: + dhcp4: true + dhcp4-overrides: + use-routes: false + match: + macaddress: '{{ ipalias_port.ports.0.mac_address }}' + set-name: ens6 + routes: + - to: 0.0.0.0/0 + via: '{{ ostack_ipalias_network_cidr | ansible.utils.ipaddr("net") | ansible.utils.ipaddr("1") | ansible.utils.ipaddr("address") }}' + table: 102 + routing-policy: + - from: '{{ ostack_ipalias_network_cidr }}' + table: 102 + +- name: Copy out ipalias netplan config + ansible.builtin.copy: + content: '{{ netplan_config | to_nice_yaml }}' + dest: /etc/netplan/90-ansible-ipalias.yaml + mode: '0644' + register: netplan_config + become: true + +- name: Print out netplan config + ansible.builtin.debug: + msg: "Netplan {{ netplan_config }}" + # https://gitlab.met.no/ansible-roles/ipalias/-/blob/master/tasks/netplan.yml?ref_type=heads + # this times out and then the servers are uncreachable? + # - name: Reboot server to apply new netplan config, without hitting netplan bug + # ansible.builtin.reboot: # noqa no-handler + # reboot_timeout: 3600 + # when: netplan_config is changed + # become: true + +- name: Apply netplan # noqa no-changed-when + ansible.builtin.command: sudo netplan apply + async: 45 + poll: 0 diff --git a/ansible/roles/ssh/default/main.yml b/ansible/roles/ssh/default/main.yml new file mode 100644 index 00000000..78126382 --- /dev/null +++ b/ansible/roles/ssh/default/main.yml @@ -0,0 +1,3 @@ +ssh_user_key_list: + - name: + key: diff --git a/ansible/roles/ssh/tasks/main.yml b/ansible/roles/ssh/tasks/postgres.yml similarity index 71% rename from ansible/roles/ssh/tasks/main.yml rename to ansible/roles/ssh/tasks/postgres.yml index 327eca80..1428b475 100644 --- a/ansible/roles/ssh/tasks/main.yml +++ b/ansible/roles/ssh/tasks/postgres.yml @@ -1,12 +1,13 @@ --- -# find the other vm, that is not currently being iterated over (this will need to be changed if more than 2) -- name: Setting host facts for other_vm +# find the other vms, that are not currently being iterated over +- name: Set host fact other_vms ansible.builtin.set_fact: - other_vm: "{{ (ansible_play_hosts_all | difference([inventory_hostname])) | first }}" + other_vms: "{{ (ansible_play_hosts_all | difference([inventory_hostname])) }}" -- name: List other vm +- name: List other VMs ansible.builtin.debug: - msg: "{{ other_vm }}" + msg: "{{ item }}" + loop: "{{ other_vms }}" - name: Create user postgres ansible.builtin.user: @@ -17,6 +18,7 @@ force: true register: ssh_keys become: true + # Another way to generate a ssh key... # - name: Force regenerate an OpenSSH keypair if it already exists # community.crypto.openssh_keypair: @@ -36,7 +38,8 @@ state: present key: "{{ ssh_keys.ssh_public_key }}" become: true - delegate_to: "{{ other_vm }}" + delegate_to: "{{ item }}" + loop: "{{ other_vms }}" - name: Get the host key ansible.builtin.set_fact: @@ -48,13 +51,14 @@ - name: List vm ip ansible.builtin.debug: - msg: "{{ vm_ip }}" + msg: "{{ ansible_host }}" - name: Add the vm to known_hosts on the other vm ansible.builtin.known_hosts: path: ~postgres/.ssh/known_hosts # need this for the postgres user - name: "{{ vm_ip }}" - key: "{{ vm_ip }} ecdsa-sha2-nistp256 {{ hostkey }}" + name: "{{ ansible_host }}" + key: "{{ ansible_host }} ecdsa-sha2-nistp256 {{ hostkey }}" state: present become: true - delegate_to: "{{ other_vm }}" + delegate_to: "{{ item }}" + loop: "{{ other_vms }}" diff --git a/ansible/roles/ssh/tasks/share-ssh-keys.yml b/ansible/roles/ssh/tasks/share-ssh-keys.yml deleted file mode 100644 index e69de29b..00000000 diff --git a/ansible/roles/ssh/tasks/users.yml b/ansible/roles/ssh/tasks/users.yml new file mode 100644 index 00000000..a1c46cef --- /dev/null +++ b/ansible/roles/ssh/tasks/users.yml @@ -0,0 +1,10 @@ +--- +- name: Add users keys to authorized_keys + ansible.posix.authorized_key: + # username on the remotehost whose authorized keys are being modified + user: ubuntu + state: present + key: "{{ item.key }}" + loop: "{{ ssh_user_key_list }}" + loop_control: + label: "adding {{ item.name }} key to authorized_keys" diff --git a/ansible/roles/switchover/default/main.yml b/ansible/roles/switchover/default/main.yml new file mode 100644 index 00000000..696c0315 --- /dev/null +++ b/ansible/roles/switchover/default/main.yml @@ -0,0 +1,2 @@ +--- +switchover_primary: diff --git a/ansible/roles/switchover/tasks/main.yml b/ansible/roles/switchover/tasks/main.yml index 8c68fd75..93918a58 100644 --- a/ansible/roles/switchover/tasks/main.yml +++ b/ansible/roles/switchover/tasks/main.yml @@ -1,4 +1,59 @@ --- -# roles/switchover/tasks/main.yml -- name: Switchover - ansible.builtin.import_tasks: switchover.yml +# TODO: maybe restart in separate task? So we only have standby code here? + +# assume the db is already there and synched, +# so now want to turn into a standby / replica +# and want to turn the current standby into the primary +- name: Restart service postgres (primary) + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + delegate_to: "{{ switchover_primary }}" + +# try to avoid issue: https://github.com/EnterpriseDB/repmgr/issues/703 +- name: Restart service postgres (standby) + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +# can now just do this with repmgr +# https://www.repmgr.org/docs/current/preparing-for-switchover.html +# need the two instances to be able to ssh to each other! +# siblings-follow only really needed if have multiple standbys... +- name: Dry run of switching the standby and primary + ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow --dry-run + become: true + become_user: postgres + register: switchover_dry_run_results + +- name: Print out the switchover_dry_run_results + ansible.builtin.debug: + msg: "repmgr {{ switchover_dry_run_results }}" + +## see preparing for switchover if things go wrong despite dry run, there is +# mention of --force-rewind # which would use pg_rewind to try to fix +# divergent timelines... + +## https://www.repmgr.org/docs/current/switchover-execution.html +## https://www.repmgr.org/docs/current/switchover-troubleshooting.html +- name: Switch the standby and primary # noqa no-changed-when + ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow + become: true + become_user: postgres + register: switchover_results + +- name: Print out the switchover_results + ansible.builtin.debug: + msg: "repmgr {{ switchover_results }}" + +- name: Check cluster + ansible.builtin.command: repmgr -f /etc/repmgr.conf cluster show + become: true + become_user: postgres + register: status_results + +- name: Print out the status_results + ansible.builtin.debug: + msg: "repmgr {{ status_results }}" diff --git a/ansible/roles/switchover/tasks/switchover.yml b/ansible/roles/switchover/tasks/switchover.yml deleted file mode 100644 index 00eec259..00000000 --- a/ansible/roles/switchover/tasks/switchover.yml +++ /dev/null @@ -1,62 +0,0 @@ ---- -# assume the db is already there and synched, so now want to turn into a standby / replica -# and want to turn the current standby into the primary -- name: Restart service postgres (primary) - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - delegate_to: "{{ primary_ip }}" - remote_user: ubuntu - -# try to avoid issue: https://github.com/EnterpriseDB/repmgr/issues/703 -- name: Restart service postgres (standby) - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - delegate_to: "{{ standby_ip }}" - remote_user: ubuntu - -# can now just do this with repmgr -# https://www.repmgr.org/docs/current/preparing-for-switchover.html -# need the two instances to be able to ssh to each other! -# siblings-follow only really needed if have multiple standbys... -- name: Dry run of switching the standby and primary - ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow --dry-run - become: true - become_user: postgres - delegate_to: "{{ standby_ip }}" - remote_user: ubuntu - register: switchover_dry_run_results - -- name: Print out the switchover_dry_run_results - ansible.builtin.debug: - msg: "repmgr {{ switchover_dry_run_results }}" -## see preparing for switchover if things go wrong despite dry run, there is mention of --force-rewind -## which would use pg_rewind to try to fix divergent timelines... - -## https://www.repmgr.org/docs/current/switchover-execution.html -## https://www.repmgr.org/docs/current/switchover-troubleshooting.html -- name: Switch the standby and primary - ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow - become: true - become_user: postgres - delegate_to: "{{ standby_ip }}" - remote_user: ubuntu - register: switchover_results -- name: Print out the switchover_results - ansible.builtin.debug: - msg: "repmgr {{ switchover_results }}" - -- name: Check cluster - ansible.builtin.command: repmgr -f /etc/repmgr.conf cluster show - become: true - become_user: postgres - delegate_to: "{{ standby_ip }}" - remote_user: ubuntu - register: status_results - -- name: Print out the status_results - ansible.builtin.debug: - msg: "repmgr {{ status_results }}" diff --git a/ansible/switchover.yml b/ansible/switchover.yml index e7cd16c4..44f6ff6e 100644 --- a/ansible/switchover.yml +++ b/ansible/switchover.yml @@ -1,22 +1,28 @@ --- - name: Switch the primary and standby / replica - hosts: localhost - vars: - ostack_cloud: lard - ostack_region: Ostack2-EXT + hosts: "{{ standby }}" gather_facts: false - pre_tasks: - - name: Find primary ip from inventory - ansible.builtin.set_fact: - primary_ip: "{{ item }}" - with_inventory_hostnames: "{{ name_primary }}" - - - name: Find standby ip from inventory - ansible.builtin.set_fact: - standby_ip: "{{ item }}" - with_inventory_hostnames: "{{ name_standby }}" + remote_user: ubuntu + vars: + primary: lard-a + standby: lard-b roles: - # ensure the names are passed in the right way around for the current state! - role: switchover - - role: movefloatingip + switchover_primary: "{{ primary }}" + +- name: Move floating IP to new primary + hosts: localhost + gather_facts: false + vars: + primary: lard-a + standby: lard-b + + tasks: + - name: Perform IP switchover + ansible.builtin.include_role: + name: ostack + tasks_from: move_floating_ip.yml + vars: + ostack_primary: "{{ primary }}" + ostack_standby: "{{ standby }}"