diff --git a/ansible/.gitignore b/ansible/.gitignore index 99ed0d43..229318aa 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -1 +1,3 @@ notes.txt +ansible.cfg +.run.sh diff --git a/ansible/.yamlfmt b/ansible/.yamlfmt new file mode 100644 index 00000000..1e6ef5d7 --- /dev/null +++ b/ansible/.yamlfmt @@ -0,0 +1,6 @@ +formatter: + type: basic + include_document_start: true + retain_line_breaks_single: true + scan_folded_as_literal: true + trim_trailing_whitespace: true diff --git a/ansible/README.md b/ansible/README.md new file mode 100644 index 00000000..5b1257a7 --- /dev/null +++ b/ansible/README.md @@ -0,0 +1,289 @@ +# LARD on OpenStack 2 + +## Get access to OpenStack + +You need to create application credentials in the project you are going to +create the instances in, so that the ansible scripts can connect to the right +`ostack_cloud` (in our case it's `lard`). + +The file should exist in `~/.config/openstack/clouds.yml`. +If you have MET access see what is written at the start of the readme [here](https://gitlab.met.no/it/infra/ostack-ansible21x-examples) +or in the authentication section [here](https://gitlab.met.no/it/infra/ostack-doc/-/blob/master/ansible-os.md?ref_type=heads). + +## Dependencies + +- Python 3.10+ + +- On your terminal run the following: + + ```terminal + python3 -m venv ~/.venv/lard + source ~/.venv/lard/bin/activate + + pip install -r requirements.txt + ansible-galaxy collection install -fr requirements.yml + ``` + +- [yamlfmt](https://github.com/google/yamlfmt) for formatting + +## Setup + +> [!IMPORTANT] +> Add your public key to the Ostack GUI. +> Go to "Compute" then "Key Pairs" and import your public key for later use during this step. + +The IPs associated to the hosts in `inventory.yml` should correspond to +floating IPs you have requested in the network section of the OpenStack GUI. +These IPs are stored in the `ansible_host` variables inside each `host_vars\.yml`. + +Private variables are encrypted with `ansible-vault` and stored inside different files in `group_vars/servers/vault`. +You can either decrypt them beforehand, or pass the `-J` flag to ansible when running the playbooks. +Passwords can be found in [CICD variables](https://gitlab.met.no/met/obsklim/bakkeobservasjoner/lagring-og-distribusjon/db-products/poda/-/settings/ci_cd). + +### 1. Provision! + +Here we create the network and the VMs. + +```terminal +ansible-playbook -i inventory.yml -e key_name=... provision.yml +``` + +> [!NOTE] +> If the network has already been setup and you only need to rebuild the VMs, you can do so with +> +> ```terminal +> ansible-playbook -i inventory.yml -e key_name=... provision.yml --skip-tags network +> ``` + +### 2. Configure! + +In this step we format the VMs, exchange their SSH keys, setup the postgres +replication, and associate a floating IP to the primary host, which will be moved +to one of the standbys when doing a switchover. + +> [!NOTE] +> The floating IP association times out, but this is ignored as it is a known bug. + +```term +ansible-playbook -i inventory.yml -e db_password=... -e repmgr_password=... configure.yml +``` + +The parts to do with the floating IP that belongs to the primary (ipalias) are based on this [repo](https://gitlab.met.no/ansible-roles/ipalias/-/tree/master?ref_type=heads). + +### 3. SSH into the VMs and connect to postgres + +It might be helpful to create host aliases and add them to your `~/.ssh/config` file, +so you don't have to remember the IPs by heart. An example host alias looks like the following: + +```ssh +Host lard-a + HostName + User ubuntu +``` + +Then you can simply run: + +```terminal +ssh lard-a +``` + +To connect to postgres you can define a [service](https://www.postgresql.org/docs/current/libpq-pgservice.html) in +`~/.pg_service.conf`, like so: + +```ini +[lard-a] +host= +port=5432 +user=lard_user +dbname=lard +password=... +``` + +And then + +```terminal +psql service=lard-a +``` + +### 4. Checking the status of the cluster + +After `ssh`ing on the server and becoming postgres user (`sudo su postgres`), you can check the repmgr status with: + +```terminal +postgres@lard-a:/home/ubuntu$ repmgr -f /etc/repmgr.conf node check +Node "lard-a": + Server role: OK (node is primary) + Replication lag: OK (N/A - node is primary) + WAL archiving: OK (0 pending archive ready files) + Upstream connection: OK (N/A - node is primary) + Downstream servers: OK (1 of 1 downstream nodes attached) + Replication slots: OK (node has no physical replication slots) + Missing physical replication slots: OK (node has no missing physical replication slots) + Configured data directory: OK (configured "data_directory" is "/mnt/ssd-data/16/main") +``` + +```terminal +postgres@lard-b:/home/ubuntu$ repmgr -f /etc/repmgr.conf node check +Node "lard-b": + Server role: OK (node is standby) + Replication lag: OK (0 seconds) + WAL archiving: OK (0 pending archive ready files) + Upstream connection: OK (node "lard-b" (ID: 2) is attached to expected upstream node "lard-a" (ID: 1)) + Downstream servers: OK (this node has no downstream nodes) + Replication slots: OK (node has no physical replication slots) + Missing physical replication slots: OK (node has no missing physical replication slots) + Configured data directory: OK (configured "data_directory" is "/mnt/ssd-data/16/main") +``` + +While a few of the configurations are found in `/etc/postgresql/16/main/postgresql.conf`, many of them +can only be seen in `/mnt/ssd-data/16/main/postgresql.auto.conf` (need `sudo` to see contents). + +### 5. Deploy LARD ingestor + +This is as simple as running + +```terminal +ansible-playbook -i inventory.yml deploy.yml +``` + +### 6. Teardown + +If you need to delete the old VMs (`Compute → Instances`) and volumes (`Volumes → Volumes`) you can do so in the OpenStack GUI. + +> [!CAUTION] +> When deleting things to build up again, if for some reason one of the IPs +> does not get disassociated properly, you have to do it manually from the GUI (`Network → Floating IPs`). + +## Switchover + +> [!NOTE] +> In the following we assume the primary is `lard-a` and the standby is `lard-b` + +### 1. Planned downtime + +This should only be used when both VMs are up and running, like in the case of planned maintenance on one data room. +You can use this script to switch the primary to the data room that will stay available ahead of time. + +**Make sure you are aware which one is the primary, and put the names the right way around in this call.** + +``` +ansible-playbook -i inventory.yml -e primary=lard-a -e standby=lard-b switchover.yml +``` + +This should also be possible to do manually, you might need to follow what is done in the ansible script (aka restarting postgres on both VMs), +then performing the switchover (as the `postgres` user): + +```terminal +repmgr standby switchover -f /etc/repmgr.conf --siblings-follow +``` + +### 2. Unplanned downtime + +This is used in the case where the primary has gone down (e.g. unplanned downtime of a data room). +Make sure you know which one you want to promote! + +```terminal +ansible-playbook -i inventory.yml -e primary=lard-a -e standby=lard-b rejoin.yml +``` + +This can also be done manually following the following steps: + +#### A. Promote standby node to primary + +1. `ssh` into the standby and become `postgres` user. + +1. Check the status: + + ```terminal + postgres@lard-b:~$ repmgr -f /etc/repmgr.conf cluster show + ``` + + The primary should say it's **unreachable**. + +1. Then promote the standby to primary: + + ```terminal + postgres@lard-b:~$ repmgr -f /etc/repmgr.conf standby promote + ``` + +1. You can then check the status again (and now the old primary will say **failed**). + +1. Then move the IP in the OpenStack GUI (`Network → Floating IPs`, dissasociate + it then associated it with the ipalias port on the other VM). + +1. Restart LARD ingestion service in the new primary + + ```terminal + ubuntu@lard-b:~$ sudo systemctl start lard_ingestion.service + ``` + +#### B. Rejoin old primary + +The cluster will be in a slightly confused state, because this VM still thinks +its a primary (although repmgr tells it the other one is running as a primary +as well). If the setup is running as asynchronous we could lose data that +wasn't copied over before the crash, if running synchronously then there should +be no data loss. + +1. `ssh` into the new primary + + ```terminal + postgres@lard-b:~$ repmgr -f /etc/repmgr.conf cluster show + ... + node "lard-b" (ID: 2) is running but the repmgr node record is inactive + ``` + +1. `ssh` into the old primary + + ```terminal + postgres@lard-a:~$ repmgr -f /etc/repmgr.conf cluster show + ... + node "lard-a" (ID: 1) is registered as standby but running as primary + ``` + +1. With a **playbook**: + + ```terminal + ansible-playbook -i inventory.yml -e primary=lard-a -e standby=lard-b rejoin.yml --skip-tags promote + ``` + + where `primary` is the host name of the primary node that has been down and should now be a standby. + +#### Testing + +1. Take out one of the replicas (or can shut off instance in the openstack GUI): + + ```terminal + sudo pg_ctlcluster 16 main -m fast stop + ``` + +1. To bring it back up (or turn it back on): + + ```terminal + sudo pg_ctlcluster 16 main start + ``` + +### Load balancing + +The `bigip` role creates a user and basic database for the load balancer to test the health +of the lard database. +The database is created only on the primary node and replicated over to the standby. +The hba conf change needs to be run on both. + +To run the bigip role on the VMs use: + +```terminal +ansible-playbook -i inventory.yml -e bigip_password=... bigip.yml +``` + +### Links: + +https://www.enterprisedb.com/postgres-tutorials/postgresql-replication-and-automatic-failover-tutorial#replication + +### Useful ansible commands: + +```terminal +ansible-inventory -i inventory.yml --graph + +ansible servers -m ping -u ubuntu -i inventory.yml +``` diff --git a/ansible/bigip.yml b/ansible/bigip.yml index 5fc6dcf8..e36c8c7c 100644 --- a/ansible/bigip.yml +++ b/ansible/bigip.yml @@ -1,52 +1,7 @@ -- name: Copy schema for bigip - vars: - ostack_cloud: lard - ostack_region: Ostack2-EXT - hosts: localhost # need to seperate this since done from localhost - gather_facts: false - pre_tasks: - # copy file, so we have an .sql file to apply locally - - name: Create a directory if it does not exist - ansible.builtin.file: - path: /etc/postgresql/16/db/bigip - state: directory - mode: '0755' - become: true - delegate_to: '{{ hostvars[groups["servers"][0]].ansible_host }}' - remote_user: ubuntu - - name: Copy the schema to the remote 1 - ansible.builtin.copy: - src: ./roles/bigip/vars/bigip.sql - dest: /etc/postgresql/16/db/bigip/bigip.sql - mode: '0755' - become: true - delegate_to: '{{ hostvars[groups["servers"][0]].ansible_host }}' - remote_user: ubuntu - - name: Create a directory if it does not exist - ansible.builtin.file: - path: /etc/postgresql/16/db/bigip - state: directory - mode: '0755' - become: true - delegate_to: '{{ hostvars[groups["servers"][1]].ansible_host }}' - remote_user: ubuntu - - name: Copy the schema to the remote 2 - ansible.builtin.copy: - src: ./roles/bigip/vars/bigip.sql - dest: /etc/postgresql/16/db/bigip/bigip.sql - mode: '0755' - become: true - delegate_to: '{{ hostvars[groups["servers"][1]].ansible_host }}' - remote_user: ubuntu - +--- - name: Create what is needed for the bigip load balancers hosts: servers remote_user: ubuntu - vars: - ostack_cloud: lard - ostack_region: Ostack2-EXT gather_facts: false - # loops over both servers roles: - - role: bigip - # will fail to create table in the standby (since read only) \ No newline at end of file + - role: bigip diff --git a/ansible/configure.yml b/ansible/configure.yml index ff586fd0..b3f38259 100644 --- a/ansible/configure.yml +++ b/ansible/configure.yml @@ -1,35 +1,48 @@ -- name: Mount disks and install stuff on the VMs +--- +- name: Configure VMs hosts: servers remote_user: ubuntu + gather_facts: true vars: - ostack_cloud: lard - ipalias_network_name: ipalias - ostack_region: Ostack2-EXT - # loops over both servers - pre_tasks: - - name: List ansible_hosts_all difference from ansible_host (aka the vm not currently being iterated on) - ansible.builtin.debug: - msg: "{{ (ansible_play_hosts_all | difference([inventory_hostname])) | first }}" - roles: - - role: addsshkeys - - role: vm_format - vars: - name_stuff: '{{ inventory_hostname }}' # name of current vm for finding ipalias port - - role: ssh + primary: lard-a # or provide via cmd + db_password: # provide via cmd + repmgr_password: # provide via cmd + + tasks: + - name: Add keys to local known_hosts + ansible.builtin.include_role: + name: ssh + tasks_from: localhost.yml + + - name: Add user SSH keys + ansible.builtin.include_role: + name: ssh + tasks_from: users.yml + + - name: Format VM + ansible.builtin.include_role: + name: ostack + tasks_from: vm_format.yml vars: - vm_ip: '{{ ansible_host }}' # the current vm's ip + ostack_repmgr_password: "{{ repmgr_password }}" -- name: Setup primary and standby - vars: - ostack_cloud: lard - ostack_region: Ostack2-EXT - hosts: localhost - gather_facts: false + - name: Share postgres SSH key between hosts + ansible.builtin.include_role: + name: ssh + tasks_from: postgres.yml + + - name: Setup primary node + ansible.builtin.include_role: + name: ostack + tasks_from: create_primary.yml + when: inventory_hostname == primary + vars: + ostack_db_password: "{{ db_password }}" - roles: - - role: primarystandbysetup + - name: Setup standby node(s) + ansible.builtin.include_role: + name: ostack + tasks_from: create_standby.yml + when: inventory_hostname != primary vars: - primary_name: lard-a - primary_ip: '{{ hostvars[groups["servers"][0]].ansible_host }}' # the first one is a - standby_name: lard-b - standby_ip: '{{ hostvars[groups["servers"][1]].ansible_host }}' # the second one is b \ No newline at end of file + ostack_primary_ip: "{{ hostvars[primary].ansible_host }}" diff --git a/ansible/deploy.yml b/ansible/deploy.yml index e17750af..9cba0fff 100644 --- a/ansible/deploy.yml +++ b/ansible/deploy.yml @@ -1,7 +1,19 @@ --- -- name: Deploy binaries - # Deploy on both VMs, only the primary is "active" +- name: Build LARD binary + hosts: localhost + gather_facts: false + tasks: + - name: Build + ansible.builtin.command: + chdir: .. + cmd: cargo build --release --workspace --exclude lard_tests + register: output + changed_when: output.stderr_lines | length > 1 + +- name: Deploy LARD binary hosts: servers remote_user: ubuntu + gather_facts: false + roles: - role: deploy diff --git a/ansible/group_vars/servers.yaml b/ansible/group_vars/servers.yaml deleted file mode 100644 index abfaab5d..00000000 --- a/ansible/group_vars/servers.yaml +++ /dev/null @@ -1,15 +0,0 @@ -$ANSIBLE_VAULT;1.1;AES256 -39323433373866646663333962396637323937663436333763373663306264363337383838313531 -3237623337373630343264663232346366316332326564330a343062633165363564616663373366 -38303633346231626433393232313332373933626432613534646538326638623339323830613465 -3135643661323930370a656136326637373933353665316462373938396338383831353039323731 -61363032653830613438313564303432613531636436306662666336383838616132666234616538 -34313861306432626237383464623134386565643831396537343232646237323764346633373461 -30333265653634313436323735633733623032333039303461633931383337333664333636386532 -66383830323265303334323163313862393466383362646634653764373230613464393332363336 -63346438306666633835316333323464623261643861646636316637346363626162303662303039 -38616335356663343762356465653635623330383532656464633537333933613632336433653838 -36633130356262383464653935653864323233346162656639303861643533643563376464633530 -62343336663961363566383438393866353336366438343365363663623162313838396666343539 -39363766316532626463363533653561333933663130353632353934393534333965346637626636 -3138623135623031386437353434643736323166623661666263 diff --git a/ansible/group_vars/servers/main.yml b/ansible/group_vars/servers/main.yml new file mode 100644 index 00000000..673f564e --- /dev/null +++ b/ansible/group_vars/servers/main.yml @@ -0,0 +1,33 @@ +--- +ostack_cloud: lard +ostack_state: present +ostack_region: Ostack2-EXT +ostack2: true + +# IP that is always associated with the primary node +ostack_floating_ip: "{{ vault_ostack_floating_ip }}" + +# networks +ostack_network_name: "{{ vault_ostack_network_name }}" +ostack_network_cidr: "{{ vault_ostack_network_cidr }}" +ostack_network_dns: "{{ vault_ostack_network_dns }}" +ostack_network_security_groups: "{{ vault_ostack_network_security_groups }}" +ostack_ipalias_network_cidr: "{{ vault_ostack_ipalias_network_cidr }}" + +# vm_create +ostack_vm_flavor: "{{ vault_ostack_vm_flavor }}" +ostack_vm_image: "{{ vault_ostack_vm_image }}" +ostack_vm_security_groups: "{{ vault_ostack_vm_security_groups }}" +ostack_vm_volume_type: "{{ vault_ostack_vm_volume_type }}" +ostack_vm_volume_size: "{{ vault_ostack_vm_volume_size }}" + +# vm_format +ostack_mount_device: "{{ vault_ostack_mount_device }}" +ostack_mount_point: "/mnt/ssd-data" + +# ssh +ssh_user_key_list: "{{ vault_ssh_user_key_list }}" + +# bigip +bigip_password: "{{ vault_bigip_password }}" +bigip_load_balancer_ips: "{{ vault_bigip_load_balancer_ips }}" diff --git a/ansible/group_vars/servers/vault/bigip.yml b/ansible/group_vars/servers/vault/bigip.yml new file mode 100644 index 00000000..de91a7f1 --- /dev/null +++ b/ansible/group_vars/servers/vault/bigip.yml @@ -0,0 +1,11 @@ +$ANSIBLE_VAULT;1.1;AES256 +33316334666236623733366639316339643236633034333464333163313066363338303061383966 +6261393335643832643461303363386366666362333232390a356530346638656438336330366265 +33653361666233653632323937613730363563613261306139303131303962346632353163303934 +3532626333643866360a343539383861313963613937616162336632623733363034633236373235 +33356632303362316231333563636536663336326362336564313263303339333837333936383834 +37656339323963336534396530626533356536303565613431316265613661313465343866653561 +65626339313432346561353762336666383136363337353237633363313461653334623336376561 +35373234663334383164643862353432343665356362636633386532356433316537366133333630 +37393632363662613137353334643339646634616662333032613362336634333936303035366266 +3264366561346337346437353738663535313533623131323330 diff --git a/ansible/group_vars/servers/vault/networks.yml b/ansible/group_vars/servers/vault/networks.yml new file mode 100644 index 00000000..8eb73604 --- /dev/null +++ b/ansible/group_vars/servers/vault/networks.yml @@ -0,0 +1,102 @@ +$ANSIBLE_VAULT;1.1;AES256 +32366166643830646532373861376535653835613537373237313133383636333639613438643536 +6634313563366538363235666666646235633831353734620a343437623466663131646633646661 +38343432303865386138616330306234656437623966383533343930663437333732393736376635 +3164646665656333310a646231616666653034363432656438356135396339353266626666383838 +61333639386435306330323034336339623536326431373731366332643135346362663538353035 +36393264636232646431373966353131316439633562663439326264343631306561363564623432 +64323134383362616631353166653065363861626132313033323663333763393039336363386431 +66373162396133376335653963623234373764363038343165386362383963346137333430383936 +65383135303864666561396338393763653637393561653764343735626330373539323232323531 +37663162643838383836316161616532613062373437346338303431363963646332643632346265 +38393165643137343733303031333333336230656165646234306639663836326531316235666538 +61643066383666613434353966306337336335663830653836393539323563656438613935343035 +36623739306239343864363833356263336330303037393533643165633561323738326137366339 +39616330623738343430643561623032313038376639303065383032623739633662623232643931 +39373934623132663662633163646165666264376632346432353935666533323564323466633764 +31616535393630643138616433656139306463613866336231376630353135306431343764313364 +64346361633736393236393363366238306366666565653962393962633462613764333530356666 +34343831376437623431343865353666613337386532393937353933666234356162356461653564 +61396236373136303265633639633331313765616565653235656262363832336335653666346436 +63366638613334336235666663616437613537626436303735303139333534643632333438626565 +61326462326134376664663439333561396332333036346330363935333631353562373839336233 +30303438623732366338393636656330306135613363383236383635396137386664353733303165 +31643132313633613363313566633463626462336464663161643331653735353063633136663338 +64333936333062633965333731636336323961313263363439383034633036666239626363393466 +37313861653031393938636234636561626661323161633563633035323664336431313936643839 +37363233653731383163366138613938333537373936623138363463643136363735373965653966 +65626139376139383663396663626630363537613238316439633931343631393265656130376634 +38643262333039353633343463373465656433376561336263343862613630666464653037313266 +63383739653437376162383831666235353533316663633534373633346264356139633332356630 +62643631646563663136396636306432363162666630303965316432303263633361613030613863 +34343163623162663339346163363536333563613931663664373132393761346637333737326439 +62633566636332636639353137393132623265336638336537346564373837666339316563303836 +37656164343230323664323735633937303264343135303936376362633463396261313236303534 +66663331333762363133663662333637653238356537376434636333666636386164656536366563 +64636437616531316366303330623635663233346632313239363538363966333230353666393762 +30316262373837653331633034363863323364626632366335663663393838643037376333643039 +65386434323862653939373162646537653061313362353466306535656631336633313435633732 +30636137613261666633323631613933633834616464373731646566626233343266343264626163 +64393435306637653563646437666365626339636130373862666364336231633536363938623830 +36393538323565636533663537633761353464373730653936313164626234303637303362663165 +66363935333364646662373164396163633334363137356162376134363932363936333062326436 +38333766383035616439396231313334373437653530396639303936393439653736613736386133 +62623032636666316465303937616131313662363734303530316233333862393231616662353431 +39373137366439343230373930396261663162366630393063303265313965333536623333313433 +38303563336632333133393039333739656337653132656563623932383263636264623466646165 +61386537336237653539313938646335313139306239366636653534343561373363623061323530 +65323638396665626433663932623364316465383734343235666132303531346632323731376530 +37356663623564646661653030393763326162666338336161663066646233363637653437656437 +66313063356535323266613666616565636337663264663738626235663337376435316535343466 +63616538663832386365303266386537653731663262323834333338383938643834623065653934 +32623131393138633362623163643861336265396563613639653336613833386437393139636461 +38316236373733633336343561653335346164373834653436393337366139343437363439306363 +61373837376237666336363732646564643539666233663037343239636562333136646239356637 +64323135346162393930373635326335343336346161356361336365666666613234313535343562 +66653337306139613633376630373836363730313866373637633834613565366664343064386435 +38323262396638346337323932363537323364353134303436393734333631646630633238306532 +63626365343264323737316634353365393331643264393339653266366532393039663338353735 +30326431343964646135333033303661373037356463353832313263323533373762363635656361 +65303533616330663732643730323961623066316665323239316237663462333232396463386536 +32393262333333326338623934633261383237376462303665343363346437366564633563643861 +36313836386438333435613561616463363063393164383963646163343162313563393238623634 +37366336376135666232323735363934376637653939633938643838333366363962373434316135 +39666663353438303238356333646136346564663966623563663963306635343837356462366164 +62393739333836326265626138373238633032343938313232303732613063373762346136313533 +64373965306430323730346535383530663032386231653734633265653332373338306232346537 +31346531643335623037363730353437663035363636373831623663646664333963353130343164 +62663036393532613835666262373266376437623761336332373562393737306430383561383937 +30353936363830323231336235393264313434396163653631383434653763626162666238643635 +30613865346336386136313461366230666332383630646466333130616437356461363862643463 +65613866663932396432653863366462373032616461353966623763616566643732396137393137 +32333461656665303538643737613231626532393036363932333764653234666633363364663233 +36653935626366346236623561613233343464613138643266326536626561323261626330326536 +33623835656266343236623838343331643635633962323762633761333038313861323133323563 +62343732653265383262323832393037316365316234653064613862393161633238336637326563 +31373930323836666166306532663265616637326137333661333436663961633338313336393631 +61376539333839663538623631333962373264386338393230356638323363313131316530663937 +34346239356438393936303334366232313663633630656130633333313032346134333838323937 +38396634336532616639396465353635343836323863373561366138376131326630623562623631 +31616463633562373464626535626237633366626137663539383939633261313435623666643036 +37633339383933626361633932343234643165326363646566636338393635366439353465313462 +30346530653836363536336339313234303836393962316535633832623763306362366633363435 +63306230373234353432336163386534383033316331653962643438346165663033383565633030 +66313466393830616264643633346130633161356635656466373734393537323063353931386132 +61336333616137373539366261323162303766346534643332356163303861613633646535353534 +38383061633066656536613735346631626439353336316333653931303663626538383564313062 +38373263623064666161303365343962633734643965346336313363623761663362363861373662 +65376161613763663337343437353936343135353439396164353530336265326530383232346634 +65653235333934396662333962393165393936626363623136373363343430383561346633653437 +31366538656465353261373931306664316361646334393833356465313031663334363465363665 +33636563316365636437396137326262363263386162346161316135353332346430303165383066 +38343033396136663733653339643334316536356635306434343261383734633535316431326261 +38626238613331333138383739376465303738626430326635633431333539333636646535643739 +37363037623335323862303462313636396536393934376662396634636331663566336262326666 +36633137386138336532626633346535396130326236653565653939353064636566306139313432 +37363461616562363039653234653762643532666138376366373066363164633064643936336435 +36363032346332386463653162656362623863306561393263303663313164626363393337623731 +33653836393964396132373433366666373531383132396464353762353061666532353865663038 +34393362653838613738313163616561326235386331373335346230303935343661373438666162 +66653133643961303361366266666663373366396364623234383730303933636635623034313266 +39373566336364306337653930303938623938396334643239646666306630373463626661313736 +63323634626565333062373539396162343637363734663335396534363766303061 diff --git a/ansible/group_vars/servers/vault/ssh.yml b/ansible/group_vars/servers/vault/ssh.yml new file mode 100644 index 00000000..6c592ef2 --- /dev/null +++ b/ansible/group_vars/servers/vault/ssh.yml @@ -0,0 +1,41 @@ +$ANSIBLE_VAULT;1.1;AES256 +37386239323433303261343038393562393066333364643163393461616638303135626536336362 +3832663063656635386635666638343736663538343730320a633337363631623938306163363964 +30386334393866636439313431653832623537623339666135333738613531313639393339346436 +3839333432316434640a666235636237613139646135306262383435343765333239336333383663 +65306632353964393866316636343262356636653538303832386236643735333566373237336362 +63326563623839613939646335616666626134303865356330343731643533363332616235343166 +61623262333062333661316563323066363738346233376361656237396338623962396331613836 +39336230353335303862373730343531336331623634363436613063316230663839366461393666 +64373238353861356332363335353336373132316633363139636235343134643330623465636466 +39366531393734653538343335666430613337343436656133633733343931303633376661383537 +31333865323533343763623133306265343061646638616435636364623533623666373437383136 +66613232373930366562383464653532383864383063313837623164353233323534613934383735 +33386361663362363632386566393832393266616337613639323132613430336235303639386565 +32333031643464646231393237643335356136336633313866613661366238363037613633343238 +39396532306563353435363239623834353536656265653661393737386464383234356363363634 +38636164613366366266346431363536656133346461356134386363343365363366353761343866 +34383536656337376338623562376131656131326338623365633939343331366331313264386233 +32343732316233326263393733666539346439323830336361323365343938326331633931313964 +30666533373538643961656335336365363231326530306335366338306436393234383830646534 +32383063623835386534613630393638666538393263383039303362343538613866313232303061 +63623930356632313466386535616565643735333132623264383335663163366338663232383963 +31333633613635656266366566326534316563613164626131336237316231663130613738313261 +61303535306438383862303830303339616636343839626166363130643864333435646262323931 +32613132646266313566646666356531373937666639346566656438313438316236363162633633 +63366538306639363435646539346131396233643663373962626137346430636639616238326661 +31376338356631626433306130643232353561623664656365353736613535613765366162396532 +34613362366636383238636665353932303639306339393066613036323537643362396139626538 +37343963306364376437616535323738643662326535316565333565653462353062666562306162 +64346134393332663431316362623931396238393631353137316462386266656236666231386230 +39373161366637353832343534646434363366356335343730663136633666363334633736386133 +65393638663566356232613164633238353030613331363838336130396530356263646238643734 +62383939633833343333306132346133303936643766613330623437326662346664386135356564 +33383561383764363764353935346662303465626664363533343532393864613366653534313335 +38386437363766386663613661396438386439353939386562346336643233643937663033623964 +63363333316131616465313736613162333831663436333661343461313936306565336661636666 +61383031316633343966333536316365306439643631336162633661626238626339356462316534 +35316535663431626430616539346563336134616530313961643339333965663332363063383837 +63393538623962616164383638346165373631346330653239653533656365346133323263623735 +65313932643737376666633538346233393535333236653861626238316462383533613338636564 +65373137383838663439 diff --git a/ansible/group_vars/servers/vault/vm_create.yml b/ansible/group_vars/servers/vault/vm_create.yml new file mode 100644 index 00000000..14ecd9be --- /dev/null +++ b/ansible/group_vars/servers/vault/vm_create.yml @@ -0,0 +1,18 @@ +$ANSIBLE_VAULT;1.1;AES256 +33623937623330646233623838396161343166333464333339663033323236333234336666373832 +3737643036643363366436616566333234613639383264340a336638613864353531613861326231 +31323031643739343731346438666135326166303366303637313933353731653234323736613434 +6431383434663964380a373364353335643137663365633930646165336634303434356538616466 +36643931313433386266396436633833356561653237363833383564376136623132393365316434 +35336338346138313436326365633430373338356163666165356231663239326564396463373864 +62336235653838393537663635373963636234323333323532633963373061343361353633633632 +34306331373362366462346561333335326532306462306562616664366533366666636261663338 +61343733613564343963386239613162653163383734656539616162316663376332646663303463 +63616631353734373932353064383537323432326265316363636433666331653736303434353735 +61386530383866343237616461636665643361626331636433386361306337326438323032306663 +66383862343261646666386163666265643566643134333362643733306431303336316332643731 +38373961363865373939383662343738626264376334613164396135633632613430313639303333 +63613564646431303930643364383432343835396364366563396136313761633561633231333662 +36363464663637353036636266616331383163333833316334636463363963383762656365643534 +63383130323132323931316538346462626465323235343962346164643862656566613734346662 +3336 diff --git a/ansible/group_vars/servers/vault/vm_format.yml b/ansible/group_vars/servers/vault/vm_format.yml new file mode 100644 index 00000000..fa50a5ce --- /dev/null +++ b/ansible/group_vars/servers/vault/vm_format.yml @@ -0,0 +1,9 @@ +$ANSIBLE_VAULT;1.1;AES256 +65333266616266386464393963343862613461313434363935393835653932376564343366353837 +3230626238386131636166383661353035353733353261650a343239663664316262396532613930 +36396436316361373430343966316538316135663938643634366137363039656462653232643830 +3733376661623239320a373066326331373863363839376332376665643164323266363465663235 +62623630393862633463623836326335323562626137303136383266313132333664343166366365 +62643763303637663631663234643863653739363963653231653161636566303734303263393961 +62336464353661376264653565643337336532323630666365636239376534356661373238623164 +66343430303065613034 diff --git a/ansible/host_vars/lard-a.yml b/ansible/host_vars/lard-a.yml new file mode 100644 index 00000000..4c27d206 --- /dev/null +++ b/ansible/host_vars/lard-a.yml @@ -0,0 +1,9 @@ +--- +ansible_host: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 31643933383935663537663361333834376232386566326531313161336238633262386432663939 + 6231613431336130396635393563373666323735643932360a653261373935373365333433313938 + 38663032303035646139306532666131383132333230633464643632333561323565353538323530 + 6563633966623536620a636634316463393763326237656365623636633264663765623439613430 + 3865 +ostack_availability_zone: ext-a diff --git a/ansible/host_vars/lard-b.yaml b/ansible/host_vars/lard-b.yaml new file mode 100644 index 00000000..d550fca3 --- /dev/null +++ b/ansible/host_vars/lard-b.yaml @@ -0,0 +1,9 @@ +--- +ansible_host: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 64376433646538343437656539363938333262306538323264623139393263666239376438323830 + 6162646538323466656135393235613865366530653465340a343434346233303030376463616561 + 36323436393364363364333066316463363937343838636134346134656636393230356639613462 + 3535383662326334630a316630366265313038313563323664376464333566303531343239636431 + 6536 +ostack_availability_zone: ext-b diff --git a/ansible/inventory.yml b/ansible/inventory.yml index a0a62a07..a57d71e6 100644 --- a/ansible/inventory.yml +++ b/ansible/inventory.yml @@ -1,6 +1,5 @@ +--- servers: hosts: lard-a: - ansible_host: 157.249.*.* lard-b: - ansible_host: 157.249.*.* \ No newline at end of file diff --git a/ansible/migrate.yml b/ansible/migrate.yml index cf6019ca..659a8002 100644 --- a/ansible/migrate.yml +++ b/ansible/migrate.yml @@ -1,10 +1,12 @@ --- +# TODO: update this when we change file names in db directory - name: Perform DB migration hosts: servers remote_user: ubuntu gather_facts: false vars: - primary: lard-a + # TODO: is there a better way to get this fact automatically? + primary: lard-a # or provide via cmd tasks: - name: Copy the db folder to the remote @@ -14,17 +16,13 @@ mode: "0755" become: true - - name: Migrate primary - when: inventory_hostname == primary + - name: Update schema in lard + community.postgresql.postgresql_script: + db: lard + path: "/etc/postgresql/16/db/{{ item }}" + loop: + - public.sql + - partitions_generated.sql become: true become_user: postgres - block: - - name: Update public schema in lard - community.postgresql.postgresql_script: - db: lard - path: /etc/postgresql/16/db/public.sql - - - name: Create partition tables in lard - community.postgresql.postgresql_script: - db: lard - path: /etc/postgresql/16/db/partitions_generated.sql + when: inventory_hostname == primary diff --git a/ansible/openstack.yaml b/ansible/openstack.yaml new file mode 100644 index 00000000..65064ab8 --- /dev/null +++ b/ansible/openstack.yaml @@ -0,0 +1,13 @@ +--- +# TODO: this file can be used instead of inventory.yml +# It queries all the instances, key pairs, and networks info directly from openstack +plugin: openstack.cloud.openstack + +all_projects: false +expand_hostvars: true +fail_on_errors: true +groups: + servers: inventory_hostname.startswith('lard') +only_clouds: + - "lard" +strict: true diff --git a/ansible/provision.yml b/ansible/provision.yml index 11bd242b..62625932 100644 --- a/ansible/provision.yml +++ b/ansible/provision.yml @@ -1,20 +1,26 @@ -- name: setup networks and 2 vms - vars: - ostack_cloud: lard - ipalias_network_name: ipalias - ostack_region: Ostack2-EXT +--- +- name: Provision 1 hosts: localhost gather_facts: false + tags: + - network + tasks: + - name: Setup networks + ansible.builtin.include_role: + name: ostack + tasks_from: networks.yml - roles: - - role: networks - - role: vm # in A - vars: - name_stuff: lard-a - availability_zone: ext-a - vm_ip: '{{ hostvars[groups["servers"][0]].ansible_host }}' - - role: vm # in B +- name: Provision 2 + hosts: servers + gather_facts: false + vars: + key_name: # provide via cmd + tasks: + - name: Create VMs + ansible.builtin.include_role: + name: ostack + tasks_from: vm_create.yml + apply: + delegate_to: localhost vars: - name_stuff: lard-b - availability_zone: ext-b - vm_ip: '{{ hostvars[groups["servers"][1]].ansible_host }}' + ostack_vm_key_name: "{{ key_name }}" diff --git a/ansible/readme.md b/ansible/readme.md deleted file mode 100644 index f2f0a5f1..00000000 --- a/ansible/readme.md +++ /dev/null @@ -1,182 +0,0 @@ -## README for LARD setup on openstack(2) - -#### Useful ansible commands: -``` -ansible-inventory -i inventory.yml --graph - -ansible servers -m ping -u ubuntu -i inventory.yml -``` - -#### Dependencies to install -``` -pip3 install wheel # so you can allow downloading of binary python packages - -pip install -r requirements.txt - -ansible-galaxy collection install openstack.cloud - -ansible-galaxy collection install community.postgresql - -ansible-galaxy collection install community.general - -ansible-galaxy collection install ansible.posix - -ansible-galaxy collection install ansible.utils - -``` - -### Get access to OpenStack -You need to create application credentials in the project you are going to create the instances in, so that the ansible scripts can connect to the right ostack_cloud which in our case needs to be called lard. - -The file should exist here: -~/.config/openstack/clouds.yml - -If have MET access see what is written at the start of the readme here: -https://gitlab.met.no/it/infra/ostack-ansible21x-examples - -Or in the authentication section here: -https://gitlab.met.no/it/infra/ostack-doc/-/blob/master/ansible-os.md?ref_type=heads - -### Add your public key to the Ostack GUI -Go to "Compute" then "Key Pairs" and import your public key for use in the provisioning step. - -### Provision! -The IPs in inventory.yml should correspond to floating ips you have requested in the network section of the open stack GUI. If you need to delete the old VMs (compute -> instances) and Volumes (volumes -> volumes) you can do so in the ostack GUI. *For some reason when deleting things to build up again one of the IPs did not get disassociated properly, and I had to do this manually (network -> floating IPs).* - -The vars for the network and addssh tasks are encrypted with ansible-vault (ansible-vault decrypt roles/networks/vars/main.yml, ansible-vault decrypt roles/addshhkeys/vars/main.yml, ansible-vault decrypt roles/vm_format/vars/main.yml). -But if this has been setup before in the ostack project, these have likely already been run and therefore already exits so you could comment out this role from provision.yml. -Passwords are in ci_cd variables https://gitlab.met.no/met/obsklim/bakkeobservasjoner/lagring-og-distribusjon/db-products/poda/-/settings/ci_cd - -``` -ansible-playbook -i inventory.yml -e ostack_key_name=xxx provision.yml -``` - -After provisioning the next steps may need to ssh into the hosts, and thus you need to add them to your known hosts. Ansible appears to be crap at this, so its best to do it before running the next step by going: -`ssh ubuntu@157.249.*.*` -For all the VMs. -If cleaning up from tearing down a previous set of VMs you may also need to remove them first: -`ssh-keygen -f "/home/louiseo/.ssh/known_hosts" -R "157.249.*.*"` - -### Configure! -The third IP being passed in here is the one that gets associated with the primary, and moved when doing a switchover. -*NOTE:* The floating IP association times out, but this is ignored as it is a known bug. - -``` -ansible-playbook -i inventory.yml -e primary_floating_ip='157.249.*.*' -e db_password=xxx -e repmgr_password=xxx configure.yml -``` - -The parts to do with the floating ip that belongs to the primary (ipalias) are based on: -https://gitlab.met.no/ansible-roles/ipalias/-/tree/master?ref_type=heads - -### Connect to database -``` -PGPASSWORD=xxx psql -h 157.249.*.* -p 5432 -U lard_user -d lard -``` - -### Checking the cluster - -Become postgres user: sudo su postgres -``` -postgres@lard-b:/home/ubuntu$ repmgr -f /etc/repmgr.conf node check -Node "lard-b": - Server role: OK (node is primary) - Replication lag: OK (N/A - node is primary) - WAL archiving: OK (0 pending archive ready files) - Upstream connection: OK (N/A - node is primary) - Downstream servers: OK (1 of 1 downstream nodes attached) - Replication slots: OK (node has no physical replication slots) - Missing physical replication slots: OK (node has no missing physical replication slots) - Configured data directory: OK (configured "data_directory" is "/mnt/ssd-b/16/main") -``` -``` -postgres@lard-a:/home/ubuntu$ repmgr -f /etc/repmgr.conf node check -Node "lard-a": - Server role: OK (node is standby) - Replication lag: OK (0 seconds) - WAL archiving: OK (0 pending archive ready files) - Upstream connection: OK (node "lard-a" (ID: 1) is attached to expected upstream node "lard-b" (ID: 2)) - Downstream servers: OK (this node has no downstream nodes) - Replication slots: OK (node has no physical replication slots) - Missing physical replication slots: OK (node has no missing physical replication slots) - Configured data directory: OK (configured "data_directory" is "/mnt/ssd-b/16/main") -``` - -While a few of the configurations are found in /etc/postgresql/16/main/postgresql.conf (particularly in the ansible block at the end), many of them -can only be seen in /mnt/ssd-b/16/main/postgresql.auto.conf (need sudo to see contents). - -### Perform switchover -This should only be used when both VMs are up and running, like in the case of planned maintenance on one datarom. -Then we would use this script to switch the primary to the datarom that will stay available ahead of time. - -*Make sure you are aware which one is the master, and put the names the right way around in this call.* - -``` -ansible-playbook -i inventory.yml -e name_primary=lard-a -e name_standby=lard-b -e primary_floating_ip='157.249.*.*' switchover.yml -``` - -This should also be possible to do manually, but might need to follow what is done in the ansible script (aka restarting postgres on both VMs), then performing the switchover: -`repmgr standby switchover -f /etc/repmgr.conf --siblings-follow` (need to be postgres user) - -### Promote standby (assuming the primary is down) -Make sure you are know which one you want to promote! -This is used in the case where the primary has gone down (e.g. unplanned downtime of a datarom). - -**Manually:** -SSH into the standby -`repmgr -f /etc/repmgr.conf cluster show` -Check the status (The primary should say its 'uncreachable') -`repmgr -f /etc/repmgr.conf standby promote` -Then promote the primary (while ssh-ed into that VM) -You can the check the status again (and now the old primary will say failed) - -Then move the ip in the ostack gui (see in network -> floating ips, dissasociate it then associated it with the ipalias port on the other VM) - -#### Later, when the old primary comes back up -The cluster will be in a slightly confused state, because this VM still thinks its a primary (although repmgr tells it the other one is running as a primary as well). If the setup is running as asynchronous we could lose data that wasn't copied over before the crash, if running synchronously then there should be no data loss. - -SSH into the new primary -`repmgr -f /etc/repmgr.conf cluster show` -says: -- node "lard-a" (ID: 1) is running but the repmgr node record is inactive - -SSH into the old primary -`repmgr -f /etc/repmgr.conf cluster show` -says: -- node "lard-b" (ID: 2) is registered as standby but running as primary - - -With a **playbook** (rejoin_ip is the ip of the node that has been down and should now be a standby not a primary): -``` -ansible-playbook -i inventory.yml -e rejoin_ip=157.249.*.* -e primary_ip=157.249.*.* rejoin.yml -``` - -Or **manually**: -Make sure the pg process is stopped (see fast stop command) if it isn't already - -Become postgres user: -`sudo su postgres` -Test the rejoin (host is the IP of the new / current primary, aka the other VM) -`repmgr node rejoin -f /etc/repmgr.conf -d 'host=157.249.*.* user=repmgr dbname=repmgr connect_timeout=2' --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose --dry-run` -Perform a rejoin -`repmgr node rejoin -f /etc/repmgr.conf -d 'host=157.249.*.* user=repmgr dbname=repmgr connect_timeout=2' --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose` - -### for testing: -Take out one of the replicas (or can shut off instance in the openstack GUI): -`sudo pg_ctlcluster 16 main -m fast stop` -For bringing it back up (or turn it back on): -`sudo pg_ctlcluster 16 main start` - -### for load balancing at MET -This role creates a user and basic db for the loadbalancer to test the health of the db. Part of the role is allowed to fail on the secondary ("cannot execute ___ in a read-only transaction"), as it should pass on the primary and be replicated over. The hba conf change needs to be run on both. - -The vars are encrypted, so run: ansible-vault decrypt roles/bigip/vars/main.yml - -Then run the bigip role on the VMs: - -``` -ansible-playbook -i inventory.yml -e bigip_password=xxx bigip.yml -``` - -### Links: - -https://www.enterprisedb.com/postgres-tutorials/postgresql-replication-and-automatic-failover-tutorial#replication \ No newline at end of file diff --git a/ansible/rejoin.yml b/ansible/rejoin.yml index 701d1cb9..0ead329d 100644 --- a/ansible/rejoin.yml +++ b/ansible/rejoin.yml @@ -1,9 +1,46 @@ +--- - name: Rejoin - hosts: servers + hosts: localhost remote_user: ubuntu - # loops over both servers - roles: - - role: rejoin + vars: + # Old primary host that went down + primary: # provide via cmd + # Old standby that will be promoted to primary + standby: # provide via cmd + + tasks: + - name: Promote standby + ansible.builtin.include_role: + name: rejoin + tasks_from: promote.yml + apply: + delegate_to: standby + tags: "promote" + + - name: Perform IP switchover + ansible.builtin.include_role: + name: ostack + tasks_from: move_floating_ip.yml vars: - vm_ip: '{{ ansible_host }}' # the current vm's ip - when: ansible_host == rejoin_ip # only run on the one that needs to be rejoined \ No newline at end of file + ostack_primary: "{{ primary }}" + ostack_standby: "{{ standby }}" + tags: "promote" + + # TODO: should this happen before or after rejoining the old primary + - name: Restart LARD ingestion service + ansible.builtin.systemd_service: + name: lard_ingestion + state: restarted + become: true + delegate_to: standby + tags: "promote" + + - name: Rejoin old primary + ansible.builtin.include_role: + name: rejoin + tasks_from: rejoin.yml + apply: + delegate_to: primary + vars: + # TODO: this should be done via DNS once we have those set up + rejoin_primary_ip: "{{ hostvars[standby].ansible_host }}" diff --git a/ansible/requirements.txt b/ansible/requirements.txt index 29772cb6..32a84a67 100644 --- a/ansible/requirements.txt +++ b/ansible/requirements.txt @@ -1,8 +1,9 @@ -ansible-core~=2.15.0 -ansible-lint~=6.17.0 +ansible-core~=2.17.4 +ansible-lint~=24.9.2 powerline-status powerline-gitstatus netaddr~=0.7.19 openstacksdk~=1.3.0 python-openstackclient~=6.2.0 -psycopg2-binary \ No newline at end of file +psycopg2-binary +wheel diff --git a/ansible/requirements.yml b/ansible/requirements.yml new file mode 100644 index 00000000..45323db2 --- /dev/null +++ b/ansible/requirements.yml @@ -0,0 +1,7 @@ +--- +collections: + - ansible.posix + - ansible.utils + - community.general + - community.postgresql + - openstack.cloud diff --git a/ansible/roles/addsshkeys/tasks/main.yml b/ansible/roles/addsshkeys/tasks/main.yml deleted file mode 100644 index 5881bf29..00000000 --- a/ansible/roles/addsshkeys/tasks/main.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -- name: Add users keys to authorized_keys - ansible.builtin.authorized_key: - user: ubuntu # this is the username on the remotehost whose authorized keys are being modified - state: present - key: "{{ item.key }}" - loop: '{{ authorized_keys_list }}' - loop_control: - label: "adding {{ item.name }} key to authorized_keys" \ No newline at end of file diff --git a/ansible/roles/addsshkeys/vars/main.yml b/ansible/roles/addsshkeys/vars/main.yml deleted file mode 100644 index 8ed8d280..00000000 --- a/ansible/roles/addsshkeys/vars/main.yml +++ /dev/null @@ -1,44 +0,0 @@ -$ANSIBLE_VAULT;1.1;AES256 -38656433653332383463656338306632666166636637363236383138306164393837633762363633 -3662633839613338366235343733633534613235323435380a346464663937373266636362376265 -36356165393035623866386563626635373964333661626166346462623434346362616562656639 -3238313131316232390a343562336665303364636136343138346134653362663164373234363732 -65666339306464346466366166666133613532386331643566343534663764376166663734356138 -32633361363765306233653639316336363361316362633430333539306565663033666261613836 -63646364383763613763323836363063393330653033333539326666613533623336666633323665 -35383464363762346136393362306136613164646234643861653965363139363234363362656163 -31333162336434356434653439343436343863316237323065333764303766663732333838646337 -66356432366161633431316334653532633666613431313738353930623764353333393838393230 -34396666643232653036626635613732633265353532646563623733303439633861373233623535 -37366531303131653234643338643232623034663763373764383563393732376138306130653862 -64306637333230333736633361356338383561633166623735376432336265353565636635646661 -37336161373236613839323335643163386162663837353236656539353635356231666265323064 -63646139383961396330653865353362643934643331646263313761353866336538353865323334 -39633438666564363331336162376664303036663638393339353637326132376466316639653238 -63313361333165633033373934336236613538336437343033383437626664316562303533663337 -35393565333336323537656436373339343330353536313262626535333434373939323561353061 -32363566393930353734376163633063386439376365613039383334616462316636313339646462 -65356266353537373534333464333631383264633636366234313337656130313761343539363361 -62313531316339353037386330363438346331323539616531326265396330623866393130306265 -31353137383931333737353664626639303033653137326264393762306532386466343239653561 -37396632633230383138666366353133613364666666646136363038323762623930363034376233 -38636365633333323861376666346534336564373039313166393062373939383438656234396135 -64313964383539666137366334303937366437363430393437333531326132326364396362613737 -32323634396263363330323535396662663834336337373563383235393132303262643961396236 -37613935613330356635653664343264396230336437663138643861373930313364306330376363 -31363137656364323638373761613735356637353731386663303766663830396664376230333532 -31356134353864643762303639343062636661313263353938393865306665313866663333636631 -64623132663632333534386466393933373764363534376465353435646438643665666561383135 -31636337376234323263363737633835343339313335343865353363653165636637343930343431 -35333938323335613162313730373065643762376632626664323535623064363439633161623335 -33313834396164313131346536646133623630393831623363306638626133633136356163653432 -38633239336264316539633566633935663532333236613664663361316232393961633464333936 -30623539633535613137363036396665663536646135323731356461663030616666303263366133 -33323065323138633032316434353333393764323435626633326538616432653330353961353730 -65343636656231616165336363653535653437623761623863343833643764383930353434386339 -32653130633839643939383337363031353464313466393539326437633962356138366534303834 -64643464353765663536626233333730613038383662643366303764363462646265346163333237 -34353533346138643435326130636464626465373466633562656237353965623530656335373665 -30323238623033313766386338313537396663623963633334386437616461623130326638363737 -37366637363233313635396433623932303363373032623664356164306232623036396437663336 -306530323762306438626534346563346135 diff --git a/ansible/roles/bigip/defaults/main.yml b/ansible/roles/bigip/defaults/main.yml new file mode 100644 index 00000000..660c97c9 --- /dev/null +++ b/ansible/roles/bigip/defaults/main.yml @@ -0,0 +1,4 @@ +--- +bigip_password: +bigip_load_balancer_ips: + - address: diff --git a/ansible/roles/bigip/vars/bigip.sql b/ansible/roles/bigip/files/bigip.sql similarity index 100% rename from ansible/roles/bigip/vars/bigip.sql rename to ansible/roles/bigip/files/bigip.sql diff --git a/ansible/roles/bigip/tasks/main.yml b/ansible/roles/bigip/tasks/main.yml index a0813d75..3477e064 100644 --- a/ansible/roles/bigip/tasks/main.yml +++ b/ansible/roles/bigip/tasks/main.yml @@ -1,41 +1,69 @@ ---- +--- +- name: Create bigip directory if it does not exist + ansible.builtin.file: + path: /etc/postgresql/16/db/bigip + state: directory + mode: '0755' + +- name: Copy the bigip schema to the remote + ansible.builtin.copy: + src: bigip.sql + dest: /etc/postgresql/16/db/bigip/bigip.sql + mode: '0755' + +- name: Run repmgr node check + ansible.builtin.command: repmgr -f /etc/repmgr.conf node check + become: true + become_user: postgres + register: node_check + changed_when: false + +# TODO: can do this automatically whenever we need to differentiate +# between `primary` and `standby` instead of passing them in manually +- name: Extract server role + ansible.builtin.set_fact: + server_role: "{{ node_check.stdout | regex_search('node is ([a-z]+)', '\\1') | first }}" + - name: Create bigip user and basic database - block: - # create user - - name: Create bigip user - community.postgresql.postgresql_user: - name: bigip - #db: bigip - password: '{{ bigip_password }}' - become: true - become_user: postgres - # create database - - name: Create a bigip database, with owner bigip - community.postgresql.postgresql_db: - name: bigip - owner: bigip - become: true - become_user: postgres - # create the schema - - name: Create the schema in bigip - community.postgresql.postgresql_script: - db: bigip - path: /etc/postgresql/16/db/bigip/bigip.sql - become: true - become_user: postgres - - name: Grant bigip priveleges on bigip database for table test - community.postgresql.postgresql_privs: - database: bigip - objs: test # only have rights on table test - privs: SELECT - role: bigip - grant_option: true - become: true - become_user: postgres - # this is allowed to fail on the secondary, should work on the primary and be replicated over - ignore_errors: true - -# loop over the two ips of the load balancers, to add to hba conf + # It would fail on the standby + when: server_role == "primary" + block: + # TODO: maybe user needs to be created on both, maybe not + - name: Create bigip user + community.postgresql.postgresql_user: + name: bigip + # db: bigip + password: "{{ bigip_password }}" + become: true + become_user: postgres + + - name: Create a bigip database, with owner bigip + community.postgresql.postgresql_db: + name: bigip + owner: bigip + become: true + become_user: postgres + + - name: Create the schema in bigip + community.postgresql.postgresql_script: + db: bigip + path: /etc/postgresql/16/db/bigip/bigip.sql + become: true + become_user: postgres + + - name: Grant bigip priveleges on bigip database for table test + community.postgresql.postgresql_privs: + database: bigip + objs: test # only have rights on table test + privs: SELECT + role: bigip + grant_option: true + become: true + become_user: postgres + +# loop over the two ips of the load balancers, to add to hba conf +# TODO: should postgres be restarted after changing the pg_hba.conf file? +# TODO: does repmgr synchronize this file? - name: Change hba conf to allow connections from bigip (load balancer) without an encrypted password community.postgresql.postgresql_pg_hba: dest: /etc/postgresql/16/main/pg_hba.conf @@ -45,6 +73,6 @@ databases: bigip users: bigip become: true - loop: '{{ load_balancer_ips }}' + loop: "{{ bigip_load_balancer_ips }}" loop_control: - label: "adding {{ item.address }} to hba conf" \ No newline at end of file + label: "adding {{ item.address }} to hba conf" diff --git a/ansible/roles/bigip/vars/main.yml b/ansible/roles/bigip/vars/main.yml deleted file mode 100644 index 8faa3fee..00000000 --- a/ansible/roles/bigip/vars/main.yml +++ /dev/null @@ -1,11 +0,0 @@ -$ANSIBLE_VAULT;1.1;AES256 -36376631623862333537653933356438333031333865343038316533323235363363646164643333 -6265643437643762363432323136363737366564393937640a633931626463303062353439333966 -61303262666137663839316334653763373036373064373234316563393232636438323761303833 -3237663365633164370a373334623862383963633461616466393339303333386632623330303737 -61346539613732336432616539616335383531386165333435653263323033393939343133393333 -37616665636662343062623235626163356635303531356164383264623162383365656632613137 -32383165626364356263616531383262373666653635343461366665323635353233623561323732 -31306262353866306539613638356161633533393261333936363562626361303038646139353737 -61356132313066623738373064646138343730633364653633366234386635613735373566643631 -3338383732343363326131643438626634383731373464393332 diff --git a/ansible/roles/deploy/defaults/main.yml b/ansible/roles/deploy/defaults/main.yml deleted file mode 100644 index 6d4b19a4..00000000 --- a/ansible/roles/deploy/defaults/main.yml +++ /dev/null @@ -1,15 +0,0 @@ ---- -deploy_envars: - - LARD_CONN_STRING: - - STINFO_CONN_STRING: - -deploy_files: - - src: lard_ingestion.service - dest: /etc/systemd/system - mode: "0664" - - src: "{{ playbook_dir }}/../target/release/lard_ingestion" - dest: /usr/local/bin - mode: "0755" - - src: "{{ playbook_dir }}/../ingestion/resources" - dest: /usr/local/bin - mode: "0755" diff --git a/ansible/roles/deploy/files/lard_ingestion.service b/ansible/roles/deploy/files/lard_ingestion.service index 7048c363..1540766c 100644 --- a/ansible/roles/deploy/files/lard_ingestion.service +++ b/ansible/roles/deploy/files/lard_ingestion.service @@ -5,6 +5,7 @@ Description=lard ingestion service User=lard Group=lard WorkingDirectory=/usr/local/bin +EnvironmentFile=/etc/systemd/lard_ingestion.var ExecStart=/usr/local/bin/lard_ingestion lard Restart=on-failure diff --git a/ansible/roles/deploy/files/var_file b/ansible/roles/deploy/files/var_file new file mode 100644 index 00000000..f5079531 --- /dev/null +++ b/ansible/roles/deploy/files/var_file @@ -0,0 +1,14 @@ +$ANSIBLE_VAULT;1.1;AES256 +37623036386130373332373637663933303031613765363336333633306238333135653335643530 +3462363932316336646165613739306361653639386565650a313439343636306561356466623962 +65313032656161333762333165663663623065646564303862653163656432326336643664343564 +6238363039306130610a316534363435353730303066663932363038633665396633343866313762 +37316132353730623336643735653162636335323936353935363939383234373763386164343663 +65666338376232363132626466303261646666643530626134653563616536336532656338313961 +38386338366233643066333362646566323138343936323363363764663438626530376262666438 +38333136373532393034353735366131376533386336393162613334353362653037613363326439 +34353665323530666566393662653130663239623031366432626263396339373034383137663865 +62633237353666663632366639396564393037633664353863623238383439613565636634613234 +35303235613931393039656465633234336638333137393264383136333836386537633532366462 +39316163633035646436373663633436383737333563383339353835666139353536333033373536 +3135 diff --git a/ansible/roles/deploy/tasks/main.yml b/ansible/roles/deploy/tasks/main.yml index 222dfa5a..d60cfa53 100644 --- a/ansible/roles/deploy/tasks/main.yml +++ b/ansible/roles/deploy/tasks/main.yml @@ -3,6 +3,7 @@ ansible.builtin.group: name: lard state: present + become: true - name: Create lard user ansible.builtin.user: @@ -12,34 +13,36 @@ append: true state: present create_home: false + become: true -# TODO: should we deploy in non root user? - name: Copy files to server ansible.builtin.copy: src: "{{ item.src }}" dest: "{{ item.dest }}" mode: "{{ item.mode }}" + # TODO: should these belong to 'lard'? owner: root group: root become: true - loop: "{{ deploy_files }}" - -- name: Import environment variables # noqa: command-instead-of-module - ansible.builtin.command: systemctl import-environment LARD_CONN_STRING STINFO_CONN_STRING - # TODO: ansible docs say that 'environment:' is "not a recommended way to pass in confidential data." - environment: "{{ deploy_envars }}" - become: true - changed_when: false + loop: + # TODO: these permissions are probably wrong? + - src: lard_ingestion.service + dest: /etc/systemd/system + mode: "0664" + - src: var_file + dest: /etc/systemd/lard_ingestion.var + mode: "0600" + - src: "{{ playbook_dir }}/../target/release/lard_ingestion" + dest: /usr/local/bin + mode: "0755" + - src: "{{ playbook_dir }}/../ingestion/resources" + dest: /usr/local/bin + mode: "0755" - name: Start LARD ingestion service - ansible.builtin.systemd: + ansible.builtin.systemd_service: daemon_reload: true name: lard_ingestion state: restarted enabled: true become: true - -- name: Unset environment variables # noqa: command-instead-of-module - ansible.builtin.command: systemctl unset-environment LARD_CONN_STRING STINFO_CONN_STRING - become: true - changed_when: false diff --git a/ansible/roles/movefloatingip/tasks/main.yml b/ansible/roles/movefloatingip/tasks/main.yml deleted file mode 100644 index a627098a..00000000 --- a/ansible/roles/movefloatingip/tasks/main.yml +++ /dev/null @@ -1,3 +0,0 @@ -# roles/movefloatingip/tasks/main.yml -- name: Movefloatingip - import_tasks: movefloatingip.yml \ No newline at end of file diff --git a/ansible/roles/movefloatingip/tasks/movefloatingip.yml b/ansible/roles/movefloatingip/tasks/movefloatingip.yml deleted file mode 100644 index 26ab05ce..00000000 --- a/ansible/roles/movefloatingip/tasks/movefloatingip.yml +++ /dev/null @@ -1,59 +0,0 @@ ---- -# Switch over the primary's particular floating ip -# this makes sense to do after successfully switching over, -# however it means that the stuff writing to the primary needs to be -# robust enough to handle getting told the db is in a read only state for a short period. -- name: Move primary floating ip - block: - # remove from old primary - - name: Detach floating ip address that we keep connected to the primary - openstack.cloud.floating_ip: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - server: '{{ name_primary }}' - state: absent - network: public - floating_ip_address: '{{ primary_floating_ip }}' - - - name: Gather information about new primary server - openstack.cloud.server_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - name: '{{ name_standby }}' - register: new_primary_server - - - name: Print out the ipalias port information for the server - ansible.builtin.debug: - msg: "Server {{ new_primary_server.servers[0].addresses.ipalias }}" - - # add to what is now primary (used to be standby) - - name: Attach floating ip address that we keep connected to the primary - openstack.cloud.floating_ip: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - server: '{{ new_primary_server.servers[0].id }}' - state: present - reuse: true - network: public - fixed_address: '{{ new_primary_server.servers[0].addresses.ipalias[0].addr }}' - floating_ip_address: '{{ primary_floating_ip }}' - wait: true - timeout: 60 - when: new_primary_server.servers[0].addresses.ipalias | length <=1 - # unfortunately it seems that attaching the floating ip results in a timeout - # even though it actually succeeds - ignore_errors: true - -- name: Check floating ip is attached - openstack.cloud.floating_ip_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - floating_ip_address: '{{ primary_floating_ip }}' - register: fip - -# this will not run if the ip is not now on the right vm -- name: Print out the floating ip information to confirm its ok - ansible.builtin.debug: - msg: "Floating ip {{ fip }}" - become: true - when: fip.floating_ips[0].port_details.device_id == new_primary_server.servers[0].id \ No newline at end of file diff --git a/ansible/roles/networks/tasks/create-ipalias-network.yml b/ansible/roles/networks/tasks/create-ipalias-network.yml deleted file mode 100644 index 7b8ee6dc..00000000 --- a/ansible/roles/networks/tasks/create-ipalias-network.yml +++ /dev/null @@ -1,42 +0,0 @@ ---- - - name: Create ipalias network - openstack.cloud.network: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - name: '{{ ipalias_network_name }}' - external: false - state: present - run_once: true - - - name: Create ipalias network subnet - openstack.cloud.subnet: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - network_name: '{{ ipalias_network_name }}' - name: '{{ ipalias_network_name }}-subnet' - cidr: 192.168.20.0/24 - state: present - dns_nameservers: '{{ met_dns[ostack_region] }}' - run_once: true - - - name: Connect ipalias network to public network - openstack.cloud.router: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - state: present - name: ipalias-router - network: public - interfaces: - - '{{ ipalias_network_name }}-subnet' - run_once: true - - - name: Remove default gateway for subnet - openstack.cloud.subnet: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - network_name: '{{ ipalias_network_name }}' - name: '{{ ipalias_network_name }}-subnet' - cidr: '{{ ipalias_ostack_network_cidr }}' - no_gateway_ip: true - state: present - run_once: true diff --git a/ansible/roles/networks/tasks/create-project-network.yml b/ansible/roles/networks/tasks/create-project-network.yml deleted file mode 100644 index 1eff31cc..00000000 --- a/ansible/roles/networks/tasks/create-project-network.yml +++ /dev/null @@ -1,28 +0,0 @@ ---- - - name: Create private network - openstack.cloud.network: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - state: present - name: '{{ ostack_network_name }}' - external: false - - - name: Create private network subnet - openstack.cloud.subnet: - state: present - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - network_name: '{{ ostack_network_name }}' - name: '{{ ostack_network_name }}-subnet' - cidr: '{{ ostack_network_cidr }}' - dns_nameservers: '{{ met_dns[ostack_region] }}' - - - name: Connect private network to public network - openstack.cloud.router: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - state: present - name: public-router - network: public - interfaces: - - '{{ ostack_network_name }}-subnet' diff --git a/ansible/roles/networks/tasks/create-project-security-group.yml b/ansible/roles/networks/tasks/create-project-security-group.yml deleted file mode 100644 index e4ebe625..00000000 --- a/ansible/roles/networks/tasks/create-project-security-group.yml +++ /dev/null @@ -1,21 +0,0 @@ ---- - - name: Create security groups - openstack.cloud.security_group: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - name: '{{ item }}' - description: Created with Ansible - loop: '{{ security_groups | map(attribute="name") | list | unique }}' - - - name: Populate security groups - openstack.cloud.security_group_rule: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - security_group: '{{ item.name }}' - protocol: tcp - port_range_max: "{{ item.rule.port }}" - port_range_min: "{{ item.rule.port }}" - remote_ip_prefix: "{{ item.rule.subnet }}" - loop: '{{ security_groups }}' - loop_control: - label: "updating security group {{ item.name }} with rule {{ item.rule }}" diff --git a/ansible/roles/networks/tasks/main.yml b/ansible/roles/networks/tasks/main.yml deleted file mode 100644 index 10a5623c..00000000 --- a/ansible/roles/networks/tasks/main.yml +++ /dev/null @@ -1,12 +0,0 @@ -# roles/networks/tasks/main.yml - - name: Create the project network (if it doesn't exist) - import_tasks: create-project-network.yml - - - name: Create the project security group (if it doesn't exist) - import_tasks: create-project-security-group.yml - - - name: Create the ipalias network (if it doesn't exist) - import_tasks: create-ipalias-network.yml - - - name: Create ping security group - import_tasks: open-for-ping.yml \ No newline at end of file diff --git a/ansible/roles/networks/vars/main.yml b/ansible/roles/networks/vars/main.yml deleted file mode 100644 index 4071b6ba..00000000 --- a/ansible/roles/networks/vars/main.yml +++ /dev/null @@ -1,102 +0,0 @@ -$ANSIBLE_VAULT;1.1;AES256 -63316462303232663161396533306631623963643536613865363931316530333935323339343165 -3334393663303564313730656263323461313133336263350a663637386337626430306430353138 -64326339663966643130363865373431656663333234666465363630616366376235346662373366 -3365333964303362660a646139336135343265613032353636616561323435366530633865323635 -63626239636465633662666262356665333162653563376266373530323562396561626535363366 -37333131353737353033313461613839366161623936656666396131646239303464623165616630 -66333866316231356161333464383062383634383530636162366464353361316532393033373963 -61646134623938633761303536646432616237316364323434646634393465363438616532313236 -61613639663835313635306265663263373639333062383937633932636166333437626461663932 -36643666613763626261316333386332623166333433306661623531623232323662396364643133 -31356665663935343436646136393935653439306332656332303834363730623937363930353737 -36323564396333326433653463313136663331623862663239663234613135333162316466383136 -39376638633063643732343764366535323166663063323034306637376665336632653264613763 -36613139376237373035393236336331653235656435303631323730323163373938666635383038 -66323662316137393137613235373261626639623331376632653935343066303034653534316236 -35653339313864313835316462383164323633323332366133343839376463623966353635363934 -35616636326432336631613736363638663439336232623064643631663830313330623736366632 -36643031383032353738653131643262383936396166353331336137333265393561626163633464 -37616662646363663933656530366633626338386530613835626533306164396336383561616533 -39636539653134366232346330386239396133313132363437313238306333623232356462386434 -37343662646562353031616535346336613131343838616532623366613136386639363763323734 -33646138313162393763653363633435623965376332633463313264333636663238366666376161 -36626365333937323430363035353439653338633838326331303830666261653866323634633434 -32343738636636356539643762393534333739623539326234613639633435636165626433616337 -35666564636463623765393232336432386636343133383537363061343064613336663665316666 -33386535376165303966306530653161383735323761363534326335653732346633333865366135 -63303464303138653937646264316164613265353934316334366335626231363832613365363532 -65343636643937376136386235366235643363343166353462663730346161393362623730643965 -39303062666266376431326333663933356465373233653835363866636237363565376662643430 -31656236623131646633643632366233633066653762323438383538366232363634353331313366 -66396331326434343362663931353866373234306663623631383330333533656139623565313336 -36303136333535613537393231613135613935323436303037376134653831353530666266376130 -32353834343461393133646134333065663239326535313831366630303361333566376532346462 -37363635366634323531616536393431656365613436666433616530356538376531656130366531 -37656130303132356432363930626632356336653235396362643062363662336530646333356538 -30373738353836363137306363613433376232366239623134643035343066653863623766653837 -62313039663666313033636331346131386632303430623034326664396663356262336363366265 -31393937373261353963623064653737343137376461353231656365373934326263376464643964 -33336566643131643163636162343862646665623139653639643439613261323366333634326438 -63633932333866346164616166343063386234383732333863333034346436356637653665626463 -34366234643339343162373663623136303236313266356164373362636237393631303866383034 -62616630663132613566336663633265356561646662333764383563353966383930613137653833 -62383661643739313230316332626236366435326662303831343936336166313033373561363037 -39393239613531643437346466383234393263643034363066366262316535313532326639356637 -66313762626232373839626638343465663935333061383839373963353833623932616433373336 -30363465623362326466323166353266346239326134376230633631653739393430326663316133 -61356431393665646664623135306538326430336137383931316165663561306262353239653765 -30636563626665363337623135346663383330626663373633336337623662353562393732646665 -37633636336564386364343632636532376536366165623032636266363765343864306234613735 -32306431393261313230326666616162303664396464303236643666336566313065663562613766 -65316132613339343864383635636433333933356664336435343134666536396162663031353532 -32373765323733656533353965333564393132656238333136663838396137336439393730303738 -34653130386130333038643833656235633531333839663462656262336262396362643766653064 -36633832346431346538306263356366613661393535356333386537383464373436623339623334 -34353038383563393334373134353734666564353639643763346166373862323866613839373539 -38643130346665336634393466356263383733613134333162653265393065633434616261323462 -65346264376534343735643039396538376637326639663966643939656663373636396566643638 -37366666623031323138356164363038393538383261313832366262636535643163663832613037 -31336136626134336661626464623439636533303731643639353664343163346332623032396430 -64383433643832343962343130636230626165376466386635363332633563333865633830383830 -66626334626433626339363837633235316636393163383464373638316132386363393739306230 -34343033393533303135343830333531626238393964306137323564623962313032633562366139 -35323261323531663335613039613764353262343433646537393830356135333265326238396663 -32636261623163633737666565666631663736333964363839373234633663343662366364646161 -63613365386335373637353633326434373632393334613131386439303339346530316334326364 -30336662653037656339393230323866643536643366383232393038323138323532636235653832 -61376338323839383539313364633936643934303264616131626233396563656163383836653132 -39393131393730343935663562386537313032383835663963653365343738373437303263313435 -32316365633333326131363034323463373065653930376365633834396137653634303038323364 -30303739363230353235666233636464373635396433616535643364666638656339653065366637 -35303531656665333334636535613631623133303662373235393231396234333566396435633839 -34663063366163653761336661386633656664313464663437323036373533323464373634616237 -64633666663033623234376630393361616638303166393230626336666236643462363565656431 -30626239323963376361353065383261383033326238613635643062373439616266313361306633 -64393263343130663765326562366266366538373130316638613734613134333030613831383938 -62393263343337306230363733326638366538393230313631383033313738346536656361623338 -34323131356230376530623035613133636434643766383162623363633464366661353031303863 -31396135333236373631363162326235313037343461656430376330383266613733656162616431 -31373231653361313465653233613537386661303737633730613033633334343964336665623639 -63393763343962346439653335333366346238643435666631356338366637316634373861383631 -38316563313866663561626632306635383062633237343038653032396266666666336436636138 -31666330323531393362366535326538626463633439393237633131376366393136386264306433 -33663434373662383632653264386566643132613938373062333635666138393136353035663666 -61636539353038363331306465383336303564633664623061326665383565616334363336313635 -37336664313334663237343762373362306239303362613966313765396666656663646636376338 -34633266343763306566633261343535653238663433613238633331306135626165366265613539 -35313334353238633532636139663363383130373066643230653535613964323061373862633433 -66343661323030666534373866363130316265346535303266616663316333666665626432386334 -36323865313661313365353666663563313232316531373761323534366266353462656132373738 -61393134656139393966636334326338643434626134333637626364326263333534643338383038 -34313339626263613566376539633737333532356131363561626364303738653066366337343935 -33323235616564316538356431623164373836356365323766613136323266616365646465613134 -30326161623665636166383636653266323739663236326162356238663865303463663964383463 -35396535623263316364366537626630643131633866396639386139373137663366636332373034 -66366231393932373230363161623039623463353732323962393361643238613130633835386231 -66373534363562663163333532653664313664306539303362346535663131303037383231616362 -30663635343563393163616333396534366637303430633264643161653865643264386262396166 -64626562396238643566326361336538646436353166343639383533386635356436306666396531 -38333836353961626431646635343032346232613464336531633862386439353131376130656632 -35356639303162663862663036396337336233613534613431303165646239316466366535613834 -3839 diff --git a/ansible/roles/ostack/defaults/main.yml b/ansible/roles/ostack/defaults/main.yml new file mode 100644 index 00000000..a37fdef6 --- /dev/null +++ b/ansible/roles/ostack/defaults/main.yml @@ -0,0 +1,41 @@ +--- +# PUBLIC VARS +ostack_cloud: lard +ostack_region: Ostack2-EXT +ostack_ipalias_network_name: ipalias +ostack_state: present +ostack_key_name: +# ostack_key_file: + +# PRIVATE VARS +# networks +ostack_network_name: +ostack_network_cidr: +ostack_netword_dns: # dict[ostack_region -> list(ipv4)] +ostack_network_security_groups: + - name: + rule: + subnet: + port: +ostack_ipalias_network_cidr: + +# vm_create +ostack_vm_image: +ostack_vm_flavor: +ostack_vm_security_groups: +ostack_vm_volume_type: +ostack_vm_volume_size: + +# vm_format +ostack_mount_device: +ostack_mount_point: +ostack_repmgr_password: + +# create_primary / create_standby +ostack_db_password: +ostack_floating_ip: +ostack_primary_ip: + +# move_floating_ip +ostack_primary: +ostack_standby: diff --git a/ansible/roles/primarystandbysetup/readme.md b/ansible/roles/ostack/replication_setup.md similarity index 100% rename from ansible/roles/primarystandbysetup/readme.md rename to ansible/roles/ostack/replication_setup.md diff --git a/ansible/roles/ostack/tasks/create_primary.yml b/ansible/roles/ostack/tasks/create_primary.yml new file mode 100644 index 00000000..3acbebf7 --- /dev/null +++ b/ansible/roles/ostack/tasks/create_primary.yml @@ -0,0 +1,249 @@ +--- +# set up a role and provide suitable entries in pg_hba.conf with the database +# field set to replication + +# ensure max_wal_senders is set to a sufficiently large value in the conf file +# (also possibly max_replication_slots?) When running a standby server, you +# must set this parameter to the same or higher value than on the primary +# server. Otherwise, queries will not be allowed in the standby server. + +# set wal_keep_size to a value large enough to ensure that WAL segments are not +# recycled too early, or configure a replication slot for the standby? if there +# is a WAL archive accessible to the standby this may not be needed? + +# On systems that support the keepalive socket option, setting +# tcp_keepalives_idle, tcp_keepalives_interval and tcp_keepalives_count helps +# the primary promptly notice a broken connection. + +# example auth +# Allow the user "foo" from host 192.168.1.100 to connect to the primary +# as a replication standby if the user's password is correctly supplied. +# +# TYPE DATABASE USER ADDRESS METHOD +# host replication foo 192.168.1.100/32 md5 +- name: Create a new database with name lard + community.postgresql.postgresql_db: + name: lard + become: true + become_user: postgres + +- name: Copy the db folder to the remote + ansible.builtin.copy: + src: "{{ playbook_dir }}/../db/" + dest: /etc/postgresql/16/db/ + mode: "0755" + become: true + +# TODO: automatically loop over the sql files in order (needs prepending IDs?) +# with_fileglob: "/etc/postgresql/16/db/*" +- name: Create schemas and tables in lard + community.postgresql.postgresql_script: + db: lard + path: "/etc/postgresql/16/db/{{ item }}" + become: true + become_user: postgres + loop: + - public.sql + - partitions_generated.sql + - labels.sql + - flags.sql + +- name: Connect to lard database, create lard_user + community.postgresql.postgresql_user: + db: lard + name: lard_user + password: "{{ ostack_db_password }}" + role_attr_flags: SUPERUSER # not desired, but the privelege granting doesn't seem to work? + become: true + become_user: postgres + +# - name: Grant lard_user priveleges on lard database +# community.postgresql.postgresql_privs: +# type: database +# db: lard +# privs: ALL +# role: lard_user +# become: true +# become_user: postgres + +# MAKE IT THE PRIMARY +- name: Set wal_level parameter + community.postgresql.postgresql_set: + name: wal_level + value: replica # https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL + become: true + become_user: postgres + +- name: Set hot_standby parameter + community.postgresql.postgresql_set: + name: hot_standby + value: true + become: true + become_user: postgres + +- name: Set hot_standby_feedback parameter + community.postgresql.postgresql_set: + name: hot_standby_feedback + value: true + become: true + become_user: postgres + +- name: Set max_wal_senders parameter + community.postgresql.postgresql_set: + name: max_wal_senders + value: 10 + become: true + become_user: postgres + +- name: Set wal_log_hints parameter # needs to be enabled to use pg_rewind + # https://www.postgresql.org/docs/current/app-pgrewind.html + community.postgresql.postgresql_set: + name: wal_log_hints + value: true + become: true + become_user: postgres + +- name: Set max_replication_slots parameter + community.postgresql.postgresql_set: + name: max_replication_slots + value: 10 + become: true + become_user: postgres + +# make it SYNCHRONOUS REPLICATION (without the next two settings it would be asynchronous) +- name: Set synchronous_standby_names parameter + community.postgresql.postgresql_set: + name: synchronous_standby_names # https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-SYNCHRONOUS-STANDBY-NAMES + value: "*" # all the standbys + become: true + become_user: postgres + +- name: Set synchronous_commit parameter + community.postgresql.postgresql_set: + name: synchronous_commit # https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-SYNCHRONOUS-COMMIT + value: "on" # will not give standby query consistency (tradeoff for better write performance), but will give standby durable commit after OS crash + become: true + become_user: postgres + +# repmgr +# https://www.repmgr.org/docs/current/quickstart-repmgr-conf.html +- name: Create a repmgr.conf if it does not exist + ansible.builtin.file: + path: /etc/repmgr.conf + state: touch + mode: "0755" + become: true + +# TODO: add use_replication_slots=true +- name: Set contents of repmgr.conf + ansible.builtin.copy: + dest: "/etc/repmgr.conf" + content: | + node_id=1 + node_name='{{ inventory_hostname }}' + conninfo='host={{ ansible_host }} user=repmgr dbname=repmgr connect_timeout=2' + data_directory='{{ ostack_mount_point }}/16/main' + service_start_command='sudo /bin/systemctl start postgresql.service' + service_stop_command='sudo /bin/systemctl stop postgresql.service' + service_restart_command='sudo /bin/systemctl restart postgresql.service' + service_reload_command='sudo /bin/systemctl reload postgresql.service' + mode: "0755" + become: true + +# https://www.repmgr.org/docs/current/quickstart-primary-register.html +- name: Run repmgr to register the primary + ansible.builtin.command: repmgr -f /etc/repmgr.conf primary register -F # only need -F if rerunning + become: true + become_user: postgres + register: register_results + # changed_when: "'NOTICE: primary node record (ID: 1) registered' in register.stderr_lines" + changed_when: true + +- name: Print out the register_primary_results + ansible.builtin.debug: + msg: "repmgr {{ register_results }}" + +# # STUFF FOR REPLICATION (do not need if using repmgr) +# - name: Create replicator user with replication priveleges +# community.postgresql.postgresql_user: +# name: replicator +# password: '{{ replicator_password }}' +# role_attr_flags: REPLICATION +# become: true +# become_user: postgres + +# # also specifically allow the replicator user +# - name: Change hba conf to allow replicator to connect +# community.postgresql.postgresql_pg_hba: +# dest: /etc/postgresql/16/main/pg_hba.conf +# databases: replication +# contype: host +# users: replicator +# #address: all +# address: '{{ standby_host }}' +# method: trust # seems to hang with md5, how to make auth work? +# become: true + +# # create replication slot +# - name: Create physical replication slot if doesn't exist +# become_user: postgres +# community.postgresql.postgresql_slot: +# slot_name: replication_slot +# #db: lard +# become: true + +# make sure these changes take effect? +- name: Restart service postgres + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +- name: Attach primary floating ip + delegate_to: localhost + block: + - name: Gather information about primary server + openstack.cloud.server_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: "{{ inventory_hostname }}" + register: primary_server + + - name: Print out the ipalias port information for the server + ansible.builtin.debug: + msg: "Server {{ primary_server.servers[0].addresses.ipalias }}" + + # give the primary a particular floating ip + - name: Attach floating ip address that we keep connected to the primary + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ primary_server.servers[0].id }}" + reuse: true + network: public + fixed_address: "{{ primary_server.servers[0].addresses.ipalias[0].addr }}" + floating_ip_address: "{{ ostack_floating_ip }}" + wait: true + # unfortunately it seems that attaching the floating ip results in a + # timeout even though it actually succeeds + ignore_errors: true + register: attach_result + when: primary_server.servers[0].addresses.ipalias | length <=1 + + - name: Print out result of attaching floating ip address + ansible.builtin.debug: + msg: "{{ attach_result }}" + +- name: Check floating ip is attached + openstack.cloud.floating_ip_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + floating_ip_address: "{{ ostack_floating_ip }}" + register: fip_info + delegate_to: localhost + +# this will not run if the ip is not now on the vm +- name: Print out the floating ip information to confirm its ok + ansible.builtin.debug: + msg: "Floating ip {{ fip_info }}" + when: fip_info.floating_ips[0].port_details.device_id == primary_server.servers[0].id diff --git a/ansible/roles/ostack/tasks/create_standby.yml b/ansible/roles/ostack/tasks/create_standby.yml new file mode 100644 index 00000000..e3cd080d --- /dev/null +++ b/ansible/roles/ostack/tasks/create_standby.yml @@ -0,0 +1,144 @@ +--- +# create standby.signal file in data directory + +# configure streaming WAL primary_conninfo needs a libpq connection string (ip +# address + other details needed to connect to primary server) + +# since we want the standby to be able to operate as the primary, we need to +# configure the WAL archiving, connections, and auth like the primary example: +# primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass +# options=''-c wal_sender_timeout=5000''' restore_command = 'cp +# /path/to/archive/%f %p' archive_cleanup_command = 'pg_archivecleanup +# /path/to/archive %r' + +# add the following line to the postgresql.conf file on the standby The standby +# connects to the primary that is running on host 192.168.1.50 and port 5432 as +# the user "foo" whose password is "foopass". primary_conninfo = +# 'host=192.168.1.50 port=5432 user=foo password=foopass' + +# use the replication slot on the primary (in file after the primary_conninfo) +# primary_slot_name = 'node_a_slot' repmgr +# https://www.repmgr.org/docs/current/quickstart-standby-clone.html must be +# done before the standby is put into read only mode (therefore not idempotent) +- name: Create a repmgr.conf if it does not exist + ansible.builtin.file: + path: /etc/repmgr.conf + state: touch + mode: "0755" + become: true + +- name: Set contents of repmgr.conf + ansible.builtin.copy: + dest: "/etc/repmgr.conf" + content: | + node_id=2 + node_name='{{ inventory_hostname }}' + conninfo='host={{ ansible_host }} user=repmgr dbname=repmgr connect_timeout=2' + data_directory='{{ ostack_mount_point }}/16/main' + service_start_command='sudo /bin/systemctl start postgresql.service' + service_stop_command='sudo /bin/systemctl stop postgresql.service' + service_restart_command='sudo /bin/systemctl restart postgresql.service' + service_reload_command='sudo /bin/systemctl reload postgresql.service' + mode: "0755" + become: true + +- name: Stop service postgres, if running + ansible.builtin.systemd_service: + name: postgresql + state: stopped + become: true + +# https://www.repmgr.org/docs/current/quickstart-standby-clone.html +# TODO: can you use `ostack_floating_ip` here? +- name: Run repmgr to dry run clone + ansible.builtin.command: repmgr -h '{{ ostack_primary_ip }}' -U repmgr -d repmgr -f /etc/repmgr.conf standby clone --dry-run + become: true + become_user: postgres + register: dry_run_clone_results + changed_when: false + +- name: Print out the dry_run_clone_results + ansible.builtin.debug: + msg: "repmgr {{ dry_run_clone_results }}" + +- name: Run repmgr to clone standby from primary + ansible.builtin.command: repmgr -h '{{ ostack_primary_ip }}' -U repmgr -d repmgr -f /etc/repmgr.conf standby clone -F + become: true + register: clone_results + become_user: postgres + # changed_when: "'NOTICE: standby clone (using pg_basebackup) complete' in clone_results.stderr_lines" + changed_when: true + +- name: Print out the clone_results + ansible.builtin.debug: + msg: "repmgr {{ clone_results }}" + +# try to clean up so can run standby clone ? +# - name: Recursively remove directory +# ansible.builtin.file: +# path: /mnt/ssd-b/16/main +# state: absent +# become: true +# - name: Create a main directory if it does not exist +# ansible.builtin.file: +# path: /mnt/ssd-b/16/main +# state: directory +# mode: '0700' +# become: true +# become_user: postgres + +# https://www.postgresql.org/docs/current/app-pgbasebackup.html +# NOTE: this part is not idempotent, so if a db is already in the dir, it will +# fail hence the stuff above that means this should not be run on a database with +# data!!! not needed if using repmgr, since clone calls this +# - name: Run pg_basebackup to initialize the replica / standby +# ansible.builtin.shell: | +# export PGPASSWORD="{{ replicator_password }}" && +# pg_basebackup --pgdata=/mnt/ssd-b/16/main -R --slot=replication_slot --user=replicator --host={{ primary_host }} --port=5432 +# args: +# executable: /bin/bash +# become: true +# become_user: postgres +# register: basebackup_results + +# - name: Print out the basebackup_results +# debug: msg="backup {{ basebackup_results }}" + +- name: Restart service postgres + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +- name: Waits for port 5432 to be available, don't check for initial 10 seconds + ansible.builtin.wait_for: + host: 0.0.0.0 + port: 5432 + delay: 10 + state: started + +# https://www.repmgr.org/docs/current/quickstart-register-standby.html +- name: Run repmgr to register the standby + ansible.builtin.command: repmgr -f /etc/repmgr.conf standby register + become: true + become_user: postgres + register: register_results + # changed_when: "'INFO: standby registration complete' in register_results.stderr_lines" + changed_when: true + +- name: Print out the register_standby_results + ansible.builtin.debug: + msg: "repmgr {{ register_results.stderr }}" + +# run some sql... to confirm clone? +- name: Do some sql to test for the existence of lard...? + community.postgresql.postgresql_query: + db: lard + query: select count(*) from timeseries + register: query_results + become: true + become_user: postgres + +- name: Print out the query + ansible.builtin.debug: + msg: "Query {{ query_results }}" diff --git a/ansible/roles/ostack/tasks/move_floating_ip.yml b/ansible/roles/ostack/tasks/move_floating_ip.yml new file mode 100644 index 00000000..d49318ee --- /dev/null +++ b/ansible/roles/ostack/tasks/move_floating_ip.yml @@ -0,0 +1,63 @@ +--- +# Switch over the primary's particular floating ip +# this makes sense to do after successfully switching over, +# however it means that the stuff writing to the primary needs to be +# robust enough to handle getting told the db is in a read only state for a short period. +- name: Move primary floating ip + block: + # remove from old primary + - name: Detach floating ip address that we keep connected to the primary + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ ostack_primary }}" + state: absent + network: public + floating_ip_address: "{{ ostack_floating_ip }}" + + - name: Gather information about new primary server + openstack.cloud.server_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: "{{ ostack_standby }}" + register: new_primary_server + + - name: Print out the ipalias port information for the server + ansible.builtin.debug: + msg: "Server {{ new_primary_server.servers[0].addresses.ipalias }}" + + # add to what is now primary (used to be standby) + # unfortunately it seems that attaching the floating ip results in a timeout + # even though it actually succeeds + - name: Attach floating ip address that we keep connected to the primary + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ new_primary_server.servers[0].id }}" + state: present + reuse: true + network: public + fixed_address: "{{ new_primary_server.servers[0].addresses.ipalias[0].addr }}" + floating_ip_address: "{{ ostack_floating_ip }}" + wait: true + # timeout: 60 + ignore_errors: true + register: attach_result + when: new_primary_server.servers[0].addresses.ipalias | length <=1 + + - name: Print out result of attaching floating ip address + ansible.builtin.debug: + msg: "{{ attach_result }}" + +- name: Check floating ip is attached + openstack.cloud.floating_ip_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + floating_ip_address: "{{ ostack_floating_ip }}" + register: fip_info + +# this will not run if the ip is not now on the right vm +- name: Print out the floating ip information to confirm its ok + ansible.builtin.debug: + msg: "Floating ip {{ fip_info }}" + when: fip_info.floating_ips[0].port_details.device_id == new_primary_server.servers[0].id diff --git a/ansible/roles/ostack/tasks/networks.yml b/ansible/roles/ostack/tasks/networks.yml new file mode 100644 index 00000000..3642bdfe --- /dev/null +++ b/ansible/roles/ostack/tasks/networks.yml @@ -0,0 +1,12 @@ +--- +- name: Create the project network (if it doesn't exist) + ansible.builtion.import_tasks: networks/create_project_network.yml + +- name: Create the project security group (if it doesn't exist) + ansible.builtion.import_tasks: networks/create_project_security_group.yml + +- name: Create the ipalias network (if it doesn't exist) + ansible.builtion.import_tasks: networks/create_ipalias_network.yml + +- name: Create ping security group + ansible.builtion.import_tasks: networks/open_for_ping.yml diff --git a/ansible/roles/ostack/tasks/networks/create_ipalias_network.yml b/ansible/roles/ostack/tasks/networks/create_ipalias_network.yml new file mode 100644 index 00000000..df0250aa --- /dev/null +++ b/ansible/roles/ostack/tasks/networks/create_ipalias_network.yml @@ -0,0 +1,42 @@ +--- +- name: Create ipalias network + openstack.cloud.network: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: ipalias + external: false + state: present + run_once: true + +- name: Create ipalias network subnet + openstack.cloud.subnet: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + network_name: ipalias + name: ipalias-subnet + cidr: "{{ ostack_ipalias_network_cidr }}" + state: present + dns_nameservers: "{{ ostack_networks_dns[ostack_region] }}" + run_once: true + +- name: Connect ipalias network to public network + openstack.cloud.router: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: ipalias-router + network: public + interfaces: + - ipalias-subnet + run_once: true + +- name: Remove default gateway for subnet + openstack.cloud.subnet: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + network_name: ipalias + name: ipalias-subnet + cidr: "{{ network_ostack_network_cidr }}" + no_gateway_ip: true + state: present + run_once: true diff --git a/ansible/roles/ostack/tasks/networks/create_project_network.yml b/ansible/roles/ostack/tasks/networks/create_project_network.yml new file mode 100644 index 00000000..62a97538 --- /dev/null +++ b/ansible/roles/ostack/tasks/networks/create_project_network.yml @@ -0,0 +1,28 @@ +--- +- name: Create private network + openstack.cloud.network: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: "{{ ostack_network_name }}" + external: false + +- name: Create private network subnet + openstack.cloud.subnet: + state: present + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + network_name: "{{ ostack_network_name }}" + name: "{{ ostack_network_name }}-subnet" + cidr: "{{ ostack_network_cidr }}" + dns_nameservers: "{{ networks_dns[ostack_region] }}" + +- name: Connect private network to public network + openstack.cloud.router: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: public-router + network: public + interfaces: + - "{{ ostack_network_name }}-subnet" diff --git a/ansible/roles/ostack/tasks/networks/create_project_security_group.yml b/ansible/roles/ostack/tasks/networks/create_project_security_group.yml new file mode 100644 index 00000000..4fa115dd --- /dev/null +++ b/ansible/roles/ostack/tasks/networks/create_project_security_group.yml @@ -0,0 +1,21 @@ +--- +- name: Create security groups + openstack.cloud.security_group: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + name: "{{ item }}" + description: Created with Ansible + loop: '{{ ostack_network_security_groups | map(attribute="name") | list | unique }}' + +- name: Populate security groups + openstack.cloud.security_group_rule: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + security_group: "{{ item.name }}" + protocol: tcp + port_range_max: "{{ item.rule.port }}" + port_range_min: "{{ item.rule.port }}" + remote_ip_prefix: "{{ item.rule.subnet }}" + loop: "{{ ostack_network_security_groups }}" + loop_control: + label: "updating security group {{ item.name }} with rule {{ item.rule }}" diff --git a/ansible/roles/networks/tasks/open-for-ping.yml b/ansible/roles/ostack/tasks/networks/open_for_ping.yml similarity index 54% rename from ansible/roles/networks/tasks/open-for-ping.yml rename to ansible/roles/ostack/tasks/networks/open_for_ping.yml index 0e383f15..88de7ae8 100644 --- a/ansible/roles/networks/tasks/open-for-ping.yml +++ b/ansible/roles/ostack/tasks/networks/open_for_ping.yml @@ -1,15 +1,15 @@ -### stuff needed for ping +--- - name: Create ping security group openstack.cloud.security_group: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" name: ping description: Created with Ansible - name: Populate ping security group openstack.cloud.security_group_rule: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" security_group: ping protocol: icmp - remote_ip_prefix: "157.249.0.0/16" \ No newline at end of file + remote_ip_prefix: "157.249.0.0/16" diff --git a/ansible/roles/ostack/tasks/vm_create.yml b/ansible/roles/ostack/tasks/vm_create.yml new file mode 100644 index 00000000..de14aeb8 --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_create.yml @@ -0,0 +1,106 @@ +--- +# TODO: add key pair without need of using the web GUI +# - name: Add key pair # noqa run-once +# openstack.cloud.keypair: +# cloud: "{{ ostack_cloud }}" +# name: "{{ key_name }}" +# state: present +# public_key_file: "{{ ostack_key_file }}" +# run_once: true +# tags: +# - add_key +# +- name: Create VM + openstack.cloud.server: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + availability_zone: "{{ ostack_availability_zone }}" + name: "{{ inventory_hostname }}" + image: "{{ ostack_vm_image }}" + flavor: "{{ ostack_vm_flavor }}" + network: "{{ ostack_network_name }}" + key_name: "{{ ostack_key_name }}" + security_groups: "{{ ostack_vm_security_groups }}" + state: "{{ ostack_state }}" + # do not give ip, since want to assign a specific one in next step (so as to reuse the ones we have) + auto_ip: false + register: server + +- name: Print out the server information + ansible.builtin.debug: + msg: "Server {{ lookup('ansible.builtin.dict', server) }}" + +# Assign a static IP to the VM +# TODO: this can be assigned automatically if `floating_ip_address` is not specified here? +# So we don't have to keep them in this repo? +- name: Attach floating ip address + openstack.cloud.floating_ip: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + server: "{{ server.server.id }}" + reuse: true + network: public + floating_ip_address: "{{ ansible_host }}" + wait: true + timeout: 60 + +- name: Create Volume + openstack.cloud.volume: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + availability_zone: "{{ ostack_availability_zone }}" + name: "{{ inventory_hostname }}" + volume_type: "{{ ostack_vm_volume_type }}" + size: "{{ ostack_vm_volume_size }}" + register: volume + +- name: Print out the volume information + ansible.builtin.debug: + msg: "Volume {{ lookup('ansible.builtin.dict', volume) }}" + +- name: Attach a volume to a compute host + openstack.cloud.server_volume: + state: present + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + volume: "{{ volume.volume.id }}" + server: "{{ server.server.id }}" + device: /dev/vdb + +- name: Create port for ipalias and set security groups + openstack.cloud.port: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + state: present + name: "ipalias-{{ inventory_hostname }}" + network: "{{ ostack_ipalias_network_name }}" + security_groups: "{{ ostack_vm_security_groups }}" + +- name: Get port info + openstack.cloud.port_info: + cloud: "{{ ostack_cloud }}" + region_name: "{{ ostack_region }}" + port: "ipalias-{{ inventory_hostname }}" + register: ipalias_port + +- name: Print out the port information + ansible.builtin.debug: + msg: "Port {{ lookup('ansible.builtin.dict', ipalias_port) }}" + +# TODO: Can't this be done directly above +# by assigning `server.server.id` to `device_id`? +- name: Add the ipalias network to server + ansible.builtin.command: + argv: + - openstack + - --os-cloud + - "{{ ostack_cloud }}" + - --os-region-name + - "{{ ostack_region }}" + - server + - add + - port + - "{{ server.server.id }}" + - "ipalias-{{ inventory_hostname }}" + when: ipalias_port.ports.0.device_id | length <=0 + changed_when: true diff --git a/ansible/roles/ostack/tasks/vm_format.yml b/ansible/roles/ostack/tasks/vm_format.yml new file mode 100644 index 00000000..0bb73873 --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format.yml @@ -0,0 +1,9 @@ +--- +- name: Netplan + ansible.builtin.import_tasks: vm_format/netplan.yml + +- name: Format and mount the disk + ansible.builtin.import_tasks: vm_format/mount_disk.yml + +- name: Install postgres + ansible.builtin.import_tasks: vm_format/install_postgres.yml diff --git a/ansible/roles/ostack/tasks/vm_format/install_postgres.yml b/ansible/roles/ostack/tasks/vm_format/install_postgres.yml new file mode 100644 index 00000000..376b1c7a --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format/install_postgres.yml @@ -0,0 +1,140 @@ +--- +- name: Add postgres apt key by id from a keyserver + ansible.builtin.apt_key: + url: https://www.postgresql.org/media/keys/ACCC4CF8.asc + state: present + become: true + +- name: Add postgres repository into sources list + ansible.builtin.apt_repository: + repo: deb https://apt.postgresql.org/pub/repos/apt jammy-pgdg main + state: present + become: true + +- name: Install apt packages + ansible.builtin.apt: + pkg: + - nano + - postgresql-16 + - postgresql-16-repmgr # https://www.repmgr.org/docs/current/install-requirements.html + - pip # needed for installing psycopg2 + - acl # needed for becoming unpriveleged user (such as postgres) + update_cache: true + become: true + +- name: Install pip packages + ansible.builtin.pip: + name: "{{ item }}" + become: true + loop: + # TODO: should probably use psycopg3 instead, v2 is deprecated + - psycopg2-binary # dependency for ansible.community.postgresql + - openstacksdk + +# Make so the data is actually kept on the ssd mount +# First stop postgres service +- name: Stop service postgres, if running + ansible.builtin.systemd_service: + name: postgresql + state: stopped + become: true + +- name: Rsync postgres directory to ssd mount + ansible.posix.synchronize: + archive: true + src: /var/lib/postgresql/ + dest: "{{ ostack_mount_point }}" + become: true + # synchronize runs by default on localhost + delegate_to: "{{ inventory_hostname }}" + +- name: Comment out original data_directory + ansible.builtin.replace: + dest: /etc/postgresql/16/main/postgresql.conf + regexp: '^data_directory' + replace: '#data_directory' + become: true + +- name: Modify postgresql config + ansible.builtin.blockinfile: + dest: /etc/postgresql/16/main/postgresql.conf + # NOTE: single quotes are mandatory here! + block: | + data_directory = '{{ ostack_mount_point }}/16/main' + listen_addresses = '*' + become: true + +# probably want to restrict this once we know what will connect? +# but the security group rules should take care of limiting to met ranges +- name: Change hba conf to allow connections + community.postgresql.postgresql_pg_hba: + dest: /etc/postgresql/16/main/pg_hba.conf + contype: host + address: all # can we put met internal ip range(s)? + method: md5 + # users and database default to all + become: true + +# make sure these changes take effect +- name: Start up postgres service again + ansible.builtin.systemd_service: + name: postgresql + state: started + become: true + +# REPMGR +- name: Create repmgr user # https://www.repmgr.org/docs/current/quickstart-repmgr-user-database.html + community.postgresql.postgresql_user: + name: repmgr + # NOTE: single quotes are mandatory here! + password: '{{ ostack_repmgr_password }}' + role_attr_flags: SUPERUSER + become: true + become_user: postgres + +- name: Create a repmgr database, with owner repmgr + community.postgresql.postgresql_db: + name: repmgr + owner: repmgr + become: true + become_user: postgres + +- name: Change hba conf to allow repmgr to connect for replication + community.postgresql.postgresql_pg_hba: + dest: /etc/postgresql/16/main/pg_hba.conf + databases: replication + contype: host + users: repmgr + address: all + # address: '{{ standby_host }}' + method: trust + become: true + +- name: Change hba conf to allow repmgr to connect to the repmgr db + community.postgresql.postgresql_pg_hba: + dest: /etc/postgresql/16/main/pg_hba.conf + databases: repmgr + contype: host + users: repmgr + address: all + # address: '{{ standby_host }}' + method: trust + become: true + +- name: Restart postgres + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +- name: Allow the postgres user to run /bin/systemctl restart, stop, start postgres + community.general.sudoers: + name: postgresql + user: postgres + commands: + - /bin/systemctl restart postgresql.service + - /bin/systemctl stop postgresql.service + - /bin/systemctl start postgresql.service + - /bin/systemctl reload postgresql.service + nopassword: true + become: true diff --git a/ansible/roles/ostack/tasks/vm_format/mount_disk.yml b/ansible/roles/ostack/tasks/vm_format/mount_disk.yml new file mode 100644 index 00000000..a31da5d3 --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format/mount_disk.yml @@ -0,0 +1,45 @@ +--- +- name: Create mount point + ansible.builtin.file: + path: "{{ ostack_mount_point }}" + state: directory + owner: ubuntu # change to postgres? + group: ubuntu # change to postgres? + mode: 'u=rw,g=rws,o=r' + become: true + +- name: Create ext4 filesystem on mount device + community.general.filesystem: + dev: '{{ ostack_mount_device }}' + fstype: ext4 + become: true + +- name: Read device information (always use unit when probing) + community.general.parted: + device: '{{ ostack_mount_device }}' + unit: MiB + register: sdb_info + become: true + +- name: Print out the device information + ansible.builtin.debug: + msg: "Partitions {{ sdb_info.partitions }}" + +# this also changes the fstab so its still there when rebooted! +- name: Mount the device on the mount point + ansible.posix.mount: + path: "{{ ostack_mount_point }}" + src: '{{ ostack_mount_device }}' + fstype: ext4 + state: mounted + become: true + +- name: Fetch the UUID of mounted device + ansible.builtin.command: blkid --match-tag UUID --output value '{{ ostack_mount_device }}' + changed_when: false + register: blkid_cmd + become: true + +- name: Print out the UUID + ansible.builtin.debug: + msg: "UUID {{ blkid_cmd.stdout }}" diff --git a/ansible/roles/ostack/tasks/vm_format/netplan.yml b/ansible/roles/ostack/tasks/vm_format/netplan.yml new file mode 100644 index 00000000..2a8790bb --- /dev/null +++ b/ansible/roles/ostack/tasks/vm_format/netplan.yml @@ -0,0 +1,61 @@ +--- +- name: Get port info + openstack.cloud.port_info: + cloud: '{{ ostack_cloud }}' + region_name: '{{ ostack_region }}' + port: 'ipalias-{{ inventory_hostname }}' + register: ipalias_port + delegate_to: localhost + +- name: Print out the port information + ansible.builtin.debug: + msg: "Port {{ lookup('ansible.builtin.dict', ipalias_port) }}" + +- name: IP alias netplan configuration + ansible.builtin.set_fact: + netplan_config: + network: + version: 2 + ethernets: + ens6: + dhcp4: true + dhcp4-overrides: + use-routes: false + match: + macaddress: '{{ ipalias_port.ports.0.mac_address }}' + set-name: ens6 + routes: + - to: 0.0.0.0/0 + via: '{{ ostack_ipalias_network_cidr | ansible.utils.ipaddr("net") | ansible.utils.ipaddr("1") | ansible.utils.ipaddr("address") }}' + table: 102 + routing-policy: + - from: '{{ ostack_ipalias_network_cidr }}' + table: 102 + +- name: Copy out ipalias netplan config + ansible.builtin.copy: + content: '{{ netplan_config | to_nice_yaml }}' + dest: /etc/netplan/90-ansible-ipalias.yaml + mode: '0644' + register: netplan_config + become: true + +- name: Print out netplan config + ansible.builtin.debug: + msg: "Netplan {{ netplan_config }}" + # https://gitlab.met.no/ansible-roles/ipalias/-/blob/master/tasks/netplan.yml?ref_type=heads + # this times out and then the servers are uncreachable? + # - name: Reboot server to apply new netplan config, without hitting netplan bug + # ansible.builtin.reboot: # noqa no-handler + # reboot_timeout: 3600 + # when: netplan_config is changed + # become: true + +- name: Apply netplan + ansible.builtin.command: sudo netplan apply + async: 45 + poll: 0 + register: netplan_output + # TODO: need to check output of netplan apply to determine + # changed_when condition + changed_when: true diff --git a/ansible/roles/primarystandbysetup/tasks/create-primary.yml b/ansible/roles/primarystandbysetup/tasks/create-primary.yml deleted file mode 100644 index 94d364f6..00000000 --- a/ansible/roles/primarystandbysetup/tasks/create-primary.yml +++ /dev/null @@ -1,262 +0,0 @@ -# set up a role and provide suitable entries in pg_hba.conf with the database field set to replication - -# ensure max_wal_senders is set to a sufficiently large value in the conf file (also possibly max_replication_slots?) -# When running a standby server, you must set this parameter to the same or higher value than on the primary server. Otherwise, queries will not be allowed in the standby server. - -# set wal_keep_size to a value large enough to ensure that WAL segments are not recycled too early, or configure a replication slot for the standby? -# if there is a WAL archive accessible to the standby this may not be needed? - -# On systems that support the keepalive socket option, setting tcp_keepalives_idle, tcp_keepalives_interval and tcp_keepalives_count helps the primary promptly notice a broken connection. - -# example auth -# Allow the user "foo" from host 192.168.1.100 to connect to the primary -# as a replication standby if the user's password is correctly supplied. -# -# TYPE DATABASE USER ADDRESS METHOD -# host replication foo 192.168.1.100/32 md5 ---- - - name: Create a new database with name lard - community.postgresql.postgresql_db: - name: lard - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - - name: Copy the db folder to the remote - ansible.builtin.copy: - src: ../../../../db/ - dest: /etc/postgresql/16/db/ - mode: '0755' - become: true - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - - name: Create the public schema in lard - community.postgresql.postgresql_script: - db: lard - path: /etc/postgresql/16/db/public.sql - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - - name: Create the labels schema in lard - community.postgresql.postgresql_script: - db: lard - path: /etc/postgresql/16/db/labels.sql - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - - name: Connect to lard database, create user - community.postgresql.postgresql_user: - db: lard - name: lard_user - password: '{{ db_password }}' - role_attr_flags: SUPERUSER # not desired, but the privelege granting doesn't seem to work? - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - # - name: Grant lard_user priveleges on lard database - # community.postgresql.postgresql_privs: - # type: database - # db: lard - # privs: ALL - # role: lard_user - # become: true - # become_user: postgres - - # MAKE IT THE PRIMARY - - name: Set wal_level parameter - community.postgresql.postgresql_set: - name: wal_level - value: replica # https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set hot_standby parameter - community.postgresql.postgresql_set: - name: hot_standby - value: true - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set hot_standby_feedback parameter - community.postgresql.postgresql_set: - name: hot_standby_feedback - value: true - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set max_wal_senders parameter - community.postgresql.postgresql_set: - name: max_wal_senders - value: 10 - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set wal_log_hints parameter # needs to be enabled to use pg_rewind - # https://www.postgresql.org/docs/current/app-pgrewind.html - community.postgresql.postgresql_set: - name: wal_log_hints - value: true - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set max_replication_slots parameter - community.postgresql.postgresql_set: - name: max_replication_slots - value: 10 - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - # make it SYNCHRONOUS REPLICATION (without the next two settings it would be asynchronous) - - name: Set synchronous_standby_names parameter - community.postgresql.postgresql_set: - name: synchronous_standby_names # https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-SYNCHRONOUS-STANDBY-NAMES - value: "*" # all the standbys - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set synchronous_commit parameter - community.postgresql.postgresql_set: - name: synchronous_commit # https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-SYNCHRONOUS-COMMIT - value: on # will not give standby query consistency (tradeoff for better write performance), but will give standby durable commit after OS crash - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - # repmgr - # https://www.repmgr.org/docs/current/quickstart-repmgr-conf.html - - name: Create a repmgr.conf if it does not exist - ansible.builtin.file: - path: /etc/repmgr.conf - state: touch - mode: '0755' - become: true - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - name: Set contents of repmgr.conf - ansible.builtin.copy: - dest: "/etc/repmgr.conf" - content: | - node_id=1 - node_name='{{ primary_name }}' - conninfo='host={{ primary_ip }} user=repmgr dbname=repmgr connect_timeout=2' - data_directory='/mnt/ssd-b/16/main' - service_start_command='sudo /bin/systemctl start postgresql.service' - service_stop_command='sudo /bin/systemctl stop postgresql.service' - service_restart_command='sudo /bin/systemctl restart postgresql.service' - service_reload_command='sudo /bin/systemctl reload postgresql.service' - mode: '0755' - become: true - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - # https://www.repmgr.org/docs/current/quickstart-primary-register.html - - name: Run repmgr to register the primary - ansible.builtin.command: repmgr -f /etc/repmgr.conf primary register -F # only need -F if rerunning - become: true - become_user: postgres - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - register: register_primary_results - - name: Print out the register_primary_results - ansible.builtin.debug: - msg: "repmgr {{ register_primary_results }}" - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - # # STUFF FOR REPLICATION (do not need if using repmgr) - # - name: Create replicator user with replication priveleges - # community.postgresql.postgresql_user: - # name: replicator - # password: '{{ replicator_password }}' - # role_attr_flags: REPLICATION - # become: true - # become_user: postgres - - # # also specifically allow the replicator user - # - name: Change hba conf to allow replicator to connect - # community.postgresql.postgresql_pg_hba: - # dest: /etc/postgresql/16/main/pg_hba.conf - # databases: replication - # contype: host - # users: replicator - # #address: all - # address: '{{ standby_host }}' - # method: trust # seems to hang with md5, how to make auth work? - # become: true - - # # create replication slot - # - name: Create physical replication slot if doesn't exist - # become_user: postgres - # community.postgresql.postgresql_slot: - # slot_name: replication_slot - # #db: lard - # become: true - - # make sure these changes take effect? - - name: Restart service postgres - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - ### now move back to default of operating from localhost - - name: Attach primary floating ip - block: - - name: Gather information about primary server - openstack.cloud.server_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - name: '{{ primary_name }}' - become: false - register: primary_server - - - name: Print out the ipalias port information for the server - ansible.builtin.debug: - msg: "Server {{ primary_server.servers[0].addresses.ipalias }}" - - # give the primary a particular floating ip - - name: Attach floating ip address that we keep connected to the primary - openstack.cloud.floating_ip: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - server: '{{ primary_server.servers[0].id }}' - reuse: true - network: public - fixed_address: '{{ primary_server.servers[0].addresses.ipalias[0].addr }}' - floating_ip_address: '{{ primary_floating_ip }}' - wait: true - timeout: 60 - when: primary_server.servers[0].addresses.ipalias | length <=1 - # unfortunately it seems that attaching the floating ip results in a timeout - # even though it actually succeeds - ignore_errors: true - - - name: Check floating ip is attached - openstack.cloud.floating_ip_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - floating_ip_address: '{{ primary_floating_ip }}' - register: fip - - # this will not run if the ip is not now on the vm - - name: Print out the floating ip information to confirm its ok - ansible.builtin.debug: - msg: "Floating ip {{ fip }}" - when: fip.floating_ips[0].port_details.device_id == primary_server.servers[0].id \ No newline at end of file diff --git a/ansible/roles/primarystandbysetup/tasks/create-standby.yml b/ansible/roles/primarystandbysetup/tasks/create-standby.yml deleted file mode 100644 index d565243e..00000000 --- a/ansible/roles/primarystandbysetup/tasks/create-standby.yml +++ /dev/null @@ -1,149 +0,0 @@ -# create standby.signal file in data directory - -# configure streaming WAL -# primary_conninfo needs a libpq connection string (ip address + other details needed to connect to primary server) - -# since we want the standby to be able to operate as the primary, we need to configure the WAL archiving, connections, and auth like the primary -# example: -#primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass options=''-c wal_sender_timeout=5000''' -#restore_command = 'cp /path/to/archive/%f %p' -#archive_cleanup_command = 'pg_archivecleanup /path/to/archive %r' - -# add the following line to the postgresql.conf file on the standby -# The standby connects to the primary that is running on host 192.168.1.50 -# and port 5432 as the user "foo" whose password is "foopass". -#primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass' - -# use the replication slot on the primary (in file after the primary_conninfo) -#primary_slot_name = 'node_a_slot' ---- - # repmgr - # https://www.repmgr.org/docs/current/quickstart-standby-clone.html - # must be done before the standby is put into read only mode (therefore not idempotent) - - name: Create a repmgr.conf if it does not exist - ansible.builtin.file: - path: /etc/repmgr.conf - state: touch - mode: '0755' - become: true - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - name: Set contents of repmgr.conf - ansible.builtin.copy: - dest: "/etc/repmgr.conf" - content: | - node_id=2 - node_name='{{ standby_name }}' - conninfo='host={{ standby_ip }} user=repmgr dbname=repmgr connect_timeout=2' - data_directory='/mnt/ssd-b/16/main' - service_start_command='sudo /bin/systemctl start postgresql.service' - service_stop_command='sudo /bin/systemctl stop postgresql.service' - service_restart_command='sudo /bin/systemctl restart postgresql.service' - service_reload_command='sudo /bin/systemctl reload postgresql.service' - mode: '0755' - become: true - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - - name: Stop service postgres, if running - ansible.builtin.systemd_service: - name: postgresql - state: stopped - become: true - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - # https://www.repmgr.org/docs/current/quickstart-standby-clone.html - - name: Run repmgr to dry run clone - ansible.builtin.command: repmgr -h '{{ primary_ip }}' -U repmgr -d repmgr -f /etc/repmgr.conf standby clone --dry-run - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - register: dry_run_clone_results - - name: Print out the dry_run_clone_results - ansible.builtin.debug: - msg: "repmgr {{ dry_run_clone_results }}" - - - name: Run repmgr to clone standby from primary - ansible.builtin.command: repmgr -h '{{ primary_ip }}' -U repmgr -d repmgr -f /etc/repmgr.conf standby clone -F - become: true - register: clone_results - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - name: Print out the clone_results - ansible.builtin.debug: - msg: "repmgr {{ clone_results }}" - - # try to clean up so can run standby clone ? - # - name: Recursively remove directory - # ansible.builtin.file: - # path: /mnt/ssd-b/16/main - # state: absent - # become: true - # - name: Create a main directory if it does not exist - # ansible.builtin.file: - # path: /mnt/ssd-b/16/main - # state: directory - # mode: '0700' - # become: true - # become_user: postgres - - # https://www.postgresql.org/docs/current/app-pgbasebackup.html - # NOTE: this part is not idempotent, so if a db is already in the dir, it will fail - # hence the stuff above that means this should not be run on a database with data!!! - # not needed if using repmgr, since clone calls this - # - name: Run pg_basebackup to initialize the replica / standby - # ansible.builtin.shell: export PGPASSWORD="{{ replicator_password }}" && pg_basebackup --pgdata=/mnt/ssd-b/16/main -R --slot=replication_slot --user=replicator --host={{ primary_host }} --port=5432 - # args: - # executable: /bin/bash - # become: true - # become_user: postgres - # register: basebackup_results - - # - name: Print out the basebackup_results - # debug: msg="backup {{ basebackup_results }}" - - - name: Restart service postgres - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - - name: Waits for port 5432 to be available, don't check for initial 10 seconds - ansible.builtin.wait_for: - host: 0.0.0.0 - port: 5432 - delay: 10 - state: started - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - # https://www.repmgr.org/docs/current/quickstart-register-standby.html - - name: Run repmgr to register the standby - ansible.builtin.command: repmgr -f /etc/repmgr.conf standby register - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - register: register_standby_results - - name: Print out the register_standby_results - ansible.builtin.debug: - msg: "repmgr {{ register_standby_results }}" - - # run some sql... to confirm clone? - - name: Do some sql to test for the existence of lard...? - community.postgresql.postgresql_query: - db: lard - query: select count(*) from timeseries - register: query_results - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - name: Print out the query - ansible.builtin.debug: - msg: "Query {{ query_results }}" diff --git a/ansible/roles/primarystandbysetup/tasks/main.yml b/ansible/roles/primarystandbysetup/tasks/main.yml deleted file mode 100644 index d1d6f048..00000000 --- a/ansible/roles/primarystandbysetup/tasks/main.yml +++ /dev/null @@ -1,7 +0,0 @@ -# roles/primarystandbysetup/tasks/main.yml -- name: Turn a vm into the primary - import_tasks: create-primary.yml - -# note, may in the future want to make multiple standbys -- name: Turn a vm into the standby - import_tasks: create-standby.yml diff --git a/ansible/roles/rejoin/default/main.yml b/ansible/roles/rejoin/default/main.yml new file mode 100644 index 00000000..9374b296 --- /dev/null +++ b/ansible/roles/rejoin/default/main.yml @@ -0,0 +1,2 @@ +--- +rejoin_ip: diff --git a/ansible/roles/rejoin/tasks/main.yml b/ansible/roles/rejoin/tasks/main.yml deleted file mode 100644 index 82fad6c4..00000000 --- a/ansible/roles/rejoin/tasks/main.yml +++ /dev/null @@ -1,3 +0,0 @@ -# roles/rejoin/tasks/main.yml -- name: Rejoin an old primary to cluster as standby - import_tasks: rejoin_old_primary.yml diff --git a/ansible/roles/rejoin/tasks/promote.yml b/ansible/roles/rejoin/tasks/promote.yml new file mode 100644 index 00000000..85a183cc --- /dev/null +++ b/ansible/roles/rejoin/tasks/promote.yml @@ -0,0 +1,27 @@ +--- +# TODO: check that primary says "unreachable"? +# - name: Check cluster +# ansible.builtin.command: repmgr -f /etc/repmgr.conf cluster show +# become: true +# become_user: postgres +# register: cluster_status +# changed_when: false + +# TODO: should postgres service be restarted? +# TODO: add dry run? +# TODO: check that primary says "failed"? +- name: Promote standby + ansible.builtin.command: repmgr -f /etc/repmgr.conf standby promote + become: true + become_user: postgres + changed_when: true + +# TODO: this will keep crashing until the the IP alias is moved to the standby +# So probably best to restart after the IP switch +# - name: Start LARD ingestion service +# ansible.builtin.systemd_service: +# daemon_reload: true +# name: lard_ingestion +# state: restarted +# enabled: true +# become: true diff --git a/ansible/roles/rejoin/tasks/rejoin.yml b/ansible/roles/rejoin/tasks/rejoin.yml new file mode 100644 index 00000000..b33017f0 --- /dev/null +++ b/ansible/roles/rejoin/tasks/rejoin.yml @@ -0,0 +1,53 @@ +--- +- name: Stop postgres service + ansible.builtin.systemd_service: + name: postgresql + state: stopped + become: true + +- name: Dry run of rejoin + ansible.builtin.command: > + repmgr node rejoin + -f /etc/repmgr.conf -d 'host='{{ rejoin_ip }}' user=repmgr dbname=repmgr connect_timeout=2' + --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose --dry-run + become: true + become_user: postgres + register: dry_run_results + changed_when: false # dry run does not change status + +- name: Print out the rejoin_dry_run_results + ansible.builtin.debug: + msg: "repmgr {{ dry_run_results }}" + +# TODO: add changed_when to fix lint? Need to figure out what the output of the command looks like +# Or is it always changed_when: true? +- name: Rejoin old primary as standby + ansible.builtin.command: > + repmgr node rejoin + -f /etc/repmgr.conf -d 'host='{{ rejoin_ip }}' user=repmgr dbname=repmgr connect_timeout=2' + --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose + become: true + become_user: postgres + register: node_rejoin_results + changed_when: true + +- name: Print out the rejoin_results + ansible.builtin.debug: + msg: "repmgr {{ node_rejoin_results }}" + +- name: Start service postgres + ansible.builtin.systemd_service: + name: postgresql + state: started + become: true + +- name: Check cluster + ansible.builtin.command: repmgr -f /etc/repmgr.conf cluster show + become: true + become_user: postgres + register: status_results + changed_when: false # cluster show does not modify status of the host + +- name: Print out the status_results + ansible.builtin.debug: + msg: "repmgr {{ status_results }}" diff --git a/ansible/roles/rejoin/tasks/rejoin_old_primary.yml b/ansible/roles/rejoin/tasks/rejoin_old_primary.yml deleted file mode 100644 index e28d92ba..00000000 --- a/ansible/roles/rejoin/tasks/rejoin_old_primary.yml +++ /dev/null @@ -1,39 +0,0 @@ ---- - - name: stop service postgres - ansible.builtin.systemd_service: - name: postgresql - state: stopped - become: true - - - name: Dry run of rejoin - ansible.builtin.command: repmgr node rejoin -f /etc/repmgr.conf -d 'host='{{ primary_ip }}' user=repmgr dbname=repmgr connect_timeout=2' --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose --dry-run - become: true - become_user: postgres - register: rejoin_dry_run_results - - name: Print out the rejoin_dry_run_results - ansible.builtin.debug: - msg: "repmgr {{ rejoin_dry_run_results }}" - - - name: Rejoin old primary as standby - ansible.builtin.command: repmgr node rejoin -f /etc/repmgr.conf -d 'host='{{ primary_ip }}' user=repmgr dbname=repmgr connect_timeout=2' --force-rewind=/usr/lib/postgresql/16/bin/pg_rewind --verbose - become: true - become_user: postgres - register: rejoin_results - - name: Print out the rejoin_results - ansible.builtin.debug: - msg: "repmgr {{ rejoin_results }}" - - - name: start service postgres - ansible.builtin.systemd_service: - name: postgresql - state: started - become: true - - - name: Check cluster - ansible.builtin.command: repmgr -f /etc/repmgr.conf cluster show - become: true - become_user: postgres - register: status_results - - name: Print out the status_results - ansible.builtin.debug: - msg: "repmgr {{ status_results }}" \ No newline at end of file diff --git a/ansible/roles/ssh/default/main.yml b/ansible/roles/ssh/default/main.yml new file mode 100644 index 00000000..78126382 --- /dev/null +++ b/ansible/roles/ssh/default/main.yml @@ -0,0 +1,3 @@ +ssh_user_key_list: + - name: + key: diff --git a/ansible/roles/ssh/tasks/localhost.yml b/ansible/roles/ssh/tasks/localhost.yml new file mode 100644 index 00000000..ff377ea1 --- /dev/null +++ b/ansible/roles/ssh/tasks/localhost.yml @@ -0,0 +1,12 @@ +--- +- name: Add hostkey to localhost `known_hosts` + ansible.builtin.known_hosts: + name: "{{ ansible_host }}" + state: present + # NOTE: requires `gather_facts: true` + key: > + "{{ ansible_host }}" + ecdsa-sha2-nistp256 + "{{ ansible_ssh_host_key_ecdsa_public }}" + hash_host: true + delegate_to: localhost diff --git a/ansible/roles/ssh/tasks/main.yml b/ansible/roles/ssh/tasks/main.yml deleted file mode 100644 index 1f968d65..00000000 --- a/ansible/roles/ssh/tasks/main.yml +++ /dev/null @@ -1,3 +0,0 @@ -# roles/ssh/tasks/main.yml -- name: Share the ssh keys one way between 2 particular VMs - import_tasks: share-ssh-keys.yml diff --git a/ansible/roles/ssh/tasks/postgres.yml b/ansible/roles/ssh/tasks/postgres.yml new file mode 100644 index 00000000..8fd33b19 --- /dev/null +++ b/ansible/roles/ssh/tasks/postgres.yml @@ -0,0 +1,65 @@ +--- +# find the other vms, that are not currently being iterated over +- name: Set host fact other_vms + ansible.builtin.set_fact: + other_vms: "{{ (ansible_play_hosts_all | difference([inventory_hostname])) }}" + +- name: List other VMs + ansible.builtin.debug: + msg: "{{ item }}" + loop: "{{ other_vms }}" + +- name: Create user postgres + ansible.builtin.user: + name: postgres + generate_ssh_key: true + ssh_key_bits: 2048 + ssh_key_file: .ssh/id_rsa + force: true + register: ssh_keys + become: true + +# Another way to generate a ssh key... +# - name: Force regenerate an OpenSSH keypair if it already exists +# community.crypto.openssh_keypair: +# path: .ssh/id_rsa +# force: true +# owner: postgres # should be this user's key +# register: ssh_keys +# become: true + +- name: List generated SSH key + ansible.builtin.debug: + msg: "{{ ssh_keys.ssh_public_key }}" + +- name: Add the key to authorized_key on the other vm + ansible.posix.authorized_key: + user: postgres + state: present + key: "{{ ssh_keys.ssh_public_key }}" + become: true + delegate_to: "{{ item }}" + loop: "{{ other_vms }}" + +- name: Get the host key + ansible.builtin.set_fact: + # NOTE: requires `gather_facts: true` + hostkey: "{{ ansible_ssh_host_key_ecdsa_public }}" + +- name: List host key + ansible.builtin.debug: + msg: "{{ hostkey }}" + +- name: List vm ip + ansible.builtin.debug: + msg: "{{ ansible_host }}" + +- name: Add the vm to known_hosts on the other vm + ansible.builtin.known_hosts: + path: ~postgres/.ssh/known_hosts # need this for the postgres user + name: "{{ ansible_host }}" + key: "{{ ansible_host }} ecdsa-sha2-nistp256 {{ hostkey }}" + state: present + become: true + delegate_to: "{{ item }}" + loop: "{{ other_vms }}" diff --git a/ansible/roles/ssh/tasks/share-ssh-keys.yml b/ansible/roles/ssh/tasks/share-ssh-keys.yml deleted file mode 100644 index 389f4b15..00000000 --- a/ansible/roles/ssh/tasks/share-ssh-keys.yml +++ /dev/null @@ -1,60 +0,0 @@ ---- - # find the other vm, that is not currently being iterated over (this will need to be changed if more than 2) - - name: Setting host facts for other_vm - ansible.builtin.set_fact: - other_vm: '{{ (ansible_play_hosts_all | difference([inventory_hostname])) | first }}' - - - name: List other vm - ansible.builtin.debug: - msg: "{{ other_vm }}" - - - name: Create user postgres - ansible.builtin.user: - name: postgres - generate_ssh_key: true - ssh_key_bits: 2048 - ssh_key_file: .ssh/id_rsa - force: true - register: ssh_keys - become: true - # Another way to generate a ssh key... - # - name: Force regenerate an OpenSSH keypair if it already exists - # community.crypto.openssh_keypair: - # path: .ssh/id_rsa - # force: true - # owner: postgres # should be this user's key - # register: ssh_keys - # become: true - - - name: List generated SSH key - ansible.builtin.debug: - msg: "{{ ssh_keys.ssh_public_key }}" - - - name: Add the key to authorized_key on the other vm - ansible.posix.authorized_key: - user: postgres - state: present - key: '{{ ssh_keys.ssh_public_key }}' - become: true - delegate_to: '{{ other_vm }}' - - - name: Get the host key - ansible.builtin.set_fact: - hostkey: '{{ ansible_ssh_host_key_ecdsa_public }}' - - - name: List host key - ansible.builtin.debug: - msg: "{{ hostkey }}" - - - name: List vm ip - ansible.builtin.debug: - msg: "{{ vm_ip }}" - - - name: Add the vm to known_hosts on the other vm - ansible.builtin.known_hosts: - path: ~postgres/.ssh/known_hosts # need this for the postgres user - name: '{{ vm_ip }}' - key: '{{ vm_ip }} ecdsa-sha2-nistp256 {{ hostkey }}' - state: present - become: true - delegate_to: '{{ other_vm }}' \ No newline at end of file diff --git a/ansible/roles/ssh/tasks/users.yml b/ansible/roles/ssh/tasks/users.yml new file mode 100644 index 00000000..a1c46cef --- /dev/null +++ b/ansible/roles/ssh/tasks/users.yml @@ -0,0 +1,10 @@ +--- +- name: Add users keys to authorized_keys + ansible.posix.authorized_key: + # username on the remotehost whose authorized keys are being modified + user: ubuntu + state: present + key: "{{ item.key }}" + loop: "{{ ssh_user_key_list }}" + loop_control: + label: "adding {{ item.name }} key to authorized_keys" diff --git a/ansible/roles/switchover/tasks/main.yml b/ansible/roles/switchover/tasks/main.yml index 0fab67d6..ecee9d3e 100644 --- a/ansible/roles/switchover/tasks/main.yml +++ b/ansible/roles/switchover/tasks/main.yml @@ -1,4 +1,65 @@ -# roles/switchover/tasks/main.yml -- name: Switchover - import_tasks: switchover.yml +--- +# TODO: maybe stop/restart in separate task? So we only have standby code here? +# assume the db is already there and synched, +# so now want to turn the primary into a standby / replica +# and want to turn the current standby into the primary + +# try to avoid issue: https://github.com/EnterpriseDB/repmgr/issues/703 +- name: Restart service postgres (standby) + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + +# can now just do this with repmgr +# https://www.repmgr.org/docs/current/preparing-for-switchover.html +# need the two instances to be able to ssh to each other! +# siblings-follow only really needed if have multiple standbys... +- name: Dry run of switching the standby and primary + ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow --dry-run + become: true + become_user: postgres + register: switchover_dry_run_results + changed_when: false + +- name: Print out the switchover_dry_run_results + ansible.builtin.debug: + msg: "repmgr {{ switchover_dry_run_results }}" + +# see preparing for switchover if things go wrong despite dry run, there is +# mention of --force-rewind which would use pg_rewind to try to fix divergent timelines... + +## https://www.repmgr.org/docs/current/switchover-execution.html +## https://www.repmgr.org/docs/current/switchover-troubleshooting.html +- name: Switch the standby and primary + ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow + become: true + become_user: postgres + register: switchover_results + changed_when: true + +- name: Print out the switchover_results + ansible.builtin.debug: + msg: "repmgr {{ switchover_results }}" + +- name: Check cluster + ansible.builtin.command: repmgr -f /etc/repmgr.conf cluster show + become: true + become_user: postgres + register: status_results + changed_when: false + +- name: Print out the status_results + ansible.builtin.debug: + msg: "repmgr {{ status_results }}" + +# TODO: this will keep crashing until the the IP alias is moved to the standby +# So probably best to restart after the IP switch +# - name: Start LARD ingestion service (standby) +# ansible.builtin.systemd_service: +# daemon_reload: true +# name: lard_ingestion +# state: restarted +# enabled: true +# become: true diff --git a/ansible/roles/switchover/tasks/switchover.yml b/ansible/roles/switchover/tasks/switchover.yml deleted file mode 100644 index 1573d7a1..00000000 --- a/ansible/roles/switchover/tasks/switchover.yml +++ /dev/null @@ -1,60 +0,0 @@ -# assume the db is already there and synched, so now want to turn into a standby / replica -# and want to turn the current standby into the primary ---- - - name: Restart service postgres (primary) - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - delegate_to: '{{ primary_ip }}' - remote_user: ubuntu - - # try to avoid issue: https://github.com/EnterpriseDB/repmgr/issues/703 - - name: Restart service postgres (standby) - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - - # can now just do this with repmgr - # https://www.repmgr.org/docs/current/preparing-for-switchover.html - # need the two instances to be able to ssh to each other! - # siblings-follow only really needed if have multiple standbys... - - name: Dry run of switching the standby and primary - ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow --dry-run - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - register: switchover_dry_run_results - - name: Print out the switchover_dry_run_results - ansible.builtin.debug: - msg: "repmgr {{ switchover_dry_run_results }}" - ## see preparing for switchover if things go wrong despite dry run, there is mention of --force-rewind - ## which would use pg_rewind to try to fix divergent timelines... - - ## https://www.repmgr.org/docs/current/switchover-execution.html - ## https://www.repmgr.org/docs/current/switchover-troubleshooting.html - - name: Switch the standby and primary - ansible.builtin.command: repmgr standby switchover -f /etc/repmgr.conf --siblings-follow - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - register: switchover_results - - name: Print out the switchover_results - ansible.builtin.debug: - msg: "repmgr {{ switchover_results }}" - - - name: Check cluster - ansible.builtin.command: repmgr -f /etc/repmgr.conf cluster show - become: true - become_user: postgres - delegate_to: '{{ standby_ip }}' - remote_user: ubuntu - register: status_results - - name: Print out the status_results - ansible.builtin.debug: - msg: "repmgr {{ status_results }}" diff --git a/ansible/roles/vm/tasks/create-project-vm.yml b/ansible/roles/vm/tasks/create-project-vm.yml deleted file mode 100644 index 408d14cf..00000000 --- a/ansible/roles/vm/tasks/create-project-vm.yml +++ /dev/null @@ -1,89 +0,0 @@ ---- - - name: Create VM - openstack.cloud.server: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - availability_zone: '{{ availability_zone }}' - name: '{{ name_stuff }}' - image: '{{ ostack_image }}' - flavor: '{{ ostack_flavor }}' - network: '{{ ostack_network_name }}' - key_name: '{{ ostack_key_name }}' - security_groups: '{{ security_groups_list }}' - state: '{{ ostack_state }}' - # do not give ip, since want to assign a specific one in next step (so as to reuse the ones we have) - auto_ip: false - register: server - - - name: Print out the server information - ansible.builtin.debug: - msg: "Server {{ lookup('ansible.builtin.dict', server) }}" - - - name: Attach floating ip address - openstack.cloud.floating_ip: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - server: '{{ server.server.id }}' - reuse: true - network: public - floating_ip_address: '{{ vm_ip }}' - wait: true - timeout: 60 - - - name: Create Volume - openstack.cloud.volume: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - availability_zone: '{{ availability_zone }}' - name: '{{ name_stuff }}' - volume_type: '{{ volume_type }}' - size: '{{ volume_size }}' - register: volume - - - name: Print out the volume information - ansible.builtin.debug: - msg: "Volume {{ lookup('ansible.builtin.dict', volume) }}" - - - name: Attach a volume to a compute host - openstack.cloud.server_volume: - state: present - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - volume: '{{ volume.volume.id }}' - server: '{{ server.server.id }}' - device: /dev/vdb - - - name: Create port for ipalias and set security groups - openstack.cloud.port: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - state: present - name: 'ipalias-{{ name_stuff }}' - network: '{{ ipalias_network_name }}' - security_groups: '{{ security_groups_list }}' - - - name: Get port info - openstack.cloud.port_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - port: 'ipalias-{{ name_stuff }}' - register: ipalias_port - - - name: Print out the port information - ansible.builtin.debug: - msg: "Port {{ lookup('ansible.builtin.dict', ipalias_port) }}" - - - name: Add the ipalias network to server - ansible.builtin.command: # noqa no-changed-when - argv: - - openstack - - --os-cloud - - '{{ ostack_cloud }}' - - --os-region-name - - '{{ ostack_region }}' - - server - - add - - port - - '{{ server.server.id }}' - - 'ipalias-{{ name_stuff }}' - when: ipalias_port.ports.0.device_id | length <=0 diff --git a/ansible/roles/vm/tasks/main.yml b/ansible/roles/vm/tasks/main.yml deleted file mode 100644 index 589fc0ef..00000000 --- a/ansible/roles/vm/tasks/main.yml +++ /dev/null @@ -1,4 +0,0 @@ -# roles/vms/tasks/main.yml -- name: Create a VM - import_tasks: create-project-vm.yml - diff --git a/ansible/roles/vm/vars/main.yml b/ansible/roles/vm/vars/main.yml deleted file mode 100644 index d079f5bd..00000000 --- a/ansible/roles/vm/vars/main.yml +++ /dev/null @@ -1,12 +0,0 @@ - # VM config - ostack_flavor: m1.xxlarge - ostack_image: met-jammy-latest - ostack_state: present - ostack_network_name: lard - security_groups_list: - - default - - ssh_usernet - - postgres - - ping - volume_type: __DEFAULT__ - volume_size: 900 \ No newline at end of file diff --git a/ansible/roles/vm_format/tasks/format-mount-disk.yml b/ansible/roles/vm_format/tasks/format-mount-disk.yml deleted file mode 100644 index 5917fa77..00000000 --- a/ansible/roles/vm_format/tasks/format-mount-disk.yml +++ /dev/null @@ -1,45 +0,0 @@ ---- - - name: Create /mnt/ssd-data - ansible.builtin.file: - path: /mnt/ssd-data - state: directory - owner: ubuntu # change to postgres? - group: ubuntu # change to postgres? - mode: 'u=rw,g=rws,o=r' - become: true - - - name: Create ext4 filesystem on {{ mount_point }} - community.general.filesystem: - dev: '{{ mount_point }}' - fstype: ext4 - become: true - - - name: Read device information (always use unit when probing) - community.general.parted: - device: '{{ mount_point }}' - unit: MiB - register: sdb_info - become: true - - - name: Print out the device information - ansible.builtin.debug: - msg: "Partitions {{ sdb_info.partitions }}" - - # this also changes the fstab so its still there when rebooted! - - name: Mount the disk from {{ mount_point }} - ansible.posix.mount: - path: /mnt/ssd-data - src: '{{ mount_point }}' - fstype: ext4 - state: mounted - become: true - - - name: Fetch the UUID of {{ mount_point }} - ansible.builtin.command: blkid --match-tag UUID --output value '{{ mount_point }}' - changed_when: false - register: blkid_cmd - become: true - - - name: Print out the UUID - ansible.builtin.debug: - msg: "UUID {{ blkid_cmd.stdout }}" diff --git a/ansible/roles/vm_format/tasks/install-postgres.yml b/ansible/roles/vm_format/tasks/install-postgres.yml deleted file mode 100644 index 58642405..00000000 --- a/ansible/roles/vm_format/tasks/install-postgres.yml +++ /dev/null @@ -1,136 +0,0 @@ ---- - - name: Add postgres apt key by id from a keyserver - ansible.builtin.apt_key: - url: https://www.postgresql.org/media/keys/ACCC4CF8.asc - state: present - become: true - - - name: Add postgres repository into sources list - ansible.builtin.apt_repository: - repo: deb https://apt.postgresql.org/pub/repos/apt jammy-pgdg main - state: present - become: true - - - name: Install a list of packages - ansible.builtin.apt: - pkg: - - nano - - postgresql-16 - - postgresql-16-repmgr # https://www.repmgr.org/docs/current/install-requirements.html - - pip # needed for installing psycopg2 - - acl # needed for becoming unpriveleged user (such as postgres) - update_cache: true - become: true - - - name: Install psycopg2 python package # dependency for postgres ansible stuff? - ansible.builtin.pip: - name: psycopg2-binary - become: true - - - name: Install openstacksdk python package - ansible.builtin.pip: - name: openstacksdk - become: true - - # make is so the data is actually kept on the ssd mount... - - ### synch the postgres stuff over to new directory, but stop postgres first - - name: Stop service postgres, if running - ansible.builtin.systemd_service: - name: postgresql - state: stopped - become: true - - - name: Run rsync - ansible.builtin.command: rsync -av /var/lib/postgresql/ /mnt/ssd-b/ - become: true - - ## change where data is stored and open up network wise - - name: Comment out original data_directory - ansible.builtin.replace: - dest: /etc/postgresql/16/main/postgresql.conf - regexp: '^data_directory' - replace: '#data_directory' - become: true - - - name: Modify postgresql config - ansible.builtin.blockinfile: - dest: /etc/postgresql/16/main/postgresql.conf - block: | - data_directory = '/mnt/ssd-b/16/main' - listen_addresses = '*' - become: true - - # probably want to restrict this once we know what will connect? - # but the security group rules should take care of limiting to met ranges - - name: Change hba conf to allow connections - community.postgresql.postgresql_pg_hba: - dest: /etc/postgresql/16/main/pg_hba.conf - contype: host - address: all # can we put met internal ip range(s)? - method: md5 - # users and database default to all - become: true - - # make sure these changes take effect - - name: Start up postgres service again - ansible.builtin.systemd_service: - name: postgresql - state: started - become: true - - # REPMGR - - name: Create repmgr user # https://www.repmgr.org/docs/current/quickstart-repmgr-user-database.html - community.postgresql.postgresql_user: - name: repmgr - password: '{{ repmgr_password }}' - role_attr_flags: SUPERUSER - become: true - become_user: postgres - - - name: Create a repmgr database, with owner repmgr - community.postgresql.postgresql_db: - name: repmgr - owner: repmgr - become: true - become_user: postgres - - - name: Change hba conf to allow repmgr to connect for replication - community.postgresql.postgresql_pg_hba: - dest: /etc/postgresql/16/main/pg_hba.conf - databases: replication - contype: host - users: repmgr - address: all - # address: '{{ standby_host }}' - method: trust - become: true - - - name: Change hba conf to allow repmgr to connect to the repmgr db - community.postgresql.postgresql_pg_hba: - dest: /etc/postgresql/16/main/pg_hba.conf - databases: repmgr - contype: host - users: repmgr - address: all - # address: '{{ standby_host }}' - method: trust - become: true - - - name: Restart postgres - ansible.builtin.systemd_service: - name: postgresql - state: restarted - become: true - - - name: Allow the postgres user to run /bin/systemctl restart, stop, start postgres - community.general.sudoers: - name: postgresql - user: postgres - commands: - - /bin/systemctl restart postgresql.service - - /bin/systemctl stop postgresql.service - - /bin/systemctl start postgresql.service - - /bin/systemctl reload postgresql.service - nopassword: true - become: true diff --git a/ansible/roles/vm_format/tasks/main.yml b/ansible/roles/vm_format/tasks/main.yml deleted file mode 100644 index 36d09ebb..00000000 --- a/ansible/roles/vm_format/tasks/main.yml +++ /dev/null @@ -1,9 +0,0 @@ -# roles/vm_format/tasks/main.yml -- name: netplan - import_tasks: netplan.yml - -- name: Format and mount the disk - import_tasks: format-mount-disk.yml - -- name: Install postgres - import_tasks: install-postgres.yml \ No newline at end of file diff --git a/ansible/roles/vm_format/tasks/netplan.yml b/ansible/roles/vm_format/tasks/netplan.yml deleted file mode 100644 index 118cb065..00000000 --- a/ansible/roles/vm_format/tasks/netplan.yml +++ /dev/null @@ -1,61 +0,0 @@ ---- - - name: Get port info - openstack.cloud.port_info: - cloud: '{{ ostack_cloud }}' - region_name: '{{ ostack_region }}' - port: 'ipalias-{{ name_stuff }}' - register: ipalias_port - delegate_to: localhost - - - name: Print out the port information - ansible.builtin.debug: - msg: "Port {{ lookup('ansible.builtin.dict', ipalias_port) }}" - delegate_to: localhost - - - name: IP alias netplan configuration - ansible.builtin.set_fact: - netplan_config: - network: - version: 2 - ethernets: - ens6: - dhcp4: true - dhcp4-overrides: - use-routes: false - match: - macaddress: '{{ ipalias_port.ports.0.mac_address }}' - set-name: ens6 - routes: - - to: 0.0.0.0/0 - via: '{{ ipalias_ostack_network_cidr | ansible.utils.ipaddr("net") | ansible.utils.ipaddr("1") | ansible.utils.ipaddr("address") }}' - table: 102 - routing-policy: - - from: '{{ ipalias_ostack_network_cidr }}' - table: 102 - become: true - - - name: Copy out ipalias netplan config - ansible.builtin.copy: - content: '{{ netplan_config | to_nice_yaml }}' - dest: /etc/netplan/90-ansible-ipalias.yaml - mode: '0644' - register: netplan_config - become: true - - - name: Print out netplan config - ansible.builtin.debug: - msg: "Netplan {{ netplan_config }}" - - - name: Apply netplan - ansible.builtin.command: sudo netplan apply - async: 45 - poll: 0 - -# https://gitlab.met.no/ansible-roles/ipalias/-/blob/master/tasks/netplan.yml?ref_type=heads -# this times out and then the servers are uncreachable? -# - name: Reboot server to apply new netplan config, without hitting netplan bug -# ansible.builtin.reboot: # noqa no-handler - # reboot_timeout: 3600 -# when: netplan_config is changed -# become: true - \ No newline at end of file diff --git a/ansible/roles/vm_format/vars/main.yml b/ansible/roles/vm_format/vars/main.yml deleted file mode 100644 index b998a5ae..00000000 --- a/ansible/roles/vm_format/vars/main.yml +++ /dev/null @@ -1,18 +0,0 @@ -$ANSIBLE_VAULT;1.1;AES256 -62373161613862343930306664363533666462303234343834336261636564626236633939393335 -3432396263316238336231346531643965306361386265620a636633646437306565303839333733 -37373533366266313034653465323365396230616136653362313435366264383532373735613436 -6639336335363766370a386536616365653437643865623238353338666666323132646565393439 -39626633353230373562343932363236393834623530663836363732653366633838613738646238 -32363330356337626638373335613466383132396164323334313335666234646130316662626438 -32346665386365363064633335316265643332663331656661613262353330633036656334393436 -66373332376239626666653666333637663337303637353162336530633637303037666631343961 -66383438643832653831303563623232643034616663303336613263653037376363653765386638 -37616332383163376536393732323439613963353339613737653433383562383038626337306563 -66366232346433636330353435306664613537663630646434303635346365346462336662336334 -65383233343634373235383236303434623138633966663864633463363432376635356339363961 -37363263633534633866343937386635366661613939373832653466303635653063343839306466 -33393966373739616133346432343332383330353332653938396433303364376331326334643236 -35646566313563303765303764366663326639323264383831383862653731313031633739313036 -33316664313061393934663763663435646138303930386335393832373230386338363538326263 -65333663396132396535346338393765366331663238396538633163383263616161 diff --git a/ansible/switchover.yml b/ansible/switchover.yml index 48c7ec6b..fa68e546 100644 --- a/ansible/switchover.yml +++ b/ansible/switchover.yml @@ -1,19 +1,40 @@ +--- - name: Switch the primary and standby / replica hosts: localhost - vars: - ostack_cloud: lard - ostack_region: Ostack2-EXT gather_facts: false - pre_tasks: - - name: find primary ip from inventory - ansible.builtin.set_fact: - primary_ip: '{{ item }}' - with_inventory_hostnames: '{{ name_primary }}' - - name: find standby ip from inventory - ansible.builtin.set_fact: - standby_ip: '{{ item }}' - with_inventory_hostnames: '{{ name_standby }}' - roles: - # ensure the names are passed in the right way around for the current state! - - role: switchover - - role: movefloatingip \ No newline at end of file + remote_user: ubuntu + # TODO: this can be probably done automatically if there are only two VMs? + vars: + primary: # provide via cmd + standby: # provide via cmd + # pre_tasks: + # - name: Find server role + + tasks: + - name: Restart service postgres (primary) + ansible.builtin.systemd_service: + name: postgresql + state: restarted + become: true + delegate_to: primary + + - name: Perform Postgres switchover + ansible.builtin.include_role: + name: switchover + apply: + delegate_to: standby + + - name: Perform IP switchover + ansible.builtin.include_role: + name: ostack + tasks_from: move_floating_ip.yml + vars: + ostack_primary: "{{ primary }}" + ostack_standby: "{{ standby }}" + + - name: Restart LARD ingestion service + ansible.builtin.systemd_service: + name: lard_ingestion + state: restarted + become: true + delegate_to: standby diff --git a/deploy.sh b/deploy.sh deleted file mode 100644 index 5d0a8c2c..00000000 --- a/deploy.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -if ! cargo build --workspace --release; then - exit 1 -fi - -pushd ansible || exit - -ansible-playbook -i inventory.yml deploy.yml --ask-vault-pass - -popd || exit