Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add maintenance node playbook and vars file #614

Draft
wants to merge 14 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
526 changes: 526 additions & 0 deletions group_vars/maintenance.yml

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions hosts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ plausible.galaxyproject.eu
[celerycluster]
celery-0.galaxyproject.eu

[maintenance]
maintenance.galaxyproject.eu ansible_ssh_user=rocky

# Baremetal
[galaxyservers]
sn06.galaxyproject.eu
Expand Down
178 changes: 178 additions & 0 deletions maintenance.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
---
- name: UseGalaxy EU maintenance server
hosts: maintenance
become: true
become_user: root
vars:
# The full internal name.
hostname: maintenance.galaxyproject.eu
vars_files:
- group_vars/gxconfig.yml # The base galaxy configuration
- group_vars/toolbox.yml # User controlled toolbox
- secret_group_vars/sentry.yml # Sentry SDK init url
- secret_group_vars/aws.yml # AWS creds
- secret_group_vars/pulsar.yml # Pulsar + MQ Connections
- secret_group_vars/oidc.yml # OIDC credentials (ELIXIR, keycloak)
- secret_group_vars/object_store.yml # Object Store credentils (S3 etc ...)
- secret_group_vars/db-main.yml # DB URL + some postgres stuff
- secret_group_vars/file_sources.yml # file_sources_conf.yml creds
- secret_group_vars/all.yml # All of the other assorted secrets...
- secret_group_vars/keys.yml # SSH keys
- templates/galaxy/config/job_conf.yml
handlers:
# Though this handler doesn't do anything (for now), galaxyproject.galaxy role
# will fail if it's not defined
- name: Restart Galaxy
shell: |
echo 'Manual web handler restart required' && cd /opt/galaxy/ && source /opt/galaxy/.bashrc
collections:
- devsec.hardening
pre_tasks:
- name: Install Dependencies
package:
name:
[
"git",
"postgresql",
"python3-psycopg2",
"python3-virtualenv",
"bc",
"python3",
"python3-devel",
]
become: true
post_tasks:
- name: Append some users to the systemd-journal group
user:
name: "{{ item }}"
groups: systemd-journal
append: true
loop:
- "{{ galaxy_user.name }}"
- "telegraf"
- "stats" # special account to retrieve statistics from the server in read-only mode
- name: Set authorized SSH key (galaxy user)
ansible.posix.authorized_key:
user: "{{ galaxy_user.name }}"
state: present
key: "{{ item }}"
loop:
- "ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBOBINXdjILF6x3WuppXyq6J2a2oSLR6waZ6txgjYJogHdIKPbI0TdReCv4EVxxYRY/NqGpHbjkqfRTsf2VgoU3U= mk@galaxy-mira"
- "ecdsa-sha2-nistp521 AAAAE2VjZHNhLXNoYTItbmlzdHA1MjEAAAAIbmlzdHA1MjEAAACFBACB5Q5blymkTIRSzVzXITOGvBuI7W0L9Ykwfz8LJGPraaGVPiezzFGvjhqwX+EyCqQPt7JprR5mimJRw/JN3nBXWAHjekvmB5FuILkk6m5fOiQJ5QhRMyQ5GfxODAvGbHpTuWHbYJLWD5fhcboKPxlXOWy4xY9kDZVuQvEKisNKYBsFLA== sanjay"
- name: Set authorized SSH key (stats user)
ansible.posix.authorized_key:
user: "stats"
state: present
key: "{{ item }}"
loop:
- "ecdsa-sha2-nistp521 AAAAE2VjZHNhLXNoYTItbmlzdHA1MjEAAAAIbmlzdHA1MjEAAACFBAA6oD5Ps9h6pKokzaAcCI6R08CKm2aSVv86h/O2HTEQkzeJq/Uvu4gbrONAM0FK5R693mHggIwaROKf1Z1+q4YNtACtYjV1c+6a9lcrGMM31y5RzO6mAW+rHNEXgZ1n3wqDcBQhSXhSekSen0R2QKwvmB7xeP4XX9qE10azZuafIFU9hQ== sanjay"
- "ecdsa-sha2-nistp521 AAAAE2VjZHNhLXNoYTItbmlzdHA1MjEAAAAIbmlzdHA1MjEAAACFBADKblzzPBc3+dEfFvhJQHsHGkFFN6ORjfXo71P1OutwcKEMCIcNkZKJHhYkLLrfTDN5JJ5tK2L5AaSxdwETofwm4AG1xv3LuoYsXC6e3sjKi09BVmzef520pIMW+rvL+hESwSazZaUAC0wDcH4aNDTonZYcAY87rpMX7pNMkNPJvWilUA== mira"
roles:
## Starting configuration of the operating system
- role: usegalaxy_eu.handy.os_setup
vars:
enable_hostname: true
enable_powertools: true # geerlingguy.repo-epel role doesn't enable PowerTools repository
enable_remap_user: true
enable_exclude_packages: true
enable_pam_limits: true # Prevent out of control processes
enable_install_software: true # Some extra admin tools (*top, vim, etc)
- geerlingguy.repo-epel # Install EPEL repository

## Istall miniconda, create a _galaxy_ environment and install Packages
## Galaxy will use the virtualenv command from this conda environment (see
## galaxy_virtualenv_command) in the group_vars/maintenance.yml
- role: galaxyproject.miniconda
vars:
miniconda_prefix: "{{ conda_prefix }}"
galaxy_conda_create_env: true
galaxy_conda_env_packages:
- python=3.8.8
- pip
- virtualenv

- usegalaxy-eu.autoupdates # keep all of our packages up to date
- influxdata.chrony # Keep our time in sync.

## Filesystems
- usegalaxy-eu.autofs # Setup the mount points which will be needed later

## Monitoring (Uncomment when in production)
# - hxr.monitor-cluster
# - hxr.monitor-email
# - usegalaxy-eu.monitoring

## Setup Galaxy user
- role: galaxyproject.galaxy
vars:
galaxy_create_user: true
galaxy_manage_clone: false
galaxy_manage_download: false
galaxy_manage_existing: true
galaxy_manage_paths: true
galaxy_manage_static_setup: false
galaxy_manage_mutable_setup: false
galaxy_manage_database: false
galaxy_fetch_dependencies: false
galaxy_build_client: false

## The bashrc needs to be created for several later features.
- role: usegalaxy-eu.bashrc

# HTCondor (Uncomment when in production and latest version is installed)
# - usegalaxy_eu.htcondor

# Misc.
- role: hxr.galaxy-nonreproducible-tools
become: true
become_user: galaxy
- usegalaxy-eu.dynmotd # nicer MOTD/welcome message
- usegalaxy-eu.rsync-galaxy-sync # syncs Galaxy codebase to NFS and to the headnodes
- role: usegalaxy-eu.webhooks # Clone webhook repository
become: true
become_user: galaxy
- role: usegalaxy-eu.tours # Clone tour repository
become: true
become_user: galaxy

## SSL / Security
# https://en.wikipedia.org/wiki/Trust_on_first_use
- ssh-host-sign # Sign the server host key to prevent TOFU for SSH

## GALAXY
- hxr.postgres-connection
- galaxyproject.gxadmin
# TODO move under monitoring + telegraf.
# Role that creates a cron to gather galaxy stats, needs to run only on one node (uncomment when in production)
# - usegalaxy-eu.galaxy-slurp

# The REAL galaxy role
- role: galaxyproject.galaxy
vars:
galaxy_create_user: true
galaxy_manage_clone: true
galaxy_manage_download: false
galaxy_manage_existing: false
galaxy_manage_static_setup: true
galaxy_manage_mutable_setup: true
galaxy_manage_database: true
galaxy_fetch_dependencies: true
galaxy_build_client: true

## Extras!
- usegalaxy-eu.fix-galaxy-server-dir # Fix details into the galaxy server dirs
- hxr.install-to-venv # Some extra packages our site needs.
- usegalaxy_eu.gie_proxy # Setup the NodeJS proxy for GxIT (not IE anymore)
# - usegalaxy_eu.fs_maintenance # (Uncomment when in production)
# - usegalaxy-eu.htcondor_release # (Uncomment when in production and condor is installed)
# Various ugly fixes
# - usegalaxy-eu.fix-unscheduled-workflows (Uncomment when in production)
# - usegalaxy-eu.fix-ancient-ftp-data # Remove FTP data older than 3 months, create FTP user directories (Uncomment when in production)
# - usegalaxy-eu.fix-missing-api-keys # Workaround for IE users not have a key set. (Uncomment when in production)
# - usegalaxy-eu.fix-user-quotas # Automatically recalculate user quotas and attribute ELIXIR quota to ELIXIR AAI user on a regular basis (Uncomment when in production)
- usegalaxy_eu.tpv_auto_lint
- ssh_hardening #dev-sec.hardening collection
- dj-wasabi.telegraf # Uncomment when in production
- usegalaxy-eu.logrotate # Rotate logs
- usegalaxy-eu.fix-stop-ITs
- usegalaxy-eu.vgcn-monitoring
4 changes: 2 additions & 2 deletions requirements.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ collections:
source: https://galaxy.ansible.com
type: galaxy
- name: usegalaxy_eu.handy
version: 2.10.0
version: 2.11.1
source: https://galaxy.ansible.com

roles:
Expand Down Expand Up @@ -89,7 +89,7 @@ roles:
src: https://github.com/usegalaxy-eu/ansible-update-hosts
version: 0.2.0
- name: usegalaxy_eu.gie_proxy
version: 0.0.2
version: 0.0.3
- name: usegalaxy-eu.autofs
src: https://github.com/usegalaxy-eu/ansible-autofs
version: 1.0.0
Expand Down
2 changes: 1 addition & 1 deletion roles/dj-wasabi.telegraf/tasks/RedHat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
description: InfluxDB Repository - RHEL $releasever
baseurl: "https://repos.influxdata.com/rhel/{{ telegraf_redhat_releasever }}/$basearch/stable"
gpgcheck: yes
gpgkey: https://repos.influxdata.com/influxdb.key
gpgkey: https://repos.influxdata.com/influxdata-archive_compat.key

- name: "Install telegraf package | RedHat"
yum:
Expand Down
30 changes: 30 additions & 0 deletions roles/usegalaxy-eu.rsync-galaxy-sync/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Role Name
=========

Adds a rsync script that performs a full sync of a Galaxy codebase to an NFS share and the head nodes

Role Variables
--------------

`execute_galaxy_sync`: Whether to execute the sync script or not. Defaults to `false`
`galaxy_rsync_user_private_key_file`: The private key of the user that will be used to rsync the codebase. If this key does not exist then it will be added from the vault file.
`headnodes`: A list of headnodes to rsync the codebase to. Defaults to `sn07.galaxyproject.eu` (this is currently (24/03/2023) in testing phase so the default is `sn07`)
`headnodes_sync_location`: The location on the headnodes to rsync the codebase to. Defaults to the variable `galaxy_root` (which is defined in the group_vars files)

Dependencies
------------

- `prsync` command (if not installed, it will be installed. The command is available in the `pssh` package)

Example Playbook
----------------

- hosts: maintenance
roles:
- role: usegalaxy-eu.rsync-galaxy-sync
vars:
execute_galaxy_sync: false
galaxy_rsync_user_private_key: "/opt/galaxy/.ssh/galaxy_rsync_key"
headnodes: "sn07.galaxyproject.eu"
headnodes_sync_location: "/opt/galaxy"
galaxy_nfs_location: "/data/galaxy-sync"
9 changes: 9 additions & 0 deletions roles/usegalaxy-eu.rsync-galaxy-sync/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
execute_galaxy_sync: false
galaxy_rsync_user_private_key_file: "{{ galaxy_user.home }}/.ssh/galaxy_rsync_key"

# Uses the hostnames of the inventory group 'galaxyservers'
# headnodes: "{{ groups['galaxyservers'] | join(', ') }}"
headnodes: "sn07.galaxyproject.eu"
headnodes_sync_location: "{{ galaxy_root }}"
galaxy_nfs_location: ""
14 changes: 14 additions & 0 deletions roles/usegalaxy-eu.rsync-galaxy-sync/meta/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
galaxy_info:
author: The Galaxy Project
description: Installs a Galaxy rsync script
company: The Galaxy Project
license: AFL v3.0
min_ansible_version: 2.5
platforms:
- name: EL
versions:
- 8
- 9
galaxy_tags: []
dependencies: []
80 changes: 80 additions & 0 deletions roles/usegalaxy-eu.rsync-galaxy-sync/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
---
- name: Check if prsync is installed
command: prsync --version
register: prsync_installed
ignore_errors: true

- name: Install prsync (part of pssh)
become: true
dnf:
name: pssh
state: present
when: not prsync_installed.stdout

- name: Check if SSH key exists
stat:
path: "{{ galaxy_rsync_user_private_key_file }}"
register: ssh_key

- name: Create SSH directory
file:
path: "{{ galaxy_user.home }}/.ssh"
state: directory
owner: "{{ galaxy_user.name }}"
group: "{{ galaxy_user.name }}"
mode: 0700
when: not ssh_key.stat.exists

- name: Add SSH key
copy:
content: "{{ galaxy_user_private_key }}"
dest: "{{ galaxy_rsync_user_private_key_file }}"
owner: "{{ galaxy_user.name }}"
group: "{{ galaxy_user.name }}"
mode: 0600
when: not ssh_key.stat.exists

- name: "Deploy galaxy-rsync script"
copy:
content: |
#!/bin/bash
headnodes="{{ headnodes }}"
cd {{ galaxy_root }};
for dir in {config,custom-tools,dynamic_rules,gie-proxy,mutable-config,mutable-data,server,venv,tool-data}; do
if [ -d $dir ]; then
echo "Syncing $dir"
# Sync to NFS server in background
rsync -avr --delete --exclude node_modules/ --exclude .git --exclude __pycache__ $dir/ {{ galaxy_nfs_location }}/$dir/ &

# Sync to headnodes only if the variable is set
if [ ! -z $headnodes ]; then
# Sync to head nodes in foreground (so we can see progress and wait until the sync is done before continuing with the next directory)
prsync -avr -H $headnodes --extra-arg='--delete' --extra-arg='--exclude=node_modules/' --extra-arg='--exclude=.git' --extra-arg='--exclude=__pycache__' --user "{{ galaxy_user.name }}" --ssh-args='-i "{{ galaxy_rsync_user_private_key_file }}"' $dir/ {{ headnodes_sync_location }}/$dir/
fi
else
echo "Skipping $dir"
fi
done;
if [ -d shed_tools-local ]; then
echo "Syncing shed_tools-local"
# Sync to NFS server in background
rsync -avr --delete --exclude .hg shed_tools-local/ {{ galaxy_nfs_location }}/shed_tools/ &

# Sync to headnodes only if the variable is set
if [ ! -z $headnodes ]; then
# Sync to head nodes in foreground (so we can see progress and wait until the sync is done before continuing with the next directory)
prsync -avr -H $headnodes --extra-arg='--delete' --extra-arg='--exclude=.hg' --user "{{ galaxy_user.name }}" --ssh-args='-i "{{ galaxy_rsync_user_private_key_file }}"' shed_tools-local/ {{ headnodes_sync_location }}/shed_tools/
fi
else
echo "Skipping shed_tools-local"
fi
dest: /usr/bin/galaxy-rsync
owner: root
group: root
mode: 0755

- name: "Execute the script. Syncing in progress."
command: /usr/bin/galaxy-rsync
become: true
become_user: galaxy
when: execute_galaxy_sync