diff --git a/.gitignore b/.gitignore index 5bdf5d8..9fb88f4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ data/** config.rb +manager.log +.idea \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock index bf05037..47c0722 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -7,8 +7,8 @@ GEM multipart-post (>= 1.2, < 3) json (2.3.0) multipart-post (2.1.1) - nio4r (2.5.8) - puma (4.3.9) + nio4r (2.5.9) + puma (6.4.0) nio4r (~> 2.0) PLATFORMS diff --git a/README b/README deleted file mode 100644 index 1de680e..0000000 --- a/README +++ /dev/null @@ -1,15 +0,0 @@ -This tool does management of LetsEncrypt certificates. - -* Run bundle (or bundle --deployment for production) -* Copy config.rb.example to config.rb and configure as needed -* Make bin/setup.rb to generate master keys, create directories, and accept the LetsEncrypt TOS -* Run with procodile -* Run bin/renew.rb from time to time - -There are 3 endpoints: - -* /~acmemanager/list - lists all currently valid certificates with their expiry date -* /~acmemanager/issue/example.com - issues a certificate for example.com -* /~acmemanager/purge/example.com - purges a certificate for example.com - -Requests must be authenticated by passing an API key in the X-API-KEY header. diff --git a/README.md b/README.md new file mode 100644 index 0000000..2015e28 --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +# Acme Manager +This tool does management of LetsEncrypt certificates in our load balancer hosts. It does two main things: + +### Web Server + +It runs a webserver which allows certain apps to control certificates for domains externally. There are 3 API endpoints: + +* `/~acmemanager/list` - lists all currently valid certificates with their expiry date +* `/~acmemanager/issue/example.com` - issues a certificate for example.com +* `/~acmemanager/purge/example.com` - purges a certificate for example.com + +Requests must be authenticated by passing an API key in the X-API-KEY header. + +### Bulk Certificate Renewals (CRON) + +There's cron jobs set in the Load Balancer hosts (under the `haproxy` user) to run renewals daily at `02:00 AM`, the job looks like this: +```shell +0 2 * * * cd /opt/acme-manager; bundle exec ruby bin/renew.rb +``` + +The `misc` directory contains some scripts required for the High Availability setup in the Load Balancer hosts. + +## Instructions +* Run bundle (or bundle --deployment for production) +* Copy config.rb.example to config.rb and configure as needed +* Make bin/setup.rb to generate master keys, create directories, and accept the LetsEncrypt TOS +* Run the web server with procodile `procodile start` +* Run bin/renew.rb from time to time diff --git a/config.rb.example b/config.rb.example index 70db4dd..548f8c9 100644 --- a/config.rb.example +++ b/config.rb.example @@ -1,6 +1,11 @@ AcmeManager.directory = 'https://acme-staging-v02.api.letsencrypt.org/directory' AcmeManager.email_address = 'domains@example.com' AcmeManager.api_key = 'xxxxxxxxxxxxxx' +AcmeManager.pre_renewal_check = proc { + lock_file_path = "/var/run/renewals_cron.lock" + node = File.read(lock_file_path).strip rescue nil + node == "MASTER" +} AcmeManager.post_commands = [ 'sudo /etc/init.d/haproxy reload' ] diff --git a/lib/acme_manager.rb b/lib/acme_manager.rb index 2454cd1..6c3c633 100644 --- a/lib/acme_manager.rb +++ b/lib/acme_manager.rb @@ -1,3 +1,4 @@ +require 'logger' require 'acme-client' require 'fileutils' require 'acme_manager/certificate' @@ -28,6 +29,7 @@ def self.renew_all new_issues = false self.certificates_due_for_renewal.each do |certificate| status = certificate.renew + AcmeManager.log_status(status, {:domain => certificate.name}) new_issues = true if status[:result] == :issued end new_issues @@ -90,6 +92,14 @@ def self.api_key @api_key || raise("API Key not set") end + def self.pre_renewal_check=(proc) + @pre_renewal_check = proc + end + + def self.pre_renewal_check + @pre_renewal_check || proc { true } + end + def self.post_commands=(post_commands) @post_commands = post_commands end @@ -105,6 +115,19 @@ def self.run_post_commands end end + def self.can_run_renewals? + return AcmeManager.pre_renewal_check.call + end + + def self.logger + @logger ||= Logger.new(File.join(File.dirname(__FILE__), '..', 'manager.log')) + end + + def self.log_status(status, *args) + method = status[:result] == :failed ? :error : :info + AcmeManager.logger.send(method, {:args => [*args]}.merge(status).to_json) + end + end config_file = File.join(File.dirname(__FILE__), '..', 'config.rb') diff --git a/lib/acme_manager/certificate.rb b/lib/acme_manager/certificate.rb index 48bcd41..a3d40f5 100644 --- a/lib/acme_manager/certificate.rb +++ b/lib/acme_manager/certificate.rb @@ -25,10 +25,15 @@ def purge end def renew + unless AcmeManager.can_run_renewals? + return {:status => :failed, :reason => {:type => :internal, :detail => "Load Balancer host transitioned"}} + end + status = Certificate.issue(@name) - if status == :failed && expired? + if status[:result] == :failed && expired? return purge end + status end diff --git a/misc/renewals_cron_control.sh b/misc/renewals_cron_control.sh new file mode 100644 index 0000000..743cd2d --- /dev/null +++ b/misc/renewals_cron_control.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# This script is used by Keepalived to notify all Load Balancer nodes +# in the cluster of their current STATE, i.e. whether current host is +# a "MASTER" or a "BACKUP". When the STATE file changes to "BACKUP" in +# a given host all renewals commands are interrupted, thus preventing +# multiple hosts from running renewals (via the CRON) +# +# This script should be located at `/usr/local/bin/` in each of the +# Load Balancer hosts. +# +# Example Keepalived config in the Load Balancers would look like this: +# +# vrrp_instance CRON { +# state MASTER +# interface ens10 +# virtual_router_id +# priority 100 +# advert_int 1 +# notify /usr/local/bin/renewals_cron_control.sh +# unicast_peer { +# +# } +# } + +# The path to the lock file +LOCK_FILE="/var/run/renewals_cron.lock" +STATE="" + +if [[ "$1" == "MASTER" ]]; then + STATE="MASTER" +elif [[ "$1" == "BACKUP" ]]; then + STATE="BACKUP" +elif [[ "$1" == "FAULT" ]]; then + STATE="FAULT" +fi + +echo "$STATE" > "$LOCK_FILE" + +# Change the owner of the lock file to 'haproxy' and make it readable +chown haproxy "$LOCK_FILE" +chmod 644 "$LOCK_FILE"