diff --git a/README.md b/README.md index 3c16184..97c7877 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,13 @@ rsync: #override_global_excluded: true #override_global_args: true +# FIXME needs more details ( +retention: + daily: int # number of daily backups to keep + weekly: int # number of weekly backups to keep + monthly: int # number of monthly backups to keep + yearly: int # number of yearly backups to keep + # Inline scripts executed on the remote host before and after rsyncing, # and before any `pre.*.sh` and/or `post.*.sh` scripts for this host. pre_script: string @@ -222,6 +229,11 @@ rsync: - "--hard-links" - "--block-size=2048" - "--recursive" +retention: + daily: 14 + weekly: 4 + monthly: 6 + yearly: 5 ``` # Copyright diff --git a/app/prune.go b/app/prune.go new file mode 100644 index 0000000..ee15896 --- /dev/null +++ b/app/prune.go @@ -0,0 +1,158 @@ +package app + +import ( + "fmt" + "sort" + "strings" + "time" + + "github.com/digineo/zackup/config" + "github.com/sirupsen/logrus" +) + +var ( + patterns = map[string]string{ + "daily": "2006-01-02", + "weekly": "", // See special case in keepers() + "monthly": "2006-01", + "yearly": "2006", + } +) + +type snapshot struct { + Ds string // Snapshot dataset name "backups/foo@RFC3339" + Time time.Time // Parsed timestamp from the dataset name +} + +func PruneSnapshots(job *config.JobConfig) { + var host = job.Host() + + // Set defaults if config is not set + if job.Retention == nil { + job.Retention = &config.RetentionConfig{ + Daily: 100000, + Weekly: 100000, + Monthly: 100000, + Yearly: 100000, + } + } + + // This catches any gaps in the config + if job.Retention.Daily == 0 { + job.Retention.Daily = 100000 + } + if job.Retention.Weekly == 0 { + job.Retention.Weekly = 100000 + } + if job.Retention.Monthly == 0 { + job.Retention.Monthly = 100000 + } + if job.Retention.Yearly == 0 { + job.Retention.Yearly = 100000 + } + + // FIXME probably should iterate over a list instead here + for _, snapshot := range listKeepers(host, "daily", job.Retention.Daily) { + log.WithFields(logrus.Fields{ + "snapshot": snapshot, + "period": "daily", + }).Debug("keeping snapshot") + } + for _, snapshot := range listKeepers(host, "weekly", job.Retention.Weekly) { + log.WithFields(logrus.Fields{ + "snapshot": snapshot, + "period": "weekly", + }).Debug("keeping snapshot") + } + for _, snapshot := range listKeepers(host, "monthly", job.Retention.Monthly) { + log.WithFields(logrus.Fields{ + "snapshot": snapshot, + "period": "monthly", + }).Debug("keeping snapshot") + } + for _, snapshot := range listKeepers(host, "yearly", job.Retention.Yearly) { + log.WithFields(logrus.Fields{ + "snapshot": snapshot, + "period": "yearly", + }).Debug("keeping snapshot") + } + + // TODO subtract keepers from the list of snapshots and rm -rf them +} + +// listKeepers returns a list of snapshot that are not subject to deletion +// for a given host, pattern, and keep_count. +func listKeepers(host string, pattern string, keep_count uint) []snapshot { + var keepers []snapshot + var last string + + for _, snapshot := range listSnapshots(host) { + var period string + + // Weekly is special because golang doesn't have support for "week number in year" + // in Time.Format strings. + if pattern == "weekly" { + year, week := snapshot.Time.Local().ISOWeek() + period = fmt.Sprintf("%d-%d", year, week) + } else { + period = snapshot.Time.Local().Format(patterns[pattern]) + } + + if period != last { + last = period + keepers = append(keepers, snapshot) + + if uint(len(keepers)) == keep_count { + break + } + } + } + + return keepers +} + +// listSnapshots calls out to ZFS for a list of snapshots for a given host. +// Returned data will be sorted by time, most recent first. +func listSnapshots(host string) []snapshot { + var snapshots []snapshot + + ds := newDataset(host) + + args := []string{ + "list", + "-H", // no field headers in output + "-o", "name", // only name field + "-t", "snapshot", // type snapshot + ds.Name, + } + o, e, err := execProgram("zfs", args...) + if err != nil { + f := appendStdlogs(logrus.Fields{ + logrus.ErrorKey: err, + "prefix": "zfs", + "command": append([]string{"zfs"}, args...), + }, o, e) + log.WithFields(f).Errorf("executing zfs list failed") + } + + for _, ss := range strings.Fields(o.String()) { + ts, err := time.Parse(time.RFC3339, strings.Split(ss, "@")[1]) + + if err != nil { + log.WithField("snapshot", ss).Error("Unable to parse timestamp from snapshot") + continue + } + + snapshots = append(snapshots, snapshot{ + Ds: ss, + Time: ts, + }) + } + + // ZFS list _should_ be in chronological order but just in case ... + sort.Slice(snapshots, func(i, j int) bool { + return snapshots[i].Time.After(snapshots[j].Time) + }) + + return snapshots +} diff --git a/cmd/prune.go b/cmd/prune.go new file mode 100644 index 0000000..11cdf6c --- /dev/null +++ b/cmd/prune.go @@ -0,0 +1,31 @@ +package cmd + +import ( + "github.com/digineo/zackup/app" + "github.com/spf13/cobra" +) + +// pruneCmd represents the prune command +var pruneCmd = &cobra.Command{ + Use: "prune [host [...]]", + Short: "Prunes backups per-host ZFS dataset", + Run: func(cmd *cobra.Command, args []string) { + if len(args) == 0 { + args = tree.Hosts() + } + + for _, host := range args { + job := tree.Host(host) + if job == nil { + log.WithField("prune", host).Warn("unknown host, ignoring") + continue + } + + app.PruneSnapshots(job) + } + }, +} + +func init() { + rootCmd.AddCommand(pruneCmd) +} diff --git a/config/job.go b/config/job.go index 219d20d..32aea12 100644 --- a/config/job.go +++ b/config/job.go @@ -4,8 +4,9 @@ package config type JobConfig struct { host string - SSH *SSHConfig `yaml:"ssh"` - RSync *RsyncConfig `yaml:"rsync"` + SSH *SSHConfig `yaml:"ssh"` + RSync *RsyncConfig `yaml:"rsync"` + Retention *RetentionConfig `yaml:"retention"` PreScript Script `yaml:"pre_script"` // from yaml file PostScript Script `yaml:"post_script"` // from yaml file @@ -18,6 +19,14 @@ type SSHConfig struct { Timeout *uint `yaml:"timeout"` // number of seconds, defaults to 15 } +// RetentionConfig holds backup retention periods +type RetentionConfig struct { + Daily uint `yaml:"daily"` // defaults to 1000000 + Weekly uint `yaml:"weekly"` // defaults to 1000000 + Monthly uint `yaml:"monthly"` // defaults to 1000000 + Yearly uint `yaml:"yearly"` // defaults to 1000000 +} + // Host returns the hostname for this job. func (j *JobConfig) Host() string { return j.host @@ -59,6 +68,26 @@ func (j *JobConfig) mergeGlobals(globals *JobConfig) { } } + if globals.Retention != nil { + if j.Retention == nil { + dup := *globals.Retention + j.Retention = &dup + } else { + if j.Retention.Daily == 0 { + j.Retention.Daily = globals.Retention.Daily + } + if j.Retention.Weekly == 0 { + j.Retention.Weekly = globals.Retention.Weekly + } + if j.Retention.Monthly == 0 { + j.Retention.Monthly = globals.Retention.Monthly + } + if j.Retention.Yearly == 0 { + j.Retention.Yearly = globals.Retention.Yearly + } + } + } + // globals.PreScript j.PreScript.inline = append(globals.PreScript.inline, j.PreScript.inline...) j.PreScript.scripts = append(globals.PreScript.scripts, j.PreScript.scripts...) diff --git a/go.mod b/go.mod index 19d91bf..5843b26 100644 --- a/go.mod +++ b/go.mod @@ -35,3 +35,5 @@ require ( gopkg.in/gemnasium/logrus-graylog-hook.v2 v2.0.7 gopkg.in/yaml.v2 v2.2.2 ) + +go 1.13 diff --git a/testdata/globals.yml b/testdata/globals.yml index 317c712..114aa19 100644 --- a/testdata/globals.yml +++ b/testdata/globals.yml @@ -4,6 +4,12 @@ ssh: port: 22 identity_file: /etc/zackup/id_rsa.pub +retention: + daily: 14 + weekly: 4 + monthly: 6 + yearly: 5 + rsync: included: - /etc