Skip to content

Commit

Permalink
New job that prunes old files from the database
Browse files Browse the repository at this point in the history
Also fixed a bug with job intervals in taskrunner.go.

Closes #8.
  • Loading branch information
oyvindhagberg committed Mar 2, 2018
1 parent 19f78fc commit 509cd4a
Show file tree
Hide file tree
Showing 3 changed files with 299 additions and 2 deletions.
202 changes: 202 additions & 0 deletions server/service/pruneOldFiles.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
package main

import (
"database/sql"
"log"
"time"

"github.com/lib/pq"
)

type pruneOldFilesJob struct{}

var pruneTimeTable []time.Duration

func init() {
pruneTimeTable = generateTimeTable()
RegisterJob(pruneOldFilesJob{})
}

func (p pruneOldFilesJob) HowOften() time.Duration {
return time.Hour
}

func (p pruneOldFilesJob) Run(db *sql.DB) {
// Find all machines
machineList := make([]string, 0, 100)
rows, err := db.Query("SELECT DISTINCT certfp FROM files")
if err != nil {
log.Fatal(err)
}
defer rows.Close()
for rows.Next() {
var certfp sql.NullString
rows.Scan(&certfp)
if err != nil {
log.Fatal(err)
}
if certfp.Valid {
machineList = append(machineList, certfp.String)
}
}
if err = rows.Err(); err != nil {
log.Fatal(err)
}

// For every machine
for _, certfp := range machineList {
// Finn all unique filenames on the machine
rows, err := db.Query("SELECT DISTINCT filename FROM files "+
"WHERE certfp=$1", certfp)
if err != nil {
log.Fatal(err)
}
defer rows.Close()
filenames := make([]string, 0)
for rows.Next() {
var filename sql.NullString
err = rows.Scan(&filename)
if err != nil {
log.Fatal(err)
}
if filename.Valid {
filenames = append(filenames, filename.String)
}
}
if err = rows.Err(); err != nil {
log.Fatal(err)
}
// Can't wait with rows.Close() until the function ends;
// If many machines, it would cause too many open connections.
rows.Close()

// For every file
for _, filename := range filenames {
timeMap := make(map[int]time.Time)
// Find all versions of that file
rows, err = db.Query("SELECT fileid,mtime FROM files "+
"WHERE certfp=$1 AND filename=$2", certfp, filename)
if err != nil {
log.Fatal(err)
}
defer rows.Close()
for rows.Next() {
var fileID sql.NullInt64
var mtime pq.NullTime
err = rows.Scan(&fileID, &mtime)
if err != nil {
log.Fatal(err)
}
if fileID.Valid && mtime.Valid {
timeMap[int(fileID.Int64)] = mtime.Time
}
}
if err = rows.Err(); err != nil {
log.Fatal(err)
}
rows.Close()

// Find what to delete
var count int
for _, deleteID := range whatToDelete(&timeMap) {
_, err = db.Exec("DELETE FROM files WHERE fileid=$1", deleteID)
if err != nil {
log.Fatal(err)
}
count++
}
if count > 0 {
//log.Printf("Pruned %d files from the database.\n", count)
}
}
}
}

func whatToDelete(m *map[int]time.Time) []int {
type record struct {
fileID int
mtime time.Time
}

slots := make([]*record, len(pruneTimeTable))
for id, timestamp := range *m {
// find the right slot for this record
age := time.Since(timestamp)
slot := 0
for slot < len(pruneTimeTable) && age > pruneTimeTable[slot] {
slot++
}
// if it is older than the record currently occupying the slot,
// it can have it.
if slots[slot] == nil || timestamp.Before(slots[slot].mtime) {
slots[slot] = &record{
fileID: id,
mtime: timestamp,
}
}
}

// what to keep
keep := make(map[int]bool)
for _, r := range slots {
if r != nil {
keep[r.fileID] = true
}
}

// keep the newest(latest) and the oldest(earliest) version forever
var oldest, newest record
now := time.Now()
for i, t := range *m {
if oldest.fileID == 0 || oldest.mtime.After(t) {
oldest.fileID = i
oldest.mtime = t
}
if newest.fileID == 0 || newest.mtime.Before(t) {
newest.fileID = i
newest.mtime = t
}
// keep every version from the last 24 hours
if now.Sub(t) < time.Duration(24)*time.Hour {
keep[i] = true
}
}
keep[oldest.fileID] = true
keep[newest.fileID] = true

// Ok, now we have a list of what to keep.
// Let's find what to delete.
del := make([]int, 0, len(*m)-len(keep))
for i := range *m {
if !keep[i] {
del = append(del, i)
}
}
return del
}

// generateTimeTable returns a slice of time.Time.
// The intention is that each entry becomes a slot
// where one record can be kept, as long as it is
// younger than the age given by the entry.
// The entries are given in ascending order.
func generateTimeTable() []time.Duration {
result := make([]time.Duration, 0)
// keep one version per day for the last 30 days
t := time.Duration(0)
for days := 1; days <= 30; days++ {
t += time.Hour * 24
result = append(result, t)
}
// keep one version per week for 52 weeks after that
for weeks := 1; weeks <= 52; weeks++ {
t += time.Hour * 24 * 7
result = append(result, t)
}
// keep one version per month (30 days) for 10 years after that
for months := 1; months <= 12*10; months++ {
t += time.Hour * 24 * 30
result = append(result, t)
}
return result
}
95 changes: 95 additions & 0 deletions server/service/pruneOldFiles_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package main

import (
"math/rand"
"testing"
"time"
)

func TestGenerateTimeTable(t *testing.T) {
table := pruneTimeTable
// how many slots <= 30 days
count := 0
for i, slot := range table {
if slot <= time.Duration(30*24)*time.Hour {
count++
if i > 0 {
diff := table[i] - table[i-1]
if diff != time.Duration(24)*time.Hour {
t.Errorf("Difference between slots is %v", diff)
}
}
}
}
if count != 30 {
t.Errorf("Wrong number of 30 day slots, got %d", count)
}
// how many slots <= (52 weeks and 30 days)
count = 0
for _, slot := range table {
if slot <= time.Duration((52*7+30)*24)*time.Hour {
count++
}
}
if count != 30+52 {
t.Errorf("Wrong number of one year slots, got %d", count)
}
}

func TestWhatToDelete(t *testing.T) {
data := make(map[int]time.Time)
// add a bunch of stuff <24h
const dayItems = 100
var fileID = 1
var maxSeconds = 24 * 60 * 60
for i := 0; i < dayItems; i++ {
data[fileID] = time.Now().Add(-time.Duration(rand.Intn(maxSeconds)) * time.Second)
fileID++
}
del := whatToDelete(&data)
if len(del) > 0 {
t.Errorf("Shouldn't delete anything younger than 24 hours.")
}

// now try things between 24 hours and 30 days
data = make(map[int]time.Time)
minSeconds := 60 * 60 * 24
maxSeconds = minSeconds + 60*60*24*29 - 1
for i := 0; i < 1000; i++ {
data[fileID] = time.Now().Add(-time.Duration(minSeconds+
rand.Intn(maxSeconds-minSeconds)) * time.Second)
fileID++
}

// the newest(latest) and the oldest(earliest) version
// should be kept regardless
type record struct {
fileID int
mtime time.Time
}
var oldest, newest record
for i, t := range data {
if oldest.fileID == 0 || oldest.mtime.After(t) {
oldest.fileID = i
oldest.mtime = t
}
if newest.fileID == 0 || newest.mtime.Before(t) {
newest.fileID = i
newest.mtime = t
}
}

del = whatToDelete(&data)
for _, id := range del {
if id == newest.fileID {
t.Errorf("The newest record got deleted")
} else if id == oldest.fileID {
t.Errorf("The oldest record got deleted")
}
delete(data, id)
}
if len(data) != 30 {
t.Errorf("Should have 30 items for last month, had %d", len(data))
t.Errorf("Newest: %v\nOldest: %v", newest.mtime, oldest.mtime)
}
}
4 changes: 2 additions & 2 deletions server/service/taskrunner.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,10 @@ func main() {

for !quit {
// Run jobs
for _, j := range jobs {
for i, j := range jobs {
if time.Since(j.lastrun) > j.job.HowOften() {
j.job.Run(db)
j.lastrun = time.Now()
jobs[i].lastrun = time.Now()
}
}

Expand Down

0 comments on commit 509cd4a

Please sign in to comment.