-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New job that prunes old files from the database
Also fixed a bug with job intervals in taskrunner.go. Closes #8.
- Loading branch information
1 parent
19f78fc
commit 509cd4a
Showing
3 changed files
with
299 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
package main | ||
|
||
import ( | ||
"database/sql" | ||
"log" | ||
"time" | ||
|
||
"github.com/lib/pq" | ||
) | ||
|
||
type pruneOldFilesJob struct{} | ||
|
||
var pruneTimeTable []time.Duration | ||
|
||
func init() { | ||
pruneTimeTable = generateTimeTable() | ||
RegisterJob(pruneOldFilesJob{}) | ||
} | ||
|
||
func (p pruneOldFilesJob) HowOften() time.Duration { | ||
return time.Hour | ||
} | ||
|
||
func (p pruneOldFilesJob) Run(db *sql.DB) { | ||
// Find all machines | ||
machineList := make([]string, 0, 100) | ||
rows, err := db.Query("SELECT DISTINCT certfp FROM files") | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
defer rows.Close() | ||
for rows.Next() { | ||
var certfp sql.NullString | ||
rows.Scan(&certfp) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
if certfp.Valid { | ||
machineList = append(machineList, certfp.String) | ||
} | ||
} | ||
if err = rows.Err(); err != nil { | ||
log.Fatal(err) | ||
} | ||
|
||
// For every machine | ||
for _, certfp := range machineList { | ||
// Finn all unique filenames on the machine | ||
rows, err := db.Query("SELECT DISTINCT filename FROM files "+ | ||
"WHERE certfp=$1", certfp) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
defer rows.Close() | ||
filenames := make([]string, 0) | ||
for rows.Next() { | ||
var filename sql.NullString | ||
err = rows.Scan(&filename) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
if filename.Valid { | ||
filenames = append(filenames, filename.String) | ||
} | ||
} | ||
if err = rows.Err(); err != nil { | ||
log.Fatal(err) | ||
} | ||
// Can't wait with rows.Close() until the function ends; | ||
// If many machines, it would cause too many open connections. | ||
rows.Close() | ||
|
||
// For every file | ||
for _, filename := range filenames { | ||
timeMap := make(map[int]time.Time) | ||
// Find all versions of that file | ||
rows, err = db.Query("SELECT fileid,mtime FROM files "+ | ||
"WHERE certfp=$1 AND filename=$2", certfp, filename) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
defer rows.Close() | ||
for rows.Next() { | ||
var fileID sql.NullInt64 | ||
var mtime pq.NullTime | ||
err = rows.Scan(&fileID, &mtime) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
if fileID.Valid && mtime.Valid { | ||
timeMap[int(fileID.Int64)] = mtime.Time | ||
} | ||
} | ||
if err = rows.Err(); err != nil { | ||
log.Fatal(err) | ||
} | ||
rows.Close() | ||
|
||
// Find what to delete | ||
var count int | ||
for _, deleteID := range whatToDelete(&timeMap) { | ||
_, err = db.Exec("DELETE FROM files WHERE fileid=$1", deleteID) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
count++ | ||
} | ||
if count > 0 { | ||
//log.Printf("Pruned %d files from the database.\n", count) | ||
} | ||
} | ||
} | ||
} | ||
|
||
func whatToDelete(m *map[int]time.Time) []int { | ||
type record struct { | ||
fileID int | ||
mtime time.Time | ||
} | ||
|
||
slots := make([]*record, len(pruneTimeTable)) | ||
for id, timestamp := range *m { | ||
// find the right slot for this record | ||
age := time.Since(timestamp) | ||
slot := 0 | ||
for slot < len(pruneTimeTable) && age > pruneTimeTable[slot] { | ||
slot++ | ||
} | ||
// if it is older than the record currently occupying the slot, | ||
// it can have it. | ||
if slots[slot] == nil || timestamp.Before(slots[slot].mtime) { | ||
slots[slot] = &record{ | ||
fileID: id, | ||
mtime: timestamp, | ||
} | ||
} | ||
} | ||
|
||
// what to keep | ||
keep := make(map[int]bool) | ||
for _, r := range slots { | ||
if r != nil { | ||
keep[r.fileID] = true | ||
} | ||
} | ||
|
||
// keep the newest(latest) and the oldest(earliest) version forever | ||
var oldest, newest record | ||
now := time.Now() | ||
for i, t := range *m { | ||
if oldest.fileID == 0 || oldest.mtime.After(t) { | ||
oldest.fileID = i | ||
oldest.mtime = t | ||
} | ||
if newest.fileID == 0 || newest.mtime.Before(t) { | ||
newest.fileID = i | ||
newest.mtime = t | ||
} | ||
// keep every version from the last 24 hours | ||
if now.Sub(t) < time.Duration(24)*time.Hour { | ||
keep[i] = true | ||
} | ||
} | ||
keep[oldest.fileID] = true | ||
keep[newest.fileID] = true | ||
|
||
// Ok, now we have a list of what to keep. | ||
// Let's find what to delete. | ||
del := make([]int, 0, len(*m)-len(keep)) | ||
for i := range *m { | ||
if !keep[i] { | ||
del = append(del, i) | ||
} | ||
} | ||
return del | ||
} | ||
|
||
// generateTimeTable returns a slice of time.Time. | ||
// The intention is that each entry becomes a slot | ||
// where one record can be kept, as long as it is | ||
// younger than the age given by the entry. | ||
// The entries are given in ascending order. | ||
func generateTimeTable() []time.Duration { | ||
result := make([]time.Duration, 0) | ||
// keep one version per day for the last 30 days | ||
t := time.Duration(0) | ||
for days := 1; days <= 30; days++ { | ||
t += time.Hour * 24 | ||
result = append(result, t) | ||
} | ||
// keep one version per week for 52 weeks after that | ||
for weeks := 1; weeks <= 52; weeks++ { | ||
t += time.Hour * 24 * 7 | ||
result = append(result, t) | ||
} | ||
// keep one version per month (30 days) for 10 years after that | ||
for months := 1; months <= 12*10; months++ { | ||
t += time.Hour * 24 * 30 | ||
result = append(result, t) | ||
} | ||
return result | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
package main | ||
|
||
import ( | ||
"math/rand" | ||
"testing" | ||
"time" | ||
) | ||
|
||
func TestGenerateTimeTable(t *testing.T) { | ||
table := pruneTimeTable | ||
// how many slots <= 30 days | ||
count := 0 | ||
for i, slot := range table { | ||
if slot <= time.Duration(30*24)*time.Hour { | ||
count++ | ||
if i > 0 { | ||
diff := table[i] - table[i-1] | ||
if diff != time.Duration(24)*time.Hour { | ||
t.Errorf("Difference between slots is %v", diff) | ||
} | ||
} | ||
} | ||
} | ||
if count != 30 { | ||
t.Errorf("Wrong number of 30 day slots, got %d", count) | ||
} | ||
// how many slots <= (52 weeks and 30 days) | ||
count = 0 | ||
for _, slot := range table { | ||
if slot <= time.Duration((52*7+30)*24)*time.Hour { | ||
count++ | ||
} | ||
} | ||
if count != 30+52 { | ||
t.Errorf("Wrong number of one year slots, got %d", count) | ||
} | ||
} | ||
|
||
func TestWhatToDelete(t *testing.T) { | ||
data := make(map[int]time.Time) | ||
// add a bunch of stuff <24h | ||
const dayItems = 100 | ||
var fileID = 1 | ||
var maxSeconds = 24 * 60 * 60 | ||
for i := 0; i < dayItems; i++ { | ||
data[fileID] = time.Now().Add(-time.Duration(rand.Intn(maxSeconds)) * time.Second) | ||
fileID++ | ||
} | ||
del := whatToDelete(&data) | ||
if len(del) > 0 { | ||
t.Errorf("Shouldn't delete anything younger than 24 hours.") | ||
} | ||
|
||
// now try things between 24 hours and 30 days | ||
data = make(map[int]time.Time) | ||
minSeconds := 60 * 60 * 24 | ||
maxSeconds = minSeconds + 60*60*24*29 - 1 | ||
for i := 0; i < 1000; i++ { | ||
data[fileID] = time.Now().Add(-time.Duration(minSeconds+ | ||
rand.Intn(maxSeconds-minSeconds)) * time.Second) | ||
fileID++ | ||
} | ||
|
||
// the newest(latest) and the oldest(earliest) version | ||
// should be kept regardless | ||
type record struct { | ||
fileID int | ||
mtime time.Time | ||
} | ||
var oldest, newest record | ||
for i, t := range data { | ||
if oldest.fileID == 0 || oldest.mtime.After(t) { | ||
oldest.fileID = i | ||
oldest.mtime = t | ||
} | ||
if newest.fileID == 0 || newest.mtime.Before(t) { | ||
newest.fileID = i | ||
newest.mtime = t | ||
} | ||
} | ||
|
||
del = whatToDelete(&data) | ||
for _, id := range del { | ||
if id == newest.fileID { | ||
t.Errorf("The newest record got deleted") | ||
} else if id == oldest.fileID { | ||
t.Errorf("The oldest record got deleted") | ||
} | ||
delete(data, id) | ||
} | ||
if len(data) != 30 { | ||
t.Errorf("Should have 30 items for last month, had %d", len(data)) | ||
t.Errorf("Newest: %v\nOldest: %v", newest.mtime, oldest.mtime) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters