diff --git a/go.mod b/go.mod index 9b3f6700..6ca8a5f2 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ module github.com/moov-io/ach-web-viewer go 1.20 require ( + cloud.google.com/go/storage v1.39.1 github.com/gorilla/mux v1.8.1 github.com/markbates/pkger v0.17.1 github.com/moov-io/ach v1.36.1 @@ -12,6 +13,7 @@ require ( github.com/moov-io/cryptfs v0.7.1 github.com/stretchr/testify v1.9.0 gocloud.dev v0.37.0 + golang.org/x/sync v0.6.0 ) require ( @@ -19,7 +21,6 @@ require ( cloud.google.com/go/compute v1.25.0 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect cloud.google.com/go/iam v1.1.6 // indirect - cloud.google.com/go/storage v1.39.1 // indirect github.com/ProtonMail/go-crypto v0.0.0-20230828082145-3c4c8a2d2371 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v3 v3.2.2 // indirect @@ -83,7 +84,6 @@ require ( golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb // indirect golang.org/x/net v0.22.0 // indirect golang.org/x/oauth2 v0.18.0 // indirect - golang.org/x/sync v0.6.0 // indirect golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/time v0.5.0 // indirect diff --git a/pkg/filelist/bucket.go b/pkg/filelist/bucket.go index a7097308..966d2861 100644 --- a/pkg/filelist/bucket.go +++ b/pkg/filelist/bucket.go @@ -9,10 +9,13 @@ import ( "path/filepath" "github.com/moov-io/ach-web-viewer/pkg/service" + "github.com/moov-io/ach-web-viewer/pkg/yyyymmdd" "github.com/moov-io/cryptfs" + "cloud.google.com/go/storage" "gocloud.dev/blob" _ "gocloud.dev/blob/gcsblob" + "golang.org/x/sync/errgroup" ) type bucketLister struct { @@ -71,9 +74,7 @@ func (ls *bucketLister) GetFiles(opts ListOpts) (Files, error) { SourceType: "Bucket", } for i := range ls.paths { - files, err := ls.listFiles(opts, ls.buck.List(&blob.ListOptions{ - Prefix: ls.paths[i], - })) + files, err := ls.listFiles(opts, ls.paths[i]) if err != nil { return out, fmt.Errorf("error reading %s bucket path: %v", ls.paths[i], err) } @@ -122,7 +123,74 @@ func (ls *bucketLister) maybeDecrypt(r io.Reader) ([]byte, error) { return initial, err } -func (ls *bucketLister) listFiles(opts ListOpts, cur *blob.ListIterator) ([]File, error) { +func (ls *bucketLister) listFiles(opts ListOpts, pathPrefix string) ([]File, error) { + // Different underlying storage engines will let us scan/glob parts of the bucket differently. + var gcsBucket *storage.Client + if ls.buck.As(&gcsBucket) { + return ls.listFilesFromGCSBucket(opts, pathPrefix) + } + return ls.listFilesFromCDKBucket(opts, pathPrefix) +} + +func (ls *bucketLister) listFilesFromGCSBucket(opts ListOpts, pathPrefix string) ([]File, error) { + var g errgroup.Group + datePrefixes := yyyymmdd.Prefixes(opts.StartDate, opts.EndDate) + + discoveredFiles := make(chan []File) + + for _, datePrefix := range datePrefixes { + g.Go(func() error { + beforeList := func(as func(interface{}) bool) error { + var q *storage.Query + if as(&q) { + q.MatchGlob = fmt.Sprintf("%s/*/%s*/*", pathPrefix, datePrefix) + } + return nil + } + + listOptions := &blob.ListOptions{ + Prefix: pathPrefix, // + "/", + BeforeList: beforeList, + } + + files, err := ls.listFilesFromCursor(opts, ls.buck.List(listOptions)) + if len(files) > 0 { + go func() { + discoveredFiles <- files + }() + } + return err + }) + } + + err := g.Wait() + go func() { + discoveredFiles <- nil + }() + if err != nil { + return nil, err + } + + var out []File + for { + files := <-discoveredFiles + if len(files) == 0 { + break + } + out = append(out, files...) + } + return out, nil + +} + +func (ls *bucketLister) listFilesFromCDKBucket(opts ListOpts, pathPrefix string) ([]File, error) { + return ls.listFilesFromCursor(opts, ls.buck.List(&blob.ListOptions{ + Delimiter: "/", + Prefix: pathPrefix, + })) +} + +func (ls *bucketLister) listFilesFromCursor(opts ListOpts, cur *blob.ListIterator) ([]File, error) { var out []File for { obj, err := cur.Next(context.Background()) diff --git a/pkg/yyyymmdd/prefix.go b/pkg/yyyymmdd/prefix.go new file mode 100644 index 00000000..78a4ba5f --- /dev/null +++ b/pkg/yyyymmdd/prefix.go @@ -0,0 +1,41 @@ +package yyyymmdd + +import ( + "slices" + "time" +) + +// Given two time.Time values generate a start and end prefix (in yyyy-mm-dd format) +// which serves as prefixs usable to filter. +// +// Examples: +// +// 2023-12-23 to 2023-12-31 produces 2023-12-2, 2023-12-3 +// 2023-12-23 to 2024-01-10 produces 2023-12-2, 2023-12-3, 2024-01-0, 2024-01-10 +func Prefixes(start, end time.Time) []string { + var out []string + + // For now just iterate over each day and chop off the trailing day digit + for { + if start.After(end) { + break + } + + // Add the current day to our list + ts := start.Format("2006-01-02") + + // Only when the end day is 10, 20, 30 we can extend the timestamp + if (start.Month() == end.Month()) && (start.Day() == end.Day()) && end.Day()%10 == 0 { + // do nothing + } else { + ts = ts[:len(ts)-1] // chop off the last digit + } + + out = append(out, ts) + + start = start.Add(24 * time.Hour) + } + + slices.Sort(out) + return slices.Compact(out) +} diff --git a/pkg/yyyymmdd/prefix_test.go b/pkg/yyyymmdd/prefix_test.go new file mode 100644 index 00000000..72708605 --- /dev/null +++ b/pkg/yyyymmdd/prefix_test.go @@ -0,0 +1,39 @@ +package yyyymmdd + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestPrefixes(t *testing.T) { + start := time.Date(2023, time.December, 23, 0, 0, 0, 0, time.UTC) + end := time.Date(2023, time.December, 31, 0, 0, 0, 0, time.UTC) + expected := []string{"2023-12-2", "2023-12-3"} + require.ElementsMatch(t, expected, Prefixes(start, end)) + + end = time.Date(2024, time.January, 4, 0, 0, 0, 0, time.UTC) + expected = append(expected, "2024-01-0") + require.ElementsMatch(t, expected, Prefixes(start, end)) + + end = time.Date(2024, time.January, 10, 0, 0, 0, 0, time.UTC) + expected = append(expected, "2024-01-10") + require.ElementsMatch(t, expected, Prefixes(start, end)) + + end = time.Date(2024, time.January, 11, 0, 0, 0, 0, time.UTC) + expected = []string{"2023-12-2", "2023-12-3", "2024-01-0", "2024-01-1"} + require.ElementsMatch(t, expected, Prefixes(start, end)) + + end = time.Date(2024, time.January, 20, 0, 0, 0, 0, time.UTC) + expected = []string{"2023-12-2", "2023-12-3", "2024-01-0", "2024-01-1", "2024-01-20"} + require.ElementsMatch(t, expected, Prefixes(start, end)) + + end = time.Date(2024, time.January, 25, 0, 0, 0, 0, time.UTC) + expected = []string{"2023-12-2", "2023-12-3", "2024-01-0", "2024-01-1", "2024-01-2"} + require.ElementsMatch(t, expected, Prefixes(start, end)) + + end = time.Date(2024, time.January, 30, 0, 0, 0, 0, time.UTC) + expected = []string{"2023-12-2", "2023-12-3", "2024-01-0", "2024-01-1", "2024-01-2", "2024-01-30"} + require.ElementsMatch(t, expected, Prefixes(start, end)) +}