Skip to content

Commit

Permalink
Merge pull request #46 from nlnwa/hostname-indexing
Browse files Browse the repository at this point in the history
Major refactor
  • Loading branch information
Avokadoen authored Jan 7, 2022
2 parents 8319800 + 8d0b6b7 commit 69ac143
Show file tree
Hide file tree
Showing 84 changed files with 2,229 additions and 2,140 deletions.
43 changes: 0 additions & 43 deletions .github/workflows/dockerimage-latest.yml

This file was deleted.

40 changes: 0 additions & 40 deletions .github/workflows/golangci-lint.yml

This file was deleted.

28 changes: 0 additions & 28 deletions .github/workflows/golangci-test.yml

This file was deleted.

39 changes: 39 additions & 0 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: release

on:
push:
tags:
- v*

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v3
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=semver,pattern={{version}}
type=ref,event=branch
type=ref,event=pr
- name: Log in to the container registry
uses: docker/login-action@v1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push Docker image
uses: docker/build-push-action@v2
with:
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
43 changes: 43 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: test

on:
pull_request: { }

permissions:
contents: read

jobs:
unit_test:
name: Golang unit tests
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2

- uses: actions/setup-go@v2
with:
go-version: '^1.16'

- name: Cache go modules
uses: actions/cache@v2
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
- name: Run tests
run: go test ./...
lint:
name: Linting
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: golangci/golangci-lint-action@v2
with:
version: latest
# Enable additional linters (see: https://golangci-lint.run/usage/linters/)
args: -E "bodyclose" -E "dogsled" -E "durationcheck" -E "errorlint" -E "forcetypeassert" -E "noctx" -E "exhaustive" -E "exportloopref" --timeout 3m0s
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ __debug_bin
*.out

# Compiled binary
/warcserver
/gowarcserver

# Gowarc cache
/warcdb*
Expand All @@ -24,5 +24,4 @@ __debug_bin
/config*.yaml

.idea
/proto/tools/
/.vscode
14 changes: 8 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ RUN go mod download

COPY . .

# Compile the binary statically, so it can be run without dynamic libraries.
RUN go test ./... && \
CGO_ENABLED=0 GOOS=linux go install -a -ldflags '-extldflags "-s -w -static"' ./cmd/warcserver
# -trimpath remove file system paths from executable
# -ldflags arguments passed to go tool link:
# -s disable symbol table
# -w disable DWARF generation
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -trimpath -ldflags "-s -w"


# Now copy it into our base image.
FROM gcr.io/distroless/base
COPY --from=build /go/bin/warcserver /
COPY --from=build /build/gowarcserver /
EXPOSE 9999

ENTRYPOINT ["/warcserver"]
ENTRYPOINT ["/gowarcserver"]
CMD ["serve"]
107 changes: 107 additions & 0 deletions cmd/index/index.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* Copyright 2021 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package index

import (
"fmt"
log "github.com/sirupsen/logrus"

"github.com/nlnwa/gowarcserver/internal/config"
"github.com/nlnwa/gowarcserver/internal/database"
"github.com/nlnwa/gowarcserver/internal/index"

"github.com/dgraph-io/badger/v3/options"
"github.com/spf13/cobra"
"github.com/spf13/viper"
)

func NewCommand() *cobra.Command {
var cmd = &cobra.Command{
Use: "index [dir] ...",
Short: "Index warc file(s)",
RunE: indexCmd,
}

// defaults
format := "cdxj"
indexDbDir := "."
compression := config.SnappyCompression
indexDepth := 4
indexWorkers := 8
indexTargets := []string{"."}
suffixes := []string{""}

cmd.Flags().StringP("format", "f", format, `index format: "cdxj", "cdxpb", "cdxdb" or "toc"`)
cmd.Flags().StringSlice("include", suffixes, "only include filenames matching these suffixes")
cmd.Flags().IntP("max-depth", "d", indexDepth, "maximum directory recursion")
cmd.Flags().Int("workers", indexWorkers, "number of index workers")
cmd.Flags().StringSlice("dirs", indexTargets, "directories to search for warc files in")
cmd.Flags().String("db-dir", indexDbDir, "path to index database")
cmd.Flags().String("compression", compression, `badger compression type: "none", "snappy" or "zstd"`)
if err := viper.BindPFlags(cmd.Flags()); err != nil {
log.Fatalf("Failed to bind index flags, err: %v", err)
}

return cmd
}

func indexCmd(_ *cobra.Command, args []string) error {
// collect paths from args and --dirs flag
dirs := viper.GetStringSlice("dirs")
dirs = append(dirs, args...)

var w index.Indexer

format := viper.GetString("format")
switch format {
case "cdxj":
w = new(index.CdxJ)
case "cdxpb":
w = new(index.CdxPb)
case "cdxdb":
var c options.CompressionType
if err := viper.UnmarshalKey("compression", &c, viper.DecodeHook(config.CompressionDecodeHookFunc())); err != nil {
return err
}
db, err := database.NewCdxIndexDb(
database.WithCompression(c),
database.WithDir(viper.GetString("db-dir")),
)
if err != nil {
return err
}
defer db.Close()
w = &index.CdxDb{CdxDbIndex: db}
case "toc":
w = new(index.Toc)
default:
return fmt.Errorf("unsupported format %s", format)
}

indexWorker := index.NewIndexWorker(w, viper.GetInt("workers"))
defer indexWorker.Close()

indexer, err := index.NewAutoIndexer(indexWorker.Accept, dirs,
index.WithMaxDepth(viper.GetInt("max-depth")),
index.WithSuffixes(viper.GetStringSlice("include")...))
if err != nil {
return err
}
defer indexer.Close()

return nil
}
13 changes: 13 additions & 0 deletions cmd/index/index_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package index

import (
"testing"
)

func TestIndexCmd(t *testing.T) {
cmd := NewCommand()
err := cmd.Execute()
if err != nil {
t.Errorf("%v", err)
}
}
Loading

0 comments on commit 69ac143

Please sign in to comment.