Skip to content

Commit

Permalink
Merge pull request #32 from nlnwa/config-files
Browse files Browse the repository at this point in the history
Avoid naming conflicts for environment variables
  • Loading branch information
johnerikhalse authored Apr 28, 2023
2 parents a7d6421 + 64839a2 commit ba85389
Show file tree
Hide file tree
Showing 15 changed files with 233 additions and 104 deletions.
19 changes: 13 additions & 6 deletions cmd/cat/cat.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ warc cat -n4 -P file1.warc.gz | feh -`,
return errors.New("missing file name")
}
c.fileName = args[0]
c.offset = viper.GetInt64(flag.Offset)
c.recordCount = viper.GetInt(flag.RecordCount)
c.recordNum = viper.GetInt(flag.RecordNum)
c.showWarcHeader = viper.GetBool(flag.ShowWarcHeader)
c.showProtocolHeader = viper.GetBool(flag.ShowProtocolHeader)
c.showPayload = viper.GetBool(flag.ShowPayload)

if (c.offset >= 0 || c.recordNum >= 0) && c.recordCount == 0 {
c.recordCount = 1
}
Expand All @@ -75,12 +82,12 @@ warc cat -n4 -P file1.warc.gz | feh -`,
},
}

cmd.Flags().Int64VarP(&c.offset, "offset", "o", -1, "print record at offset bytes")
cmd.Flags().IntVarP(&c.recordNum, "num", "n", -1, "print the n'th record. This is applied after records are filtered out by other options")
cmd.Flags().IntVarP(&c.recordCount, "record-count", "c", 0, "The maximum number of records to show. Defaults to show all records except if -o or -n option is set, then default is one.")
cmd.Flags().BoolVarP(&c.showWarcHeader, "header", "w", false, "show WARC header")
cmd.Flags().BoolVarP(&c.showProtocolHeader, "protocol-header", "p", false, "show protocol header")
cmd.Flags().BoolVarP(&c.showPayload, "payload", "P", false, "show payload")
cmd.Flags().Int64P(flag.Offset, "o", -1, flag.OffsetHelp)
cmd.Flags().IntP(flag.RecordNum, "n", -1, flag.RecordNumHelp)
cmd.Flags().IntP(flag.RecordCount, "c", 0, flag.RecordCountHelp+" Defaults to show all records except if -o or -n option is set, then default is one.")
cmd.Flags().BoolP(flag.ShowWarcHeader, "w", false, flag.ShowWarcHeaderHelp)
cmd.Flags().BoolP(flag.ShowProtocolHeader, "p", false, flag.ShowProtocolHeaderHelp)
cmd.Flags().BoolP(flag.ShowPayload, "P", false, flag.ShowPayloadHelp)
cmd.Flags().StringArray(flag.RecordId, []string{}, flag.RecordIdHelp)
cmd.Flags().StringSliceP(flag.RecordType, "t", []string{}, flag.RecordTypeHelp)
cmd.Flags().StringP(flag.ResponseCode, "S", "", flag.ResponseCodeHelp)
Expand Down
2 changes: 1 addition & 1 deletion cmd/completion.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ PowerShell:
`,
DisableFlagsInUseLine: true,
ValidArgs: []string{"bash", "zsh", "fish", "powershell"},
Args: cobra.ExactValidArgs(1),
Args: cobra.MatchAll(cobra.ExactArgs(1), cobra.OnlyValidArgs),
Run: func(cmd *cobra.Command, args []string) {
switch args[0] {
case "bash":
Expand Down
6 changes: 0 additions & 6 deletions cmd/convert/nedlib/nedlib.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,6 @@ func NewCommand() *cobra.Command {
payload := &gowarc.WarcFields{}
payload.Set("software", cmdversion.SoftwareVersion())
payload.Set("format", fmt.Sprintf("WARC File Format %d.%d", wc.WarcVersion.Minor(), wc.WarcVersion.Minor()))
//payload.Set("collection", ww.collectionConfig.GetMeta().GetName())
//payload.Set("description", ww.collectionConfig.GetMeta().GetDescription())
//if ww.subCollection != config.Collection_UNDEFINED {
// payload.Set("subCollection", ww.subCollection.String())
//}
//payload.Set("isPartOf", ww.CollectionName())
h, e := os.Hostname()
if e != nil {
return e
Expand Down
19 changes: 13 additions & 6 deletions cmd/ls/ls.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ Output options:
return errors.New("missing file or directory")
}
c.files = args
c.delimiter = viper.GetString(flag.Delimiter)
c.concurrency = viper.GetInt(flag.Concurrency)
c.offset = viper.GetInt64(flag.Offset)
c.recordCount = viper.GetInt(flag.RecordCount)
c.strict = viper.GetBool(flag.Strict)
c.fields = viper.GetString(flag.Fields)

if c.offset >= 0 && c.recordCount == 0 {
c.recordCount = 1
// TODO: check that input is exactly one file when using offset
Expand All @@ -101,12 +108,12 @@ Output options:
cmd.Flags().BoolP(flag.Recursive, "r", false, flag.RecursiveHelp)
cmd.Flags().BoolP(flag.FollowSymlinks, "s", false, flag.FollowSymlinksHelp)
cmd.Flags().StringSlice(flag.Suffixes, []string{".warc", ".warc.gz"}, flag.SuffixesHelp)
cmd.Flags().IntVarP(&c.concurrency, flag.Concurrency, "c", 1, flag.ConcurrencyHelp)
cmd.Flags().Int64VarP(&c.offset, "offset", "o", -1, "record offset")
cmd.Flags().IntVarP(&c.recordCount, "record-count", "n", 0, "The maximum number of records to show")
cmd.Flags().BoolVar(&c.strict, "strict", false, "strict parsing")
cmd.Flags().StringVarP(&c.delimiter, "delimiter", "d", " ", "use string instead of SPACE for field delimiter")
cmd.Flags().StringVarP(&c.fields, "fields", "f", "", "which fields to include. See 'warc help ls' for a description")
cmd.Flags().IntP(flag.Concurrency, "c", 1, flag.ConcurrencyHelp)
cmd.Flags().Int64P(flag.Offset, "o", -1, flag.OffsetHelp)
cmd.Flags().IntP(flag.RecordCount, "n", 0, flag.RecordCountHelp)
cmd.Flags().Bool(flag.Strict, false, flag.StrictHelp)
cmd.Flags().StringP(flag.Delimiter, "d", " ", flag.DelimiterHelp)
cmd.Flags().StringP(flag.Fields, "f", "", flag.FieldsHelp)
cmd.Flags().StringArray(flag.RecordId, []string{}, flag.RecordIdHelp)
cmd.Flags().StringSliceP(flag.RecordType, "t", []string{}, flag.RecordTypeHelp)
cmd.Flags().StringP(flag.ResponseCode, "S", "", flag.ResponseCodeHelp)
Expand Down
17 changes: 0 additions & 17 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"github.com/nlnwa/warchaeology/internal/config"
"github.com/nlnwa/warchaeology/internal/flag"
"github.com/spf13/cobra"
"github.com/spf13/viper"
"os"
)

Expand All @@ -36,22 +35,6 @@ func NewCommand() *cobra.Command {
Use: "warc",
Short: "A tool for handling warc files",
Long: ``,

PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
// Overwrite config values if set in command specific key
cv := viper.Sub(cmd.Name())
if cv != nil {
for _, k := range cv.AllKeys() {
viper.Set(k, cv.Get(k))
}
}

if err := viper.BindPFlags(cmd.Flags()); err != nil {
panic(err)
}

return nil
},
}

// Flags
Expand Down
14 changes: 5 additions & 9 deletions cmd/validate/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@ import (
)

type conf struct {
files []string
recursive bool
followSymlinks bool
suffixes []string
concurrency int
files []string
}

func NewCommand() *cobra.Command {
Expand All @@ -56,10 +52,10 @@ func NewCommand() *cobra.Command {
ValidArgsFunction: flag.SuffixCompletionFn,
}

cmd.Flags().BoolVarP(&c.recursive, flag.Recursive, "r", false, flag.RecursiveHelp)
cmd.Flags().BoolVarP(&c.followSymlinks, flag.FollowSymlinks, "s", false, flag.FollowSymlinksHelp)
cmd.Flags().StringSliceVar(&c.suffixes, flag.Suffixes, []string{".warc", ".warc.gz"}, flag.SuffixesHelp)
cmd.Flags().IntVarP(&c.concurrency, flag.Concurrency, "c", int(float32(runtime.NumCPU())*float32(1.5)), flag.ConcurrencyHelp)
cmd.Flags().BoolP(flag.Recursive, "r", false, flag.RecursiveHelp)
cmd.Flags().BoolP(flag.FollowSymlinks, "s", false, flag.FollowSymlinksHelp)
cmd.Flags().StringSlice(flag.Suffixes, []string{".warc", ".warc.gz"}, flag.SuffixesHelp)
cmd.Flags().IntP(flag.Concurrency, "c", int(float32(runtime.NumCPU())*float32(1.5)), flag.ConcurrencyHelp)

return cmd
}
Expand Down
4 changes: 2 additions & 2 deletions docs/content/_index.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: Warchaeology
weight: 1
weight: 10
---

{{< lead >}}
Expand All @@ -13,7 +13,7 @@ Warchaeology is a collection of tools for inspecting, manipulating and validatin
Validate that WARC-files conforms to specification.
{{< /featureitem >}}
{{< featureitem icon="fas fa-search" title="Inspection" >}}
Navigate WARC-files with terminal console.
Navigate WARC-files with terminal console or extract content from WARC-files.
{{< /featureitem >}}
{{< featureitem icon="fas fa-magic" title="Conversion" >}}
Convert webarchives into WARC-format.
Expand Down
21 changes: 19 additions & 2 deletions docs/content/cmd/_index.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
---
title: Usage
weight: 2
weight: 20
---

The command line tool is named `warc`. The different functions are executed trough sub-commands.
The Warchaeology tool is named `warc`.

## Syntax
```
warc [command [subcommand]] [flags]
```
Where:
* **command** and eventually **subcommand** specifies the operation that you want to perform.
* **flags** specifies optional flags. For example, you can use the `--config` flag to specify
the location of a configuration file.



## Commands
The following pages are generated from the built in documentation and can also be viewed on the
command line with `warc <command> -h`

{{< childpages >}}
92 changes: 92 additions & 0 deletions docs/content/config/_index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
---
title: Configuration
weight: 30
---

## Configuration parameters

Warchaeology commands can be configured by specifying parameters.
There are several options for specifying parameters where using command line flags
is the easiest. But if you find yourself always setting a specific flag it might be better
to add a configuration file or environment variable.

Flags set on the command line takes precedence over configuration files and environment variables.

Parameter documentation can be found in the *options* section for each [command](/cmd).
The parameter name is the long flag name with the dashes removed.

## Environment variables

Environment variables can be used to set parameters. Use the following steps to convert
a parameter name to an environment variable name:
* converting the parameter name to upper case
* replace '-' with '_'
* prefix with `WARC_`

> Setting the environment variable **WARC_RECORD_COUNT=2** is equal to specify the flag `--record-count=2`.
Environment variables takes precedence over parameters in config files.

## Configuration File

Parameters can also be set in configuration files. The configuration file format is YAML.

#### File structure

To set a configuration parameter use the parameter name as key and then the value:

```yaml
delimiter: "\t"
record-count: 2
```
If you want to have a global default, but override the parameter for a specific command
you can do so by adding a section with the command as key.
```yaml
delimiter: "\t"
record-count: 2
ls:
record-count: 5
convert:
tmpdir: mydir
arc:
tmpdir: anotherdir
```
This config file gives the following values
{{< table style="table-striped" >}}
| Command | parameter name | parameter value |
|-------------------|----------------|-----------------|
| warc cat | record-count | 2 |
| warc ls | record-count | 5 |
| warc ls | tmpdir | /tmp (default) |
| warc convert warc | tmpdir | mydir |
| warc convert arc | tmpdir | anotherdir |
{{< /table >}}
#### Config file location
The standard configuration files are named `config.yaml` and are searched for in
system default directories.

The directories are looked up in the following order:

1. Standard Global Configuration Paths
* _Linux_: $XDG_CONFIG_DIRS or "/etc/xdg/warc"
* _Windows_: %PROGRAMDATA% or "C:\\ProgramData/warc"
* _macOS_: /Library/Application Support/warc

2. Standard User-Specific Configuration Paths
* _Linux_: $XDG_CONFIG_HOME or "$HOME/.config/warc"
* _Windows_: %APPDATA% or "C:\\Users\\%USER%\\AppData\\Roaming\\warc"
* _macOS_: $HOME/Library/Application Support/warc

3. Working directory
* The directory warc was started from

All steps are searched for a file named `config.yaml` and if found,
values in a later file will override values in the files before it.

By setting the command line flag `--config` to a file name, the user can override the default
config with a user specified config file.
2 changes: 1 addition & 1 deletion docs/content/contributing/_index.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: Contributing
weight: 3
weight: 50
---

## Getting Started
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
{{- $cpage := (.Scratch.Get "current") }}


<ul class="docs-children list-unstyled">
<ul class="docs-children">
{{- .Scratch.Set "pages" $cpage.Pages }}
{{- if $cpage.Sections}}
{{- .Scratch.Set "pages" ($cpage.Pages | union $cpage.Sections) }}
Expand All @@ -27,7 +27,7 @@

{{ define "childs" }}
{{- range .menu }}
<li class="h2 py-1">
<li class="py-1">
<a href="{{.RelPermalink}}"> {{ .Title }} </a>
</li>
{{ end }}
Expand Down
2 changes: 2 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ func loadConfig(cmd *cobra.Command) {
viper.SetDefault(flag.CompressionLevel, gzip.DefaultCompression)
viper.SetDefault(flag.DefaultDate, time.Now().Format(warcwriterconfig.DefaultDateFormat))

viper.SetEnvPrefix("WARC")
viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
viper.AutomaticEnv() // read in environment variables that match

if viper.IsSet("config") {
Expand Down
27 changes: 15 additions & 12 deletions internal/filewalker/filewalker.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"github.com/nlnwa/warchaeology/internal/flag"
"github.com/nlnwa/warchaeology/internal/utils"
"github.com/nlnwa/warchaeology/internal/workerpool"
"github.com/spf13/viper"
"io/fs"
Expand Down Expand Up @@ -51,18 +52,20 @@ func New(paths []string, recursive, followSymlinks bool, suffixes []string, conc
func NewFromViper(cmd string, paths []string, fn func(path string) Result) FileWalker {
var consoleType logType
var fileType logType
for _, t := range viper.GetStringSlice(flag.LogConsole) {
switch strings.ToLower(t) {
case "info":
consoleType = consoleType | info
case "error":
consoleType = consoleType | err
case "summary":
consoleType = consoleType | summary
case "progress":
consoleType = consoleType | progress
default:
panic("Illegal config value '" + t + "' for " + flag.LogConsole)
if utils.StdoutIsTerminal() {
for _, t := range viper.GetStringSlice(flag.LogConsole) {
switch strings.ToLower(t) {
case "info":
consoleType = consoleType | info
case "error":
consoleType = consoleType | err
case "summary":
consoleType = consoleType | summary
case "progress":
consoleType = consoleType | progress
default:
panic("Illegal config value '" + t + "' for " + flag.LogConsole)
}
}
}
for _, t := range viper.GetStringSlice(flag.LogFile) {
Expand Down
Loading

0 comments on commit ba85389

Please sign in to comment.