Skip to content

Commit

Permalink
v0.3.6
Browse files Browse the repository at this point in the history
  • Loading branch information
shenwei356 committed Aug 18, 2016
1 parent 4eb407a commit bfa84d3
Show file tree
Hide file tree
Showing 7 changed files with 181 additions and 11 deletions.
34 changes: 33 additions & 1 deletion csvtk/cmd/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@ import (
"strings"

"github.com/brentp/xopen"
"github.com/shenwei356/breader"
"github.com/shenwei356/util/stringutil"
"github.com/spf13/cobra"
)

// VERSION of csvtk
const VERSION = "0.3.5.2"
const VERSION = "0.3.6"

func checkError(err error) {
if err != nil {
Expand Down Expand Up @@ -468,3 +469,34 @@ func removeComma(s string) string {
}
return string(newSlice)
}

func readKVs(file string) (map[string]string, error) {
type KV [2]string
fn := func(line string) (interface{}, bool, error) {
if len(line) == 0 {
return nil, false, nil
}
items := strings.Split(strings.TrimRight(line, "\r\n"), "\t")
if len(items) < 2 {
return nil, false, nil
}

return KV([2]string{items[0], items[1]}), true, nil
}
kvs := make(map[string]string)
reader, err := breader.NewBufferedReader(file, 2, 10, fn)
if err != nil {
return kvs, err
}
var items KV
for chunk := range reader.Ch {
if chunk.Err != nil {
return kvs, err
}
for _, data := range chunk.Data {
items = data.(KV)
kvs[items[0]] = items[1]
}
}
return kvs, nil
}
97 changes: 94 additions & 3 deletions csvtk/cmd/replace.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import (
"fmt"
"regexp"
"runtime"
"strconv"
"strings"

"github.com/brentp/xopen"
"github.com/spf13/cobra"
Expand All @@ -36,6 +38,26 @@ var replaceCmd = &cobra.Command{
Short: "replace data of selected fields by regular expression",
Long: `replace data of selected fields by regular expression
Note that the replacement supports capture variables.
e.g. $1 represents the text of the first submatch.
ATTENTION: use SINGLE quote NOT double quotes in *nix OS.
Examples: Adding space to all bases.
csvtk replace -p "(.)" -r '$1 ' -s
Or use the \ escape character.
csvtk replace -p "(.)" -r "\$1 " -s
more on: http://shenwei356.github.io/csvtk/usage/#replace
Special repalcement symbols:
{nr} Record number, starting from 1
{kv} Corresponding value of the key ($1) by key-value file
`,
Run: func(cmd *cobra.Command, args []string) {
config := getConfigs(cmd)
Expand All @@ -51,13 +73,52 @@ var replaceCmd = &cobra.Command{
if pattern == "" {
checkError(fmt.Errorf("flags -p (--pattern) needed"))
}

p := pattern
if ignoreCase {
p = "(?i)" + p
}
patternRegexp, err := regexp.Compile(p)
checkError(err)

kvFile := getFlagString(cmd, "kv-file")

var replaceWithNR bool
if reNR.MatchString(replacement) {
replaceWithNR = true
}

var replaceWithKV bool
var kvs map[string]string
if reKV.MatchString(replacement) {
replaceWithKV = true
if !regexp.MustCompile(`\(.+\)`).MatchString(pattern) {
checkError(fmt.Errorf(`value of -p (--pattern) must contains "(" and ")" to capture data which is used specify the KEY`))
}
if kvFile == "" {
checkError(fmt.Errorf(`since repalcement symbol "{kv}"/"{KV}" found in value of flag -r (--replacement), tab-delimited key-value file should be given by flag -k (--kv-file)`))
}
log.Infof("read key-value file: %s", kvFile)
var err error
kvs, err = readKVs(kvFile)
if err != nil {
checkError(fmt.Errorf("read key-value file: %s", err))
}
if len(kvs) == 0 {
checkError(fmt.Errorf("no valid data in key-value file: %s", kvFile))
}

if ignoreCase {
kvs2 := make(map[string]string, len(kvs))
for k, v := range kvs {
kvs2[strings.ToLower(k)] = v
}
kvs = kvs2
}

log.Infof("%d pairs of key-value loaded", len(kvs))
}

fieldStr := getFlagString(cmd, "fields")
fields, colnames, negativeFields, needParseHeaderRow := parseFields(cmd, fieldStr, config.NoHeaderRow)
var fieldsMap map[int]struct{}
Expand Down Expand Up @@ -102,7 +163,10 @@ var replaceCmd = &cobra.Command{
checkFields := true

var record2 []string // for output

var r string
var found []string
var k string
nr := 0
for chunk := range csvReader.Ch {
checkError(chunk.Err)

Expand Down Expand Up @@ -181,11 +245,33 @@ var replaceCmd = &cobra.Command{
parseHeaderRow2 = false
continue
}

nr++
for f := range record {
record2[f] = record[f]
if _, ok := fieldsMap[f+1]; ok {
record2[f] = patternRegexp.ReplaceAllString(record2[f], replacement)

r = replacement

if replaceWithNR {
r = reNR.ReplaceAllString(r, strconv.Itoa(nr))
}

if replaceWithKV {
found = patternRegexp.FindStringSubmatch(record2[f])
if len(found) > 0 {
k = string(found[1])
if ignoreCase {
k = strings.ToLower(k)
}
if _, ok = kvs[k]; ok {
r = reKV.ReplaceAllString(r, kvs[k])
} else {
r = reKV.ReplaceAllString(r, found[1])
}
}
}

record2[f] = patternRegexp.ReplaceAllString(record2[f], r)
}
}
checkError(writer.Write(record2))
Expand All @@ -208,4 +294,9 @@ func init() {
"ATTENTION: use SINGLE quote NOT double quotes in *nix OS or "+
"use the \\ escape character.")
replaceCmd.Flags().BoolP("ignore-case", "i", false, "ignore case")
replaceCmd.Flags().StringP("kv-file", "k", "",
`tab-delimited key-value file for replacing key with value when using "{kv}" in -r (--replacement)`)
}

var reNR = regexp.MustCompile(`\{(NR|nr)\}`)
var reKV = regexp.MustCompile(`\{(KV|kv)\}`)
1 change: 1 addition & 0 deletions doc/docs/bioinf.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<meta http-equiv="refresh" content="0; url=http://bioinf.shenwei.me" />
9 changes: 6 additions & 3 deletions doc/docs/download.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

## Current Version

- [csvtk v0.3.5.2](https://github.com/shenwei356/csvtk/releases/tag/v0.3.6)
- add flag `--fill` for `csvtk join`, so we can fill the unmatched data
- fix typo
- [csvtk v0.3.6](https://github.com/shenwei356/csvtk/releases/tag/v0.3.6)
- `csvtk replace` support replacement symbols `{nr}` (record number)
and `{kv}` (corresponding value of the key ($1) by key-value file)

## Installation

Expand Down Expand Up @@ -42,6 +42,9 @@ For Go developer, just one command:

## Previous Versions

- [csvtk v0.3.5.2](https://github.com/shenwei356/csvtk/releases/tag/v0.3.6)
- add flag `--fill` for `csvtk join`, so we can fill the unmatched data
- fix typo
- [csvtk v0.3.5.1](https://github.com/shenwei356/csvtk/releases/tag/v0.3.5.1)
- fix minor bug of reading lines ending with `\r\n` from a dependency package
- [csvtk v0.3.5](https://github.com/shenwei356/csvtk/releases/tag/v0.3.5)
Expand Down
42 changes: 41 additions & 1 deletion doc/docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Usage
```
Another cross-platform, efficient and practical CSV/TSV toolkit
Version: 0.3.5.2
Version: 0.3.6
Author: Wei Shen <[email protected]>
Expand Down Expand Up @@ -536,13 +536,33 @@ Usage
```
replace data of selected fields by regular expression
Note that the replacement supports capture variables.
e.g. $1 represents the text of the first submatch.
ATTENTION: use SINGLE quote NOT double quotes in *nix OS.
Examples: Adding space to all bases.
csvtk replace -p "(.)" -r '$1 ' -s
Or use the \ escape character.
csvtk replace -p "(.)" -r "\$1 " -s
more on: http://shenwei356.github.io/csvtk/usage/#replace
Special repalcement symbols:
{nr} Record number, starting from 1
{kv} Corresponding value of the key ($1) by key-value file
Usage:
csvtk replace [flags]
Flags:
-f, --fields string select only these fields. e.g -f 1,2 or -f columnA,columnB (default "1")
-F, --fuzzy-fields using fuzzy fileds, e.g. *name or id123*
-i, --ignore-case ignore case
-k, --kv-file string tab-delimited key-value file for replacing key with value when using "{kv}" in -r (--replacement)
-p, --pattern string search regular expression
-r, --replacement string replacement. supporting capture variables. e.g. $1 represents the text of the first submatch. ATTENTION: use SINGLE quote NOT double quotes in *nix OS or use the \ escape character.
Expand All @@ -551,6 +571,26 @@ Flags:
Examples

- remove Chinese charactors: `csvtk replace -F -f "*_name" -p "\p{Han}+" -r ""`
- replace by key-value files: `csvtk replace -f 1 -p "(.+)" -r "value of $1 is {kv}" -k kv.tsv`

$ cat data.tsv
name id
A ID001
B ID002
C ID004

$ cat alias.tsv
001 Tom
002 Bob
003 Jim

$ csvtk replace -t -f 2 -p "ID(.+)" -r "N: {nr}, alias: {kv}" -k alias.tsv data.tsv
[INFO] read key-value file: alias.tsv
[INFO] 3 pairs of key-value loaded
name id
A N: 1, alias: Tom
B N: 2, alias: Bob
C N: 3, alias: 004

## mutate

Expand Down
7 changes: 5 additions & 2 deletions doc/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ site_name: csvtk - CSV/TSV Toolkit
pages:
- Home: index.md
- Download: download.md
- Usage: usage.md
- Tutorial: tutorial.md
- Documents:
- Usage: usage.md
- Tutorial: tutorial.md
- Links:
- Wei Shen's Bioinformatic tools: bioinf.md
theme: mkdocs

repo_url: https://github.com/shenwei356/csvtk
Expand Down
2 changes: 1 addition & 1 deletion doc/site
Submodule site updated from 2bd716 to 4057a2

0 comments on commit bfa84d3

Please sign in to comment.