Skip to content

Commit

Permalink
v0.3.8 adding new command plot
Browse files Browse the repository at this point in the history
  • Loading branch information
shenwei356 committed Oct 5, 2016
1 parent adaaabb commit d108b8f
Show file tree
Hide file tree
Showing 17 changed files with 633 additions and 16 deletions.
26 changes: 25 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ and also easy to integrated into analysis pipelines**.
- Most of the subcommands support ***unselecting fields*** and ***fuzzy fields***,
e.g. `-f "-id,-name"` for all fields except "id" and "name",
`-F -f "a.*"` for all fields with prefix "a.".
- **Support common plots** (see [usage](http://bioinf.shenwei.me/csvtk/usage/#plot))


## Subcommands

19 in total.
20 in total.

**Information**

Expand Down Expand Up @@ -60,6 +61,13 @@ and also easy to integrated into analysis pipelines**.

- `sort` sort by selected fields

**Ploting**

- `plot` see [usage](http://bioinf.shenwei.me/csvtk/usage/#plot)
- `plot hist` histogram
- `plot box` boxplot
- `plot line` line plot

## Installation

[Download Page](https://github.com/shenwei356/csvtk/releases)
Expand Down Expand Up @@ -203,6 +211,22 @@ Examples
- using `--any` to print record if any of the field satisfy the condition: `csvtk filter -f "1-3>0" --any`
- fuzzy fields: `csvtk filter -F -f "A*!=0"`

1. ploting
- plot histogram with data of the second column:
`csvtk -t plot hist testdata/grouped_data.tsv.gz -f 2`
![histogram.png](testdata/figures/histogram.png)
- plot boxplot with data of the "GC Content" (third) column,
group information is the "Group" column.
`csvtk -t plot box testdata/grouped_data.tsv.gz -g "Group" -f "GC Content" --width 3`
![boxplot.png](testdata/figures/boxplot.png)
- plot horiz boxplot with data of the "Length" (second) column,
group information is the "Group" column.
`csvtk -t plot box testdata/grouped_data.tsv.gz -g "Group" -f "Length" --height 3 --width 5 --horiz --title "Horiz box plot"`
![boxplot2.png](testdata/figures/boxplot2.png)
- plot line plot with X-Y data
`csvtk -t plot line testdata/xy.tsv -x X -y Y -g Group`
![lineplot.png](testdata/figures/lineplot.png)

## Contact

Email me for any problem when using `csvtk`. shenwei356(at)gmail.com
Expand Down
204 changes: 204 additions & 0 deletions csvtk/cmd/box.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
// Copyright © 2016 Wei Shen <[email protected]>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

package cmd

import (
"fmt"
"runtime"
"sort"
"strconv"

"github.com/gonum/plot"
"github.com/gonum/plot/plotter"
"github.com/gonum/plot/vg"
"github.com/shenwei356/util/stringutil"
"github.com/spf13/cobra"
)

// boxCmd represents the box command
var boxCmd = &cobra.Command{
Use: "box",
Short: "plot boxplot",
Long: `plot boxplot
`,
Run: func(cmd *cobra.Command, args []string) {
config := getConfigs(cmd)
plotConfig := getPlotConfigs(cmd)

files := getFileList(args)
if len(files) > 1 {
checkError(fmt.Errorf("no more than one file should be given"))
}
runtime.GOMAXPROCS(config.NumCPUs)

file := files[0]
headerRow, data, fields := parseCSVfile(cmd, config, file, plotConfig.fieldStr, false)

// =======================================

horiz := getFlagBool(cmd, "horiz")
w := vg.Length(getFlagNonNegativeFloat64(cmd, "box-width"))

if config.OutFile == "-" {
config.OutFile = "boxplot.png"
}
if plotConfig.title == "" {
plotConfig.title = "Box plot"
}

groups := make(map[string]plotter.Values)
groupOrderMap := make(map[string]int)
var f float64
var err error
var ok bool
var order int
var groupName string
for _, d := range data {
f, err = strconv.ParseFloat(d[0], 64)
if err != nil {
if len(headerRow) > 0 {
checkError(fmt.Errorf("fail to parse data: %s at column: %s. please choose the right column by flag -f (--data-field)", d[0], headerRow[0]))
} else {
checkError(fmt.Errorf("fail to parse data: %s at column: %d. please choose the right column by flag -f (--data-field)", d[0], fields[0]))
}
}
if len(d) > 1 {
groupName = d[1]
} else {
if len(headerRow) > 0 {
groupName = headerRow[0]
} else {
groupName = ""
}
}
if _, ok = groups[groupName]; !ok {
groups[groupName] = make(plotter.Values, 0)
}
groups[groupName] = append(groups[groupName], f)

if _, ok = groupOrderMap[groupName]; !ok {
groupOrderMap[groupName] = order
order++
}
}

p, err := plot.New()
if err != nil {
checkError(err)
}

var groupOrders []stringutil.StringCount
for g := range groupOrderMap {
groupOrders = append(groupOrders, stringutil.StringCount{Key: g, Count: groupOrderMap[g]})
}
sort.Sort(stringutil.StringCountList(groupOrders))

if !horiz {
if w == 0 {
w = vg.Points(float64(plotConfig.width*vg.Inch) / float64(len(groupOrders)) / 2.5)
}
} else {
if w == 0 {
w = vg.Points(float64(plotConfig.height*vg.Inch) / float64(len(groupOrders)) / 2.5)
}
}

groupNames := make([]string, len(groupOrders))
for i, group := range groupOrders {
groupNames[i] = group.Key
b, err := plotter.NewBoxPlot(w, float64(i), groups[group.Key])
checkError(err)
if horiz {
b.Horizontal = true
}
p.Add(b)
}

if !horiz {
p.NominalX(groupNames...)
// p.HideX()
} else {
p.NominalY(groupNames...)
// p.HideY()
}
if !horiz {
if plotConfig.ylab == "" {
if len(headerRow) > 0 {
plotConfig.ylab = headerRow[0]
} else {
plotConfig.ylab = "Values"
}
}
if plotConfig.xlab == "" {
if len(headerRow) > 0 {
plotConfig.xlab = headerRow[1]
} else {
plotConfig.xlab = "Groups"
}
}
} else {
if plotConfig.xlab == "" {
plotConfig.xlab = "Values"
}
if plotConfig.ylab == "" && plotConfig.groupFieldStr != "" && len(headerRow) > 0 {
plotConfig.ylab = headerRow[0]
}
}

p.Title.Text = plotConfig.title
p.Title.TextStyle.Font.Size = plotConfig.titleSize
p.X.Label.Text = plotConfig.xlab
p.Y.Label.Text = plotConfig.ylab
p.X.Label.TextStyle.Font.Size = plotConfig.labelSize
p.Y.Label.TextStyle.Font.Size = plotConfig.labelSize
p.X.Width = plotConfig.axisWidth
p.Y.Width = plotConfig.axisWidth
p.X.Tick.Width = plotConfig.tickWidth
p.Y.Tick.Width = plotConfig.tickWidth

if plotConfig.xminStr != "" {
log.Warning("flag --x-min ignored for command box")
}
if plotConfig.xmaxStr != "" {
log.Warning("flag --x-max ignored for command box")
}
if plotConfig.yminStr != "" {
log.Warning("flag --y-min ignored for command box")
}
if plotConfig.ymaxStr != "" {
log.Warning("flag --y-max ignored for command box")
}

// Save the plot to a PNG file.
if err := p.Save(plotConfig.width*vg.Inch,
plotConfig.height*vg.Inch, config.OutFile); err != nil {
checkError(err)
}
},
}

func init() {
plotCmd.AddCommand(boxCmd)

boxCmd.Flags().Float64P("box-width", "", 0, "box width")
boxCmd.Flags().BoolP("horiz", "", false, "horize box plot")
}
20 changes: 19 additions & 1 deletion csvtk/cmd/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,16 @@ func getFlagNonNegativeInt(cmd *cobra.Command, flag string) int {
value, err := cmd.Flags().GetInt(flag)
checkError(err)
if value < 0 {
checkError(fmt.Errorf("value of flag --%s should be greater than 0", flag))
checkError(fmt.Errorf("value of flag --%s should be greater than or equal to 0", flag))
}
return value
}

func getFlagNonNegativeFloat64(cmd *cobra.Command, flag string) float64 {
value, err := cmd.Flags().GetFloat64(flag)
checkError(err)
if value < 0 {
checkError(fmt.Errorf("value of flag --%s should be greater than or equal to ", flag))
}
return value
}
Expand Down Expand Up @@ -457,6 +466,15 @@ func parseCSVfile(cmd *cobra.Command, config Config, file string,

parseHeaderRow = false

orderedFieldss := make([]orderedField, len(fields))
for i, f := range fields {
orderedFieldss[i] = orderedField{field: f, order: fieldsOrder[f]}
}
sort.Sort(orderedFields(orderedFieldss))
for i, of := range orderedFieldss {
fields[i] = of.field
}

items := make([]string, len(fields))
for i, f := range fields {
items[i] = record[f-1]
Expand Down
6 changes: 3 additions & 3 deletions csvtk/cmd/hist.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ var histCmd = &cobra.Command{
plotConfig.ylab = "Count"
}
if config.OutFile == "-" {
config.OutFile = "hist.png"
config.OutFile = "histogram.png"
}
if plotConfig.title == "" {
plotConfig.title = "Histogram"
Expand All @@ -83,9 +83,9 @@ var histCmd = &cobra.Command{
f, err = strconv.ParseFloat(d[0], 64)
if err != nil {
if len(headerRow) > 0 {
checkError(fmt.Errorf("fail to parse float: %s at column %s", d[0], headerRow[0]))
checkError(fmt.Errorf("fail to parse data: %s at column: %s. please choose the right column by flag -f (--data-field)", d[0], headerRow[0]))
} else {
checkError(fmt.Errorf("fail to parse float: %s at column %d", d[0], fields[0]))
checkError(fmt.Errorf("fail to parse data: %s at column: %d. please choose the right column by flag -f (--data-field)", d[0], fields[0]))
}
}
v[i] = f
Expand Down
Loading

0 comments on commit d108b8f

Please sign in to comment.