diff --git a/CHANGELOG.md b/CHANGELOG.md index aad6416..136db38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Combining filters with AND - User-defined filters - Concatination of Dataframes +- Math for vector operations on multiple columns +- FindElem for content-based location of an element ### Changed - Make fixColnames faster diff --git a/README.md b/README.md index 49e5742..deea6f4 100644 --- a/README.md +++ b/README.md @@ -330,6 +330,32 @@ df.Capply(mean) df.Rapply(mean) ``` +#### Math + +Element-wise arithmetic vector operations are available on `int` and +`float64` values through the `Math` method: + +```go +df := dataframe.New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Primes"), + series.New([]int{1, 2, 3, 4, 5}, series.Int, "Naturals"), +) + +// Returns a new DataFrame with a column named "Diff" = Floats - Primes +withNewDiffColumn := df.Math("Diff", "-", "Floats", "Primes") +``` + +It is also possible for the second argument (the operator) to be a +function (unary, binary, or trinary) on `int` or `float64` (especially +useful with Go's `math` package): + +```go +import "math" +withNewFMACol := df.Math("FMA", math.FMA, "Floats", "Primes", "Naturals") +``` + #### Chaining operations DataFrames support a number of methods for wrangling the data, diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index df95d55..f654de8 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -100,7 +100,7 @@ func (df DataFrame) Copy() DataFrame { // String implements the Stringer interface for DataFrame func (df DataFrame) String() (str string) { - return df.print(true, true, true, true, 10, 70, "DataFrame") + return df.print(true, false, true, true, 10, 70, "DataFrame") } func (df DataFrame) print( @@ -815,6 +815,34 @@ func (df DataFrame) Capply(f func(series.Series) series.Series) DataFrame { return New(columns...) } +func detectType(types []series.Type) series.Type { + var hasStrings, hasFloats, hasInts, hasBools bool + for _, t := range types { + switch t { + case series.String: + hasStrings = true + case series.Float: + hasFloats = true + case series.Int: + hasInts = true + case series.Bool: + hasBools = true + } + } + switch { + case hasStrings: + return series.String + case hasBools: + return series.Bool + case hasFloats: + return series.Float + case hasInts: + return series.Int + default: + panic("type not supported") + } +} + // Rapply applies the given function to the rows of a DataFrame. Prior to applying // the function the elements of each row are cast to a Series of a specific // type. In order of priority: String -> Float -> Int -> Bool. This casting also @@ -824,34 +852,6 @@ func (df DataFrame) Rapply(f func(series.Series) series.Series) DataFrame { return df } - detectType := func(types []series.Type) series.Type { - var hasStrings, hasFloats, hasInts, hasBools bool - for _, t := range types { - switch t { - case series.String: - hasStrings = true - case series.Float: - hasFloats = true - case series.Int: - hasInts = true - case series.Bool: - hasBools = true - } - } - switch { - case hasStrings: - return series.String - case hasBools: - return series.Bool - case hasFloats: - return series.Float - case hasInts: - return series.Int - default: - panic("type not supported") - } - } - // Detect row type prior to function application types := df.Types() rowType := detectType(types) @@ -2333,3 +2333,243 @@ func (df DataFrame) Describe() DataFrame { ddf := New(ss...) return ddf } + +// Finds a specific element (like `dataframe.Elem`), but using a column and value to get the row, +// and a column within that row to pinpoint an element. If multiple rows match the +// `column` x `keyInColumn` coordinate, a value is only returned for the first match. +// If no match is found or columns don't exist, `ok` is set to false. +// Note that this function is slow for many rows. In the future this will be corrected by indexing. +func (df DataFrame) FindElem(colname string, keyInColumn interface{}, columnInRow string) (value series.Element, ok bool) { + // find column index for given `columnInRow` coordinate + cidx := findInStringSlice(columnInRow, df.Names()) + if cidx < 0 { + return value, false + } + // find row index for given `colname` and `keyInColumn` coordinates + c1idx := findInStringSlice(colname, df.Names()) + if c1idx < 0 { + return value, false + } + s := df.columns[c1idx] + ridx := -1 + for i := 0; i < s.Len(); i++ { + if s.Val(i) == keyInColumn { + ridx = i + break + } + } + if ridx < 0 { + return value, false + } + + return df.Elem(ridx, cidx), true +} + +// Element-wise arithmetic vector operations on `int` and `float64` values. +// Applies `op` to the columns specified in operandcols, and stores the result +// in a new column named `resultcolnm`. +// `op` may be a string representing an arithmetic operator ("+", "-", "*", "/". also "%" on ints) +// or a unary, binary, or trinary function on `int` or `float`. +// Automatically coerces `int` to `float64` if necessary. +func (df DataFrame) Math(resultcolnm string, op interface{}, operandcols ...string) DataFrame { + if df.Err != nil { + return df + } + if len(operandcols) == 0 { + df.Err = fmt.Errorf("must supply at least one operand column name") + return df + } + cols := make([]series.Series, len(operandcols)) + types := make([]series.Type, len(operandcols)) + for i, colnm := range operandcols { + cols[i] = df.Col(colnm) + types[i] = cols[i].Type() + } + nrows := cols[0].Len() + ncols := len(cols) + + // detect result column type (as well as pre-op coercion target) + // If `op` is a float func, need to coerce ints to floats + var resultType series.Type + switch op.(type) { + case func(float64) float64, func(float64, float64) float64, func(float64, float64, float64) float64: + resultType = series.Float + default: + resultType = detectType(types) // float if there are any floats, int otherwise + } + // confirm colTypes are all numeric + if resultType == series.String || resultType == series.Bool { + df.Err = fmt.Errorf("cannot perform arithmetic with column of type %s", resultType) + return df + } + + switch resultType { + case series.Int: + results := make([]int, nrows) + for ridx := 0; ridx < nrows; ridx++ { + operands := make([]int, ncols) + for cidx, column := range cols { + operand, err := column.Elem(ridx).Int() + if err != nil { + // it's possible this error just can't happen anymore at this point + df.Err = fmt.Errorf("unable to convert element %d of column %s to int: %w", ridx, operandcols[cidx], err) + return df + } + operands[cidx] = operand + } + result, err := intOp(op, operands) + if err != nil { + df.Err = fmt.Errorf("error while performing integer op: %w", err) + return df + } + results[ridx] = result + } + df = df.Mutate( + series.New(results, resultType, resultcolnm), + ) + case series.Float: + results := make([]float64, nrows) + for ridx := 0; ridx < nrows; ridx++ { + operands := make([]float64, ncols) + for cidx, column := range cols { + operand := column.Elem(ridx).Float() + operands[cidx] = operand + } + results[ridx] = floatOp(op, operands) + } + df = df.Mutate( + series.New(results, resultType, resultcolnm), + ) + default: + df.Err = fmt.Errorf("series type %s is not a type on which we can perform arithmetic", resultType) + } + + return df +} + +func floatOp(op interface{}, operands []float64) float64 { + var acc float64 // accumulator for n-ary operators + if len(operands) == 0 { + return 0 + } + + switch op := op.(type) { // takes care of support for things in `math` + case func(float64) float64: + return op(operands[0]) + case func(float64, float64) float64: + return op(operands[0], operands[1]) + case func(float64, float64, float64) float64: + return op(operands[0], operands[1], operands[2]) + + } + // for the most basic operations, support variadic operands + switch op { + case "+": + // add all operands + for _, operand := range operands { + acc += operand + } + case "-": + // with only one operand, return its negative. + // with more, subtract the rest from the first. + if len(operands) == 1 { + return -operands[0] + } + acc = operands[0] + for i := 1; i < len(operands); i++ { + acc = acc - operands[i] + } + case "*": + // the product of all operands + acc = 1 + for _, operand := range operands { + acc = acc * operand + } + case "/": + // With only one operand, reciprocal + // With more operands, divides by each denominator + // Divide by zero returns +Inf (as per usual with float64) + if len(operands) == 1 { + return 1 / operands[0] + } + acc = operands[0] + for i := 1; i < len(operands); i++ { + acc = acc / operands[i] + } + default: + panic(fmt.Sprintf("Unknown arithmetic operator: %s", op)) + } + + return acc +} + +func intOp(op interface{}, operands []int) (int, error) { + var acc int // accumulator for n-ary operators + if len(operands) == 0 { + return 0, nil + } + + switch op := op.(type) { // users can specify functions for `op`, or a string + case func(int) int: + return op(operands[0]), nil + case func(int, int) int: + return op(operands[0], operands[1]), nil + case func(int, int, int) int: + return op(operands[0], operands[1], operands[2]), nil + } + + switch op { + case "+": + // add all operands + for _, operand := range operands { + acc += operand + } + case "-": + // with only one operand, return its negative. + // with more, subtract the rest from the first. + if len(operands) == 1 { + return -operands[0], nil + } + acc = operands[0] + for i := 1; i < len(operands); i++ { + acc = acc - operands[i] + } + case "*": + // the product of all operands + acc = 1 + for _, operand := range operands { + acc = acc * operand + } + case "/": + // With only one operand, int reciprocal (0 or 1) + // With more, divides by each denominator + // Divide by zero errors + if len(operands) == 1 { // reciprocal case + if operands[0] == 0 { // reciprocal of zero + return 0, fmt.Errorf("integer divide by zero") + } + return 1 / operands[0], nil // 0 or 1 for int division + } + // normal division case + acc = operands[0] + for i := 1; i < len(operands); i++ { + if operands[i] == 0 { + return 0, fmt.Errorf("integer divide by zero") + } + acc = acc / operands[i] + } + case "%": + // remainder after division of first two operands only + if len(operands) < 2 { // one argument, just return it + return operands[0], nil + } + if operands[1] == 0 { + return 0, fmt.Errorf("integer divide by zero") + } + return operands[0] % operands[1], nil + default: + return 0, fmt.Errorf("unknown arithmetic operator: %s", op) + } + + return acc, nil +} diff --git a/dataframe/examples_test.go b/dataframe/examples_test.go index 8cdb36c..40dde5a 100644 --- a/dataframe/examples_test.go +++ b/dataframe/examples_test.go @@ -2,6 +2,7 @@ package dataframe_test import ( "fmt" + "math" "strings" "github.com/go-gota/gota/dataframe" @@ -266,3 +267,62 @@ func ExampleDataFrame_Describe() { ) fmt.Println(df.Describe()) } + +func ExampleDataFrame_FindElem() { + df := dataframe.New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Ints"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]bool{false, true, false, false, false}, series.Bool, "Bools"), + ) + + if f, ok := df.FindElem("Strings", "Pi", "Floats"); ok { + fmt.Printf("The value of Pi is %f\n", f.Float()) + } +} + +func ExampleDataFrame_Math() { + /* `input` is a 5x4 DataFrame: + + Strings Floats Primes Naturals + 0: e 2.718000 1 1 + 1: Pi 3.142000 3 2 + 2: Phi 1.618000 5 3 + 3: Sqrt2 1.414000 7 4 + 4: Ln2 0.693000 11 5 + + */ + df := dataframe.New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Primes"), + series.New([]int{1, 2, 3, 4, 5}, series.Int, "Naturals"), + ) + + // `Math` takes a new column name, an operator (string or func) and at least one column name + withNewDiffColumn := df.Math("Diff", "-", "Floats", "Primes") + + // New `DataFrame` now has a column named "Diff" which is + // the result of subtracting Primes from Floats. + fmt.Println(withNewDiffColumn) + + /* + Strings Floats Primes Naturals Diff + 0: e 2.718000 1 1 1.718000 + 1: Pi 3.142000 3 2 0.142000 + 2: Phi 1.618000 5 3 -3.382000 + 3: Sqrt2 1.414000 7 4 -5.586000 + 4: Ln2 0.693000 11 5 -10.307000 + + */ + + // Also supports passing unary, binary, or trinary functions of + // int or float64, e.g., for functions from Go's `math` package. + // (Note here that `dataframe.Math` supports specifying many + // column names depending on the given operator, and also that + // it automatically coerces int to float64 when `op` is a + // function on float64.) + withNewFMACol := df.Math("FMA", math.FMA, "Floats", "Primes", "Naturals") + + fmt.Println(withNewFMACol) +} diff --git a/dataframe/find_elem_test.go b/dataframe/find_elem_test.go new file mode 100644 index 0000000..4ebc2a2 --- /dev/null +++ b/dataframe/find_elem_test.go @@ -0,0 +1,107 @@ +package dataframe + +import ( + "testing" + + "github.com/go-gota/gota/series" +) + +func TestFindElem(t *testing.T) { + /* Input is a 5x4 DataFrame + + Strings Floats Primes Naturals + 0: e 2.718000 1 1 + 1: Pi 3.142000 3 2 + 2: Phi 1.618000 5 3 + 3: Sqrt2 1.414000 7 4 + 4: Ln2 0.693000 11 5 + + */ + df := New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Ints"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]bool{false, true, false, false, false}, series.Bool, "Bools"), + ) + + t.Run("String lookup of float value", func(t *testing.T) { + e, ok := df.FindElem("Strings", "Pi", "Floats") + if !ok { + t.Fatal("failed to find value") + } + observed := e.Float() + expected := 3.142 + if observed != expected { + t.Fatalf("values did not match - expected %f but got %f", expected, observed) + } + }) + + t.Run("Float lookup of string value", func(t *testing.T) { + e, ok := df.FindElem("Floats", 3.142, "Strings") + if !ok { + t.Fatal("failed to find value") + } + observed := e.String() + expected := "Pi" + if observed != expected { + t.Fatalf("values did not match - expected %s but got %s", expected, observed) + } + }) + + t.Run("Int lookup of bool value", func(t *testing.T) { + e, ok := df.FindElem("Ints", 3, "Bools") + if !ok { + t.Fatal("failed to find value") + } + observed, _ := e.Bool() + expected := true + if observed != expected { + t.Fatalf("values did not match - expected %t but got %t", expected, observed) + } + }) + + t.Run("Bool lookup of int value", func(t *testing.T) { + e, ok := df.FindElem("Bools", true, "Ints") + if !ok { + t.Fatal("failed to find value") + } + observed, _ := e.Int() + expected := 3 + if observed != expected { + t.Fatalf("values did not match - expected %d but got %d", expected, observed) + } + }) + + t.Run("Multiple matches returns first", func(t *testing.T) { + e, ok := df.FindElem("Bools", false, "Ints") + if !ok { + t.Fatal("failed to find value") + } + observed, _ := e.Int() + expected := 1 + if observed != expected { + t.Fatalf("values did not match - expected %d but got %d", expected, observed) + } + }) + + t.Run("First column not found sets ok to false", func(t *testing.T) { + _, ok := df.FindElem("Eentz", 11, "Strings") + if ok { + t.Fatal("expected ok false") + } + }) + + t.Run("Key not found sets ok to false", func(t *testing.T) { + _, ok := df.FindElem("Ints", 12, "Strings") + if ok { + t.Fatal("expected ok false") + } + }) + + t.Run("Second column not found sets ok to false", func(t *testing.T) { + _, ok := df.FindElem("Ints", 11, "Ropes") + if ok { + t.Fatal("expected ok false") + } + }) +} diff --git a/dataframe/math_test.go b/dataframe/math_test.go new file mode 100644 index 0000000..67c9618 --- /dev/null +++ b/dataframe/math_test.go @@ -0,0 +1,322 @@ +package dataframe + +import ( + "math" + "reflect" + "strings" + "testing" + + "github.com/go-gota/gota/series" +) + +func TestMath(t *testing.T) { + /* Input is a 5x4 DataFrame + + Strings Floats Primes Naturals + 0: e 2.718000 1 1 + 1: Pi 3.142000 3 2 + 2: Phi 1.618000 5 3 + 3: Sqrt2 1.414000 7 4 + 4: Ln2 0.693000 11 5 + + */ + input := New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Primes"), + series.New([]int{1, 2, 3, 4, 5}, series.Int, "Naturals"), + ) + + table := testTable{ + // Sums + { + fut: func(df DataFrame) DataFrame { + df = df.Math("Sum", "+", "Floats", "Primes") + return df + }, + selection: []string{"Sum"}, + expected: New( + series.New([]float64{3.718, 6.142, 6.618, 8.414, 11.693}, series.Float, "Sum"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("IntSum", "+", "Primes", "Naturals") + return df + }, + selection: []string{"IntSum"}, + expected: New( + series.New([]int{2, 5, 8, 11, 16}, series.Int, "IntSum"), + ), + }, + + // Differences + { + fut: func(df DataFrame) DataFrame { + df = df.Math("Difference", "-", "Floats", "Primes") + return df + }, + selection: []string{"Difference"}, + expected: New( + series.New([]float64{1.718000, 0.142000, -3.382000, -5.586000, -10.307000}, series.Float, "Difference"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("IntDifference", "-", "Primes", "Naturals") + return df + }, + selection: []string{"IntDifference"}, + expected: New( + series.New([]int{0, 1, 2, 3, 6}, series.Int, "IntDifference"), + ), + }, + + // Products + { + fut: func(df DataFrame) DataFrame { + df = df.Math("Product", "*", "Floats", "Primes") + return df + }, + selection: []string{"Product"}, + expected: New( + series.New([]float64{2.718000, 9.426000, 8.090000, 9.898000, 7.623000}, series.Float, "Product"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("IntProduct", "*", "Primes", "Naturals") + return df + }, + selection: []string{"IntProduct"}, + expected: New( + series.New([]int{1, 6, 15, 28, 55}, series.Int, "IntProduct"), + ), + }, + + // Quotients + { + fut: func(df DataFrame) DataFrame { + df = df.Math("Quotient", "/", "Floats", "Primes") + return df + }, + selection: []string{"Quotient"}, + expected: New( + series.New([]float64{2.718000, 1.047333, 0.323600, 0.202000, 0.063000}, series.Float, "Quotient"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("IntQuotient", "/", "Primes", "Naturals") + return df + }, + selection: []string{"IntQuotient"}, + expected: New( + series.New([]int{1, 1, 1, 1, 2}, series.Int, "IntQuotient"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("Modulo", "%", "Primes", "Naturals") + return df + }, + selection: []string{"Modulo"}, + expected: New( + series.New([]int{0, 1, 2, 3, 1}, series.Int, "Modulo"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("ModuloSelf", "%", "Primes", "Primes") + return df + }, + selection: []string{"ModuloSelf"}, + expected: New( + series.New([]int{0, 0, 0, 0, 0}, series.Int, "ModuloSelf"), + ), + }, + + // >2 operands + { + fut: func(df DataFrame) DataFrame { + df = df.Math("MultiSum", "+", "Floats", "Floats", "Primes", "Primes") + return df + }, + selection: []string{"MultiSum"}, + expected: New( + series.New([]float64{7.436000, 12.284000, 13.236000, 16.828000, 23.386000}, series.Float, "MultiSum"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("MultiDifference", "-", "Floats", "Floats", "Primes", "Primes") + return df + }, + selection: []string{"MultiDifference"}, + expected: New( + series.New([]float64{-2.000000, -6.000000, -10.000000, -14.000000, -22.000000}, series.Float, "MultiDifference"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("MultiProduct", "*", "Floats", "Floats", "Primes", "Primes") + return df + }, + selection: []string{"MultiProduct"}, + expected: New( + series.New([]float64{7.387524, 88.849476, 65.448100, 97.970404, 58.110129}, series.Float, "MultiProduct"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("MultiQuotient", "/", "Floats", "Floats", "Primes", "Primes") + return df + }, + selection: []string{"MultiQuotient"}, + expected: New( + series.New([]float64{1.000000, 0.111111, 0.040000, 0.020408, 0.008264}, series.Float, "MultiQuotient"), + ), + }, + + // Arbitrary float functions + { + fut: func(df DataFrame) DataFrame { + df = df.Math("UnaryFloatFunc", math.Cos, "Floats") + return df + }, + selection: []string{"UnaryFloatFunc"}, + expected: New( + series.New([]float64{-0.911618, -1.000000, -0.047186, 0.156155, 0.769333}, series.Float, "UnaryFloatFunc"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("BinaryFloatFunc", math.Hypot, "Floats", "Floats") + return df + }, + selection: []string{"BinaryFloatFunc"}, + expected: New( + series.New([]float64{3.843832, 4.443459, 2.288198, 1.999698, 0.980050}, series.Float, "BinaryFloatFunc"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("TrinaryFloatFunc", math.FMA, "Floats", "Floats", "Floats") + return df + }, + selection: []string{"TrinaryFloatFunc"}, + expected: New( + series.New([]float64{10.105524, 13.014164, 4.235924, 3.413396, 1.173249}, series.Float, "TrinaryFloatFunc"), + ), + }, + + // Arbitrary int functions + { + fut: func(df DataFrame) DataFrame { + df = df.Math("UnaryIntFunc", func(i int) int { return i*2 + 1 }, "Primes") + return df + }, + selection: []string{"UnaryIntFunc"}, + expected: New( + series.New([]int{3, 7, 11, 15, 23}, series.Int, "UnaryIntFunc"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("BinaryIntFunc", func(x, y int) int { return x * y }, "Naturals", "Primes") + return df + }, + selection: []string{"BinaryIntFunc"}, + expected: New( + series.New([]int{1, 6, 15, 28, 55}, series.Int, "BinaryIntFunc"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math( + "TrinaryIntFunc", + func(x, y, z int) int { return x * y * z }, + "Naturals", "Naturals", "Primes") + return df + }, + selection: []string{"TrinaryIntFunc"}, + expected: New( + series.New([]int{1, 12, 45, 112, 275}, series.Int, "TrinaryIntFunc"), + ), + }, + } + + runTestTable(table, input, t) + +} + +func TestMathErrors(t *testing.T) { + expectError("at least one operand", func(df DataFrame) DataFrame { + return df.Math("Empty operands", "+") + }, t) + + expectError("cannot perform arithmetic with column of type string", func(df DataFrame) DataFrame { + return df.Math("Non-numeric type", "+", "Strings") + }, t) + + expectError("unknown arithmetic operator", func(df DataFrame) DataFrame { + return df.Math("unknown operator", "!", "Primes") + }, t) + + expectError("integer divide by zero", func(df DataFrame) DataFrame { + return df.Math("Divide by zero", "/", "Primes", "Naturals0") + }, t) + + // reciprocal + expectError("integer divide by zero", func(df DataFrame) DataFrame { + return df.Math("Divide by zero", "/", "Naturals0") + }, t) + + // modulo 0 + expectError("integer divide by zero", func(df DataFrame) DataFrame { + return df.Math("Divide by zero", "%", "Primes", "Naturals0") + }, t) + + // catch panic on unknown op +} + +// Test helpers + +type testTable []struct { + fut func(DataFrame) DataFrame + selection interface{} + expected DataFrame +} + +func runTestTable(table testTable, input DataFrame, t *testing.T) { + + for tidx, test := range table { + observed := test.fut(input).Select(test.selection) + + // Check that the types are the same between both DataFrames + if !reflect.DeepEqual(test.expected.Types(), observed.Types()) { + t.Errorf("Test: %d\nDifferent types:\nA:%v\nB:%v", tidx, test.expected.Types(), observed.Types()) + } + // Check that the colnames are the same between both DataFrames + if !reflect.DeepEqual(test.expected.Names(), observed.Names()) { + t.Errorf("Test: %d\nDifferent colnames:\nA:%v\nB:%v", tidx, test.expected.Names(), observed.Names()) + } + // Check that the values are the same between both DataFrames + if !reflect.DeepEqual(test.expected.Records(), observed.Records()) { + t.Fatalf("Test: %d\nDifferent values:\nExpected:%v\nObserved:%v", tidx, test.expected.Records(), observed.Records()) + } + } +} + +func expectError(message string, fut func(DataFrame) DataFrame, t *testing.T) { + df := New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Primes"), + series.New([]int{0, 1, 2, 3, 4}, series.Int, "Naturals0"), + ) + df = fut(df) + if !strings.Contains(df.Err.Error(), message) { + t.Fatalf("expected error to contain '%s', but got %v", message, df.Err) + } +}