From 4514eb0f08d20f33d474754467ee13ba9a3c5a89 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 24 Jun 2021 16:53:39 -0400
Subject: [PATCH 01/13] inital commit
---
src/DataFramesMeta.jl | 9 ++-
src/macros.jl | 127 +++++++++++++++++++++---------------------
src/parsing.jl | 49 ++++------------
test/dataframes.jl | 4 +-
4 files changed, 84 insertions(+), 105 deletions(-)
diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl
index c1f4ad20..f2974d5b 100644
--- a/src/DataFramesMeta.jl
+++ b/src/DataFramesMeta.jl
@@ -7,11 +7,14 @@ using MacroTools
@reexport using DataFrames
# Basics:
-export @with, @where, @orderby, @transform, @by, @combine, @select,
- @transform!, @select!,
+export @with,
+ @subset, @subset!,
+ @orderby,
+ @by, @combine,
+ @transform, @select, @transform!, @select!,
@eachrow, @eachrow!,
@byrow,
- @based_on # deprecated
+ @based_on, @where # deprecated
include("parsing.jl")
include("macros.jl")
diff --git a/src/macros.jl b/src/macros.jl
index 83e6b3e0..b40cf52f 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -415,50 +415,31 @@ macro with(d, body)
end
-
##############################################################################
##
-## @where - select row subsets
+## @subset - select row subsets
##
##############################################################################
-function where_helper(x, args...)
- exprs, wrap_byrow = create_args_vector(args...)
- t = (fun_to_vec(ex; gensym_names = true, wrap_byrow = wrap_byrow) for ex in exprs)
+function subset_helper(x, args...)
+ exprs, outer_flags = create_args_vector(args...)
+ t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs)
+ skipmissing = outer_flags[Symbol("@skipmissing")][]
quote
- $where($x, $(t...))
+ $subset($x, $(t...); skipmissing=$skipmissing)
end
end
-function df_to_bool(res::AbstractDataFrame)
- if any(t -> !(t isa AbstractVector{<:Union{Missing, Bool}}), eachcol(res))
- throw(ArgumentError("All arguments in @where must return an " *
- "AbstractVector{<:Union{Missing, Bool}}"))
+function where_helper(x, args...)
+ exprs, outer_flags = create_args_vector(args...)
+ t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs)
+ quote
+ $subset($x, $(t...); skipmissing=true)
end
-
- return reduce((x, y) -> x .& y, eachcol(res)) .=== true
-end
-
-function where(df::AbstractDataFrame, @nospecialize(args...))
- res = DataFrames.select(df, args...; copycols = false)
- tokeep = df_to_bool(res)
- df[tokeep, :]
-end
-
-function where(gd::GroupedDataFrame, @nospecialize(args...))
- res = DataFrames.select(gd, args...; copycols = false, keepkeys = false)
- tokeep = df_to_bool(res)
- parent(gd)[tokeep, :]
-end
-
-function where(df::SubDataFrame, @nospecialize(args...))
- res = DataFrames.select(df, args...)
- tokeep = df_to_bool(res)
- df[tokeep, :]
end
"""
- @where(d, i...)
+ @subset(d, i...)
Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s.
@@ -469,16 +450,16 @@ Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s.
Multiple `i` expressions are "and-ed" together.
-If given a `GroupedDataFrame`, `@where` applies transformations by
+If given a `GroupedDataFrame`, `@subset` applies transformations by
group, and returns a fresh `DataFrame` containing the rows
for which the generated values are all `true`.
-Inputs to `@where` can come in two formats: a `begin ... end` block, in which case each
+Inputs to `@subset` can come in two formats: a `begin ... end` block, in which case each
line is a separate selector, or as multiple arguments.
For example the following two statements are equivalent:
```julia
-@where df begin
+@subset df begin
:x .> 1
:y .< 2
end
@@ -487,22 +468,32 @@ end
and
```
-@where(df, :x .> 1, :y .< 2)
+@subset(df, :x .> 1, :y .< 2)
```
!!! note
- `@where` treats `missing` values as `false` when filtering rows.
- Unlike `DataFrames.filter` and other boolean operations with
- `missing`, `@where` will *not* error on missing values, and
- will only keep `true` values.
-
-If an expression provided to `@where` begins with `@byrow`, operations
+ `@subset` will error on `missing`, unlike `@where`. To
+ recover the old behavior of `@where`, use the macro-flag
+ `@skipmissing`
+
+ ```julia
+ julia> df = DataFrame(a = [1, missing], b = [3, 4]);
+
+ julia> @subset df @skipmissing :a .== 1
+ 1×2 DataFrame
+ Row │ a b
+ │ Int64? Int64
+ ─────┼───────────────
+ 1 │ 1 3
+ ```
+
+If an expression provided to `@subset` begins with `@byrow`, operations
are applied "by row" along the data frame. To avoid writing `@byrow` multiple
times, `@orderby` also allows `@byrow`to be placed at the beginning of a block of
operations. For example, the following two statements are equivalent.
```
-@where df @byrow begin
+@subset df @byrow begin
:x > 1
:y < 2
end
@@ -511,7 +502,7 @@ end
and
```
-@orderby df
+@subset df
@byrow :x > 1
@byrow :y < 2
end
@@ -526,7 +517,7 @@ julia> df = DataFrame(x = 1:3, y = [2, 1, 2]);
julia> globalvar = [2, 1, 0];
-julia> @where(df, :x .> 1)
+julia> @subset(df, :x .> 1)
2×2 DataFrame
Row │ x y
│ Int64 Int64
@@ -534,7 +525,7 @@ julia> @where(df, :x .> 1)
1 │ 2 1
2 │ 3 2
-julia> @where(df, :x .> globalvar)
+julia> @subset(df, :x .> globalvar)
2×2 DataFrame
Row │ x y
│ Int64 Int64
@@ -542,7 +533,7 @@ julia> @where(df, :x .> globalvar)
1 │ 2 1
2 │ 3 2
-julia> @where df begin
+julia> @subset df begin
:x .> globalvar
:y .== 3
end
@@ -553,7 +544,7 @@ julia> d = DataFrame(n = 1:20, x = [3, 3, 3, 3, 1, 1, 1, 2, 1, 1,
julia> g = groupby(d, :x);
-julia> @where(g, :n .> mean(:n))
+julia> @subset(g, :n .> mean(:n))
8×2 DataFrame
Row │ n x
│ Int64 Int64
@@ -567,7 +558,7 @@ julia> @where(g, :n .> mean(:n))
7 │ 19 1
8 │ 20 2
-julia> @where g begin
+julia> @subset g begin
:n .> mean(:n)
:n .< 20
end
@@ -585,7 +576,7 @@ julia> @where g begin
julia> d = DataFrame(a = [1, 2, missing], b = ["x", "y", missing]);
-julia> @where(d, :a .== 1)
+julia> @subset(d, :a .== 1)
1×2 DataFrame
│ Row │ a │ b │
│ │ Int64? │ String? │
@@ -593,7 +584,17 @@ julia> @where(d, :a .== 1)
│ 1 │ 1 │ x │
```
"""
+macro subset(x, args...)
+ esc(subset_helper(x, args...))
+end
+
+"""
+ @where(x, args...)
+
+Deprecated version of `@subset`, see `?@subset` for details.
+"""
macro where(x, args...)
+ @warn "`@where is deprecated, use `@subset` with `@skipmissing` instead."
esc(where_helper(x, args...))
end
@@ -604,8 +605,8 @@ end
##############################################################################
function orderby_helper(x, args...)
- exprs, wrap_byrow = create_args_vector(args...)
- t = (fun_to_vec(ex; gensym_names = true, wrap_byrow = wrap_byrow) for ex in exprs)
+ exprs, outer_flags = create_args_vector(args...)
+ t = (fun_to_vec(ex; gensym_names = true, outer_flags = outer_flags) for ex in exprs)
quote
$DataFramesMeta.orderby($x, $(t...))
end
@@ -768,8 +769,8 @@ end
function transform_helper(x, args...)
- exprs, wrap_byrow = create_args_vector(args...)
- t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+ exprs, outer_flags = create_args_vector(args...)
+ t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
quote
$DataFrames.transform($x, $(t...))
end
@@ -886,8 +887,8 @@ end
function transform!_helper(x, args...)
- exprs, wrap_byrow = create_args_vector(args...)
- t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+ exprs, outer_flags = create_args_vector(args...)
+ t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
quote
$DataFrames.transform!($x, $(t...))
end
@@ -981,8 +982,8 @@ end
##############################################################################
function select_helper(x, args...)
- exprs, wrap_byrow = create_args_vector(args...)
- t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+ exprs, outer_flags = create_args_vector(args...)
+ t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
quote
$DataFrames.select($x, $(t...))
end
@@ -1095,8 +1096,8 @@ end
##############################################################################
function select!_helper(x, args...)
- exprs, wrap_byrow = create_args_vector(args...)
- t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+ exprs, outer_flags = create_args_vector(args...)
+ t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
quote
$DataFrames.select!($x, $(t...))
end
@@ -1206,7 +1207,7 @@ end
function combine_helper(x, args...; deprecation_warning = false)
deprecation_warning && @warn "`@based_on` is deprecated. Use `@combine` instead."
- exprs, wrap_byrow = create_args_vector(args...)
+ exprs, outer_flags = create_args_vector(args...)
fe = first(exprs)
if length(exprs) == 1 &&
@@ -1218,7 +1219,7 @@ function combine_helper(x, args...; deprecation_warning = false)
exprs = ((:(cols(AsTable) = $fe)),)
end
- t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+ t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
quote
$DataFrames.combine($x, $(t...))
@@ -1327,7 +1328,7 @@ end
function by_helper(x, what, args...)
# Only allow one argument when returning a Table object
# Only allow one argument when returning a Table object
- exprs, wrap_byrow = create_args_vector(args...)
+ exprs, outer_flags = create_args_vector(args...)
fe = first(exprs)
if length(exprs) == 1 &&
!(fe isa QuoteNode || onearg(fe, :cols)) &&
@@ -1338,7 +1339,7 @@ function by_helper(x, what, args...)
exprs = ((:(cols(AsTable) = $fe)),)
end
- t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+ t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
quote
$DataFrames.combine($groupby($x, $what), $(t...))
diff --git a/src/parsing.jl b/src/parsing.jl
index cd77f41c..10835bd5 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -64,8 +64,8 @@ end
is_macro_head(ex, name) = false
is_macro_head(ex::Expr, name) = ex.head == :macrocall && ex.args[1] == Symbol(name)
-extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false),)) = (ex, exprflags)
-function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false),))
+extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false))) = (ex, exprflags)
+function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false)))
if ex.head == :macrocall
macroname = ex.args[1]
if macroname in keys(exprflags)
@@ -183,7 +183,7 @@ end
# We need wrap_byrow as a keyword argument here in case someone
# uses `@transform df @byrow begin ... end`, which we
# deal with outside of this function.
-function fun_to_vec(ex::Expr; gensym_names::Bool=false, no_dest::Bool=false, wrap_byrow::Bool=false)
+function fun_to_vec(ex::Expr; gensym_names::Bool=false, outer_flags=nothing, no_dest::Bool=false)
# classify the type of expression
# :x # handled via dispatch
# cols(:x) # handled as though above
@@ -201,15 +201,16 @@ function fun_to_vec(ex::Expr; gensym_names::Bool=false, no_dest::Bool=false, wra
# cols(y) = :x + 1 # re-write as complicated col, but RHS is :block
# cols(:y) = cols(:x) + 1 # re-write as complicated call, RHS is block, use cols
# `@byrow` before any of the above
- ex, flags = extract_macro_flags(MacroTools.unblock(ex))
+ ex, inner_flags = extract_macro_flags(MacroTools.unblock(ex))
# Use tuple syntax in future when we add more flags
- wrap_byrow_t = flags[Symbol("@byrow")][]
+ inner_wrap_byrow = inner_flags[Symbol("@byrow")][]
+ outer_wrap_byrow = outer_flags === nothing ? false : outer_flags[Symbol("@byrow")][]
- if wrap_byrow_t && wrap_byrow
+ if inner_wrap_byrow && outer_wrap_byrow
throw(ArgumentError("Redundant @byrow calls."))
else
- wrap_byrow = wrap_byrow || wrap_byrow_t
+ wrap_byrow = inner_wrap_byrow || outer_wrap_byrow
end
if gensym_names
@@ -304,7 +305,7 @@ function fun_to_vec(ex::Expr; gensym_names::Bool=false, no_dest::Bool=false, wra
throw(ArgumentError("This path should not be reached"))
end
-fun_to_vec(ex::QuoteNode; no_dest::Bool=false, gensym_names::Bool=false, wrap_byrow::Bool=false) = ex
+fun_to_vec(ex::QuoteNode; no_dest::Bool=false, gensym_names::Bool=false, outer_flags=nothing) = ex
function make_source_concrete(x::AbstractVector)
if isempty(x) || isconcretetype(eltype(x))
@@ -344,33 +345,10 @@ function create_args_vector(args...)
end
"""
- create_args_vector(arg) -> vec, wrap_byrow
-
-Normalize a single input to a vector of expressions,
-with a `wrap_byrow` flag indicating that the
-expressions should operate by row.
-
-If `arg` is a single `:block`, it is unnested.
-Otherwise, return a single-element array.
-Also removes line numbers.
-
-If `arg` is of the form `@byrow ...`, then
-`wrap_byrow` is returned as `true`.
+ create_args_vector(arg) -> vec, outer_flags
"""
function create_args_vector(arg)
- if arg isa Expr && is_macro_head(arg, "@byrow")
- wrap_byrow = true
- largs = length(arg.args)
- if largs == 2
- throw(ArgumentError("No transformations supplied with `@byrow`"))
- elseif largs == 3
- arg = arg.args[3]
- else
- arg = Expr(:block, arg.args[3:end]...)
- end
- else
- wrap_byrow = false
- end
+ arg, outer_flags = extract_macro_flags(MacroTools.unblock(arg))
if arg isa Expr && arg.head == :block
x = MacroTools.rmlines(arg).args
@@ -378,8 +356,5 @@ function create_args_vector(arg)
x = Any[arg]
end
- if wrap_byrow && any(t -> is_macro_head(t, "@byrow"), x)
- throw(ArgumentError("Redundant `@byrow` calls."))
- end
- return x, wrap_byrow
+ return x, outer_flags
end
diff --git a/test/dataframes.jl b/test/dataframes.jl
index b8767d6d..b56114d8 100644
--- a/test/dataframes.jl
+++ b/test/dataframes.jl
@@ -734,9 +734,9 @@ end
macro linenums_macro(arg)
if arg isa Expr && arg.head == :block && length(arg.args) == 1 && arg.args[1] isa LineNumberNode
- esc(:(true))
+ esc(:([true]))
else
- esc(:(false))
+ esc(:([false]))
end
end
From 2e5de90463308220f1ad6a7203800718027cb82a Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 24 Jun 2021 17:09:39 -0400
Subject: [PATCH 02/13] fix tests
---
test/dataframes.jl | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/test/dataframes.jl b/test/dataframes.jl
index b56114d8..162ec30f 100644
--- a/test/dataframes.jl
+++ b/test/dataframes.jl
@@ -740,6 +740,14 @@ macro linenums_macro(arg)
end
end
+macro linenums_macro_byrow(arg)
+ if arg isa Expr && arg.head == :block && length(arg.args) == 1 && arg.args[1] isa LineNumberNode
+ esc(:(true))
+ else
+ esc(:(false))
+ end
+end
+
@testset "removing lines" begin
df = DataFrame(a = [1], b = [2])
# Can't use @test because @test remove line numbers
@@ -753,7 +761,7 @@ end
@test d.y == [true]
d = @transform df @byrow begin
- y = @linenums_macro begin end
+ y = @linenums_macro_byrow begin end
end
@test d.y == [true]
@@ -763,13 +771,13 @@ end
@test nrow(d) == 1
d = @where df begin
- @byrow @linenums_macro begin end
+ @byrow @linenums_macro_byrow begin end
end
@test nrow(d) == 1
d = @where df @byrow begin
- @linenums_macro begin end
+ @linenums_macro_byrow begin end
end
@test nrow(d) == 1
From eca50d475530affe1ec0c5bf26a8e2423c86dcb4 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Sat, 26 Jun 2021 11:40:46 -0400
Subject: [PATCH 03/13] no more skipmissing
---
src/macros.jl | 173 ++++++++++++++++++++++++++++++++++++++++++++-----
src/parsing.jl | 4 +-
2 files changed, 158 insertions(+), 19 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index b40cf52f..035b9c77 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -417,16 +417,15 @@ end
##############################################################################
##
-## @subset - select row subsets
+## @subset and subset! - select row subsets
##
##############################################################################
function subset_helper(x, args...)
exprs, outer_flags = create_args_vector(args...)
t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs)
- skipmissing = outer_flags[Symbol("@skipmissing")][]
quote
- $subset($x, $(t...); skipmissing=$skipmissing)
+ $subset($x, $(t...); skipmissing=true)
end
end
@@ -472,20 +471,10 @@ and
```
!!! note
- `@subset` will error on `missing`, unlike `@where`. To
- recover the old behavior of `@where`, use the macro-flag
- `@skipmissing`
-
- ```julia
- julia> df = DataFrame(a = [1, missing], b = [3, 4]);
-
- julia> @subset df @skipmissing :a .== 1
- 1×2 DataFrame
- Row │ a b
- │ Int64? Int64
- ─────┼───────────────
- 1 │ 1 3
- ```
+ `@subset` treats `missing` values as `false` when filtering rows.
+ Unlike `DataFrames.subset` and other boolean operations with
+ `missing`, `@subset` will *not* error on missing values, and
+ will only keep `true` values.
If an expression provided to `@subset` begins with `@byrow`, operations
are applied "by row" along the data frame. To avoid writing `@byrow` multiple
@@ -598,6 +587,156 @@ macro where(x, args...)
esc(where_helper(x, args...))
end
+function subset!_helper(x, args...)
+ exprs, outer_flags = create_args_vector(args...)
+ t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs)
+ quote
+ $subset!($x, $(t...); skipmissing=true)
+ end
+end
+
+"""
+ @subset!(d, i...)
+
+Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s,
+mutating the underlying data-frame in-place.
+
+### Arguments
+
+* `d` : an AbstractDataFrame or GroupedDataFrame
+* `i...` : expression for selecting rows
+
+Multiple `i` expressions are "and-ed" together.
+
+If given a `GroupedDataFrame`, `@subset!` applies transformations by
+group, and returns a fresh `DataFrame` containing the rows
+for which the generated values are all `true`.
+
+Inputs to `@subset!` can come in two formats: a `begin ... end` block, in which case each
+line is a separate selector, or as multiple arguments.
+For example the following two statements are equivalent:
+
+```julia
+@subset! df begin
+ :x .> 1
+ :y .< 2
+end
+```
+
+and
+
+```
+@subset!(df, :x .> 1, :y .< 2)
+```
+
+!!! note
+ `@subset!` treats `missing` values as `false` when filtering rows.
+ Unlike `DataFrames.subset!` and other boolean operations with
+ `missing`, `@subset!` will *not* error on missing values, and
+ will only keep `true` values.
+
+If an expression provided to `@subset!` begins with `@byrow`, operations
+are applied "by row" along the data frame. To avoid writing `@byrow` multiple
+times, `@orderby` also allows `@byrow`to be placed at the beginning of a block of
+operations. For example, the following two statements are equivalent.
+
+```
+@subset! df @byrow begin
+ :x > 1
+ :y < 2
+end
+```
+
+and
+
+```
+@subset! df
+ @byrow :x > 1
+ @byrow :y < 2
+end
+```
+
+### Examples
+
+```jldoctest
+julia> using DataFramesMeta, Statistics
+
+julia> df = DataFrame(x = 1:3, y = [2, 1, 2]);
+
+julia> globalvar = [2, 1, 0];
+
+julia> @subset!(df, :x .> 1)
+2×2 DataFrame
+ Row │ x y
+ │ Int64 Int64
+─────┼──────────────
+ 1 │ 2 1
+ 2 │ 3 2
+
+julia> @subset!(df, :x .> globalvar)
+2×2 DataFrame
+ Row │ x y
+ │ Int64 Int64
+─────┼──────────────
+ 1 │ 2 1
+ 2 │ 3 2
+
+julia> @subset! df begin
+ :x .> globalvar
+ :y .== 3
+end
+0×2 DataFrame
+
+julia> d = DataFrame(n = 1:20, x = [3, 3, 3, 3, 1, 1, 1, 2, 1, 1,
+ 2, 1, 1, 2, 2, 2, 3, 1, 1, 2]);
+
+julia> g = groupby(d, :x);
+
+julia> @subset!(g, :n .> mean(:n))
+8×2 DataFrame
+ Row │ n x
+ │ Int64 Int64
+─────┼──────────────
+ 1 │ 12 1
+ 2 │ 13 1
+ 3 │ 15 2
+ 4 │ 16 2
+ 5 │ 17 3
+ 6 │ 18 1
+ 7 │ 19 1
+ 8 │ 20 2
+
+julia> @subset! g begin
+ :n .> mean(:n)
+ :n .< 20
+ end
+7×2 DataFrame
+ Row │ n x
+ │ Int64 Int64
+─────┼──────────────
+ 1 │ 12 1
+ 2 │ 13 1
+ 3 │ 15 2
+ 4 │ 16 2
+ 5 │ 17 3
+ 6 │ 18 1
+ 7 │ 19 1
+
+julia> d = DataFrame(a = [1, 2, missing], b = ["x", "y", missing]);
+
+julia> @subset!(d, :a .== 1)
+1×2 DataFrame
+│ Row │ a │ b │
+│ │ Int64? │ String? │
+├─────┼────────┼─────────┤
+│ 1 │ 1 │ x │
+```
+"""
+macro subset!(x, args...)
+ esc(subset!_helper(x, args...))
+end
+
+
##############################################################################
##
## @orderby
diff --git a/src/parsing.jl b/src/parsing.jl
index 10835bd5..d196a824 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -64,8 +64,8 @@ end
is_macro_head(ex, name) = false
is_macro_head(ex::Expr, name) = ex.head == :macrocall && ex.args[1] == Symbol(name)
-extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false))) = (ex, exprflags)
-function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false)))
+extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false),)) = (ex, exprflags)
+function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false),))
if ex.head == :macrocall
macroname = ex.args[1]
if macroname in keys(exprflags)
From 2ddee3856b8fa63bb9a41f4e642bd9886159ac86 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Sat, 26 Jun 2021 11:47:52 -0400
Subject: [PATCH 04/13] tests
---
test/dataframes.jl | 66 ---------------------
test/deprecated.jl | 67 +++++++++++++++++++++
test/runtests.jl | 1 +
test/subset.jl | 143 +++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 211 insertions(+), 66 deletions(-)
create mode 100644 test/subset.jl
diff --git a/test/dataframes.jl b/test/dataframes.jl
index 162ec30f..9f045eda 100644
--- a/test/dataframes.jl
+++ b/test/dataframes.jl
@@ -595,72 +595,6 @@ end
@test @with(df, cols("A")) === df.A
end
-@testset "where" begin
- df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
-
- x = [2, 1, 0, 0]
-
- @test @where(df, :A .> 1) == df[(df.A .> 1) .=== true,:]
- @test @where(df, :B .> 1) == df[df.B .> 1,:]
- @test @where(df, :A .> x) == df[(df.A .> x) .=== true,:]
- @test @where(df, :B .> x) ≅ df[df.B .> x,:]
- @test @where(df, :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2)
- @test @where(df, :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
- @test @where(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
-
- @test @where(df, :A .> 1).A isa Vector{Union{Missing, Int}}
-
- @test @where(df, cols(:A) .> 1) == df[(df.A .> 1) .=== true,:]
- @test @where(df, cols(:B) .> 1) == df[df.B .> 1,:]
- @test @where(df, cols(:A) .> x) == df[(df.A .> x) .=== true,:]
- @test @where(df, cols(:B) .> x) ≅ df[df.B .> x,:]
- @test @where(df, cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2)
- @test @where(df, cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
- @test @where(df, cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
-
- @test @where(df, :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1)
-
- subdf = @view df[df.B .== 2, :]
-
- @test @where(subdf, :A .== 3) == DataFrame(A = 3, B = 2)
-end
-
-@testset "where with :block" begin
- df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
-
- d = @where df begin
- :A .> 1
- :B .> 1
- end
- @test d ≅ @where(df, :A .> 1, :B .> 1)
-
- d = @where df begin
- cols(:A) .> 1
- :B .> 1
- end
- @test d ≅ @where(df, :A .> 1, :B .> 1)
-
- d = @where df begin
- :A .> 1
- cols(:B) .> 1
- end
- @test d ≅ @where(df, :A .> 1, :B .> 1)
-
- d = @where df begin
- begin
- :A .> 1
- end
- :B .> 1
- end
- @test d ≅ @where(df, :A .> 1, :B .> 1)
-
- d = @where df begin
- :A .> 1
- @. :B > 1
- end
- @test d ≅ @where(df, :A .> 1, :B .> 1)
-end
-
@testset "orderby" begin
df = DataFrame(
g = [1, 1, 1, 2, 2],
diff --git a/test/deprecated.jl b/test/deprecated.jl
index e0f68b35..968fc589 100644
--- a/test/deprecated.jl
+++ b/test/deprecated.jl
@@ -77,4 +77,71 @@ const ≅ = isequal
@test @based_on(gd, cols("new" * "_" * "column") = 2)."new_column" == [2, 2]
end
+@testset "where" begin
+ df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+ x = [2, 1, 0, 0]
+
+ @test @where(df, :A .> 1) == df[(df.A .> 1) .=== true,:]
+ @test @where(df, :B .> 1) == df[df.B .> 1,:]
+ @test @where(df, :A .> x) == df[(df.A .> x) .=== true,:]
+ @test @where(df, :B .> x) ≅ df[df.B .> x,:]
+ @test @where(df, :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2)
+ @test @where(df, :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+ @test @where(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+ @test @where(df, :A .> 1).A isa Vector{Union{Missing, Int}}
+
+ @test @where(df, cols(:A) .> 1) == df[(df.A .> 1) .=== true,:]
+ @test @where(df, cols(:B) .> 1) == df[df.B .> 1,:]
+ @test @where(df, cols(:A) .> x) == df[(df.A .> x) .=== true,:]
+ @test @where(df, cols(:B) .> x) ≅ df[df.B .> x,:]
+ @test @where(df, cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2)
+ @test @where(df, cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+ @test @where(df, cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+ @test @where(df, :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1)
+
+ subdf = @view df[df.B .== 2, :]
+
+ @test @where(subdf, :A .== 3) == DataFrame(A = 3, B = 2)
+end
+
+@testset "where with :block" begin
+ df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+ d = @where df begin
+ :A .> 1
+ :B .> 1
+ end
+ @test d ≅ @where(df, :A .> 1, :B .> 1)
+
+ d = @where df begin
+ cols(:A) .> 1
+ :B .> 1
+ end
+ @test d ≅ @where(df, :A .> 1, :B .> 1)
+
+ d = @where df begin
+ :A .> 1
+ cols(:B) .> 1
+ end
+ @test d ≅ @where(df, :A .> 1, :B .> 1)
+
+ d = @where df begin
+ begin
+ :A .> 1
+ end
+ :B .> 1
+ end
+ @test d ≅ @where(df, :A .> 1, :B .> 1)
+
+ d = @where df begin
+ :A .> 1
+ @. :B > 1
+ end
+ @test d ≅ @where(df, :A .> 1, :B .> 1)
+end
+
+
end # module
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 36e12417..3fb003b3 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -5,6 +5,7 @@ quiet = false
my_tests = ["dataframes.jl",
"eachrow.jl",
"grouping.jl",
+ "subset.jl",
"function_compilation.jl",
"chaining.jl",
"linqmacro.jl",
diff --git a/test/subset.jl b/test/subset.jl
new file mode 100644
index 00000000..241f6182
--- /dev/null
+++ b/test/subset.jl
@@ -0,0 +1,143 @@
+module TestSubset
+
+using Test
+using DataFrames
+using DataFramesMeta
+using Statistics
+
+const ≅ = isequal
+
+@testset "subset" begin
+ df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+ x = [2, 1, 0, 0]
+
+ @test @subset(df, :A .> 1) == df[(df.A .> 1) .=== true,:]
+ @test @subset(df, :B .> 1) == df[df.B .> 1,:]
+ @test @subset(df, :A .> x) == df[(df.A .> x) .=== true,:]
+ @test @subset(df, :B .> x) ≅ df[df.B .> x,:]
+ @test @subset(df, :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2)
+ @test @subset(df, :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+ @test @subset(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+ @test @subset(df, :A .> 1).A isa Vector{Union{Missing, Int}}
+
+ @test @subset(df, cols(:A) .> 1) == df[(df.A .> 1) .=== true,:]
+ @test @subset(df, cols(:B) .> 1) == df[df.B .> 1,:]
+ @test @subset(df, cols(:A) .> x) == df[(df.A .> x) .=== true,:]
+ @test @subset(df, cols(:B) .> x) ≅ df[df.B .> x,:]
+ @test @subset(df, cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2)
+ @test @subset(df, cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+ @test @subset(df, cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+ @test @subset(df, :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1)
+
+ subdf = @view df[df.B .== 2, :]
+
+ @test @subset(subdf, :A .== 3) == DataFrame(A = 3, B = 2)
+end
+
+@testset "subset with :block" begin
+ df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+ d = @subset df begin
+ :A .> 1
+ :B .> 1
+ end
+ @test d ≅ @subset(df, :A .> 1, :B .> 1)
+
+ d = @subset df begin
+ cols(:A) .> 1
+ :B .> 1
+ end
+ @test d ≅ @subset(df, :A .> 1, :B .> 1)
+
+ d = @subset df begin
+ :A .> 1
+ cols(:B) .> 1
+ end
+ @test d ≅ @subset(df, :A .> 1, :B .> 1)
+
+ d = @subset df begin
+ begin
+ :A .> 1
+ end
+ :B .> 1
+ end
+ @test d ≅ @subset(df, :A .> 1, :B .> 1)
+
+ d = @subset df begin
+ :A .> 1
+ @. :B > 1
+ end
+ @test d ≅ @subset(df, :A .> 1, :B .> 1)
+end
+
+
+@testset "subset!" begin
+ df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+ x = [2, 1, 0, 0]
+
+ @test @subset!(copy(df), :A .> 1) == df[(df.A .> 1) .=== true,:]
+ @test @subset!(copy(df), :B .> 1) == df[df.B .> 1,:]
+ @test @subset!(copy(df), :A .> x) == df[(df.A .> x) .=== true,:]
+ @test @subset!(copy(df), :B .> x) ≅ df[df.B .> x,:]
+ @test @subset!(copy(df), :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2)
+ @test @subset!(copy(df), :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+ @test @subset!(copy(df), :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+ @test @subset!(copy(df), :A .> 1).A isa Vector{Union{Missing, Int}}
+
+ @test @subset!(copy(df), cols(:A) .> 1) == df[(df.A .> 1) .=== true,:]
+ @test @subset!(copy(df), cols(:B) .> 1) == df[df.B .> 1,:]
+ @test @subset!(copy(df), cols(:A) .> x) == df[(df.A .> x) .=== true,:]
+ @test @subset!(copy(df), cols(:B) .> x) ≅ df[df.B .> x,:]
+ @test @subset!(copy(df), cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2)
+ @test @subset!(copy(df), cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+ @test @subset!(copy(df), cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+ @test @subset!(copy(df), :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1)
+
+ subdf = @view df[df.B .== 2, :]
+
+ @test @subset!(copy(subdf), :A .== 3) == DataFrame(A = 3, B = 2)
+end
+
+@testset "subset! with :block" begin
+ df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+ d = @subset! copy(df) begin
+ :A .> 1
+ :B .> 1
+ end
+ @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
+
+ d = @subset! copy(df) begin
+ cols(:A) .> 1
+ :B .> 1
+ end
+ @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
+
+ d = @subset! copy(df) begin
+ :A .> 1
+ cols(:B) .> 1
+ end
+ @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
+
+ d = @subset! copy(df) begin
+ begin
+ :A .> 1
+ end
+ :B .> 1
+ end
+ @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
+
+ d = @subset! copy(df) begin
+ :A .> 1
+ @. :B > 1
+ end
+ @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
+end
+
+end # module
\ No newline at end of file
From c815d1a2b660b5da9e17c298e38e57c89dfe21af Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Sat, 26 Jun 2021 11:58:49 -0400
Subject: [PATCH 05/13] update index.md
---
docs/src/index.md | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/docs/src/index.md b/docs/src/index.md
index bf315786..d496f7ec 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -12,7 +12,7 @@ in C#.
In addition, DataFramesMeta provides
* `@orderby`, for sorting data frames
-* `@where`, for keeping rows of a DataFrame matching a given condition
+* `@subset` and `@subset!`, for keeping rows of a DataFrame matching a given condition
* `@by`, for grouping and combining a data frame in a single step
* `@with`, for working with the columns of a data frame with high performance and
convenient syntax
@@ -96,18 +96,21 @@ gd = groupby(df, :x);
@transform!(gd, y = 2 .* :y .* first(:y))
```
-## `@where`
+## `@subset` and `@subset!`
Select row subsets. Operates on both a `DataFrame` and a `GroupedDataFrame`.
+`@subset` always returns a freshly-allocated data frame whereas
+`@subset!` modifies the data frame in-place.
```julia
+using Statistics
df = DataFrame(x = [1, 1, 2, 2], y = [1, 2, 101, 102]);
gd = groupby(df, :x);
outside_var = 1;
-@where(df, :x .> 1)
-@where(df, :x .> outside_var)
-@where(df, :x .> outside_var, :y .< 102) # the two expressions are "and-ed"
-@where(gd, :x .> mean(:x))
+@subset(df, :x .> 1)
+@subset(df, :x .> outside_var)
+@subset(df, :x .> outside_var, :y .< 102) # the two expressions are "and-ed"
+@subset(gd, :x .> mean(:x))
```
## `@combine`
@@ -300,7 +303,7 @@ The following macros accept `@byrow`:
* `@transform` and `@transform!`, `@select`, `@select!`, and `@combine`.
`@byrow` can be used in the left hand side of expressions, e.g.
`@select(df, @byrow z = :x * :y)`.
-* `@where` and `@orderby`, with syntax of the form `@where(df, @byrow :x > :y)`
+* `@subset`, `@subset!` and `@orderby`, with syntax of the form `@where(df, @byrow :x > :y)`
* `@with`, where the anonymous function created by `@with` is wrapped in
`ByRow`, as in `@with(df, @byrow :x * :y)`.
From f2dde1ef968c3dae582f0e6557e8348ec758d0f2 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Sun, 27 Jun 2021 15:32:46 -0400
Subject: [PATCH 06/13] add docstring
---
src/parsing.jl | 15 ++++++++-------
test/dataframes.jl | 6 +++---
test/deprecated.jl | 15 +++++++++++++++
test/grouping.jl | 15 ---------------
test/subset.jl | 16 ++++++++++++++++
5 files changed, 42 insertions(+), 25 deletions(-)
diff --git a/src/parsing.jl b/src/parsing.jl
index 32ca666e..07cdf452 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -336,19 +336,20 @@ function replace_dotted!(e, membernames)
Expr(:., x_new, y_new)
end
-"""
- create_args_vector(args...) -> vec, wrap_byrow
-
-Given multiple arguments which can be any type
-of expression-like object (`Expr`, `QuoteNode`, etc.),
-puts them into a single array, removing line numbers.
-"""
function create_args_vector(args...)
create_args_vector(Expr(:block, args...))
end
"""
create_args_vector(arg) -> vec, outer_flags
+
+Given an expression return a vector of operations
+and a `NamedTuple` of the macro-flags that appear
+in the expression.
+
+If a `:block` expression, returns the `args` of
+the block as an array. If a simple expression,
+wrap the expression in a one-element vector.
"""
function create_args_vector(arg)
arg, outer_flags = extract_macro_flags(MacroTools.unblock(arg))
diff --git a/test/dataframes.jl b/test/dataframes.jl
index 9f045eda..84e3c770 100644
--- a/test/dataframes.jl
+++ b/test/dataframes.jl
@@ -700,17 +700,17 @@ end
@test d.y == [true]
- d = @where(df, @linenums_macro begin end)
+ d = @subset(df, @linenums_macro begin end)
@test nrow(d) == 1
- d = @where df begin
+ d = @subset df begin
@byrow @linenums_macro_byrow begin end
end
@test nrow(d) == 1
- d = @where df @byrow begin
+ d = @subset df @byrow begin
@linenums_macro_byrow begin end
end
diff --git a/test/deprecated.jl b/test/deprecated.jl
index 968fc589..0412d57a 100644
--- a/test/deprecated.jl
+++ b/test/deprecated.jl
@@ -143,5 +143,20 @@ end
@test d ≅ @where(df, :A .> 1, :B .> 1)
end
+@testset "@where with a grouped data frame" begin
+ df = DataFrame(
+ g = [1, 1, 1, 2, 2],
+ i = 1:5,
+ t = ["a", "b", "c", "c", "e"],
+ y = [:v, :w, :x, :y, :z],
+ c = [:g, :quote, :body, :transform, missing]
+ )
+
+ gd = groupby(df, :g)
+
+ @test @where(gd, :i .== first(:i)) ≅ df[[1, 4], :]
+ @test @where(gd, cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :]
+ @test @where(gd, :c .== :g) ≅ df[[], :]
+end
end # module
\ No newline at end of file
diff --git a/test/grouping.jl b/test/grouping.jl
index ca7957cb..c603c2a5 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -357,19 +357,4 @@ end
@test @select(g, :a, @byrow t = :a ^ 2).t ≅ d.a .^ 2
end
-@testset "@where with a grouped data frame" begin
- df = DataFrame(
- g = [1, 1, 1, 2, 2],
- i = 1:5,
- t = ["a", "b", "c", "c", "e"],
- y = [:v, :w, :x, :y, :z],
- c = [:g, :quote, :body, :transform, missing]
- )
-
- gd = groupby(df, :g)
-
- @test @where(gd, :i .== first(:i)) ≅ df[[1, 4], :]
- @test @where(gd, cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :]
- @test @where(gd, :c .== :g) ≅ df[[], :]
-end
end # module
diff --git a/test/subset.jl b/test/subset.jl
index 241f6182..00959dd7 100644
--- a/test/subset.jl
+++ b/test/subset.jl
@@ -140,4 +140,20 @@ end
@test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
end
+@testset "@subset with a grouped data frame" begin
+ df = DataFrame(
+ g = [1, 1, 1, 2, 2],
+ i = 1:5,
+ t = ["a", "b", "c", "c", "e"],
+ y = [:v, :w, :x, :y, :z],
+ c = [:g, :quote, :body, :transform, missing]
+ )
+
+ gd = groupby(df, :g)
+
+ @test @subset(gd, :i .== first(:i)) ≅ df[[1, 4], :]
+ @test @subset(gd, cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :]
+ @test @subset(gd, :c .== :g) ≅ df[[], :]
+end
+
end # module
\ No newline at end of file
From 7c14a205b54ef1de00deaba452da4043888a7a77 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Sun, 27 Jun 2021 19:22:13 -0400
Subject: [PATCH 07/13] Apply suggestions from code review
Co-authored-by: Milan Bouchet-Valat
---
src/macros.jl | 4 ++--
src/parsing.jl | 5 ++++-
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index 035b9c77..6326c287 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -472,7 +472,7 @@ and
!!! note
`@subset` treats `missing` values as `false` when filtering rows.
- Unlike `DataFrames.subset` and other boolean operations with
+ Unlike `DataFrames.subset` and other Boolean operations with
`missing`, `@subset` will *not* error on missing values, and
will only keep `true` values.
@@ -631,7 +631,7 @@ and
!!! note
`@subset!` treats `missing` values as `false` when filtering rows.
- Unlike `DataFrames.subset!` and other boolean operations with
+ Unlike `DataFrames.subset!` and other Boolean operations with
`missing`, `@subset!` will *not* error on missing values, and
will only keep `true` values.
diff --git a/src/parsing.jl b/src/parsing.jl
index d196a824..32ca666e 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -183,7 +183,10 @@ end
# We need wrap_byrow as a keyword argument here in case someone
# uses `@transform df @byrow begin ... end`, which we
# deal with outside of this function.
-function fun_to_vec(ex::Expr; gensym_names::Bool=false, outer_flags=nothing, no_dest::Bool=false)
+function fun_to_vec(ex::Expr;
+ gensym_names::Bool=false,
+ outer_flags::Union{NamedTuple, Nothing}=nothing,
+ no_dest::Bool=false)
# classify the type of expression
# :x # handled via dispatch
# cols(:x) # handled as though above
From f83161f0e9b6c488c356daa4425f51dc73582ed5 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Sun, 27 Jun 2021 19:28:33 -0400
Subject: [PATCH 08/13] Apply suggestions from code review
Co-authored-by: Milan Bouchet-Valat
---
docs/src/index.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/src/index.md b/docs/src/index.md
index d496f7ec..f1139d68 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -12,7 +12,7 @@ in C#.
In addition, DataFramesMeta provides
* `@orderby`, for sorting data frames
-* `@subset` and `@subset!`, for keeping rows of a DataFrame matching a given condition
+* `@subset` and `@subset!`, for keeping rows of a data frame matching a given condition
* `@by`, for grouping and combining a data frame in a single step
* `@with`, for working with the columns of a data frame with high performance and
convenient syntax
From 6f956dc32736f58f028519c7c4e5b1d0f76ed31f Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Sun, 27 Jun 2021 19:34:09 -0400
Subject: [PATCH 09/13] Update test/subset.jl
Co-authored-by: Milan Bouchet-Valat
---
test/subset.jl | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/test/subset.jl b/test/subset.jl
index 00959dd7..1f758f56 100644
--- a/test/subset.jl
+++ b/test/subset.jl
@@ -79,7 +79,9 @@ end
x = [2, 1, 0, 0]
- @test @subset!(copy(df), :A .> 1) == df[(df.A .> 1) .=== true,:]
+ df2 = copy(df)
+ @test @subset!(df2, :A .> 1) === df2
+ @test df2 == df[(df.A .> 1) .=== true,:]
@test @subset!(copy(df), :B .> 1) == df[df.B .> 1,:]
@test @subset!(copy(df), :A .> x) == df[(df.A .> x) .=== true,:]
@test @subset!(copy(df), :B .> x) ≅ df[df.B .> x,:]
@@ -156,4 +158,4 @@ end
@test @subset(gd, :c .== :g) ≅ df[[], :]
end
-end # module
\ No newline at end of file
+end # module
From a75d81770aa03936abaa4ef602731a7f91bca841 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Mon, 28 Jun 2021 14:36:27 -0400
Subject: [PATCH 10/13] switching
---
test/subset.jl | 1 +
1 file changed, 1 insertion(+)
diff --git a/test/subset.jl b/test/subset.jl
index 1f758f56..03ed5ebe 100644
--- a/test/subset.jl
+++ b/test/subset.jl
@@ -82,6 +82,7 @@ end
df2 = copy(df)
@test @subset!(df2, :A .> 1) === df2
@test df2 == df[(df.A .> 1) .=== true,:]
+
@test @subset!(copy(df), :B .> 1) == df[df.B .> 1,:]
@test @subset!(copy(df), :A .> x) == df[(df.A .> x) .=== true,:]
@test @subset!(copy(df), :B .> x) ≅ df[df.B .> x,:]
From 7991b8c1f0329d3920fb8913fb011f81c94ed0f7 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Mon, 28 Jun 2021 14:51:15 -0400
Subject: [PATCH 11/13] @subset! with gd
---
test/subset.jl | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/test/subset.jl b/test/subset.jl
index 03ed5ebe..d501be18 100644
--- a/test/subset.jl
+++ b/test/subset.jl
@@ -159,4 +159,18 @@ end
@test @subset(gd, :c .== :g) ≅ df[[], :]
end
+@testset "@subset! with a grouped data frame" begin
+ df = DataFrame(
+ g = [1, 1, 1, 2, 2],
+ i = 1:5,
+ t = ["a", "b", "c", "c", "e"],
+ y = [:v, :w, :x, :y, :z],
+ c = [:g, :quote, :body, :transform, missing]
+ )
+
+ @test @subset!(groupby(copy(df), :g), :i .== first(:i)) ≅ df[[1, 4], :]
+ @test @subset!(groupby(copy(df), :g), cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :]
+ @test @subset!(groupby(copy(df), :g), :c .== :g) ≅ df[[], :]
+end
+
end # module
From fa09626e5ed6345b194b1d81746961f3a049af45 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Mon, 28 Jun 2021 18:47:53 -0400
Subject: [PATCH 12/13] Update src/parsing.jl
Co-authored-by: Milan Bouchet-Valat
---
src/parsing.jl | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/parsing.jl b/src/parsing.jl
index 07cdf452..ab6bdf6f 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -308,7 +308,10 @@ function fun_to_vec(ex::Expr;
throw(ArgumentError("This path should not be reached"))
end
-fun_to_vec(ex::QuoteNode; no_dest::Bool=false, gensym_names::Bool=false, outer_flags=nothing) = ex
+fun_to_vec(ex::QuoteNode;
+ no_dest::Bool=false,
+ gensym_names::Bool=false,
+ outer_flags::Union{NamedTuple, Nothing}=nothing) = ex
function make_source_concrete(x::AbstractVector)
if isempty(x) || isconcretetype(eltype(x))
From 1c485c8c25351f7846c91cf4fbfe35afdded2a69 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Mon, 28 Jun 2021 18:48:00 -0400
Subject: [PATCH 13/13] Update src/parsing.jl
Co-authored-by: Milan Bouchet-Valat
---
src/parsing.jl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/parsing.jl b/src/parsing.jl
index ab6bdf6f..e5a4427d 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -350,7 +350,7 @@ Given an expression return a vector of operations
and a `NamedTuple` of the macro-flags that appear
in the expression.
-If a `:block` expression, returns the `args` of
+If a `:block` expression, return the `args` of
the block as an array. If a simple expression,
wrap the expression in a one-element vector.
"""