From 4514eb0f08d20f33d474754467ee13ba9a3c5a89 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 24 Jun 2021 16:53:39 -0400 Subject: [PATCH 01/13] inital commit --- src/DataFramesMeta.jl | 9 ++- src/macros.jl | 127 +++++++++++++++++++++--------------------- src/parsing.jl | 49 ++++------------ test/dataframes.jl | 4 +- 4 files changed, 84 insertions(+), 105 deletions(-) diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index c1f4ad20..f2974d5b 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -7,11 +7,14 @@ using MacroTools @reexport using DataFrames # Basics: -export @with, @where, @orderby, @transform, @by, @combine, @select, - @transform!, @select!, +export @with, + @subset, @subset!, + @orderby, + @by, @combine, + @transform, @select, @transform!, @select!, @eachrow, @eachrow!, @byrow, - @based_on # deprecated + @based_on, @where # deprecated include("parsing.jl") include("macros.jl") diff --git a/src/macros.jl b/src/macros.jl index 83e6b3e0..b40cf52f 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -415,50 +415,31 @@ macro with(d, body) end - ############################################################################## ## -## @where - select row subsets +## @subset - select row subsets ## ############################################################################## -function where_helper(x, args...) - exprs, wrap_byrow = create_args_vector(args...) - t = (fun_to_vec(ex; gensym_names = true, wrap_byrow = wrap_byrow) for ex in exprs) +function subset_helper(x, args...) + exprs, outer_flags = create_args_vector(args...) + t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs) + skipmissing = outer_flags[Symbol("@skipmissing")][] quote - $where($x, $(t...)) + $subset($x, $(t...); skipmissing=$skipmissing) end end -function df_to_bool(res::AbstractDataFrame) - if any(t -> !(t isa AbstractVector{<:Union{Missing, Bool}}), eachcol(res)) - throw(ArgumentError("All arguments in @where must return an " * - "AbstractVector{<:Union{Missing, Bool}}")) +function where_helper(x, args...) + exprs, outer_flags = create_args_vector(args...) + t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs) + quote + $subset($x, $(t...); skipmissing=true) end - - return reduce((x, y) -> x .& y, eachcol(res)) .=== true -end - -function where(df::AbstractDataFrame, @nospecialize(args...)) - res = DataFrames.select(df, args...; copycols = false) - tokeep = df_to_bool(res) - df[tokeep, :] -end - -function where(gd::GroupedDataFrame, @nospecialize(args...)) - res = DataFrames.select(gd, args...; copycols = false, keepkeys = false) - tokeep = df_to_bool(res) - parent(gd)[tokeep, :] -end - -function where(df::SubDataFrame, @nospecialize(args...)) - res = DataFrames.select(df, args...) - tokeep = df_to_bool(res) - df[tokeep, :] end """ - @where(d, i...) + @subset(d, i...) Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s. @@ -469,16 +450,16 @@ Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s. Multiple `i` expressions are "and-ed" together. -If given a `GroupedDataFrame`, `@where` applies transformations by +If given a `GroupedDataFrame`, `@subset` applies transformations by group, and returns a fresh `DataFrame` containing the rows for which the generated values are all `true`. -Inputs to `@where` can come in two formats: a `begin ... end` block, in which case each +Inputs to `@subset` can come in two formats: a `begin ... end` block, in which case each line is a separate selector, or as multiple arguments. For example the following two statements are equivalent: ```julia -@where df begin +@subset df begin :x .> 1 :y .< 2 end @@ -487,22 +468,32 @@ end and ``` -@where(df, :x .> 1, :y .< 2) +@subset(df, :x .> 1, :y .< 2) ``` !!! note - `@where` treats `missing` values as `false` when filtering rows. - Unlike `DataFrames.filter` and other boolean operations with - `missing`, `@where` will *not* error on missing values, and - will only keep `true` values. - -If an expression provided to `@where` begins with `@byrow`, operations + `@subset` will error on `missing`, unlike `@where`. To + recover the old behavior of `@where`, use the macro-flag + `@skipmissing` + + ```julia + julia> df = DataFrame(a = [1, missing], b = [3, 4]); + + julia> @subset df @skipmissing :a .== 1 + 1×2 DataFrame + Row │ a b + │ Int64? Int64 + ─────┼─────────────── + 1 │ 1 3 + ``` + +If an expression provided to `@subset` begins with `@byrow`, operations are applied "by row" along the data frame. To avoid writing `@byrow` multiple times, `@orderby` also allows `@byrow`to be placed at the beginning of a block of operations. For example, the following two statements are equivalent. ``` -@where df @byrow begin +@subset df @byrow begin :x > 1 :y < 2 end @@ -511,7 +502,7 @@ end and ``` -@orderby df +@subset df @byrow :x > 1 @byrow :y < 2 end @@ -526,7 +517,7 @@ julia> df = DataFrame(x = 1:3, y = [2, 1, 2]); julia> globalvar = [2, 1, 0]; -julia> @where(df, :x .> 1) +julia> @subset(df, :x .> 1) 2×2 DataFrame Row │ x y │ Int64 Int64 @@ -534,7 +525,7 @@ julia> @where(df, :x .> 1) 1 │ 2 1 2 │ 3 2 -julia> @where(df, :x .> globalvar) +julia> @subset(df, :x .> globalvar) 2×2 DataFrame Row │ x y │ Int64 Int64 @@ -542,7 +533,7 @@ julia> @where(df, :x .> globalvar) 1 │ 2 1 2 │ 3 2 -julia> @where df begin +julia> @subset df begin :x .> globalvar :y .== 3 end @@ -553,7 +544,7 @@ julia> d = DataFrame(n = 1:20, x = [3, 3, 3, 3, 1, 1, 1, 2, 1, 1, julia> g = groupby(d, :x); -julia> @where(g, :n .> mean(:n)) +julia> @subset(g, :n .> mean(:n)) 8×2 DataFrame Row │ n x │ Int64 Int64 @@ -567,7 +558,7 @@ julia> @where(g, :n .> mean(:n)) 7 │ 19 1 8 │ 20 2 -julia> @where g begin +julia> @subset g begin :n .> mean(:n) :n .< 20 end @@ -585,7 +576,7 @@ julia> @where g begin julia> d = DataFrame(a = [1, 2, missing], b = ["x", "y", missing]); -julia> @where(d, :a .== 1) +julia> @subset(d, :a .== 1) 1×2 DataFrame │ Row │ a │ b │ │ │ Int64? │ String? │ @@ -593,7 +584,17 @@ julia> @where(d, :a .== 1) │ 1 │ 1 │ x │ ``` """ +macro subset(x, args...) + esc(subset_helper(x, args...)) +end + +""" + @where(x, args...) + +Deprecated version of `@subset`, see `?@subset` for details. +""" macro where(x, args...) + @warn "`@where is deprecated, use `@subset` with `@skipmissing` instead." esc(where_helper(x, args...)) end @@ -604,8 +605,8 @@ end ############################################################################## function orderby_helper(x, args...) - exprs, wrap_byrow = create_args_vector(args...) - t = (fun_to_vec(ex; gensym_names = true, wrap_byrow = wrap_byrow) for ex in exprs) + exprs, outer_flags = create_args_vector(args...) + t = (fun_to_vec(ex; gensym_names = true, outer_flags = outer_flags) for ex in exprs) quote $DataFramesMeta.orderby($x, $(t...)) end @@ -768,8 +769,8 @@ end function transform_helper(x, args...) - exprs, wrap_byrow = create_args_vector(args...) - t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs) + exprs, outer_flags = create_args_vector(args...) + t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote $DataFrames.transform($x, $(t...)) end @@ -886,8 +887,8 @@ end function transform!_helper(x, args...) - exprs, wrap_byrow = create_args_vector(args...) - t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs) + exprs, outer_flags = create_args_vector(args...) + t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote $DataFrames.transform!($x, $(t...)) end @@ -981,8 +982,8 @@ end ############################################################################## function select_helper(x, args...) - exprs, wrap_byrow = create_args_vector(args...) - t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs) + exprs, outer_flags = create_args_vector(args...) + t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote $DataFrames.select($x, $(t...)) end @@ -1095,8 +1096,8 @@ end ############################################################################## function select!_helper(x, args...) - exprs, wrap_byrow = create_args_vector(args...) - t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs) + exprs, outer_flags = create_args_vector(args...) + t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote $DataFrames.select!($x, $(t...)) end @@ -1206,7 +1207,7 @@ end function combine_helper(x, args...; deprecation_warning = false) deprecation_warning && @warn "`@based_on` is deprecated. Use `@combine` instead." - exprs, wrap_byrow = create_args_vector(args...) + exprs, outer_flags = create_args_vector(args...) fe = first(exprs) if length(exprs) == 1 && @@ -1218,7 +1219,7 @@ function combine_helper(x, args...; deprecation_warning = false) exprs = ((:(cols(AsTable) = $fe)),) end - t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs) + t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote $DataFrames.combine($x, $(t...)) @@ -1327,7 +1328,7 @@ end function by_helper(x, what, args...) # Only allow one argument when returning a Table object # Only allow one argument when returning a Table object - exprs, wrap_byrow = create_args_vector(args...) + exprs, outer_flags = create_args_vector(args...) fe = first(exprs) if length(exprs) == 1 && !(fe isa QuoteNode || onearg(fe, :cols)) && @@ -1338,7 +1339,7 @@ function by_helper(x, what, args...) exprs = ((:(cols(AsTable) = $fe)),) end - t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs) + t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote $DataFrames.combine($groupby($x, $what), $(t...)) diff --git a/src/parsing.jl b/src/parsing.jl index cd77f41c..10835bd5 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -64,8 +64,8 @@ end is_macro_head(ex, name) = false is_macro_head(ex::Expr, name) = ex.head == :macrocall && ex.args[1] == Symbol(name) -extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false),)) = (ex, exprflags) -function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false),)) +extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false))) = (ex, exprflags) +function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false))) if ex.head == :macrocall macroname = ex.args[1] if macroname in keys(exprflags) @@ -183,7 +183,7 @@ end # We need wrap_byrow as a keyword argument here in case someone # uses `@transform df @byrow begin ... end`, which we # deal with outside of this function. -function fun_to_vec(ex::Expr; gensym_names::Bool=false, no_dest::Bool=false, wrap_byrow::Bool=false) +function fun_to_vec(ex::Expr; gensym_names::Bool=false, outer_flags=nothing, no_dest::Bool=false) # classify the type of expression # :x # handled via dispatch # cols(:x) # handled as though above @@ -201,15 +201,16 @@ function fun_to_vec(ex::Expr; gensym_names::Bool=false, no_dest::Bool=false, wra # cols(y) = :x + 1 # re-write as complicated col, but RHS is :block # cols(:y) = cols(:x) + 1 # re-write as complicated call, RHS is block, use cols # `@byrow` before any of the above - ex, flags = extract_macro_flags(MacroTools.unblock(ex)) + ex, inner_flags = extract_macro_flags(MacroTools.unblock(ex)) # Use tuple syntax in future when we add more flags - wrap_byrow_t = flags[Symbol("@byrow")][] + inner_wrap_byrow = inner_flags[Symbol("@byrow")][] + outer_wrap_byrow = outer_flags === nothing ? false : outer_flags[Symbol("@byrow")][] - if wrap_byrow_t && wrap_byrow + if inner_wrap_byrow && outer_wrap_byrow throw(ArgumentError("Redundant @byrow calls.")) else - wrap_byrow = wrap_byrow || wrap_byrow_t + wrap_byrow = inner_wrap_byrow || outer_wrap_byrow end if gensym_names @@ -304,7 +305,7 @@ function fun_to_vec(ex::Expr; gensym_names::Bool=false, no_dest::Bool=false, wra throw(ArgumentError("This path should not be reached")) end -fun_to_vec(ex::QuoteNode; no_dest::Bool=false, gensym_names::Bool=false, wrap_byrow::Bool=false) = ex +fun_to_vec(ex::QuoteNode; no_dest::Bool=false, gensym_names::Bool=false, outer_flags=nothing) = ex function make_source_concrete(x::AbstractVector) if isempty(x) || isconcretetype(eltype(x)) @@ -344,33 +345,10 @@ function create_args_vector(args...) end """ - create_args_vector(arg) -> vec, wrap_byrow - -Normalize a single input to a vector of expressions, -with a `wrap_byrow` flag indicating that the -expressions should operate by row. - -If `arg` is a single `:block`, it is unnested. -Otherwise, return a single-element array. -Also removes line numbers. - -If `arg` is of the form `@byrow ...`, then -`wrap_byrow` is returned as `true`. + create_args_vector(arg) -> vec, outer_flags """ function create_args_vector(arg) - if arg isa Expr && is_macro_head(arg, "@byrow") - wrap_byrow = true - largs = length(arg.args) - if largs == 2 - throw(ArgumentError("No transformations supplied with `@byrow`")) - elseif largs == 3 - arg = arg.args[3] - else - arg = Expr(:block, arg.args[3:end]...) - end - else - wrap_byrow = false - end + arg, outer_flags = extract_macro_flags(MacroTools.unblock(arg)) if arg isa Expr && arg.head == :block x = MacroTools.rmlines(arg).args @@ -378,8 +356,5 @@ function create_args_vector(arg) x = Any[arg] end - if wrap_byrow && any(t -> is_macro_head(t, "@byrow"), x) - throw(ArgumentError("Redundant `@byrow` calls.")) - end - return x, wrap_byrow + return x, outer_flags end diff --git a/test/dataframes.jl b/test/dataframes.jl index b8767d6d..b56114d8 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -734,9 +734,9 @@ end macro linenums_macro(arg) if arg isa Expr && arg.head == :block && length(arg.args) == 1 && arg.args[1] isa LineNumberNode - esc(:(true)) + esc(:([true])) else - esc(:(false)) + esc(:([false])) end end From 2e5de90463308220f1ad6a7203800718027cb82a Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 24 Jun 2021 17:09:39 -0400 Subject: [PATCH 02/13] fix tests --- test/dataframes.jl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/test/dataframes.jl b/test/dataframes.jl index b56114d8..162ec30f 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -740,6 +740,14 @@ macro linenums_macro(arg) end end +macro linenums_macro_byrow(arg) + if arg isa Expr && arg.head == :block && length(arg.args) == 1 && arg.args[1] isa LineNumberNode + esc(:(true)) + else + esc(:(false)) + end +end + @testset "removing lines" begin df = DataFrame(a = [1], b = [2]) # Can't use @test because @test remove line numbers @@ -753,7 +761,7 @@ end @test d.y == [true] d = @transform df @byrow begin - y = @linenums_macro begin end + y = @linenums_macro_byrow begin end end @test d.y == [true] @@ -763,13 +771,13 @@ end @test nrow(d) == 1 d = @where df begin - @byrow @linenums_macro begin end + @byrow @linenums_macro_byrow begin end end @test nrow(d) == 1 d = @where df @byrow begin - @linenums_macro begin end + @linenums_macro_byrow begin end end @test nrow(d) == 1 From eca50d475530affe1ec0c5bf26a8e2423c86dcb4 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Sat, 26 Jun 2021 11:40:46 -0400 Subject: [PATCH 03/13] no more skipmissing --- src/macros.jl | 173 ++++++++++++++++++++++++++++++++++++++++++++----- src/parsing.jl | 4 +- 2 files changed, 158 insertions(+), 19 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index b40cf52f..035b9c77 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -417,16 +417,15 @@ end ############################################################################## ## -## @subset - select row subsets +## @subset and subset! - select row subsets ## ############################################################################## function subset_helper(x, args...) exprs, outer_flags = create_args_vector(args...) t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs) - skipmissing = outer_flags[Symbol("@skipmissing")][] quote - $subset($x, $(t...); skipmissing=$skipmissing) + $subset($x, $(t...); skipmissing=true) end end @@ -472,20 +471,10 @@ and ``` !!! note - `@subset` will error on `missing`, unlike `@where`. To - recover the old behavior of `@where`, use the macro-flag - `@skipmissing` - - ```julia - julia> df = DataFrame(a = [1, missing], b = [3, 4]); - - julia> @subset df @skipmissing :a .== 1 - 1×2 DataFrame - Row │ a b - │ Int64? Int64 - ─────┼─────────────── - 1 │ 1 3 - ``` + `@subset` treats `missing` values as `false` when filtering rows. + Unlike `DataFrames.subset` and other boolean operations with + `missing`, `@subset` will *not* error on missing values, and + will only keep `true` values. If an expression provided to `@subset` begins with `@byrow`, operations are applied "by row" along the data frame. To avoid writing `@byrow` multiple @@ -598,6 +587,156 @@ macro where(x, args...) esc(where_helper(x, args...)) end +function subset!_helper(x, args...) + exprs, outer_flags = create_args_vector(args...) + t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs) + quote + $subset!($x, $(t...); skipmissing=true) + end +end + +""" + @subset!(d, i...) + +Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s, +mutating the underlying data-frame in-place. + +### Arguments + +* `d` : an AbstractDataFrame or GroupedDataFrame +* `i...` : expression for selecting rows + +Multiple `i` expressions are "and-ed" together. + +If given a `GroupedDataFrame`, `@subset!` applies transformations by +group, and returns a fresh `DataFrame` containing the rows +for which the generated values are all `true`. + +Inputs to `@subset!` can come in two formats: a `begin ... end` block, in which case each +line is a separate selector, or as multiple arguments. +For example the following two statements are equivalent: + +```julia +@subset! df begin + :x .> 1 + :y .< 2 +end +``` + +and + +``` +@subset!(df, :x .> 1, :y .< 2) +``` + +!!! note + `@subset!` treats `missing` values as `false` when filtering rows. + Unlike `DataFrames.subset!` and other boolean operations with + `missing`, `@subset!` will *not* error on missing values, and + will only keep `true` values. + +If an expression provided to `@subset!` begins with `@byrow`, operations +are applied "by row" along the data frame. To avoid writing `@byrow` multiple +times, `@orderby` also allows `@byrow`to be placed at the beginning of a block of +operations. For example, the following two statements are equivalent. + +``` +@subset! df @byrow begin + :x > 1 + :y < 2 +end +``` + +and + +``` +@subset! df + @byrow :x > 1 + @byrow :y < 2 +end +``` + +### Examples + +```jldoctest +julia> using DataFramesMeta, Statistics + +julia> df = DataFrame(x = 1:3, y = [2, 1, 2]); + +julia> globalvar = [2, 1, 0]; + +julia> @subset!(df, :x .> 1) +2×2 DataFrame + Row │ x y + │ Int64 Int64 +─────┼────────────── + 1 │ 2 1 + 2 │ 3 2 + +julia> @subset!(df, :x .> globalvar) +2×2 DataFrame + Row │ x y + │ Int64 Int64 +─────┼────────────── + 1 │ 2 1 + 2 │ 3 2 + +julia> @subset! df begin + :x .> globalvar + :y .== 3 +end +0×2 DataFrame + +julia> d = DataFrame(n = 1:20, x = [3, 3, 3, 3, 1, 1, 1, 2, 1, 1, + 2, 1, 1, 2, 2, 2, 3, 1, 1, 2]); + +julia> g = groupby(d, :x); + +julia> @subset!(g, :n .> mean(:n)) +8×2 DataFrame + Row │ n x + │ Int64 Int64 +─────┼────────────── + 1 │ 12 1 + 2 │ 13 1 + 3 │ 15 2 + 4 │ 16 2 + 5 │ 17 3 + 6 │ 18 1 + 7 │ 19 1 + 8 │ 20 2 + +julia> @subset! g begin + :n .> mean(:n) + :n .< 20 + end +7×2 DataFrame + Row │ n x + │ Int64 Int64 +─────┼────────────── + 1 │ 12 1 + 2 │ 13 1 + 3 │ 15 2 + 4 │ 16 2 + 5 │ 17 3 + 6 │ 18 1 + 7 │ 19 1 + +julia> d = DataFrame(a = [1, 2, missing], b = ["x", "y", missing]); + +julia> @subset!(d, :a .== 1) +1×2 DataFrame +│ Row │ a │ b │ +│ │ Int64? │ String? │ +├─────┼────────┼─────────┤ +│ 1 │ 1 │ x │ +``` +""" +macro subset!(x, args...) + esc(subset!_helper(x, args...)) +end + + ############################################################################## ## ## @orderby diff --git a/src/parsing.jl b/src/parsing.jl index 10835bd5..d196a824 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -64,8 +64,8 @@ end is_macro_head(ex, name) = false is_macro_head(ex::Expr, name) = ex.head == :macrocall && ex.args[1] == Symbol(name) -extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false))) = (ex, exprflags) -function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false))) +extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false),)) = (ex, exprflags) +function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false),)) if ex.head == :macrocall macroname = ex.args[1] if macroname in keys(exprflags) From 2ddee3856b8fa63bb9a41f4e642bd9886159ac86 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Sat, 26 Jun 2021 11:47:52 -0400 Subject: [PATCH 04/13] tests --- test/dataframes.jl | 66 --------------------- test/deprecated.jl | 67 +++++++++++++++++++++ test/runtests.jl | 1 + test/subset.jl | 143 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 211 insertions(+), 66 deletions(-) create mode 100644 test/subset.jl diff --git a/test/dataframes.jl b/test/dataframes.jl index 162ec30f..9f045eda 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -595,72 +595,6 @@ end @test @with(df, cols("A")) === df.A end -@testset "where" begin - df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1]) - - x = [2, 1, 0, 0] - - @test @where(df, :A .> 1) == df[(df.A .> 1) .=== true,:] - @test @where(df, :B .> 1) == df[df.B .> 1,:] - @test @where(df, :A .> x) == df[(df.A .> x) .=== true,:] - @test @where(df, :B .> x) ≅ df[df.B .> x,:] - @test @where(df, :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2) - @test @where(df, :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:] - @test @where(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:] - - @test @where(df, :A .> 1).A isa Vector{Union{Missing, Int}} - - @test @where(df, cols(:A) .> 1) == df[(df.A .> 1) .=== true,:] - @test @where(df, cols(:B) .> 1) == df[df.B .> 1,:] - @test @where(df, cols(:A) .> x) == df[(df.A .> x) .=== true,:] - @test @where(df, cols(:B) .> x) ≅ df[df.B .> x,:] - @test @where(df, cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2) - @test @where(df, cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:] - @test @where(df, cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:] - - @test @where(df, :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1) - - subdf = @view df[df.B .== 2, :] - - @test @where(subdf, :A .== 3) == DataFrame(A = 3, B = 2) -end - -@testset "where with :block" begin - df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1]) - - d = @where df begin - :A .> 1 - :B .> 1 - end - @test d ≅ @where(df, :A .> 1, :B .> 1) - - d = @where df begin - cols(:A) .> 1 - :B .> 1 - end - @test d ≅ @where(df, :A .> 1, :B .> 1) - - d = @where df begin - :A .> 1 - cols(:B) .> 1 - end - @test d ≅ @where(df, :A .> 1, :B .> 1) - - d = @where df begin - begin - :A .> 1 - end - :B .> 1 - end - @test d ≅ @where(df, :A .> 1, :B .> 1) - - d = @where df begin - :A .> 1 - @. :B > 1 - end - @test d ≅ @where(df, :A .> 1, :B .> 1) -end - @testset "orderby" begin df = DataFrame( g = [1, 1, 1, 2, 2], diff --git a/test/deprecated.jl b/test/deprecated.jl index e0f68b35..968fc589 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -77,4 +77,71 @@ const ≅ = isequal @test @based_on(gd, cols("new" * "_" * "column") = 2)."new_column" == [2, 2] end +@testset "where" begin + df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1]) + + x = [2, 1, 0, 0] + + @test @where(df, :A .> 1) == df[(df.A .> 1) .=== true,:] + @test @where(df, :B .> 1) == df[df.B .> 1,:] + @test @where(df, :A .> x) == df[(df.A .> x) .=== true,:] + @test @where(df, :B .> x) ≅ df[df.B .> x,:] + @test @where(df, :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2) + @test @where(df, :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:] + @test @where(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:] + + @test @where(df, :A .> 1).A isa Vector{Union{Missing, Int}} + + @test @where(df, cols(:A) .> 1) == df[(df.A .> 1) .=== true,:] + @test @where(df, cols(:B) .> 1) == df[df.B .> 1,:] + @test @where(df, cols(:A) .> x) == df[(df.A .> x) .=== true,:] + @test @where(df, cols(:B) .> x) ≅ df[df.B .> x,:] + @test @where(df, cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2) + @test @where(df, cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:] + @test @where(df, cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:] + + @test @where(df, :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1) + + subdf = @view df[df.B .== 2, :] + + @test @where(subdf, :A .== 3) == DataFrame(A = 3, B = 2) +end + +@testset "where with :block" begin + df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1]) + + d = @where df begin + :A .> 1 + :B .> 1 + end + @test d ≅ @where(df, :A .> 1, :B .> 1) + + d = @where df begin + cols(:A) .> 1 + :B .> 1 + end + @test d ≅ @where(df, :A .> 1, :B .> 1) + + d = @where df begin + :A .> 1 + cols(:B) .> 1 + end + @test d ≅ @where(df, :A .> 1, :B .> 1) + + d = @where df begin + begin + :A .> 1 + end + :B .> 1 + end + @test d ≅ @where(df, :A .> 1, :B .> 1) + + d = @where df begin + :A .> 1 + @. :B > 1 + end + @test d ≅ @where(df, :A .> 1, :B .> 1) +end + + end # module \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 36e12417..3fb003b3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,6 +5,7 @@ quiet = false my_tests = ["dataframes.jl", "eachrow.jl", "grouping.jl", + "subset.jl", "function_compilation.jl", "chaining.jl", "linqmacro.jl", diff --git a/test/subset.jl b/test/subset.jl new file mode 100644 index 00000000..241f6182 --- /dev/null +++ b/test/subset.jl @@ -0,0 +1,143 @@ +module TestSubset + +using Test +using DataFrames +using DataFramesMeta +using Statistics + +const ≅ = isequal + +@testset "subset" begin + df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1]) + + x = [2, 1, 0, 0] + + @test @subset(df, :A .> 1) == df[(df.A .> 1) .=== true,:] + @test @subset(df, :B .> 1) == df[df.B .> 1,:] + @test @subset(df, :A .> x) == df[(df.A .> x) .=== true,:] + @test @subset(df, :B .> x) ≅ df[df.B .> x,:] + @test @subset(df, :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2) + @test @subset(df, :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:] + @test @subset(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:] + + @test @subset(df, :A .> 1).A isa Vector{Union{Missing, Int}} + + @test @subset(df, cols(:A) .> 1) == df[(df.A .> 1) .=== true,:] + @test @subset(df, cols(:B) .> 1) == df[df.B .> 1,:] + @test @subset(df, cols(:A) .> x) == df[(df.A .> x) .=== true,:] + @test @subset(df, cols(:B) .> x) ≅ df[df.B .> x,:] + @test @subset(df, cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2) + @test @subset(df, cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:] + @test @subset(df, cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:] + + @test @subset(df, :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1) + + subdf = @view df[df.B .== 2, :] + + @test @subset(subdf, :A .== 3) == DataFrame(A = 3, B = 2) +end + +@testset "subset with :block" begin + df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1]) + + d = @subset df begin + :A .> 1 + :B .> 1 + end + @test d ≅ @subset(df, :A .> 1, :B .> 1) + + d = @subset df begin + cols(:A) .> 1 + :B .> 1 + end + @test d ≅ @subset(df, :A .> 1, :B .> 1) + + d = @subset df begin + :A .> 1 + cols(:B) .> 1 + end + @test d ≅ @subset(df, :A .> 1, :B .> 1) + + d = @subset df begin + begin + :A .> 1 + end + :B .> 1 + end + @test d ≅ @subset(df, :A .> 1, :B .> 1) + + d = @subset df begin + :A .> 1 + @. :B > 1 + end + @test d ≅ @subset(df, :A .> 1, :B .> 1) +end + + +@testset "subset!" begin + df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1]) + + x = [2, 1, 0, 0] + + @test @subset!(copy(df), :A .> 1) == df[(df.A .> 1) .=== true,:] + @test @subset!(copy(df), :B .> 1) == df[df.B .> 1,:] + @test @subset!(copy(df), :A .> x) == df[(df.A .> x) .=== true,:] + @test @subset!(copy(df), :B .> x) ≅ df[df.B .> x,:] + @test @subset!(copy(df), :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2) + @test @subset!(copy(df), :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:] + @test @subset!(copy(df), :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:] + + @test @subset!(copy(df), :A .> 1).A isa Vector{Union{Missing, Int}} + + @test @subset!(copy(df), cols(:A) .> 1) == df[(df.A .> 1) .=== true,:] + @test @subset!(copy(df), cols(:B) .> 1) == df[df.B .> 1,:] + @test @subset!(copy(df), cols(:A) .> x) == df[(df.A .> x) .=== true,:] + @test @subset!(copy(df), cols(:B) .> x) ≅ df[df.B .> x,:] + @test @subset!(copy(df), cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2) + @test @subset!(copy(df), cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:] + @test @subset!(copy(df), cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:] + + @test @subset!(copy(df), :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1) + + subdf = @view df[df.B .== 2, :] + + @test @subset!(copy(subdf), :A .== 3) == DataFrame(A = 3, B = 2) +end + +@testset "subset! with :block" begin + df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1]) + + d = @subset! copy(df) begin + :A .> 1 + :B .> 1 + end + @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1) + + d = @subset! copy(df) begin + cols(:A) .> 1 + :B .> 1 + end + @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1) + + d = @subset! copy(df) begin + :A .> 1 + cols(:B) .> 1 + end + @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1) + + d = @subset! copy(df) begin + begin + :A .> 1 + end + :B .> 1 + end + @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1) + + d = @subset! copy(df) begin + :A .> 1 + @. :B > 1 + end + @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1) +end + +end # module \ No newline at end of file From c815d1a2b660b5da9e17c298e38e57c89dfe21af Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Sat, 26 Jun 2021 11:58:49 -0400 Subject: [PATCH 05/13] update index.md --- docs/src/index.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index bf315786..d496f7ec 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -12,7 +12,7 @@ in C#. In addition, DataFramesMeta provides * `@orderby`, for sorting data frames -* `@where`, for keeping rows of a DataFrame matching a given condition +* `@subset` and `@subset!`, for keeping rows of a DataFrame matching a given condition * `@by`, for grouping and combining a data frame in a single step * `@with`, for working with the columns of a data frame with high performance and convenient syntax @@ -96,18 +96,21 @@ gd = groupby(df, :x); @transform!(gd, y = 2 .* :y .* first(:y)) ``` -## `@where` +## `@subset` and `@subset!` Select row subsets. Operates on both a `DataFrame` and a `GroupedDataFrame`. +`@subset` always returns a freshly-allocated data frame whereas +`@subset!` modifies the data frame in-place. ```julia +using Statistics df = DataFrame(x = [1, 1, 2, 2], y = [1, 2, 101, 102]); gd = groupby(df, :x); outside_var = 1; -@where(df, :x .> 1) -@where(df, :x .> outside_var) -@where(df, :x .> outside_var, :y .< 102) # the two expressions are "and-ed" -@where(gd, :x .> mean(:x)) +@subset(df, :x .> 1) +@subset(df, :x .> outside_var) +@subset(df, :x .> outside_var, :y .< 102) # the two expressions are "and-ed" +@subset(gd, :x .> mean(:x)) ``` ## `@combine` @@ -300,7 +303,7 @@ The following macros accept `@byrow`: * `@transform` and `@transform!`, `@select`, `@select!`, and `@combine`. `@byrow` can be used in the left hand side of expressions, e.g. `@select(df, @byrow z = :x * :y)`. -* `@where` and `@orderby`, with syntax of the form `@where(df, @byrow :x > :y)` +* `@subset`, `@subset!` and `@orderby`, with syntax of the form `@where(df, @byrow :x > :y)` * `@with`, where the anonymous function created by `@with` is wrapped in `ByRow`, as in `@with(df, @byrow :x * :y)`. From f2dde1ef968c3dae582f0e6557e8348ec758d0f2 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Sun, 27 Jun 2021 15:32:46 -0400 Subject: [PATCH 06/13] add docstring --- src/parsing.jl | 15 ++++++++------- test/dataframes.jl | 6 +++--- test/deprecated.jl | 15 +++++++++++++++ test/grouping.jl | 15 --------------- test/subset.jl | 16 ++++++++++++++++ 5 files changed, 42 insertions(+), 25 deletions(-) diff --git a/src/parsing.jl b/src/parsing.jl index 32ca666e..07cdf452 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -336,19 +336,20 @@ function replace_dotted!(e, membernames) Expr(:., x_new, y_new) end -""" - create_args_vector(args...) -> vec, wrap_byrow - -Given multiple arguments which can be any type -of expression-like object (`Expr`, `QuoteNode`, etc.), -puts them into a single array, removing line numbers. -""" function create_args_vector(args...) create_args_vector(Expr(:block, args...)) end """ create_args_vector(arg) -> vec, outer_flags + +Given an expression return a vector of operations +and a `NamedTuple` of the macro-flags that appear +in the expression. + +If a `:block` expression, returns the `args` of +the block as an array. If a simple expression, +wrap the expression in a one-element vector. """ function create_args_vector(arg) arg, outer_flags = extract_macro_flags(MacroTools.unblock(arg)) diff --git a/test/dataframes.jl b/test/dataframes.jl index 9f045eda..84e3c770 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -700,17 +700,17 @@ end @test d.y == [true] - d = @where(df, @linenums_macro begin end) + d = @subset(df, @linenums_macro begin end) @test nrow(d) == 1 - d = @where df begin + d = @subset df begin @byrow @linenums_macro_byrow begin end end @test nrow(d) == 1 - d = @where df @byrow begin + d = @subset df @byrow begin @linenums_macro_byrow begin end end diff --git a/test/deprecated.jl b/test/deprecated.jl index 968fc589..0412d57a 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -143,5 +143,20 @@ end @test d ≅ @where(df, :A .> 1, :B .> 1) end +@testset "@where with a grouped data frame" begin + df = DataFrame( + g = [1, 1, 1, 2, 2], + i = 1:5, + t = ["a", "b", "c", "c", "e"], + y = [:v, :w, :x, :y, :z], + c = [:g, :quote, :body, :transform, missing] + ) + + gd = groupby(df, :g) + + @test @where(gd, :i .== first(:i)) ≅ df[[1, 4], :] + @test @where(gd, cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :] + @test @where(gd, :c .== :g) ≅ df[[], :] +end end # module \ No newline at end of file diff --git a/test/grouping.jl b/test/grouping.jl index ca7957cb..c603c2a5 100644 --- a/test/grouping.jl +++ b/test/grouping.jl @@ -357,19 +357,4 @@ end @test @select(g, :a, @byrow t = :a ^ 2).t ≅ d.a .^ 2 end -@testset "@where with a grouped data frame" begin - df = DataFrame( - g = [1, 1, 1, 2, 2], - i = 1:5, - t = ["a", "b", "c", "c", "e"], - y = [:v, :w, :x, :y, :z], - c = [:g, :quote, :body, :transform, missing] - ) - - gd = groupby(df, :g) - - @test @where(gd, :i .== first(:i)) ≅ df[[1, 4], :] - @test @where(gd, cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :] - @test @where(gd, :c .== :g) ≅ df[[], :] -end end # module diff --git a/test/subset.jl b/test/subset.jl index 241f6182..00959dd7 100644 --- a/test/subset.jl +++ b/test/subset.jl @@ -140,4 +140,20 @@ end @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1) end +@testset "@subset with a grouped data frame" begin + df = DataFrame( + g = [1, 1, 1, 2, 2], + i = 1:5, + t = ["a", "b", "c", "c", "e"], + y = [:v, :w, :x, :y, :z], + c = [:g, :quote, :body, :transform, missing] + ) + + gd = groupby(df, :g) + + @test @subset(gd, :i .== first(:i)) ≅ df[[1, 4], :] + @test @subset(gd, cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :] + @test @subset(gd, :c .== :g) ≅ df[[], :] +end + end # module \ No newline at end of file From 7c14a205b54ef1de00deaba452da4043888a7a77 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Sun, 27 Jun 2021 19:22:13 -0400 Subject: [PATCH 07/13] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/macros.jl | 4 ++-- src/parsing.jl | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index 035b9c77..6326c287 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -472,7 +472,7 @@ and !!! note `@subset` treats `missing` values as `false` when filtering rows. - Unlike `DataFrames.subset` and other boolean operations with + Unlike `DataFrames.subset` and other Boolean operations with `missing`, `@subset` will *not* error on missing values, and will only keep `true` values. @@ -631,7 +631,7 @@ and !!! note `@subset!` treats `missing` values as `false` when filtering rows. - Unlike `DataFrames.subset!` and other boolean operations with + Unlike `DataFrames.subset!` and other Boolean operations with `missing`, `@subset!` will *not* error on missing values, and will only keep `true` values. diff --git a/src/parsing.jl b/src/parsing.jl index d196a824..32ca666e 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -183,7 +183,10 @@ end # We need wrap_byrow as a keyword argument here in case someone # uses `@transform df @byrow begin ... end`, which we # deal with outside of this function. -function fun_to_vec(ex::Expr; gensym_names::Bool=false, outer_flags=nothing, no_dest::Bool=false) +function fun_to_vec(ex::Expr; + gensym_names::Bool=false, + outer_flags::Union{NamedTuple, Nothing}=nothing, + no_dest::Bool=false) # classify the type of expression # :x # handled via dispatch # cols(:x) # handled as though above From f83161f0e9b6c488c356daa4425f51dc73582ed5 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Sun, 27 Jun 2021 19:28:33 -0400 Subject: [PATCH 08/13] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- docs/src/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/index.md b/docs/src/index.md index d496f7ec..f1139d68 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -12,7 +12,7 @@ in C#. In addition, DataFramesMeta provides * `@orderby`, for sorting data frames -* `@subset` and `@subset!`, for keeping rows of a DataFrame matching a given condition +* `@subset` and `@subset!`, for keeping rows of a data frame matching a given condition * `@by`, for grouping and combining a data frame in a single step * `@with`, for working with the columns of a data frame with high performance and convenient syntax From 6f956dc32736f58f028519c7c4e5b1d0f76ed31f Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Sun, 27 Jun 2021 19:34:09 -0400 Subject: [PATCH 09/13] Update test/subset.jl Co-authored-by: Milan Bouchet-Valat --- test/subset.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/subset.jl b/test/subset.jl index 00959dd7..1f758f56 100644 --- a/test/subset.jl +++ b/test/subset.jl @@ -79,7 +79,9 @@ end x = [2, 1, 0, 0] - @test @subset!(copy(df), :A .> 1) == df[(df.A .> 1) .=== true,:] + df2 = copy(df) + @test @subset!(df2, :A .> 1) === df2 + @test df2 == df[(df.A .> 1) .=== true,:] @test @subset!(copy(df), :B .> 1) == df[df.B .> 1,:] @test @subset!(copy(df), :A .> x) == df[(df.A .> x) .=== true,:] @test @subset!(copy(df), :B .> x) ≅ df[df.B .> x,:] @@ -156,4 +158,4 @@ end @test @subset(gd, :c .== :g) ≅ df[[], :] end -end # module \ No newline at end of file +end # module From a75d81770aa03936abaa4ef602731a7f91bca841 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Mon, 28 Jun 2021 14:36:27 -0400 Subject: [PATCH 10/13] switching --- test/subset.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/subset.jl b/test/subset.jl index 1f758f56..03ed5ebe 100644 --- a/test/subset.jl +++ b/test/subset.jl @@ -82,6 +82,7 @@ end df2 = copy(df) @test @subset!(df2, :A .> 1) === df2 @test df2 == df[(df.A .> 1) .=== true,:] + @test @subset!(copy(df), :B .> 1) == df[df.B .> 1,:] @test @subset!(copy(df), :A .> x) == df[(df.A .> x) .=== true,:] @test @subset!(copy(df), :B .> x) ≅ df[df.B .> x,:] From 7991b8c1f0329d3920fb8913fb011f81c94ed0f7 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Mon, 28 Jun 2021 14:51:15 -0400 Subject: [PATCH 11/13] @subset! with gd --- test/subset.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/subset.jl b/test/subset.jl index 03ed5ebe..d501be18 100644 --- a/test/subset.jl +++ b/test/subset.jl @@ -159,4 +159,18 @@ end @test @subset(gd, :c .== :g) ≅ df[[], :] end +@testset "@subset! with a grouped data frame" begin + df = DataFrame( + g = [1, 1, 1, 2, 2], + i = 1:5, + t = ["a", "b", "c", "c", "e"], + y = [:v, :w, :x, :y, :z], + c = [:g, :quote, :body, :transform, missing] + ) + + @test @subset!(groupby(copy(df), :g), :i .== first(:i)) ≅ df[[1, 4], :] + @test @subset!(groupby(copy(df), :g), cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :] + @test @subset!(groupby(copy(df), :g), :c .== :g) ≅ df[[], :] +end + end # module From fa09626e5ed6345b194b1d81746961f3a049af45 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Mon, 28 Jun 2021 18:47:53 -0400 Subject: [PATCH 12/13] Update src/parsing.jl Co-authored-by: Milan Bouchet-Valat --- src/parsing.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parsing.jl b/src/parsing.jl index 07cdf452..ab6bdf6f 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -308,7 +308,10 @@ function fun_to_vec(ex::Expr; throw(ArgumentError("This path should not be reached")) end -fun_to_vec(ex::QuoteNode; no_dest::Bool=false, gensym_names::Bool=false, outer_flags=nothing) = ex +fun_to_vec(ex::QuoteNode; + no_dest::Bool=false, + gensym_names::Bool=false, + outer_flags::Union{NamedTuple, Nothing}=nothing) = ex function make_source_concrete(x::AbstractVector) if isempty(x) || isconcretetype(eltype(x)) From 1c485c8c25351f7846c91cf4fbfe35afdded2a69 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Mon, 28 Jun 2021 18:48:00 -0400 Subject: [PATCH 13/13] Update src/parsing.jl Co-authored-by: Milan Bouchet-Valat --- src/parsing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsing.jl b/src/parsing.jl index ab6bdf6f..e5a4427d 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -350,7 +350,7 @@ Given an expression return a vector of operations and a `NamedTuple` of the macro-flags that appear in the expression. -If a `:block` expression, returns the `args` of +If a `:block` expression, return the `args` of the block as an array. If a simple expression, wrap the expression in a one-element vector. """