From 4514eb0f08d20f33d474754467ee13ba9a3c5a89 Mon Sep 17 00:00:00 2001
From: Peter Deffebach <p.deffebach@gmail.com>
Date: Thu, 24 Jun 2021 16:53:39 -0400
Subject: [PATCH 01/13] inital commit

---
 src/DataFramesMeta.jl |   9 ++-
 src/macros.jl         | 127 +++++++++++++++++++++---------------------
 src/parsing.jl        |  49 ++++------------
 test/dataframes.jl    |   4 +-
 4 files changed, 84 insertions(+), 105 deletions(-)

diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl
index c1f4ad20..f2974d5b 100644
--- a/src/DataFramesMeta.jl
+++ b/src/DataFramesMeta.jl
@@ -7,11 +7,14 @@ using MacroTools
 @reexport using DataFrames
 
 # Basics:
-export @with, @where, @orderby, @transform, @by, @combine, @select,
-       @transform!, @select!,
+export @with,
+       @subset, @subset!,
+       @orderby,
+       @by, @combine,
+       @transform, @select, @transform!, @select!,
        @eachrow, @eachrow!,
        @byrow,
-       @based_on # deprecated
+       @based_on, @where # deprecated
 
 include("parsing.jl")
 include("macros.jl")
diff --git a/src/macros.jl b/src/macros.jl
index 83e6b3e0..b40cf52f 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -415,50 +415,31 @@ macro with(d, body)
 end
 
 
-
 ##############################################################################
 ##
-## @where - select row subsets
+## @subset - select row subsets
 ##
 ##############################################################################
 
-function where_helper(x, args...)
-    exprs, wrap_byrow = create_args_vector(args...)
-    t = (fun_to_vec(ex; gensym_names = true, wrap_byrow = wrap_byrow) for ex in exprs)
+function subset_helper(x, args...)
+    exprs, outer_flags = create_args_vector(args...)
+    t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs)
+    skipmissing = outer_flags[Symbol("@skipmissing")][]
     quote
-        $where($x, $(t...))
+        $subset($x, $(t...); skipmissing=$skipmissing)
     end
 end
 
-function df_to_bool(res::AbstractDataFrame)
-    if any(t -> !(t isa AbstractVector{<:Union{Missing, Bool}}), eachcol(res))
-        throw(ArgumentError("All arguments in @where must return an " *
-                            "AbstractVector{<:Union{Missing, Bool}}"))
+function where_helper(x, args...)
+    exprs, outer_flags = create_args_vector(args...)
+    t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs)
+    quote
+        $subset($x, $(t...); skipmissing=true)
     end
-
-    return reduce((x, y) -> x .& y, eachcol(res)) .=== true
-end
-
-function where(df::AbstractDataFrame, @nospecialize(args...))
-    res = DataFrames.select(df, args...; copycols = false)
-    tokeep = df_to_bool(res)
-    df[tokeep, :]
-end
-
-function where(gd::GroupedDataFrame, @nospecialize(args...))
-    res = DataFrames.select(gd, args...; copycols = false, keepkeys = false)
-    tokeep = df_to_bool(res)
-    parent(gd)[tokeep, :]
-end
-
-function where(df::SubDataFrame, @nospecialize(args...))
-    res = DataFrames.select(df, args...)
-    tokeep = df_to_bool(res)
-    df[tokeep, :]
 end
 
 """
-    @where(d, i...)
+    @subset(d, i...)
 
 Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s.
 
@@ -469,16 +450,16 @@ Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s.
 
 Multiple `i` expressions are "and-ed" together.
 
-If given a `GroupedDataFrame`, `@where` applies transformations by
+If given a `GroupedDataFrame`, `@subset` applies transformations by
 group, and returns a fresh `DataFrame` containing the rows
 for which the generated values are all `true`.
 
-Inputs to `@where` can come in two formats: a `begin ... end` block, in which case each
+Inputs to `@subset` can come in two formats: a `begin ... end` block, in which case each
 line is a separate selector, or as multiple arguments.
 For example the following two statements are equivalent:
 
 ```julia
-@where df begin
+@subset df begin
     :x .> 1
     :y .< 2
 end
@@ -487,22 +468,32 @@ end
 and
 
 ```
-@where(df, :x .> 1, :y .< 2)
+@subset(df, :x .> 1, :y .< 2)
 ```
 
 !!! note
-    `@where` treats `missing` values as `false` when filtering rows.
-    Unlike `DataFrames.filter` and other boolean operations with
-    `missing`, `@where` will *not* error on missing values, and
-    will only keep `true` values.
-
-If an expression provided to `@where` begins with `@byrow`, operations
+    `@subset` will error on `missing`, unlike `@where`. To
+    recover the old behavior of `@where`, use the macro-flag
+    `@skipmissing`
+
+    ```julia
+    julia> df = DataFrame(a = [1, missing], b = [3, 4]);
+
+    julia> @subset df @skipmissing :a .== 1
+    1×2 DataFrame
+     Row │ a       b
+         │ Int64?  Int64
+    ─────┼───────────────
+       1 │      1      3
+    ```
+
+If an expression provided to `@subset` begins with `@byrow`, operations
 are applied "by row" along the data frame. To avoid writing `@byrow` multiple
 times, `@orderby` also allows `@byrow`to be placed at the beginning of a block of
 operations. For example, the following two statements are equivalent.
 
 ```
-@where df @byrow begin
+@subset df @byrow begin
     :x > 1
     :y < 2
 end
@@ -511,7 +502,7 @@ end
 and
 
 ```
-@orderby df
+@subset df
     @byrow :x > 1
     @byrow :y < 2
 end
@@ -526,7 +517,7 @@ julia> df = DataFrame(x = 1:3, y = [2, 1, 2]);
 
 julia> globalvar = [2, 1, 0];
 
-julia> @where(df, :x .> 1)
+julia> @subset(df, :x .> 1)
 2×2 DataFrame
  Row │ x      y
      │ Int64  Int64
@@ -534,7 +525,7 @@ julia> @where(df, :x .> 1)
    1 │     2      1
    2 │     3      2
 
-julia> @where(df, :x .> globalvar)
+julia> @subset(df, :x .> globalvar)
 2×2 DataFrame
  Row │ x      y
      │ Int64  Int64
@@ -542,7 +533,7 @@ julia> @where(df, :x .> globalvar)
    1 │     2      1
    2 │     3      2
 
-julia> @where df begin
+julia> @subset df begin
     :x .> globalvar
     :y .== 3
 end
@@ -553,7 +544,7 @@ julia> d = DataFrame(n = 1:20, x = [3, 3, 3, 3, 1, 1, 1, 2, 1, 1,
 
 julia> g = groupby(d, :x);
 
-julia> @where(g, :n .> mean(:n))
+julia> @subset(g, :n .> mean(:n))
 8×2 DataFrame
  Row │ n      x
      │ Int64  Int64
@@ -567,7 +558,7 @@ julia> @where(g, :n .> mean(:n))
    7 │    19      1
    8 │    20      2
 
-julia> @where g begin
+julia> @subset g begin
            :n .> mean(:n)
            :n .< 20
        end
@@ -585,7 +576,7 @@ julia> @where g begin
 
 julia> d = DataFrame(a = [1, 2, missing], b = ["x", "y", missing]);
 
-julia> @where(d, :a .== 1)
+julia> @subset(d, :a .== 1)
 1×2 DataFrame
 │ Row │ a      │ b       │
 │     │ Int64? │ String? │
@@ -593,7 +584,17 @@ julia> @where(d, :a .== 1)
 │ 1   │ 1      │ x       │
 ```
 """
+macro subset(x, args...)
+    esc(subset_helper(x, args...))
+end
+
+"""
+    @where(x, args...)
+
+Deprecated version of `@subset`, see `?@subset` for details.
+"""
 macro where(x, args...)
+    @warn "`@where is deprecated, use `@subset`  with `@skipmissing` instead."
     esc(where_helper(x, args...))
 end
 
@@ -604,8 +605,8 @@ end
 ##############################################################################
 
 function orderby_helper(x, args...)
-    exprs, wrap_byrow = create_args_vector(args...)
-    t = (fun_to_vec(ex; gensym_names = true, wrap_byrow = wrap_byrow) for ex in exprs)
+    exprs, outer_flags = create_args_vector(args...)
+    t = (fun_to_vec(ex; gensym_names = true, outer_flags = outer_flags) for ex in exprs)
     quote
         $DataFramesMeta.orderby($x, $(t...))
     end
@@ -768,8 +769,8 @@ end
 
 
 function transform_helper(x, args...)
-    exprs, wrap_byrow = create_args_vector(args...)
-    t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+    exprs, outer_flags = create_args_vector(args...)
+    t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
     quote
         $DataFrames.transform($x, $(t...))
     end
@@ -886,8 +887,8 @@ end
 
 
 function transform!_helper(x, args...)
-    exprs, wrap_byrow = create_args_vector(args...)
-    t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+    exprs, outer_flags = create_args_vector(args...)
+    t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
     quote
         $DataFrames.transform!($x, $(t...))
     end
@@ -981,8 +982,8 @@ end
 ##############################################################################
 
 function select_helper(x, args...)
-    exprs, wrap_byrow = create_args_vector(args...)
-    t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+    exprs, outer_flags = create_args_vector(args...)
+    t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
     quote
         $DataFrames.select($x, $(t...))
     end
@@ -1095,8 +1096,8 @@ end
 ##############################################################################
 
 function select!_helper(x, args...)
-    exprs, wrap_byrow = create_args_vector(args...)
-    t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+    exprs, outer_flags = create_args_vector(args...)
+    t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
     quote
         $DataFrames.select!($x, $(t...))
     end
@@ -1206,7 +1207,7 @@ end
 function combine_helper(x, args...; deprecation_warning = false)
     deprecation_warning && @warn "`@based_on` is deprecated. Use `@combine` instead."
 
-    exprs, wrap_byrow = create_args_vector(args...)
+    exprs, outer_flags = create_args_vector(args...)
 
     fe = first(exprs)
     if length(exprs) == 1 &&
@@ -1218,7 +1219,7 @@ function combine_helper(x, args...; deprecation_warning = false)
         exprs = ((:(cols(AsTable) = $fe)),)
     end
 
-    t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+    t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
 
     quote
         $DataFrames.combine($x, $(t...))
@@ -1327,7 +1328,7 @@ end
 function by_helper(x, what, args...)
     # Only allow one argument when returning a Table object
     # Only allow one argument when returning a Table object
-    exprs, wrap_byrow = create_args_vector(args...)
+    exprs, outer_flags = create_args_vector(args...)
     fe = first(exprs)
     if length(exprs) == 1 &&
         !(fe isa QuoteNode || onearg(fe, :cols)) &&
@@ -1338,7 +1339,7 @@ function by_helper(x, what, args...)
         exprs = ((:(cols(AsTable) = $fe)),)
     end
 
-    t = (fun_to_vec(ex; gensym_names = false, wrap_byrow = wrap_byrow) for ex in exprs)
+    t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
 
     quote
         $DataFrames.combine($groupby($x, $what), $(t...))
diff --git a/src/parsing.jl b/src/parsing.jl
index cd77f41c..10835bd5 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -64,8 +64,8 @@ end
 is_macro_head(ex, name) = false
 is_macro_head(ex::Expr, name) = ex.head == :macrocall && ex.args[1] == Symbol(name)
 
-extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false),)) = (ex, exprflags)
-function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false),))
+extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false))) = (ex, exprflags)
+function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false)))
     if ex.head == :macrocall
         macroname = ex.args[1]
         if macroname in keys(exprflags)
@@ -183,7 +183,7 @@ end
 # We need wrap_byrow as a keyword argument here in case someone
 # uses `@transform df @byrow begin ... end`, which we
 # deal with outside of this function.
-function fun_to_vec(ex::Expr; gensym_names::Bool=false, no_dest::Bool=false, wrap_byrow::Bool=false)
+function fun_to_vec(ex::Expr; gensym_names::Bool=false, outer_flags=nothing, no_dest::Bool=false)
     # classify the type of expression
     # :x # handled via dispatch
     # cols(:x) # handled as though above
@@ -201,15 +201,16 @@ function fun_to_vec(ex::Expr; gensym_names::Bool=false, no_dest::Bool=false, wra
     # cols(y) = :x + 1 # re-write as complicated col, but RHS is :block
     # cols(:y) = cols(:x) + 1 # re-write as complicated call, RHS is block, use cols
     # `@byrow` before any of the above
-    ex, flags = extract_macro_flags(MacroTools.unblock(ex))
+    ex, inner_flags = extract_macro_flags(MacroTools.unblock(ex))
 
     # Use tuple syntax in future when we add more flags
-    wrap_byrow_t = flags[Symbol("@byrow")][]
+    inner_wrap_byrow = inner_flags[Symbol("@byrow")][]
+    outer_wrap_byrow = outer_flags === nothing ? false : outer_flags[Symbol("@byrow")][]
 
-    if wrap_byrow_t && wrap_byrow
+    if inner_wrap_byrow && outer_wrap_byrow
         throw(ArgumentError("Redundant @byrow calls."))
     else
-        wrap_byrow = wrap_byrow || wrap_byrow_t
+        wrap_byrow = inner_wrap_byrow || outer_wrap_byrow
     end
 
     if gensym_names
@@ -304,7 +305,7 @@ function fun_to_vec(ex::Expr; gensym_names::Bool=false, no_dest::Bool=false, wra
 
     throw(ArgumentError("This path should not be reached"))
 end
-fun_to_vec(ex::QuoteNode; no_dest::Bool=false, gensym_names::Bool=false, wrap_byrow::Bool=false) = ex
+fun_to_vec(ex::QuoteNode; no_dest::Bool=false, gensym_names::Bool=false, outer_flags=nothing) = ex
 
 function make_source_concrete(x::AbstractVector)
     if isempty(x) || isconcretetype(eltype(x))
@@ -344,33 +345,10 @@ function create_args_vector(args...)
 end
 
 """
-   create_args_vector(arg) -> vec, wrap_byrow
-
-Normalize a single input to a vector of expressions,
-with a `wrap_byrow` flag indicating that the
-expressions should operate by row.
-
-If `arg` is a single `:block`, it is unnested.
-Otherwise, return a single-element array.
-Also removes line numbers.
-
-If `arg` is of the form `@byrow ...`, then
-`wrap_byrow` is returned as `true`.
+   create_args_vector(arg) -> vec, outer_flags
 """
 function create_args_vector(arg)
-    if arg isa Expr && is_macro_head(arg, "@byrow")
-        wrap_byrow = true
-        largs = length(arg.args)
-        if largs == 2
-            throw(ArgumentError("No transformations supplied with `@byrow`"))
-        elseif largs == 3
-            arg = arg.args[3]
-        else
-            arg = Expr(:block, arg.args[3:end]...)
-        end
-    else
-        wrap_byrow = false
-    end
+    arg, outer_flags = extract_macro_flags(MacroTools.unblock(arg))
 
     if arg isa Expr && arg.head == :block
         x = MacroTools.rmlines(arg).args
@@ -378,8 +356,5 @@ function create_args_vector(arg)
         x = Any[arg]
     end
 
-    if wrap_byrow && any(t -> is_macro_head(t, "@byrow"), x)
-        throw(ArgumentError("Redundant `@byrow` calls."))
-    end
-    return x, wrap_byrow
+    return x, outer_flags
 end
diff --git a/test/dataframes.jl b/test/dataframes.jl
index b8767d6d..b56114d8 100644
--- a/test/dataframes.jl
+++ b/test/dataframes.jl
@@ -734,9 +734,9 @@ end
 
 macro linenums_macro(arg)
     if arg isa Expr && arg.head == :block && length(arg.args) == 1 && arg.args[1] isa LineNumberNode
-        esc(:(true))
+        esc(:([true]))
     else
-        esc(:(false))
+        esc(:([false]))
     end
 end
 

From 2e5de90463308220f1ad6a7203800718027cb82a Mon Sep 17 00:00:00 2001
From: Peter Deffebach <p.deffebach@gmail.com>
Date: Thu, 24 Jun 2021 17:09:39 -0400
Subject: [PATCH 02/13] fix tests

---
 test/dataframes.jl | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/test/dataframes.jl b/test/dataframes.jl
index b56114d8..162ec30f 100644
--- a/test/dataframes.jl
+++ b/test/dataframes.jl
@@ -740,6 +740,14 @@ macro linenums_macro(arg)
     end
 end
 
+macro linenums_macro_byrow(arg)
+    if arg isa Expr && arg.head == :block && length(arg.args) == 1 && arg.args[1] isa LineNumberNode
+        esc(:(true))
+    else
+        esc(:(false))
+    end
+end
+
 @testset "removing lines" begin
     df = DataFrame(a = [1], b = [2])
     # Can't use @test because @test remove line numbers
@@ -753,7 +761,7 @@ end
     @test d.y == [true]
 
     d = @transform df @byrow begin
-        y = @linenums_macro begin end
+        y = @linenums_macro_byrow begin end
     end
 
     @test d.y == [true]
@@ -763,13 +771,13 @@ end
     @test nrow(d) == 1
 
     d = @where df begin
-        @byrow @linenums_macro begin end
+        @byrow @linenums_macro_byrow begin end
     end
 
     @test nrow(d) == 1
 
     d = @where df @byrow begin
-        @linenums_macro begin end
+        @linenums_macro_byrow begin end
     end
 
     @test nrow(d) == 1

From eca50d475530affe1ec0c5bf26a8e2423c86dcb4 Mon Sep 17 00:00:00 2001
From: Peter Deffebach <p.deffebach@gmail.com>
Date: Sat, 26 Jun 2021 11:40:46 -0400
Subject: [PATCH 03/13] no more skipmissing

---
 src/macros.jl  | 173 ++++++++++++++++++++++++++++++++++++++++++++-----
 src/parsing.jl |   4 +-
 2 files changed, 158 insertions(+), 19 deletions(-)

diff --git a/src/macros.jl b/src/macros.jl
index b40cf52f..035b9c77 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -417,16 +417,15 @@ end
 
 ##############################################################################
 ##
-## @subset - select row subsets
+## @subset and subset! - select row subsets
 ##
 ##############################################################################
 
 function subset_helper(x, args...)
     exprs, outer_flags = create_args_vector(args...)
     t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs)
-    skipmissing = outer_flags[Symbol("@skipmissing")][]
     quote
-        $subset($x, $(t...); skipmissing=$skipmissing)
+        $subset($x, $(t...); skipmissing=true)
     end
 end
 
@@ -472,20 +471,10 @@ and
 ```
 
 !!! note
-    `@subset` will error on `missing`, unlike `@where`. To
-    recover the old behavior of `@where`, use the macro-flag
-    `@skipmissing`
-
-    ```julia
-    julia> df = DataFrame(a = [1, missing], b = [3, 4]);
-
-    julia> @subset df @skipmissing :a .== 1
-    1×2 DataFrame
-     Row │ a       b
-         │ Int64?  Int64
-    ─────┼───────────────
-       1 │      1      3
-    ```
+    `@subset` treats `missing` values as `false` when filtering rows.
+    Unlike `DataFrames.subset` and other boolean operations with
+    `missing`, `@subset` will *not* error on missing values, and
+    will only keep `true` values.
 
 If an expression provided to `@subset` begins with `@byrow`, operations
 are applied "by row" along the data frame. To avoid writing `@byrow` multiple
@@ -598,6 +587,156 @@ macro where(x, args...)
     esc(where_helper(x, args...))
 end
 
+function subset!_helper(x, args...)
+    exprs, outer_flags = create_args_vector(args...)
+    t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs)
+    quote
+        $subset!($x, $(t...); skipmissing=true)
+    end
+end
+
+"""
+    @subset!(d, i...)
+
+Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s,
+mutating the underlying data-frame in-place.
+
+### Arguments
+
+* `d` : an AbstractDataFrame or GroupedDataFrame
+* `i...` : expression for selecting rows
+
+Multiple `i` expressions are "and-ed" together.
+
+If given a `GroupedDataFrame`, `@subset!` applies transformations by
+group, and returns a fresh `DataFrame` containing the rows
+for which the generated values are all `true`.
+
+Inputs to `@subset!` can come in two formats: a `begin ... end` block, in which case each
+line is a separate selector, or as multiple arguments.
+For example the following two statements are equivalent:
+
+```julia
+@subset! df begin
+    :x .> 1
+    :y .< 2
+end
+```
+
+and
+
+```
+@subset!(df, :x .> 1, :y .< 2)
+```
+
+!!! note
+    `@subset!` treats `missing` values as `false` when filtering rows.
+    Unlike `DataFrames.subset!` and other boolean operations with
+    `missing`, `@subset!` will *not* error on missing values, and
+    will only keep `true` values.
+
+If an expression provided to `@subset!` begins with `@byrow`, operations
+are applied "by row" along the data frame. To avoid writing `@byrow` multiple
+times, `@orderby` also allows `@byrow`to be placed at the beginning of a block of
+operations. For example, the following two statements are equivalent.
+
+```
+@subset! df @byrow begin
+    :x > 1
+    :y < 2
+end
+```
+
+and
+
+```
+@subset! df
+    @byrow :x > 1
+    @byrow :y < 2
+end
+```
+
+### Examples
+
+```jldoctest
+julia> using DataFramesMeta, Statistics
+
+julia> df = DataFrame(x = 1:3, y = [2, 1, 2]);
+
+julia> globalvar = [2, 1, 0];
+
+julia> @subset!(df, :x .> 1)
+2×2 DataFrame
+ Row │ x      y
+     │ Int64  Int64
+─────┼──────────────
+   1 │     2      1
+   2 │     3      2
+
+julia> @subset!(df, :x .> globalvar)
+2×2 DataFrame
+ Row │ x      y
+     │ Int64  Int64
+─────┼──────────────
+   1 │     2      1
+   2 │     3      2
+
+julia> @subset! df begin
+    :x .> globalvar
+    :y .== 3
+end
+0×2 DataFrame
+
+julia> d = DataFrame(n = 1:20, x = [3, 3, 3, 3, 1, 1, 1, 2, 1, 1,
+                                    2, 1, 1, 2, 2, 2, 3, 1, 1, 2]);
+
+julia> g = groupby(d, :x);
+
+julia> @subset!(g, :n .> mean(:n))
+8×2 DataFrame
+ Row │ n      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │    12      1
+   2 │    13      1
+   3 │    15      2
+   4 │    16      2
+   5 │    17      3
+   6 │    18      1
+   7 │    19      1
+   8 │    20      2
+
+julia> @subset! g begin
+           :n .> mean(:n)
+           :n .< 20
+       end
+7×2 DataFrame
+ Row │ n      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │    12      1
+   2 │    13      1
+   3 │    15      2
+   4 │    16      2
+   5 │    17      3
+   6 │    18      1
+   7 │    19      1
+
+julia> d = DataFrame(a = [1, 2, missing], b = ["x", "y", missing]);
+
+julia> @subset!(d, :a .== 1)
+1×2 DataFrame
+│ Row │ a      │ b       │
+│     │ Int64? │ String? │
+├─────┼────────┼─────────┤
+│ 1   │ 1      │ x       │
+```
+"""
+macro subset!(x, args...)
+    esc(subset!_helper(x, args...))
+end
+
+
 ##############################################################################
 ##
 ## @orderby
diff --git a/src/parsing.jl b/src/parsing.jl
index 10835bd5..d196a824 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -64,8 +64,8 @@ end
 is_macro_head(ex, name) = false
 is_macro_head(ex::Expr, name) = ex.head == :macrocall && ex.args[1] == Symbol(name)
 
-extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false))) = (ex, exprflags)
-function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false), Symbol("@skipmissing") => Ref(false)))
+extract_macro_flags(ex, exprflags = (;Symbol("@byrow") => Ref(false),)) = (ex, exprflags)
+function extract_macro_flags(ex::Expr, exprflags = (;Symbol("@byrow") => Ref(false),))
     if ex.head == :macrocall
         macroname = ex.args[1]
         if macroname in keys(exprflags)

From 2ddee3856b8fa63bb9a41f4e642bd9886159ac86 Mon Sep 17 00:00:00 2001
From: Peter Deffebach <p.deffebach@gmail.com>
Date: Sat, 26 Jun 2021 11:47:52 -0400
Subject: [PATCH 04/13] tests

---
 test/dataframes.jl |  66 ---------------------
 test/deprecated.jl |  67 +++++++++++++++++++++
 test/runtests.jl   |   1 +
 test/subset.jl     | 143 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 211 insertions(+), 66 deletions(-)
 create mode 100644 test/subset.jl

diff --git a/test/dataframes.jl b/test/dataframes.jl
index 162ec30f..9f045eda 100644
--- a/test/dataframes.jl
+++ b/test/dataframes.jl
@@ -595,72 +595,6 @@ end
     @test @with(df, cols("A")) === df.A
 end
 
-@testset "where" begin
-    df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
-
-    x = [2, 1, 0, 0]
-
-    @test @where(df, :A .> 1) == df[(df.A .> 1) .=== true,:]
-    @test @where(df, :B .> 1) == df[df.B .> 1,:]
-    @test @where(df, :A .> x) == df[(df.A .> x) .=== true,:]
-    @test @where(df, :B .> x) ≅ df[df.B .> x,:]
-    @test @where(df, :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2)
-    @test @where(df, :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
-    @test @where(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
-
-    @test @where(df, :A .> 1).A isa Vector{Union{Missing, Int}}
-
-    @test @where(df, cols(:A) .> 1) == df[(df.A .> 1) .=== true,:]
-    @test @where(df, cols(:B) .> 1) == df[df.B .> 1,:]
-    @test @where(df, cols(:A) .> x) == df[(df.A .> x) .=== true,:]
-    @test @where(df, cols(:B) .> x) ≅ df[df.B .> x,:]
-    @test @where(df, cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2)
-    @test @where(df, cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
-    @test @where(df, cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
-
-    @test @where(df, :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1)
-
-    subdf = @view df[df.B .== 2, :]
-
-    @test @where(subdf, :A .== 3) == DataFrame(A = 3, B = 2)
-end
-
-@testset "where with :block" begin
-    df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
-
-    d = @where df begin
-        :A .> 1
-        :B .> 1
-    end
-    @test d ≅ @where(df, :A .> 1, :B .> 1)
-
-    d = @where df begin
-        cols(:A) .> 1
-        :B .> 1
-    end
-    @test d ≅ @where(df, :A .> 1, :B .> 1)
-
-    d = @where df begin
-        :A .> 1
-        cols(:B) .> 1
-    end
-    @test d ≅ @where(df, :A .> 1, :B .> 1)
-
-    d = @where df begin
-        begin
-            :A .> 1
-        end
-        :B .> 1
-    end
-    @test d ≅ @where(df, :A .> 1, :B .> 1)
-
-    d = @where df begin
-        :A .> 1
-        @. :B > 1
-    end
-    @test d ≅ @where(df, :A .> 1, :B .> 1)
-end
-
 @testset "orderby" begin
     df = DataFrame(
         g = [1, 1, 1, 2, 2],
diff --git a/test/deprecated.jl b/test/deprecated.jl
index e0f68b35..968fc589 100644
--- a/test/deprecated.jl
+++ b/test/deprecated.jl
@@ -77,4 +77,71 @@ const ≅ = isequal
     @test @based_on(gd, cols("new" * "_" * "column") = 2)."new_column" == [2, 2]
 end
 
+@testset "where" begin
+    df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+    x = [2, 1, 0, 0]
+
+    @test @where(df, :A .> 1) == df[(df.A .> 1) .=== true,:]
+    @test @where(df, :B .> 1) == df[df.B .> 1,:]
+    @test @where(df, :A .> x) == df[(df.A .> x) .=== true,:]
+    @test @where(df, :B .> x) ≅ df[df.B .> x,:]
+    @test @where(df, :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2)
+    @test @where(df, :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+    @test @where(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+    @test @where(df, :A .> 1).A isa Vector{Union{Missing, Int}}
+
+    @test @where(df, cols(:A) .> 1) == df[(df.A .> 1) .=== true,:]
+    @test @where(df, cols(:B) .> 1) == df[df.B .> 1,:]
+    @test @where(df, cols(:A) .> x) == df[(df.A .> x) .=== true,:]
+    @test @where(df, cols(:B) .> x) ≅ df[df.B .> x,:]
+    @test @where(df, cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2)
+    @test @where(df, cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+    @test @where(df, cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+    @test @where(df, :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1)
+
+    subdf = @view df[df.B .== 2, :]
+
+    @test @where(subdf, :A .== 3) == DataFrame(A = 3, B = 2)
+end
+
+@testset "where with :block" begin
+    df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+    d = @where df begin
+        :A .> 1
+        :B .> 1
+    end
+    @test d ≅ @where(df, :A .> 1, :B .> 1)
+
+    d = @where df begin
+        cols(:A) .> 1
+        :B .> 1
+    end
+    @test d ≅ @where(df, :A .> 1, :B .> 1)
+
+    d = @where df begin
+        :A .> 1
+        cols(:B) .> 1
+    end
+    @test d ≅ @where(df, :A .> 1, :B .> 1)
+
+    d = @where df begin
+        begin
+            :A .> 1
+        end
+        :B .> 1
+    end
+    @test d ≅ @where(df, :A .> 1, :B .> 1)
+
+    d = @where df begin
+        :A .> 1
+        @. :B > 1
+    end
+    @test d ≅ @where(df, :A .> 1, :B .> 1)
+end
+
+
 end # module
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 36e12417..3fb003b3 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -5,6 +5,7 @@ quiet = false
 my_tests = ["dataframes.jl",
             "eachrow.jl",
             "grouping.jl",
+            "subset.jl",
             "function_compilation.jl",
             "chaining.jl",
             "linqmacro.jl",
diff --git a/test/subset.jl b/test/subset.jl
new file mode 100644
index 00000000..241f6182
--- /dev/null
+++ b/test/subset.jl
@@ -0,0 +1,143 @@
+module TestSubset
+
+using Test
+using DataFrames
+using DataFramesMeta
+using Statistics
+
+const ≅ = isequal
+
+@testset "subset" begin
+    df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+    x = [2, 1, 0, 0]
+
+    @test @subset(df, :A .> 1) == df[(df.A .> 1) .=== true,:]
+    @test @subset(df, :B .> 1) == df[df.B .> 1,:]
+    @test @subset(df, :A .> x) == df[(df.A .> x) .=== true,:]
+    @test @subset(df, :B .> x) ≅ df[df.B .> x,:]
+    @test @subset(df, :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2)
+    @test @subset(df, :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+    @test @subset(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+    @test @subset(df, :A .> 1).A isa Vector{Union{Missing, Int}}
+
+    @test @subset(df, cols(:A) .> 1) == df[(df.A .> 1) .=== true,:]
+    @test @subset(df, cols(:B) .> 1) == df[df.B .> 1,:]
+    @test @subset(df, cols(:A) .> x) == df[(df.A .> x) .=== true,:]
+    @test @subset(df, cols(:B) .> x) ≅ df[df.B .> x,:]
+    @test @subset(df, cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2)
+    @test @subset(df, cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+    @test @subset(df, cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+    @test @subset(df, :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1)
+
+    subdf = @view df[df.B .== 2, :]
+
+    @test @subset(subdf, :A .== 3) == DataFrame(A = 3, B = 2)
+end
+
+@testset "subset with :block" begin
+    df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+    d = @subset df begin
+        :A .> 1
+        :B .> 1
+    end
+    @test d ≅ @subset(df, :A .> 1, :B .> 1)
+
+    d = @subset df begin
+        cols(:A) .> 1
+        :B .> 1
+    end
+    @test d ≅ @subset(df, :A .> 1, :B .> 1)
+
+    d = @subset df begin
+        :A .> 1
+        cols(:B) .> 1
+    end
+    @test d ≅ @subset(df, :A .> 1, :B .> 1)
+
+    d = @subset df begin
+        begin
+            :A .> 1
+        end
+        :B .> 1
+    end
+    @test d ≅ @subset(df, :A .> 1, :B .> 1)
+
+    d = @subset df begin
+        :A .> 1
+        @. :B > 1
+    end
+    @test d ≅ @subset(df, :A .> 1, :B .> 1)
+end
+
+
+@testset "subset!" begin
+    df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+    x = [2, 1, 0, 0]
+
+    @test @subset!(copy(df), :A .> 1) == df[(df.A .> 1) .=== true,:]
+    @test @subset!(copy(df), :B .> 1) == df[df.B .> 1,:]
+    @test @subset!(copy(df), :A .> x) == df[(df.A .> x) .=== true,:]
+    @test @subset!(copy(df), :B .> x) ≅ df[df.B .> x,:]
+    @test @subset!(copy(df), :A .> :B, :B .> mean(:B)) == DataFrame(A = 3, B = 2)
+    @test @subset!(copy(df), :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+    @test @subset!(copy(df), :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+    @test @subset!(copy(df), :A .> 1).A isa Vector{Union{Missing, Int}}
+
+    @test @subset!(copy(df), cols(:A) .> 1) == df[(df.A .> 1) .=== true,:]
+    @test @subset!(copy(df), cols(:B) .> 1) == df[df.B .> 1,:]
+    @test @subset!(copy(df), cols(:A) .> x) == df[(df.A .> x) .=== true,:]
+    @test @subset!(copy(df), cols(:B) .> x) ≅ df[df.B .> x,:]
+    @test @subset!(copy(df), cols(:A) .> :B, cols(:B) .> mean(:B)) == DataFrame(A = 3, B = 2)
+    @test @subset!(copy(df), cols(:A) .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
+    @test @subset!(copy(df), cols(:A) .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
+
+    @test @subset!(copy(df), :A .> 1, :A .<= 2) == DataFrame(A = 2, B = 1)
+
+    subdf = @view df[df.B .== 2, :]
+
+    @test @subset!(copy(subdf), :A .== 3) == DataFrame(A = 3, B = 2)
+end
+
+@testset "subset! with :block" begin
+    df = DataFrame(A = [1, 2, 3, missing], B = [2, 1, 2, 1])
+
+    d = @subset! copy(df) begin
+        :A .> 1
+        :B .> 1
+    end
+    @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
+
+    d = @subset! copy(df) begin
+        cols(:A) .> 1
+        :B .> 1
+    end
+    @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
+
+    d = @subset! copy(df) begin
+        :A .> 1
+        cols(:B) .> 1
+    end
+    @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
+
+    d = @subset! copy(df) begin
+        begin
+            :A .> 1
+        end
+        :B .> 1
+    end
+    @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
+
+    d = @subset! copy(df) begin
+        :A .> 1
+        @. :B > 1
+    end
+    @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
+end
+
+end # module
\ No newline at end of file

From c815d1a2b660b5da9e17c298e38e57c89dfe21af Mon Sep 17 00:00:00 2001
From: Peter Deffebach <p.deffebach@gmail.com>
Date: Sat, 26 Jun 2021 11:58:49 -0400
Subject: [PATCH 05/13] update index.md

---
 docs/src/index.md | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index bf315786..d496f7ec 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -12,7 +12,7 @@ in C#.
 In addition, DataFramesMeta provides 
 
 * `@orderby`, for sorting data frames
-* `@where`, for keeping rows of a DataFrame matching a given condition
+* `@subset` and `@subset!`, for keeping rows of a DataFrame matching a given condition
 * `@by`, for grouping and combining a data frame in a single step
 * `@with`, for working with the columns of a data frame with high performance and 
   convenient syntax
@@ -96,18 +96,21 @@ gd = groupby(df, :x);
 @transform!(gd, y = 2 .* :y .* first(:y))
 ```
 
-## `@where`
+## `@subset` and `@subset!`
 
 Select row subsets. Operates on both a `DataFrame` and a `GroupedDataFrame`. 
+`@subset` always returns a freshly-allocated data frame whereas 
+`@subset!` modifies the data frame in-place.
 
 ```julia
+using Statistics
 df = DataFrame(x = [1, 1, 2, 2], y = [1, 2, 101, 102]);
 gd = groupby(df, :x);
 outside_var = 1;
-@where(df, :x .> 1)
-@where(df, :x .> outside_var)
-@where(df, :x .> outside_var, :y .< 102)  # the two expressions are "and-ed"
-@where(gd, :x .> mean(:x))
+@subset(df, :x .> 1)
+@subset(df, :x .> outside_var)
+@subset(df, :x .> outside_var, :y .< 102)  # the two expressions are "and-ed"
+@subset(gd, :x .> mean(:x))
 ```
 
 ## `@combine`
@@ -300,7 +303,7 @@ The following macros accept `@byrow`:
 * `@transform` and `@transform!`, `@select`, `@select!`, and `@combine`. 
   `@byrow` can be used in the left hand side of expressions, e.g.
   `@select(df, @byrow z = :x * :y)`. 
-* `@where` and `@orderby`, with syntax of the form `@where(df, @byrow :x > :y)`
+* `@subset`, `@subset!` and `@orderby`, with syntax of the form `@where(df, @byrow :x > :y)`
 * `@with`, where the anonymous function created by `@with` is wrapped in
   `ByRow`, as in `@with(df, @byrow :x * :y)`.
 

From f2dde1ef968c3dae582f0e6557e8348ec758d0f2 Mon Sep 17 00:00:00 2001
From: Peter Deffebach <p.deffebach@gmail.com>
Date: Sun, 27 Jun 2021 15:32:46 -0400
Subject: [PATCH 06/13] add docstring

---
 src/parsing.jl     | 15 ++++++++-------
 test/dataframes.jl |  6 +++---
 test/deprecated.jl | 15 +++++++++++++++
 test/grouping.jl   | 15 ---------------
 test/subset.jl     | 16 ++++++++++++++++
 5 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/src/parsing.jl b/src/parsing.jl
index 32ca666e..07cdf452 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -336,19 +336,20 @@ function replace_dotted!(e, membernames)
     Expr(:., x_new, y_new)
 end
 
-"""
-    create_args_vector(args...) -> vec, wrap_byrow
-
-Given multiple arguments which can be any type
-of expression-like object (`Expr`, `QuoteNode`, etc.),
-puts them into a single array, removing line numbers.
-"""
 function create_args_vector(args...)
     create_args_vector(Expr(:block, args...))
 end
 
 """
    create_args_vector(arg) -> vec, outer_flags
+
+Given an expression return a vector of operations
+and a `NamedTuple` of the macro-flags that appear
+in the expression.
+
+If a `:block` expression, returns the `args` of
+the block as an array. If a simple expression,
+wrap the expression in a one-element vector.
 """
 function create_args_vector(arg)
     arg, outer_flags = extract_macro_flags(MacroTools.unblock(arg))
diff --git a/test/dataframes.jl b/test/dataframes.jl
index 9f045eda..84e3c770 100644
--- a/test/dataframes.jl
+++ b/test/dataframes.jl
@@ -700,17 +700,17 @@ end
 
     @test d.y == [true]
 
-    d = @where(df, @linenums_macro begin end)
+    d = @subset(df, @linenums_macro begin end)
 
     @test nrow(d) == 1
 
-    d = @where df begin
+    d = @subset df begin
         @byrow @linenums_macro_byrow begin end
     end
 
     @test nrow(d) == 1
 
-    d = @where df @byrow begin
+    d = @subset df @byrow begin
         @linenums_macro_byrow begin end
     end
 
diff --git a/test/deprecated.jl b/test/deprecated.jl
index 968fc589..0412d57a 100644
--- a/test/deprecated.jl
+++ b/test/deprecated.jl
@@ -143,5 +143,20 @@ end
     @test d ≅ @where(df, :A .> 1, :B .> 1)
 end
 
+@testset "@where with a grouped data frame" begin
+    df = DataFrame(
+        g = [1, 1, 1, 2, 2],
+        i = 1:5,
+        t = ["a", "b", "c", "c", "e"],
+        y = [:v, :w, :x, :y, :z],
+        c = [:g, :quote, :body, :transform, missing]
+    )
+
+    gd = groupby(df, :g)
+
+    @test @where(gd, :i .== first(:i)) ≅ df[[1, 4], :]
+    @test @where(gd, cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :]
+    @test @where(gd, :c .== :g) ≅ df[[], :]
+end
 
 end # module
\ No newline at end of file
diff --git a/test/grouping.jl b/test/grouping.jl
index ca7957cb..c603c2a5 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -357,19 +357,4 @@ end
 	@test @select(g, :a, @byrow t = :a ^ 2).t ≅ d.a .^ 2
 end
 
-@testset "@where with a grouped data frame" begin
-    df = DataFrame(
-        g = [1, 1, 1, 2, 2],
-        i = 1:5,
-        t = ["a", "b", "c", "c", "e"],
-        y = [:v, :w, :x, :y, :z],
-        c = [:g, :quote, :body, :transform, missing]
-    )
-
-    gd = groupby(df, :g)
-
-    @test @where(gd, :i .== first(:i)) ≅ df[[1, 4], :]
-    @test @where(gd, cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :]
-    @test @where(gd, :c .== :g) ≅ df[[], :]
-end
 end # module
diff --git a/test/subset.jl b/test/subset.jl
index 241f6182..00959dd7 100644
--- a/test/subset.jl
+++ b/test/subset.jl
@@ -140,4 +140,20 @@ end
     @test d ≅ @subset!(copy(df), :A .> 1, :B .> 1)
 end
 
+@testset "@subset with a grouped data frame" begin
+    df = DataFrame(
+        g = [1, 1, 1, 2, 2],
+        i = 1:5,
+        t = ["a", "b", "c", "c", "e"],
+        y = [:v, :w, :x, :y, :z],
+        c = [:g, :quote, :body, :transform, missing]
+    )
+
+    gd = groupby(df, :g)
+
+    @test @subset(gd, :i .== first(:i)) ≅ df[[1, 4], :]
+    @test @subset(gd, cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :]
+    @test @subset(gd, :c .== :g) ≅ df[[], :]
+end
+
 end # module
\ No newline at end of file

From 7c14a205b54ef1de00deaba452da4043888a7a77 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Sun, 27 Jun 2021 19:22:13 -0400
Subject: [PATCH 07/13] Apply suggestions from code review

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 src/macros.jl  | 4 ++--
 src/parsing.jl | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/macros.jl b/src/macros.jl
index 035b9c77..6326c287 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -472,7 +472,7 @@ and
 
 !!! note
     `@subset` treats `missing` values as `false` when filtering rows.
-    Unlike `DataFrames.subset` and other boolean operations with
+    Unlike `DataFrames.subset` and other Boolean operations with
     `missing`, `@subset` will *not* error on missing values, and
     will only keep `true` values.
 
@@ -631,7 +631,7 @@ and
 
 !!! note
     `@subset!` treats `missing` values as `false` when filtering rows.
-    Unlike `DataFrames.subset!` and other boolean operations with
+    Unlike `DataFrames.subset!` and other Boolean operations with
     `missing`, `@subset!` will *not* error on missing values, and
     will only keep `true` values.
 
diff --git a/src/parsing.jl b/src/parsing.jl
index d196a824..32ca666e 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -183,7 +183,10 @@ end
 # We need wrap_byrow as a keyword argument here in case someone
 # uses `@transform df @byrow begin ... end`, which we
 # deal with outside of this function.
-function fun_to_vec(ex::Expr; gensym_names::Bool=false, outer_flags=nothing, no_dest::Bool=false)
+function fun_to_vec(ex::Expr;
+                    gensym_names::Bool=false,
+                    outer_flags::Union{NamedTuple, Nothing}=nothing,
+                    no_dest::Bool=false)
     # classify the type of expression
     # :x # handled via dispatch
     # cols(:x) # handled as though above

From f83161f0e9b6c488c356daa4425f51dc73582ed5 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Sun, 27 Jun 2021 19:28:33 -0400
Subject: [PATCH 08/13] Apply suggestions from code review

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 docs/src/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index d496f7ec..f1139d68 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -12,7 +12,7 @@ in C#.
 In addition, DataFramesMeta provides 
 
 * `@orderby`, for sorting data frames
-* `@subset` and `@subset!`, for keeping rows of a DataFrame matching a given condition
+* `@subset` and `@subset!`, for keeping rows of a data frame matching a given condition
 * `@by`, for grouping and combining a data frame in a single step
 * `@with`, for working with the columns of a data frame with high performance and 
   convenient syntax

From 6f956dc32736f58f028519c7c4e5b1d0f76ed31f Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Sun, 27 Jun 2021 19:34:09 -0400
Subject: [PATCH 09/13] Update test/subset.jl

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 test/subset.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/subset.jl b/test/subset.jl
index 00959dd7..1f758f56 100644
--- a/test/subset.jl
+++ b/test/subset.jl
@@ -79,7 +79,9 @@ end
 
     x = [2, 1, 0, 0]
 
-    @test @subset!(copy(df), :A .> 1) == df[(df.A .> 1) .=== true,:]
+    df2 = copy(df)
+    @test @subset!(df2, :A .> 1) === df2
+    @test df2 == df[(df.A .> 1) .=== true,:]
     @test @subset!(copy(df), :B .> 1) == df[df.B .> 1,:]
     @test @subset!(copy(df), :A .> x) == df[(df.A .> x) .=== true,:]
     @test @subset!(copy(df), :B .> x) ≅ df[df.B .> x,:]
@@ -156,4 +158,4 @@ end
     @test @subset(gd, :c .== :g) ≅ df[[], :]
 end
 
-end # module
\ No newline at end of file
+end # module

From a75d81770aa03936abaa4ef602731a7f91bca841 Mon Sep 17 00:00:00 2001
From: Peter Deffebach <p.deffebach@gmail.com>
Date: Mon, 28 Jun 2021 14:36:27 -0400
Subject: [PATCH 10/13] switching

---
 test/subset.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/subset.jl b/test/subset.jl
index 1f758f56..03ed5ebe 100644
--- a/test/subset.jl
+++ b/test/subset.jl
@@ -82,6 +82,7 @@ end
     df2 = copy(df)
     @test @subset!(df2, :A .> 1) === df2
     @test df2 == df[(df.A .> 1) .=== true,:]
+
     @test @subset!(copy(df), :B .> 1) == df[df.B .> 1,:]
     @test @subset!(copy(df), :A .> x) == df[(df.A .> x) .=== true,:]
     @test @subset!(copy(df), :B .> x) ≅ df[df.B .> x,:]

From 7991b8c1f0329d3920fb8913fb011f81c94ed0f7 Mon Sep 17 00:00:00 2001
From: Peter Deffebach <p.deffebach@gmail.com>
Date: Mon, 28 Jun 2021 14:51:15 -0400
Subject: [PATCH 11/13] @subset! with gd

---
 test/subset.jl | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/test/subset.jl b/test/subset.jl
index 03ed5ebe..d501be18 100644
--- a/test/subset.jl
+++ b/test/subset.jl
@@ -159,4 +159,18 @@ end
     @test @subset(gd, :c .== :g) ≅ df[[], :]
 end
 
+@testset "@subset! with a grouped data frame" begin
+    df = DataFrame(
+        g = [1, 1, 1, 2, 2],
+        i = 1:5,
+        t = ["a", "b", "c", "c", "e"],
+        y = [:v, :w, :x, :y, :z],
+        c = [:g, :quote, :body, :transform, missing]
+    )
+
+    @test @subset!(groupby(copy(df), :g), :i .== first(:i)) ≅ df[[1, 4], :]
+    @test @subset!(groupby(copy(df), :g), cols(:i) .> mean(cols(:i)), :t .== "c") ≅ df[[3], :]
+    @test @subset!(groupby(copy(df), :g), :c .== :g) ≅ df[[], :]
+end
+
 end # module

From fa09626e5ed6345b194b1d81746961f3a049af45 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Mon, 28 Jun 2021 18:47:53 -0400
Subject: [PATCH 12/13] Update src/parsing.jl

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 src/parsing.jl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/parsing.jl b/src/parsing.jl
index 07cdf452..ab6bdf6f 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -308,7 +308,10 @@ function fun_to_vec(ex::Expr;
 
     throw(ArgumentError("This path should not be reached"))
 end
-fun_to_vec(ex::QuoteNode; no_dest::Bool=false, gensym_names::Bool=false, outer_flags=nothing) = ex
+fun_to_vec(ex::QuoteNode;
+           no_dest::Bool=false,
+           gensym_names::Bool=false,
+           outer_flags::Union{NamedTuple, Nothing}=nothing) = ex
 
 function make_source_concrete(x::AbstractVector)
     if isempty(x) || isconcretetype(eltype(x))

From 1c485c8c25351f7846c91cf4fbfe35afdded2a69 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Mon, 28 Jun 2021 18:48:00 -0400
Subject: [PATCH 13/13] Update src/parsing.jl

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 src/parsing.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/parsing.jl b/src/parsing.jl
index ab6bdf6f..e5a4427d 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -350,7 +350,7 @@ Given an expression return a vector of operations
 and a `NamedTuple` of the macro-flags that appear
 in the expression.
 
-If a `:block` expression, returns the `args` of
+If a `:block` expression, return the `args` of
 the block as an array. If a simple expression,
 wrap the expression in a one-element vector.
 """