Skip to content

Commit

Permalink
Fix tests (#245)
Browse files Browse the repository at this point in the history
  • Loading branch information
nilshg authored Oct 22, 2023
1 parent 6d23721 commit 4c5e187
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 64 deletions.
49 changes: 40 additions & 9 deletions src/FixedEffectModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,31 @@ end

# predict, residuals, modelresponse

# Utility functions for checking whether FE/continuous interactions are in formula
# These are currently not supported in predict
function is_cont_fe_int(x)
x isa InteractionTerm || return false
any(x -> isa(x, Term), x.terms) && any(x -> isa(x, FunctionTerm{typeof(fe), Vector{Term}}), x.terms)
end

# Does the formula have InteractionTerms?
function has_cont_fe_interaction(x::FormulaTerm)
if x.rhs isa Term # only one term
is_cont_fe_int(x)
elseif hasfield(typeof(x.rhs), :lhs) # Is an IV term
false # Is this correct?
else
any(is_cont_fe_int, x.rhs)
end
end

function StatsAPI.predict(m::FixedEffectModel, data)
Tables.istable(data) ||
throw(ArgumentError("expected second argument to be a Table, got $(typeof(data))"))
has_fe(m) &&
throw("To predict for a model with high-dimensional fixed effects, run `reg` with the option save = true, and then access predicted values using `fe().")

has_cont_fe_interaction(m.formula) &&
throw(ArgumentError("Interaction of fixed effect and continuous variable detected in formula; this is currently not supported in `predict`"))

cdata = StatsModels.columntable(data)
cols, nonmissings = StatsModels.missing_omit(cdata, m.formula_schema.rhs)
Xnew = modelmatrix(m.formula_schema, cols)
Expand All @@ -130,13 +149,25 @@ function StatsAPI.predict(m::FixedEffectModel, data)
end

# Join FE estimates onto data and sum row-wise
# This code does not work propertly with missing or with interacted fixed effect, so deleted
#if has_fe(m)
# df = DataFrame(t; copycols = false)
# fes = leftjoin(select(df, m.fekeys), unique(m.fe); on = m.fekeys, makeunique = true, #matchmissing = :equal)
# fes = combine(fes, AsTable(Not(m.fekeys)) => sum)
# out[nonmissings] .+= fes[nonmissings, 1]
#end
# This does not account for FEs interacted with continuous variables - to be implemented
if has_fe(m)
nrow(fe(m)) > 0 || throw(ArgumentError("Model has no estimated fixed effects. To store estimates of fixed effects, run `reg` the option save = :fe"))

df = DataFrame(data; copycols = false)
fes = leftjoin(select(df, m.fekeys), dropmissing(unique(m.fe)); on = m.fekeys,
makeunique = true, matchmissing = :equal, order = :left)
fes = combine(fes, AsTable(Not(m.fekeys)) => sum)

if any(ismissing, Matrix(select(df, m.fekeys))) || any(ismissing, Matrix(fes))
out = allowmissing(out)
end

out[nonmissings] .+= fes[nonmissings, 1]

if any(.!nonmissings)
out[.!nonmissings] .= missing
end
end

return out
end
Expand Down
193 changes: 138 additions & 55 deletions test/predict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ using FixedEffectModels, DataFrames, CategoricalArrays, CSV, Test
residuals(result, df)
@test responsename(result) == "Sales"


model = @formula Sales ~ CPI + (Price ~ Pimin)
result = reg(df, model)
coeftable(result)
Expand All @@ -38,62 +37,147 @@ using FixedEffectModels, DataFrames, CategoricalArrays, CSV, Test
show(result)
end

@testset "predict" begin

df = DataFrame(CSV.File(joinpath(dirname(pathof(FixedEffectModels)), "../dataset/Cigar.csv")))
df.StateC = categorical(df.State)

model = @formula Sales ~ Price + StateC
result = reg(df, model)
@test predict(result, df)[1] 115.9849874

#model = @formula Sales ~ Price + fe(State)
#result = reg(df, model, save = :fe)
#@test predict(result)[1] ≈ 115.9849874

model = @formula Sales ~ Price * Pop + StateC
result = reg(df, model)
@test predict(result, df)[1] 115.643985352

#model = @formula Sales ~ Price * Pop + fe(State)
#result = reg(df, model, save = :fe)
#@test predict(result, df)[1] ≈ 115.643985352

model = @formula Sales ~ Price + Pop + Price & Pop + StateC
result = reg(df, model)
@test predict(result, df)[1] 115.643985352

#model = @formula Sales ~ Price + Pop + Price & Pop + fe(State)
#result = reg(df, model, save = :fe)
#@test predict(result, df)[1] ≈ 115.643985352





# Tests for predict method
# Test that predicting from model without saved FE test throws
model = @formula Sales ~ Price + fe(State)
result = reg(df, model)
@test_throws "No estimates for fixed effects found. Fixed effects need to be estimated using the option save = :fe or :all for prediction to work." predict(result, df)

# Test basic functionality - adding 1 to price should increase prediction by coef
#model = @formula Sales ~ Price + fe(State)
#result = reg(df, model, save = :fe)
#x = predict(result, DataFrame(Price = [1.0, 2.0], State = [1, 1]))
#@test last(x) - first(x) ≈ only(result.coef)
@testset "Predict" begin
# Simple - one binary FE
df = DataFrame(x = rand(100), g = rand(["a", "b"], 100))
df.y = 1.0 .+ 0.5 .* df.x .+ (df.g .== "b")
m = reg(df, @formula(y ~ x + fe(g)); save = :fe)
pred = predict(m, df)
@test pred df.y

# One group only
df = DataFrame(x = rand(100), g = "a")
df.y = 1.0 .+ 0.5 .* df.x
m = reg(df, @formula(y ~ x + fe(g)); save = :fe)
pred = predict(m, df)
@test pred df.y

# Two groups and predict df has a level that's missing from model
df = DataFrame(x = rand(100), g = rand(["a", "b"], 100))
df.y = 1.0 .+ 0.5 .* df.x .+ (df.g .== "b")
m = reg(df, @formula(y ~ x + fe(g)); save = :fe)
pred = predict(m, DataFrame(x = [1.0, 2.0], g = ["a", "c"]))
@test ismissing(pred[2])

# Two groups + missing observation of FE
df = DataFrame(x = rand(100), g = [missing; rand(["a", "b"], 99)])
df.y = 1.0 .+ 0.5 .* df.x .+ isequal.(df.g, "b")
m = reg(df, @formula(y ~ x + fe(g)), save = :fe)
pred = predict(m, df)
@test pred isa Vector{Union{Missing, Float64}}
@test ismissing(pred[1])
@test pred[2:end] df.y[2:end]

# Two groups + missing observation of non-FE
df = DataFrame(x = [missing; rand(99)], g = rand(["a", "b"], 100))
df.y = 1.0 .+ 0.5 .* df.x .+ isequal.(df.g, "b")
m = reg(df, @formula(y ~ x + fe(g)), save = :fe)
pred = predict(m, df)
@test pred isa Vector{Union{Missing, Float64}}
@test ismissing(pred[1])
@test pred[2] df.y[2]

# Two groups + two FEs
df = DataFrame(x = rand(100), g1 = rand(["a", "b"], 100), g2 = rand(["c", "d"], 100))
df.y = 1.0 .+ 0.5 .* df.x .+ isequal.(df.g1, "b") .+ (df.g2 .== "d") * 2
m = reg(df, @formula(y ~ x + fe(g1) + fe(g2)); save = :fe)
pred = predict(m, df)
@test pred isa Vector{Float64}
@test pred df.y

# Two groups + two FEs, missing one FE
df = DataFrame(x = rand(100), g1 = rand(["a", "b"], 100), g2 = [missing; rand(["c", "d"], 99)])
df.y = 1.0 .+ 0.5 .* df.x .+ isequal.(df.g1, "b") .+ isequal.(df.g2,"d") * 2
m = reg(df, @formula(y ~ x + fe(g1) + fe(g2)); save = :fe)
pred = predict(m, df)
@test pred isa Vector{Union{Missing, Float64}}
@test ismissing(pred[1])
@test pred[2:end] df.y[2:end]

# Three FEs, "middle" one missing
df = DataFrame(x = rand(100), g1 = rand(["a", "b"], 100), g2 = [missing; rand(["c", "d"], 99)],
g3 = rand(["e", "f"], 100))
df.y = 1.0 .+ 0.5 .* df.x .+ isequal.(df.g1, "b") .+ isequal.(df.g2, "d") * 2 .+
isequal.(df.g3, "e")
m = reg(df, @formula(y ~ x + fe(g1) + fe(g2) + fe(g3)); save = :fe)
pred = predict(m, df)
@test pred isa Vector{Union{Missing, Float64}}
@test ismissing(pred[1])
@test pred[2:end] df.y[2:end]

# Interactive FE
df = DataFrame(x = rand(100), g1 = rand(["a", "b"], 100), g2 = rand(["c", "d"], 100))
df.y = 1.0 .+ 0.5 .* df.x .+ (df.g1 .== "b") .+ (df.g1 .== "b" .&& df.g2 .== "d")
m = reg(df, @formula(y ~ x + fe(g1)&fe(g2)); save = :fe)
pred = predict(m, df)
@test pred isa Vector{Float64}
@test pred df.y

# Interactive FE + missing
df = DataFrame(x = rand(100), g1 = rand(["a", "b"], 100), g2 = [missing; rand(["c", "d"], 99)])
df.y = 1.0 .+ 0.5 .* df.x .+ (df.g1 .== "b") .+ (isequal.(df.g1, "b") .&& isequal.(df.g2, "d"))
m6 = reg(df, @formula(y ~ x + fe(g1)&fe(g2)); save = :fe)
pred = predict(m, df)
@test pred isa Vector{Union{Missing, Float64}}
@test length(pred) == nrow(df)
@test ismissing(pred[1])
@test pred[2:end] df.y[2:end]

# Interaction with continuous variable
df = DataFrame(x = rand(100), g = rand(["a", "b"], 100), z = rand(100))
df.y = 1.0 .+ 0.5 .* df.x .+ 2.0 .* (df.g .== "b") .* df.z
m = reg(df, @formula(y ~ x + fe(g)&z); save = :fe)
@test_throws ArgumentError pred = predict(m, df)
#@test pred ≈ df.y # Once implemented

# Interaction with continuous variable, FE missing
df = DataFrame(x = rand(100), g = [missing; rand(["a", "b"], 99)], z = rand(100))
df.y = 1.0 .+ 0.5 .* df.x .+ 2.0 .* (df.g .== "b") .* df.z
m = reg(df, @formula(y ~ x + fe(g)&z); save = :fe)
@test_throws ArgumentError pred = predict(m, df)
#@test pred ≈ df.y

# Interaction with continuous variable, cont var missing
df = DataFrame(x = rand(100), g = rand(["a", "b"], 100), z = [missing; rand(99)])
df.y = 1.0 .+ 0.5 .* df.x .+ 2.0 .* (df.g .== "b") .* df.z
m = reg(df, @formula(y ~ x + fe(g)&z); save = :fe)
@test_throws ArgumentError pred = predict(m, df)
#@test pred ≈ df.y

# Regular FE + another FE interacted with continuous variable
df = DataFrame(x = rand(100), g1 = rand(["a", "b"], 100), g2 = rand(["c", "d"], 100), z = rand(100))
df.y = 1.0 .+ 0.5 .* df.x .+ 2.0 .* (df.g2 .== "b") .* df.z .+ 3.0 .* (df.g1 .== "b")
m = reg(df, @formula(y ~ x + fe(g1) + fe(g2)&z); save = :fe)
@test_throws ArgumentError pred = predict(m, df)
#@test pred ≈ df.y

# Two continuous/FE interactions
m = reg(df, @formula(y ~ x + fe(g1) + fe(g2)&z + fe(g1)&x); save = :fe)
@test_throws ArgumentError pred = predict(m, df)

# Regular FE + interacted FE + FE/continuous interaction
df = DataFrame(x = rand(100), g1 = rand(["a", "b"], 100), g2 = rand(["c", "d"], 100),
g3 = rand(["e", "f"], 100), g4 = rand(["g", "h"], 100), z = rand(100))
df.y = 1.0 .+ 0.5 .* df.x .+ 2.0 .* (df.g1 .== "b") .+ 3.0 .* (df.g2 .== "d") .* (df.g3 .== "f") .+
4.0 .* (df.g4 .== "h") .* df.z
m = reg(df, @formula(y ~ x + fe(g1) + fe(g2)&fe(g3) + fe(g4)&z))
@test_throws ArgumentError pred = predict(m, df)
end

# Missing variables in covariates should yield missing prediction
#x = predict(result, DataFrame(Price = [1.0, missing], State = [1, 1]))
#@test ismissing(last(x))
@testset "Continuous/FE detection" begin
# Regular interaction is fine as handled by StatsModels
@test FixedEffectModels.has_cont_fe_interaction(@formula(y ~ x + y&z)) == false

# FE/FE interaction also works
@test FixedEffectModels.has_cont_fe_interaction(@formula(y ~ x + fe(y)&fe(z))) == false

# Missing variables in fixed effects should yield missing prediction
#x = predict(result, DataFrame(Price = [1.0, 2.0], State = [1, missing]))
#@test ismissing(last(x))
# Interaction of FEs with continuous variable requires special handling, currently not implemented
@test FixedEffectModels.has_cont_fe_interaction(@formula(y ~ x + fe(y)&z)) == true
@test FixedEffectModels.has_cont_fe_interaction(@formula(y ~ x + y&fe(z))) == true
@test FixedEffectModels.has_cont_fe_interaction(@formula(y ~ x + fe(y)&fe(z)&a)) == true

# Fixed effect levels not in the estimation data should yield missing prediction
#x = predict(result, DataFrame(Price = [1.0, 2.0], State = [1, 111]))
#@test ismissing(last(x))
# Interaction of continuous with non-FE function term again handled by StatsModels
@test FixedEffectModels.has_cont_fe_interaction(@formula(y ~ x + y^2&z)) == false
end


Expand Down Expand Up @@ -177,7 +261,6 @@ end
result = reg(df, model, save = :fe)
@test "fe_State" names(fe(result))


# iv recategorized
df.Pimin2 = df.Pimin
m = @formula Sales ~ (Pimin2 + Price ~ NDI + Pimin)
Expand Down

0 comments on commit 4c5e187

Please sign in to comment.