Skip to content

Commit

Permalink
other tinkerings
Browse files Browse the repository at this point in the history
  • Loading branch information
palday committed Aug 14, 2024
1 parent 20f7418 commit d0311f7
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 11 deletions.
2 changes: 1 addition & 1 deletion issues/780/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

julia_version = "1.10.4"
manifest_format = "2.0"
project_hash = "cab816fabd6dc09bb3070438342b0e3a876786b9"
project_hash = "ee9452c19ab51ba0816b484826fde7c9c38fea0f"

[[deps.Adapt]]
deps = ["LinearAlgebra", "Requires"]
Expand Down
2 changes: 2 additions & 0 deletions issues/780/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
Scratch = "6c6a2e73-6563-6170-7368-637461726353"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
70 changes: 60 additions & 10 deletions issues/780/issue.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,30 +28,28 @@ module IssueData
return extract_csv(zipfile, "testdataforjulia_bothcase.csv")
end

transform!(data, :individual_local_identifier => ByRow(string); renamecols=false),
Arrow.write(path, data)
return data
end

clear_scratchspaces!() = Scratch.clear_scratchspaces!(@__MODULE__)
clear_scratchspaces!() = rm.(readdir(CACHE[]))

function __init__()
CACHE[] = @get_scratch!("data")
CACHE[] = get_scratch!(Main, "780")
return nothing
end
end

using DataFrames
using .IssueData
using LinearAlgebra
using MixedModels
using Statistics

data = get_data()
m0form = @formula(case ~ 0 + Analysisclass + (1|cropyear/individual_local_identifier))
# works
model_fast = fit(MixedModel, m0form, data, Bernoulli();
wts=float.(data.weights),
contrasts= Dict(:Analysisclass => DummyCoding(; base="aRice_Wet_day")),
fast=true,
progress=true,
verbose=false)

# fails
model = fit(MixedModel, m0form, data, Bernoulli();
wts=float.(data.weights),
Expand All @@ -60,11 +58,63 @@ model = fit(MixedModel, m0form, data, Bernoulli();
progress=true,
verbose=false)

# works
# works, non singular, FE look okay
model = fit(MixedModel, m0form, data, Bernoulli();
wts=float.(data.weights),
contrasts= Dict(:Analysisclass => DummyCoding(; base="aRice_Wet_day")),
init_from_lmm=[],
fast=false,
progress=true,
verbose=false)

# works, singular and has questionable FE
m0fast = fit(MixedModel, m0form, data, Bernoulli();
wts=float.(data.weights),
contrasts= Dict(:Analysisclass => DummyCoding(; base="aRice_Wet_day")),
fast=true,
progress=true,
verbose=false)

# this model is singular in cropyear, but it looks like there is proper nesting:
groups = select(data, :cropyear, :individual_local_identifier)
unique(groups)
unique(groups, :cropyear)
unique(groups, :individual_local_identifier)

# the estimates for `Nonhabitat_Wet_day` and `Nonhabitat_Wet_night` are identical,
# which seems suspicious, and they have very large standard errors. I think
# this hints at undetected collinearity.
X = modelmatrix(m0fast)
rank(X) # =12
idx = findall(coefnames(m0fast)) do x
return x in ("Analysisclass: Nonhabitat_Wet_day", "Analysisclass: Nonhabitat_Wet_night")
end

cols = X[:, idx]
# AHA 98% of values are identical because these measurements are very sparse
mean(cols[:, 1] .== cols[:, 2])
mean(cols[:, 1])
mean(cols[:, 2])

counts = sort!(combine(groupby(data, :Analysisclass), nrow => :n), :n)
transform!(counts, :n => ByRow(x -> round(100x / sum(counts.n); digits=1)) => "%")

# let's try reparameterizing

transform!(data, :Analysisclass => ByRow(ac -> NamedTuple{(:habitat, :wet, :time)}(split(ac, "_"))) => AsTable)

m1form = @formula(case ~ 0 + habitat * wet * time + (1|cropyear & individual_local_identifier))

# fails really fast with a PosDefException
m1fast = fit(MixedModel, m1form, data, Bernoulli();
wts=float.(data.weights),
fast=true,
progress=true,
verbose=false)

# still fails
m1 = fit(MixedModel, m1form, data, Bernoulli();
wts=float.(data.weights),
fast=false,
progress=true,
verbose=false)

0 comments on commit d0311f7

Please sign in to comment.