Skip to content

Commit

Permalink
[WIP] prelim version of the density geometry revamp
Browse files Browse the repository at this point in the history
working on #1152.

Note: This is a WIP and currently completely breaks `Geom.density` and
`Geom.violin` has several regressions.
  • Loading branch information
tlnagy committed May 28, 2018
1 parent ce98466 commit 006c437
Show file tree
Hide file tree
Showing 4 changed files with 265 additions and 113 deletions.
71 changes: 71 additions & 0 deletions src/aesthetics.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using IterTools

const NumericalOrCategoricalAesthetic =
Union{(Void), Vector, DataArray, IndirectArray}

Expand Down Expand Up @@ -413,3 +415,72 @@ function inherit!(a::Aesthetics, b::Aesthetics;
end
nothing
end

"""
Given aesthetics to group with, `by`, and an aesthetic to group `togroupvar`
this function constructs a dictionary that maps each given combination of the
`by` aesthetics to the positions which they apply to. Thus the output is a
dictionary of tuples of each unique combination of `by` mapped to a boolean
array of length `n` where `n` is the length of the aesthetics (they have to all
have the same length). If the provided aesthetics are missing, a placeholder
`nothing` is return instead of the unique value.
## Examples
```jldoctest
aes = Gadfly.Aesthetics()
aes.x = repeat([1, 2], inner=3)
aes.y = collect(1:6)
groupby(aes, [:x, :color], :y)
# output
Dict((2, nothing)=>Bool[false, false, false, true, true, true],(1, nothing)=>Bool[true, true, true, false, false, false])
```
```jldoctest
aes = Gadfly.Aesthetics()
aes.x = repeat([:a, :b], inner=2)
aes.y = collect(1:4)
aes.color = repeat([colorant"red", colorant"blue"], inner=2)
groupby(aes, [:x, :color], :y)
# output
Dict((:a, RGB{N0f8}(1.0,0.0,0.0))=>Bool[true, true, false, false],(:b, RGB{N0f8}(0.0,0.0,1.0))=>Bool[false, false, true, true])
```
"""
function groupby(aes::Gadfly.Aesthetics, by::Vector{Symbol}, togroupvar::Symbol)
types = fill(Nothing, length(by))
isconcrete = fill(false, length(by))
for i in 1:length(by)
isconcrete[i] = getfield(aes, by[i]) != nothing
(!isconcrete[i]) && continue
types[i] = eltype(getfield(aes, by[i]))
@assert length(getfield(aes, togroupvar)) == length(getfield(aes, by[i])) "$togroupvar and $(by[i]) aesthetics must have same length"
end

T = Tuple{types...}
grouped = Dict{T, Vector{Bool}}()

# gather options for each `by` aesthetic
opt = [if isconcrete[i] unique(getfield(aes, by[i])) else [nothing] end for i in 1:length(by)]

# The approach is to identify positions were multiple by aesthetics overlap
# and thus grouping the data positions. We first assume that all positions
# belong to a combination of aesthetics and then whittle it down
for combo in product(opt...)
belongs = fill(true, length(getfield(aes, togroupvar)))
for i in 1:length(combo)
(combo[i] == nothing) && continue
belongs .&= getfield(aes, by[i]) .== combo[i]
end
# for multiple by variables we need to check whether there is any overlap
# between this specific combo before adding it to the dict
(any(belongs)) && (grouped[combo] = belongs)
end
grouped
end
70 changes: 52 additions & 18 deletions src/geom/density.jl
Original file line number Diff line number Diff line change
@@ -1,36 +1,70 @@
struct DensityGeometry <: Gadfly.GeometryElement
stat::Gadfly.StatisticElement
order::Int
tag::Symbol
end

function DensityGeometry(; order=1, tag=empty_tag, kwargs...)
DensityGeometry(Gadfly.Stat.DensityStatistic(; kwargs...), order, tag)
end

DensityGeometry(stat; order=1, tag=empty_tag) = DensityGeometry(stat, order, tag)

const density = DensityGeometry

element_aesthetics(::DensityGeometry) = Symbol[]
default_statistic(geom::DensityGeometry) = Gadfly.Stat.DensityStatistic(geom.stat)

struct ViolinGeometry <: Gadfly.GeometryElement
stat::Gadfly.StatisticElement
split::Bool
order::Int
tag::Symbol
end
ViolinGeometry(; order=1, tag=empty_tag) = ViolinGeometry(order, tag)
function ViolinGeometry(; order=1, tag=empty_tag, split=false, kwargs...)
ViolinGeometry(Gadfly.Stat.DensityStatistic(; kwargs...), split, order, tag)
end

const violin = ViolinGeometry

element_aesthetics(::ViolinGeometry) = [:x, :y, :color]

default_statistic(::ViolinGeometry) = Gadfly.Stat.violin()
default_statistic(geom::ViolinGeometry) = Gadfly.Stat.DensityStatistic(geom.stat)

function render(geom::ViolinGeometry, theme::Gadfly.Theme, aes::Gadfly.Aesthetics)
# TODO: What should we do with the color aesthetic?

Gadfly.assert_aesthetics_defined("Geom.violin", aes, :y, :width)
Gadfly.assert_aesthetics_equal_length("Geom.violin", aes, :y, :width)

default_aes = Gadfly.Aesthetics()
default_aes.color = fill(theme.default_color, length(aes.y))
aes = Gadfly.inherit(aes, default_aes)

# Group y, width and color by x
ux = unique(aes.x)
grouped_color = Dict(x => first(aes.color[aes.x.==x]) for x in ux)
grouped_y = Dict(x => aes.y[aes.x.==x] for x in ux)
grouped_width = Dict(x => aes.width[aes.x.==x] for x in ux)

kgy = keys(grouped_y)
violins = [vcat([(x - w/2, y) for (y, w) in zip(grouped_y[x], grouped_width[x])],
reverse!([(x + w/2, y) for (y, w) in zip(grouped_y[x], grouped_width[x])]))
for x in kgy]
colors = [grouped_color[x] for x in kgy]
grouped_data = Gadfly.groupby(aes, [:x, :color], :y)
violins = Array{NTuple{2, Float64}}[]

colors = []
(aes.color == nothing) && (aes.color = fill(theme.default_color, length(aes.x)))
color_opts = unique(aes.color)
if geom.split && length(color_opts) > 2
error("Split violins require 2 colors, not more")
end

for (keys, belongs) in grouped_data
x, color = keys
ys = aes.y[belongs]
ws = aes.width[belongs]

if geom.split
pos = findfirst(color_opts, color)
if pos == 1
push!(violins, [(x - w/2, y) for (y, w) in zip(ys, ws)])
else
push!(violins, reverse!([(x + w/2, y) for (y, w) in zip(ys, ws)]))
end
push!(colors, color)
else
push!(violins, vcat([(x - w/2, y) for (y, w) in zip(ys, ws)],
reverse!([(x + w/2, y) for (y, w) in zip(ys, ws)])))
push!(colors, color != nothing ? color : theme.default_color)
end
end

ctx = context(order=geom.order)
compose!(ctx, Compose.polygon(violins, geom.tag), fill(colors))
Expand Down
3 changes: 0 additions & 3 deletions src/geom/line.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ end
# any of the others will work with `preserve_order=true` right now
path() = LineGeometry(preserve_order=true)

density(; bandwidth::Real=-Inf) =
LineGeometry(Gadfly.Stat.density(bandwidth=bandwidth))

density2d(; bandwidth::Tuple{Real,Real}=(-Inf,-Inf), levels=15) =
LineGeometry(Gadfly.Stat.density2d(bandwidth=bandwidth, levels=levels); preserve_order=true)

Expand Down
Loading

0 comments on commit 006c437

Please sign in to comment.