Skip to content

Commit

Permalink
Allow Shapefile to read .zip files (#113)
Browse files Browse the repository at this point in the history
* Allow Shapefile to read `.zip` files

We should subscribe to the FileIO interface so we can deal with streaming files as well...

Co-authored-by: David Gleich <[email protected]>

* Add compat for ZipFiles

* Add a test

* Fix tests

* Fix Makie ambiguity in convert_arguments definition

* Switch from download to RemoteFile in tests

* Increment minor version, but this is technically breaking...do we want that?

* Update ShapefileMakieExt.jl

---------

Co-authored-by: David Gleich <[email protected]>
  • Loading branch information
asinghvi17 and dgleich authored Apr 27, 2024
1 parent a9acd12 commit 35c8d42
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 15 deletions.
22 changes: 13 additions & 9 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Shapefile"
uuid = "8e980c4a-a4fe-5da2-b3a7-4b4b0353a2f4"
license = "MIT"
version = "0.12.2"
version = "0.13.0"

[deps]
DBFTables = "75c7ada1-017a-5fb6-b8c7-2125ff2d6c93"
Expand All @@ -14,32 +14,36 @@ OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[weakdeps]
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"

[extensions]
ShapefileMakieExt = "Makie"
ShapefileZipFileExt = "ZipFile"

[compat]
DBFTables = "1.2"
Extents = "0.1"
GeoFormatTypes = "0.4"
GeoInterface = "1.0"
GeoInterfaceMakie = "0.1"
GeoInterfaceRecipes = "1.0"
Makie = "0.20"
Makie = "0.20, 0.21"
OrderedCollections = "1"
RecipesBase = "1"
Tables = "0.2, 1"
ZipFile = "0.9, 0.10"
julia = "1.9"

[weakdeps]
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"

[extensions]
ShapefileMakieExt = "Makie"

[extras]
ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
RemoteFiles = "cbe49d4c-5af1-5b60-bb70-0a60aa018e1b"
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["ArchGDAL", "DataFrames", "Makie", "Plots", "RemoteFiles", "Test"]
test = ["ArchGDAL", "DataFrames", "Makie", "Plots", "RemoteFiles", "ZipFile", "Test"]
39 changes: 39 additions & 0 deletions ext/ShapefileZipFileExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
module ShapefileZipFileExt
import ZipFile, Shapefile
import Shapefile: _read_shp_from_zipfile
function _read_shp_from_zipfile(zipfile)
r = ZipFile.Reader(zipfile)
# need to get dbx
shpdata, shxdata, dbfdata, prjdata = nothing, nothing, nothing, nothing
for f in r.files
fn = f.name
lfn = lowercase(fn)
if endswith(lfn, ".shp")
shpdata = IOBuffer(read(f))
elseif endswith(lfn, ".shx")
shxdata = read(f, Shapefile.IndexHandle)
elseif endswith(lfn, ".dbf")
dbfdata = Shapefile.DBFTables.Table(IOBuffer(read(f)))
elseif endswith(lfn, "prj")
prjdata = try
Shapefile.GeoFormatTypes.ESRIWellKnownText(Shapefile.GeoFormatTypes.CRS(), read(f, String))
catch
@warn "Projection file $zipfile/$lfn appears to be corrupted. `nothing` used for `crs`"
nothing
end
end
end
close(r)
@assert shpdata !== nothing
shp = if shxdata !== nothing # we have shxdata/index
read(shpdata, Shapefile.Handle, shxdata)
else
read(shpdata, Shapefile.Handle)
end
if prjdata !== nothing
shp.crs = prjdata
end
return Shapefile.Table(shp, dbfdata)
end

end
17 changes: 17 additions & 0 deletions src/Shapefile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,21 @@ include("extent.jl")
include("plotrecipes.jl")
include("writer.jl")

function __init__()
# Register an error hint, so that if a user tries to read a zipfile and fails, they get a helpful error message
# that includes the ShapefileZipFileExt package.
Base.Experimental.register_error_hint(MethodError) do io, exc, argtypes, kwargs
if exc.f == _read_shp_from_zipfile
if isnothing(Base.get_extension(Shapefile, :ShapefileZipFileExt))
print(io, "\nPlease load the ")
printstyled(io, "ZipFile", color=:cyan)
println(io, " package to read zipfiles into Shapefile.Table objects.")
println(io, "You can do this by typing: ")
printstyled(io, "using ZipFile", color=:cyan, bold = true)
println(io, "\ninto your REPL or code.")
end
end
end
end

end # module
5 changes: 5 additions & 0 deletions src/table.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ function Table(shp::Handle{T}, dbf::DBFTables.Table) where {T}
Table{T}(shp, dbf)
end
function Table(path::AbstractString)
if endswith(path, ".zip")
return _read_shp_from_zipfile(path)
end
paths = _shape_paths(path)
isfile(paths.shp) || throw(ArgumentError("File not found: $(paths.dbf)"))
isfile(paths.dbf) || throw(ArgumentError("File not found: $(paths.dbf)"))
Expand All @@ -80,6 +83,8 @@ function Table(path::AbstractString)
return Shapefile.Table(shp, dbf)
end

function _read_shp_from_zipfile end

getshp(t::Table) = getfield(t, :shp)
getdbf(t::Table) = getfield(t, :dbf)

Expand Down
33 changes: 27 additions & 6 deletions test/table.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ wkt = "GEOGCS[\"GCS_WGS_1984\",DATUM[\"D_WGS_1984\",SPHEROID[\"WGS_1984\",637813
@test propertynames(ne_land) == [:geometry, :featurecla, :scalerank, :min_zoom]
@test propertynames(first(ne_land)) == [:geometry, :featurecla, :scalerank, :min_zoom]
@test first(ne_land).geometry isa Shapefile.Polygon
@test ne_land.featurecla isa Vector{String}
@test ne_land.featurecla isa Vector{Union{String,Missing}}
@test length(ne_land.scalerank) == length(ne_land)
@test GeoInterface.crs(ne_land) == GeoFormatTypes.ESRIWellKnownText(GeoFormatTypes.CRS(), wkt)

Expand All @@ -110,7 +110,7 @@ wkt = "GEOGCS[\"GCS_WGS_1984\",DATUM[\"D_WGS_1984\",SPHEROID[\"WGS_1984\",637813
df_land = DataFrames.DataFrame(ne_land)
@test size(df_land) == (127, 4)
@test names(df_land) == ["geometry", "featurecla", "scalerank", "min_zoom"]
df_land.featurecla isa Vector{String}
df_land.featurecla isa Vector{Union{String,Missing}}
end

@testset "ne_coastline" begin
Expand All @@ -120,7 +120,7 @@ end
@test propertynames(ne_coastline) == [:geometry, :scalerank, :featurecla, :min_zoom]
@test propertynames(first(ne_coastline)) == [:geometry, :scalerank, :featurecla, :min_zoom]
@test first(ne_coastline).geometry isa Shapefile.Polyline
@test ne_coastline.featurecla isa Vector{String}
@test ne_coastline.featurecla isa Vector{Union{String,Missing}}
@test GeoInterface.crs(ne_coastline) == GeoFormatTypes.ESRIWellKnownText(GeoFormatTypes.CRS(), wkt)
@test length(ne_coastline.scalerank) == length(ne_coastline)
@test sum(ne_coastline.scalerank) == 59
Expand All @@ -141,7 +141,7 @@ end
df_coastline = DataFrames.DataFrame(ne_coastline)
@test size(df_coastline) == (134, 4)
@test names(df_coastline) == ["geometry", "scalerank", "featurecla", "min_zoom"]
df_coastline.featurecla isa Vector{String}
df_coastline.featurecla isa Vector{Union{String,Missing}}
end

@testset "ne_cities" begin
Expand All @@ -158,7 +158,7 @@ end
@test propertynames(ne_cities) == colnames
@test propertynames(first(ne_cities)) == colnames
@test first(ne_cities).geometry isa Shapefile.Point
@test ne_cities.featurecla isa Vector{String}
@test ne_cities.featurecla isa Vector{Union{String,Missing}}
@test GeoInterface.crs(ne_coastline) == GeoFormatTypes.ESRIWellKnownText(GeoFormatTypes.CRS(), wkt)
@test length(ne_cities.scalerank) == length(ne_cities)
@test sum(ne_cities.scalerank) == 612
Expand Down Expand Up @@ -186,7 +186,7 @@ end
df_cities = DataFrames.DataFrame(ne_cities)
@test size(df_cities) == (243, 39)
@test names(df_cities) == string.(colnames)
df_cities.featurecla isa Vector{String}
df_cities.featurecla isa Vector{Union{String,Missing}}
end

# no need to use shx in Shapefile.Tables since we read the shapes into a Vector and can thus index them
Expand Down Expand Up @@ -233,3 +233,24 @@ end
end

end # testset "Tables interface"

@testset "Reading with ZipFile" begin
using ZipFile
@test !isnothing(Base.get_extension(Shapefile, :ShapefileZipFileExt))
mktempdir() do dir
cd(dir) do
zipfile = @RemoteFile "https://ndownloader.figshare.com/files/20460645" dir=datadir file="tracts.zip"
download(zipfile)
@test_nowarn Shapefile.Table(path(zipfile))
table = Shapefile.Table(path(zipfile))
# Test that the return type is correct
@test table isa Shapefile.Table
# Test that the table is read correctly
@test length(table) == 822
@test eltype(table.STATEFP) <: Union{Missing, String}
# Test that the projection was picked up
@test GeoInterface.crs(table) isa Shapefile.GeoFormatTypes.ESRIWellKnownText{Shapefile.GeoFormatTypes.CRS}
end
end
end

0 comments on commit 35c8d42

Please sign in to comment.