Skip to content

Commit

Permalink
add csv merge
Browse files Browse the repository at this point in the history
  • Loading branch information
olivierlabayle committed May 24, 2022
1 parent 214ac06 commit 9f730f0
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 1 deletion.
24 changes: 24 additions & 0 deletions bin/csvmerge.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using ArgParse
using UKBMain

function parse_commandline()
s = ArgParseSettings(description="Merges 2 .csv file by SAMPLE_ID")

@add_arg_table s begin
"csv1"
help = "First .csv file"
arg_type = String
"csv2"
help = "Second .csv file"
arg_type = String
"out"
help = "Output file path"
arg_type = String
end

return parse_args(s)
end

parsed_args = parse_commandline()

csvmerge(parsed_args)
9 changes: 8 additions & 1 deletion src/UKBMain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,12 @@ function decode(parsed_args)
CSV.write(outfile, dataset)
end

export decode
function csvmerge(parsed_args)
csv₁ = CSV.read(parsed_args["csv1"], DataFrame)
csv₂ = CSV.read(parsed_args["csv2"], DataFrame)
CSV.write(parsed_args["out"], innerjoin(csv₁, csv₂, on=:SAMPLE_ID))
end

export decode, csvmerge

end
28 changes: 28 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,31 @@ end

rm(parsed_args["out"])
end

@testset "Test csvmerge" begin
CSV. write("test_csv1.csv",
DataFrame(SAMPLE_ID=[1,2,3], COL1=[1., 2., 3.])
)
CSV. write("test_csv2.csv",
DataFrame(SAMPLE_ID=[4,3,2], COL2=[1., 2., 3.])
)

parsed_args = Dict(
"csv1" => "test_csv1.csv",
"csv2" => "test_csv2.csv",
"out" => "test_out.csv"
)
csvmerge(parsed_args)

out = CSV.read(parsed_args["out"], DataFrame)

@test out == DataFrame(
SAMPLE_ID = [3, 2],
COL1 = [3., 2.],
COL2 = [2., 3.]
)

for file in values(parsed_args)
rm(file)
end
end

0 comments on commit 9f730f0

Please sign in to comment.