From a9b264292a93776e364c620340b9b3c97e75943b Mon Sep 17 00:00:00 2001 From: Valentin Kaisermayer Date: Wed, 17 Jul 2024 14:18:02 +0200 Subject: [PATCH 1/9] adds method for splitting data into periods --- docs/src/split.md | 11 ++++++++++ src/TimeSeries.jl | 12 +++++------ src/split.jl | 52 +++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/docs/src/split.md b/docs/src/split.md index f7858910..417a80bd 100644 --- a/docs/src/split.md +++ b/docs/src/split.md @@ -124,3 +124,14 @@ using MarketData tail(cl) tail(cl, 3) ``` + +## Splitting by period + +Splitting data by a given function, e.g. `Dates.day` into periods. + +```@repl +using TimeSeries +using MarketData + +split(cl, week) +``` \ No newline at end of file diff --git a/src/TimeSeries.jl b/src/TimeSeries.jl index 727e3be2..8addb07b 100644 --- a/src/TimeSeries.jl +++ b/src/TimeSeries.jl @@ -12,12 +12,12 @@ using Tables using PrettyTables: pretty_table export TimeArray, AbstractTimeSeries, - when, from, to, findwhen, timestamp, values, colnames, meta, head, tail, - lag, lead, diff, percentchange, moving, upto, - uniformspaced, uniformspace, dropnan, - basecall, - merge, collapse, - readtimearray, writetimearray + when, from, to, findwhen, timestamp, values, colnames, meta, head, tail, split, + lag, lead, diff, percentchange, moving, upto, + uniformspaced, uniformspace, dropnan, + basecall, + merge, collapse, + readtimearray, writetimearray # modify.jl export rename, rename! diff --git a/src/split.jl b/src/split.jl index 13688d1e..ca150b56 100644 --- a/src/split.jl +++ b/src/split.jl @@ -7,17 +7,17 @@ when(ta::TimeArray, period::Function, t::String) = # from, to ###################### -from(ta::TimeArray{T, N, D}, d::D) where {T, N, D} = +from(ta::TimeArray{T,N,D}, d::D) where {T,N,D} = length(ta) == 0 ? ta : - d < timestamp(ta)[1] ? ta : - d > timestamp(ta)[end] ? ta[1:0] : - ta[searchsortedfirst(timestamp(ta), d):end] + d < timestamp(ta)[1] ? ta : + d > timestamp(ta)[end] ? ta[1:0] : + ta[searchsortedfirst(timestamp(ta), d):end] -to(ta::TimeArray{T, N, D}, d::D) where {T, N, D} = +to(ta::TimeArray{T,N,D}, d::D) where {T,N,D} = length(ta) == 0 ? ta : - d < timestamp(ta)[1] ? ta[1:0] : - d > timestamp(ta)[end] ? ta : - ta[1:searchsortedlast(timestamp(ta), d)] + d < timestamp(ta)[1] ? ta[1:0] : + d > timestamp(ta)[end] ? ta : + ta[1:searchsortedlast(timestamp(ta), d)] ###### findall ################## @@ -43,7 +43,7 @@ findwhen(ta::TimeArray{Bool,1}) = timestamp(ta)[findall(values(ta))] end end - @generated function tail(ta::TimeArray{T,N}, n::Int=6) where {T,N} +@generated function tail(ta::TimeArray{T,N}, n::Int=6) where {T,N} new_values = (N == 1) ? :(values(ta)[start:end]) : :(values(ta)[start:end, :]) quote @@ -58,3 +58,37 @@ end Base.first(ta::TimeArray) = head(ta, 1) Base.last(ta::TimeArray) = tail(ta, 1) + + +""" + split(data::TimeSeries.TimeArray, period::Function) + +Split `data` by `period` function, returns a vector of `TimeSeries.TimeArray`. + +## Arguments + +- `data::TimeSeries.TimeArray`: Data to split +- `period::Function`: Function, e.g. `Dates.day` that is used to split the `data`. +""" +split(data::TimeSeries.TimeArray, period::Function) = map(i -> data[i], _split(data, period)) +function _split(data::TimeSeries.TimeArray, period::Function) + isempty(data) && return data + + m = length(data) + ts = TimeSeries.timestamp(data) + idx = UnitRange{Int}[] + sizehint!(idx, m) + + t0 = period(ts[1]) + j = 1 + for i in 1:(m-1) + t1 = period(ts[i+1]) + t0 == t1 && continue + push!(idx, j:i) + j = i + 1 + t0 = t1 + end + push!(idx, j:m) + + return idx +end \ No newline at end of file From e7604ceab4fd872032b903191a8206de65d4a2d0 Mon Sep 17 00:00:00 2001 From: Valentin Kaisermayer Date: Wed, 17 Jul 2024 14:19:34 +0200 Subject: [PATCH 2/9] minor --- docs/src/split.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/split.md b/docs/src/split.md index 417a80bd..bb919264 100644 --- a/docs/src/split.md +++ b/docs/src/split.md @@ -133,5 +133,5 @@ Splitting data by a given function, e.g. `Dates.day` into periods. using TimeSeries using MarketData -split(cl, week) +split(cl, Dates.day) ``` \ No newline at end of file From bad718ae8af8e8ea49c931250dc9383cb9823284 Mon Sep 17 00:00:00 2001 From: Valentin Kaisermayer Date: Fri, 13 Dec 2024 21:40:23 +0100 Subject: [PATCH 3/9] return iterator and adds test --- src/split.jl | 2 +- test/split.jl | 215 ++++++++++++++++++++++++++------------------------ 2 files changed, 111 insertions(+), 106 deletions(-) diff --git a/src/split.jl b/src/split.jl index ca150b56..c00d0b9c 100644 --- a/src/split.jl +++ b/src/split.jl @@ -70,7 +70,7 @@ Split `data` by `period` function, returns a vector of `TimeSeries.TimeArray`. - `data::TimeSeries.TimeArray`: Data to split - `period::Function`: Function, e.g. `Dates.day` that is used to split the `data`. """ -split(data::TimeSeries.TimeArray, period::Function) = map(i -> data[i], _split(data, period)) +split(data::TimeSeries.TimeArray, period::Function) = Iterators.map(i -> data[i], _split(data, period)) function _split(data::TimeSeries.TimeArray, period::Function) isempty(data) && return data diff --git a/test/split.jl b/test/split.jl index 553b9da4..3edf563d 100644 --- a/test/split.jl +++ b/test/split.jl @@ -1,121 +1,126 @@ using Dates using Test - using MarketData - using TimeSeries - @testset "split" begin - -@testset "find methods" begin - @testset "find returns correct row numbers array" begin - @test timestamp(cl[findall(cl .> op)])[1] == Date(2000, 1, 3) - @test length(findall(cl .> op)) == 244 + @testset "find methods" begin + @testset "find returns correct row numbers array" begin + @test timestamp(cl[findall(cl .> op)])[1] == Date(2000, 1, 3) + @test length(findall(cl .> op)) == 244 + end + + @testset "findwhen returns correct Dates array" begin + @test findwhen(cl .> op)[2] == Date(2000, 1, 5) + @test length(findwhen(cl .> op)) == 244 + end + + @testset "findall(f::Function, ta)" begin + @test findall(cl .> 100) == findall(x -> x > 100, cl) + @test findall(cl .> 100) == findall(x -> x[4] > 100, ohlc) + end end - @testset "findwhen returns correct Dates array" begin - @test findwhen(cl .> op)[2] == Date(2000, 1, 5) - @test length(findwhen(cl .> op)) == 244 + @testset "split date operations" begin + @testset "from and to correctly subset non-zero and zero-length time arrays" begin + @test length(from(cl, Date(2001, 12, 28))) == 2 + @test length(from(cl, Date(2002, 1, 1))) == 0 + @test length(from(from(cl, Date(2002, 1, 1)), Date(2012, 1, 1))) == 0 + + @test length(to(cl, Date(2000, 1, 4))) == 2 + @test length(to(cl, Date(1999, 1, 4))) == 0 + @test length(to(to(cl, Date(1999, 1, 4)), Date(1912, 1, 1))) == 0 + end + + @testset "when method correctly subset" begin + @test timestamp(when(cl, day, 4))[1] == Date(2000, 1, 4) + @test timestamp(when(cl, dayname, "Friday"))[1] == Date(2000, 1, 7) + @test timestamp(when(cl, week, 5))[1] == Date(2000, 1, 31) + @test timestamp(when(cl, month, 5))[1] == Date(2000, 5, 1) + @test timestamp(when(cl, monthname, "June"))[1] == Date(2000, 6, 1) + @test timestamp(when(cl, year, 2001))[1] == Date(2001, 1, 2) + @test timestamp(when(cl, dayofweek, 1))[1] == Date(2000, 1, 3) + # all the days in the nth week of each month + @test timestamp(when(cl, dayofweekofmonth, 5))[1] == Date(2000, 1, 31) + @test timestamp(when(cl, dayofyear, 365))[1] == Date(2001, 12, 31) + @test timestamp(when(cl, quarterofyear, 4))[1] == Date(2000, 10, 2) + @test timestamp(when(cl, dayofquarter, 1))[1] == Date(2001, 10, 1) + end end - @testset "findall(f::Function, ta)" begin - @test findall(cl .> 100) == findall(x -> x > 100, cl) - @test findall(cl .> 100) == findall(x -> x[4] > 100, ohlc) + @testset "head, tail, first and last methods" begin + @testset "head, tail, first and last methods work with default n value on single column TimeArray" begin + @test length(head(cl, 6)) == 6 + @test timestamp(head(cl)) == [Date(2000, 1, 3), Date(2000, 1, 4), Date(2000, 1, 5), + Date(2000, 1, 6), Date(2000, 1, 7), Date(2000, 1, 10)] + @test values(head(cl)) == [111.94, 102.5, 104.0, 95.0, 99.5, 97.75] + + @test length(tail(cl, 6)) == 6 + @test timestamp(tail(cl)) == [Date(2001, 12, 21), Date(2001, 12, 24), Date(2001, 12, 26), + Date(2001, 12, 27), Date(2001, 12, 28), Date(2001, 12, 31)] + @test values(tail(cl)) == [21.0, 21.36, 21.49, 22.07, 22.43, 21.9] + + @test length(first(cl)) == 1 + @test timestamp(first(cl))[1] == Date(2000, 1, 3) + @test values(first(cl))[1] == 111.94 + @test meta(first(cl)) == "AAPL" + + @test length(last(cl)) == 1 + @test timestamp(last(cl))[1] == Date(2001, 12, 31) + @test values(last(cl))[1] == 21.9 + @test meta(last(cl)) == "AAPL" + end + + @testset "head, tail, first and last methods work with default n value on multi column TimeArray" begin + @test length(head(ohlc)) == 6 + @test values(head(ohlc, 1)) == [104.88 112.5 101.69 111.94] + + @test length(tail(ohlc)) == 6 + @test values(tail(ohlc, 1)) == [22.51 22.66 21.83 21.9] + + @test length(first(ohlc)) == 1 + @test timestamp(first(ohlc))[1] == Date(2000, 1, 3) + @test values(first(ohlc)) == [104.88 112.5 101.69 111.94] + @test meta(first(ohlc)) == "AAPL" + + @test length(last(ohlc)) == 1 + @test timestamp(last(ohlc))[1] == Date(2001, 12, 31) + @test values(last(ohlc)) == [22.51 22.66 21.83 21.9] + @test meta(last(ohlc)) == "AAPL" + end + + @testset "head, tail, first and last methods work with custom periods on single column TimeArray" begin + @test length(head(cl, 2)) == 2 + @test length(head(cl, 500)) == length(cl) + @test length(tail(cl, 2)) == 2 + @test length(tail(cl, 500)) == length(cl) + + @test length(first(cl)) == 1 + @test length(last(cl)) == 1 + end + + @testset "head, tail, first and last methods work with custom periods on multi column TimeArray" begin + @test length(head(ohlc, 2)) == 2 + @test length(head(ohlc, 500)) == length(ohlc) + @test length(tail(ohlc, 2)) == 2 + @test length(tail(ohlc, 500)) == length(ohlc) + + @test length(first(ohlc)) == 1 + @test length(last(ohlc)) == 1 + end end -end - - -@testset "split date operations" begin - @testset "from and to correctly subset non-zero and zero-length time arrays" begin - @test length(from(cl, Date(2001,12,28))) == 2 - @test length(from(cl, Date(2002,1,1))) == 0 - @test length(from(from(cl, Date(2002,1,1)), Date(2012,1,1))) == 0 - @test length(to(cl, Date(2000,1,4))) == 2 - @test length(to(cl, Date(1999,1,4))) == 0 - @test length(to(to(cl, Date(1999,1,4)), Date(1912,1,1))) == 0 + @testset "split period" begin + for period in [day, week, month, year] + for cl_ in split(cl, period) + @test allequal(period.(timestamp(cl_))) + end + end + @test length(split(cl, day)) == 500 + @test length(split(cl, week)) == 105 + @test length(split(cl, month)) == 24 + @test length(split(cl, year)) == 2 end - @testset "when method correctly subset" begin - @test timestamp(when(cl, day, 4))[1] == Date(2000,1,4) - @test timestamp(when(cl, dayname, "Friday"))[1] == Date(2000,1,7) - @test timestamp(when(cl, week, 5))[1] == Date(2000,1,31) - @test timestamp(when(cl, month, 5))[1] == Date(2000,5,1) - @test timestamp(when(cl, monthname, "June"))[1] == Date(2000,6,1) - @test timestamp(when(cl, year, 2001))[1] == Date(2001,1,2) - @test timestamp(when(cl, dayofweek, 1))[1] == Date(2000,1,3) - # all the days in the nth week of each month - @test timestamp(when(cl, dayofweekofmonth, 5))[1] == Date(2000,1,31) - @test timestamp(when(cl, dayofyear, 365))[1] == Date(2001,12,31) - @test timestamp(when(cl, quarterofyear, 4))[1] == Date(2000,10,2) - @test timestamp(when(cl, dayofquarter, 1))[1] == Date(2001,10,1) - end -end - - -@testset "head, tail, first and last methods" begin - @testset "head, tail, first and last methods work with default n value on single column TimeArray" begin - @test length(head(cl,6)) == 6 - @test timestamp(head(cl)) == [Date(2000,1,3), Date(2000,1,4), Date(2000,1,5), - Date(2000,1,6), Date(2000,1,7), Date(2000,1,10)] - @test values(head(cl)) == [111.94, 102.5, 104.0, 95.0, 99.5, 97.75] - - @test length(tail(cl,6)) == 6 - @test timestamp(tail(cl)) == [Date(2001,12,21), Date(2001,12,24), Date(2001,12,26), - Date(2001,12,27), Date(2001,12,28), Date(2001,12,31)] - @test values(tail(cl)) == [21.0, 21.36, 21.49, 22.07, 22.43, 21.9] - - @test length(first(cl)) == 1 - @test timestamp(first(cl))[1] == Date(2000,1,3) - @test values(first(cl))[1] == 111.94 - @test meta(first(cl)) == "AAPL" - - @test length(last(cl)) == 1 - @test timestamp(last(cl))[1] == Date(2001,12,31) - @test values(last(cl))[1] == 21.9 - @test meta(last(cl)) == "AAPL" - end - - @testset "head, tail, first and last methods work with default n value on multi column TimeArray" begin - @test length(head(ohlc)) == 6 - @test values(head(ohlc, 1)) == [104.88 112.5 101.69 111.94] - - @test length(tail(ohlc)) == 6 - @test values(tail(ohlc, 1)) == [22.51 22.66 21.83 21.9] - - @test length(first(ohlc)) == 1 - @test timestamp(first(ohlc))[1] == Date(2000,1,3) - @test values(first(ohlc)) == [104.88 112.5 101.69 111.94] - @test meta(first(ohlc)) == "AAPL" - - @test length(last(ohlc)) == 1 - @test timestamp(last(ohlc))[1] == Date(2001,12,31) - @test values(last(ohlc)) == [22.51 22.66 21.83 21.9] - @test meta(last(ohlc)) == "AAPL" - end - - @testset "head, tail, first and last methods work with custom periods on single column TimeArray" begin - @test length(head(cl, 2)) == 2 - @test length(head(cl, 500)) == length(cl) - @test length(tail(cl, 2)) == 2 - @test length(tail(cl, 500)) == length(cl) - - @test length(first(cl)) == 1 - @test length(last(cl)) == 1 - end - - @testset "head, tail, first and last methods work with custom periods on multi column TimeArray" begin - @test length(head(ohlc, 2)) == 2 - @test length(head(ohlc, 500)) == length(ohlc) - @test length(tail(ohlc, 2)) == 2 - @test length(tail(ohlc, 500)) == length(ohlc) - - @test length(first(ohlc)) == 1 - @test length(last(ohlc)) == 1 - end -end - - end # @testset "split" From 139aa77d4a7dfef4e37dc8ac5e794c2efd25df52 Mon Sep 17 00:00:00 2001 From: Valentin Kaisermayer Date: Fri, 13 Dec 2024 22:38:28 +0100 Subject: [PATCH 4/9] changes _split --- src/split.jl | 11 +++++------ test/split.jl | 3 +++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/split.jl b/src/split.jl index c00d0b9c..614a58e1 100644 --- a/src/split.jl +++ b/src/split.jl @@ -70,15 +70,14 @@ Split `data` by `period` function, returns a vector of `TimeSeries.TimeArray`. - `data::TimeSeries.TimeArray`: Data to split - `period::Function`: Function, e.g. `Dates.day` that is used to split the `data`. """ -split(data::TimeSeries.TimeArray, period::Function) = Iterators.map(i -> data[i], _split(data, period)) -function _split(data::TimeSeries.TimeArray, period::Function) - isempty(data) && return data +split(data::TimeSeries.TimeArray, period::Function) = Iterators.map(i -> data[i], _split(TimeSeries.timestamp(data), period)) - m = length(data) - ts = TimeSeries.timestamp(data) +function _split(ts::AbstractVector{D}, period::Function) where {D<:TimeType} + m = length(ts) idx = UnitRange{Int}[] - sizehint!(idx, m) + isempty(ts) && return idx + sizehint!(idx, m) t0 = period(ts[1]) j = 1 for i in 1:(m-1) diff --git a/test/split.jl b/test/split.jl index 3edf563d..8b6b3bb7 100644 --- a/test/split.jl +++ b/test/split.jl @@ -121,6 +121,9 @@ using TimeSeries @test length(split(cl, week)) == 105 @test length(split(cl, month)) == 24 @test length(split(cl, year)) == 2 + + # test empty timearray + @test length(split(to(cl, Date(2000)), week)) == 0 end end # @testset "split" From 16557939614cd64c371855d64fb4fa7911d976c9 Mon Sep 17 00:00:00 2001 From: Valentin Kaisermayer Date: Fri, 13 Dec 2024 22:44:37 +0100 Subject: [PATCH 5/9] changes LTS --- .github/workflows/CI.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 2f19f766..060857ff 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -15,9 +15,8 @@ jobs: fail-fast: false matrix: version: - - "1.6.7" # LTS - - "1.6" - - "1" # Latest Release + - "1.10.7" # LTS + - "1" # Latest Release os: - ubuntu-latest arch: @@ -56,7 +55,7 @@ jobs: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@latest with: - version: '1' + version: "1" - run: | git config --global user.name name git config --global user.email email From 99123e841bac75ebb1af7e3ca1f94efbe70ed094 Mon Sep 17 00:00:00 2001 From: Valentin Kaisermayer Date: Fri, 13 Dec 2024 22:48:07 +0100 Subject: [PATCH 6/9] overload Base.split --- src/TimeSeries.jl | 2 +- src/split.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TimeSeries.jl b/src/TimeSeries.jl index 8addb07b..cd6f98e3 100644 --- a/src/TimeSeries.jl +++ b/src/TimeSeries.jl @@ -12,7 +12,7 @@ using Tables using PrettyTables: pretty_table export TimeArray, AbstractTimeSeries, - when, from, to, findwhen, timestamp, values, colnames, meta, head, tail, split, + when, from, to, findwhen, timestamp, values, colnames, meta, head, tail, lag, lead, diff, percentchange, moving, upto, uniformspaced, uniformspace, dropnan, basecall, diff --git a/src/split.jl b/src/split.jl index 614a58e1..42f80443 100644 --- a/src/split.jl +++ b/src/split.jl @@ -70,7 +70,7 @@ Split `data` by `period` function, returns a vector of `TimeSeries.TimeArray`. - `data::TimeSeries.TimeArray`: Data to split - `period::Function`: Function, e.g. `Dates.day` that is used to split the `data`. """ -split(data::TimeSeries.TimeArray, period::Function) = Iterators.map(i -> data[i], _split(TimeSeries.timestamp(data), period)) +Base.split(data::TimeSeries.TimeArray, period::Function) = Iterators.map(i -> data[i], _split(TimeSeries.timestamp(data), period)) function _split(ts::AbstractVector{D}, period::Function) where {D<:TimeType} m = length(ts) From d887ea11a9e038a94c9ec9bd28ff8cf897005308 Mon Sep 17 00:00:00 2001 From: Valentin Kaisermayer Date: Sat, 14 Dec 2024 21:24:17 +0100 Subject: [PATCH 7/9] changes interface of _split to return timestamps --- src/split.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/split.jl b/src/split.jl index 42f80443..74d053c1 100644 --- a/src/split.jl +++ b/src/split.jl @@ -89,5 +89,5 @@ function _split(ts::AbstractVector{D}, period::Function) where {D<:TimeType} end push!(idx, j:m) - return idx + return Iterators.map(i -> ts[i], idx) end \ No newline at end of file From 6c83ea0f1e0fdf1102f6256a0e2ede009d003616 Mon Sep 17 00:00:00 2001 From: Valentin Kaisermayer Date: Sat, 14 Dec 2024 21:25:53 +0100 Subject: [PATCH 8/9] fix --- src/split.jl | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/split.jl b/src/split.jl index 74d053c1..64422474 100644 --- a/src/split.jl +++ b/src/split.jl @@ -75,19 +75,18 @@ Base.split(data::TimeSeries.TimeArray, period::Function) = Iterators.map(i -> da function _split(ts::AbstractVector{D}, period::Function) where {D<:TimeType} m = length(ts) idx = UnitRange{Int}[] - isempty(ts) && return idx - - sizehint!(idx, m) - t0 = period(ts[1]) - j = 1 - for i in 1:(m-1) - t1 = period(ts[i+1]) - t0 == t1 && continue - push!(idx, j:i) - j = i + 1 - t0 = t1 + if !isempty(ts) + sizehint!(idx, m) + t0 = period(ts[1]) + j = 1 + for i in 1:(m-1) + t1 = period(ts[i+1]) + t0 == t1 && continue + push!(idx, j:i) + j = i + 1 + t0 = t1 + end + push!(idx, j:m) end - push!(idx, j:m) - return Iterators.map(i -> ts[i], idx) end \ No newline at end of file From ee4c929438a739de1889dc2451698ccc7f08e8a9 Mon Sep 17 00:00:00 2001 From: Valentin Kaisermayer <50108075+ValentinKaisermayer@users.noreply.github.com> Date: Mon, 16 Dec 2024 11:30:40 +0100 Subject: [PATCH 9/9] Update src/split.jl Co-authored-by: Iblis Lin --- src/split.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/split.jl b/src/split.jl index 64422474..c0fb0109 100644 --- a/src/split.jl +++ b/src/split.jl @@ -89,4 +89,4 @@ function _split(ts::AbstractVector{D}, period::Function) where {D<:TimeType} push!(idx, j:m) end return Iterators.map(i -> ts[i], idx) -end \ No newline at end of file +end