-
Notifications
You must be signed in to change notification settings - Fork 58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/irregular_structure_and_operations #536
Changes from all commits
e314ee8
f504223
de32dfe
a7ab765
3904443
583a2be
ab15784
265cb7e
5a994d1
a9434c7
d0c295d
c28b451
1243291
87267b7
24bea75
a5a0c2b
21e099c
7e2aa65
db112b9
8ba5ef6
e562097
37cb3a7
4d4b797
6f8ca61
de4d470
13c476f
0f4275b
f52fd98
d7a0097
4260d68
ac37a25
c6f5859
4f8985a
de17d26
2b6e078
c3dba8b
00ad9de
f27420e
62401cf
42596f8
197fa28
bd0f9d8
4356d6d
73b79c1
46f187e
94644fe
5ce6a86
5a4ace7
2a845c4
8a81bd7
194cab5
f82e7e3
56aa321
0b10333
9cba9e8
786f186
e2ebf2e
55bbcb1
2c3d6c7
3f58908
0ca3f49
29da53e
95d86bc
bbe6e06
c657967
7fd0f44
aa9230c
00a348e
22beb0e
b1e67b1
f2244f3
c438deb
aff4baa
87d754f
5f7b4c9
c352d57
b7a7489
22388f3
494fa67
b750e74
59d10d4
1b7a25e
cbc03a9
810b5b0
598d5ac
3d9329e
a78673a
2d98830
8091e78
15454a6
10479ef
bdc7858
6d15f06
8f1ea02
7508581
71d08a6
742ecc1
272d0f6
8420f1e
2e933a4
d92d005
3c13035
91b74b6
80b814a
5183987
1a15e9c
8a6dd5d
07ffeff
47d1ba1
e225c3e
c4cd858
dec2db3
bd28940
fff8230
20fb03d
8081d68
7bbe8bc
0b9b5bb
e1ddca4
789aedc
e172815
eee7909
c7a6235
d82693f
e202aa7
c2afecc
0f90db2
26b5f0c
fb69594
ad71339
f6a87ad
eee51ad
3bd25fa
ea7e6dd
6e32109
5b0ba71
b107525
0e9949f
6a8a90d
22aa8c7
11508c1
5a841ed
1a1835e
f0fe0d7
4a2fc88
01fe0ba
3856e10
dfaad23
cc7d6be
d06e843
ee89c2c
6bf925c
16c8109
a875bd7
777bd5d
fb6502f
cd7e73e
21f7bad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
from typing_extensions import Literal | ||
|
||
from ..representation import FDataGrid | ||
from ..representation.irregular import FDataIrregular | ||
from ..typing._numpy import NDArrayFloat, NDArrayInt | ||
|
||
|
||
|
@@ -162,7 +163,7 @@ def fetch_ucr( | |
return_X_y: bool = False, | ||
**kwargs: Any, | ||
) -> Bunch | Tuple[FDataGrid, NDArrayInt]: | ||
""" | ||
r""" | ||
Fetch a dataset from the UCR/UEA repository. | ||
|
||
The UCR/UEA Time Series Classification repository, hosted at | ||
|
@@ -173,6 +174,7 @@ def fetch_ucr( | |
|
||
Args: | ||
name: Dataset name. | ||
return_X_y: Return tuple (data, target) | ||
kwargs: Additional parameters for the function | ||
:func:`skdatasets.repositories.ucr.fetch`. | ||
|
||
|
@@ -247,7 +249,7 @@ def _fetch_fda_usc(name: str) -> Any: | |
Acoustic-Phonetic Continuous Speech Corpus, NTIS, US Dept of Commerce) | ||
which is a widely used resource for research in speech recognition. A | ||
dataset was formed by selecting five phonemes for | ||
classification based on digitized speech from this database. | ||
classification based on digitized speech from this database. | ||
phonemes are transcribed as follows: "sh" as in "she", "dcl" as in | ||
"dark", "iy" as the vowel in "she", "aa" as the vowel in "dark", and | ||
"ao" as the first vowel in "water". From continuous speech of 50 male | ||
|
@@ -1551,3 +1553,92 @@ def fetch_mco( | |
cite=":footcite:p:`ruiz-meana++_2003_cariporide`", | ||
bibliography=".. footbibliography::", | ||
) + _param_descr | ||
|
||
|
||
def _fetch_loon_data(name: str) -> Any: | ||
return _fetch_cran_no_encoding_warning( | ||
name, | ||
"loon.data", | ||
version="0.1.3", | ||
) | ||
|
||
|
||
_bone_density_descr = """ | ||
The Bone Density dataset is a study of bone density | ||
in boys and girls aged 8-17. It contains data from 423 | ||
individuals, measured irregularly in different times, | ||
with an average of ~3 points per individual. | ||
|
||
References: | ||
https://cran.r-project.org/package=loon.data | ||
Laura K. Bachrach, Trevor Hastie, May-Choo Wang, | ||
Balasubramanian Narasimhan, and Robert Marcus (1999) | ||
"Bone Mineral Acquisition in Healthy Asian, Hispanic, Black | ||
and Caucasian Youth. A Longitudinal Study", | ||
J Clin Endocrinol Metab, 84, 4702-12. | ||
Trevor Hastie, Robert Tibshirani, and Jerome Friedman (2009) | ||
"The Elements of Statistical Learning", | ||
2nd Edition, Springer New York <doi:10.1007/978-0-387-84858-7> | ||
|
||
""" | ||
|
||
|
||
def fetch_bone_density( | ||
return_X_y: bool = False, | ||
as_frame: bool = False, | ||
) -> Bunch | Tuple[FDataGrid, NDArrayInt] | Tuple[DataFrame, Series]: | ||
""" | ||
Load the Bone Density dataset. This is an irregular dataset. | ||
|
||
The data is obtained from the R package 'loon.data', which compiles several | ||
irregular datasets. Sources to be determined. | ||
""" | ||
descr = _bone_density_descr | ||
frame = None | ||
|
||
raw_dataset = _fetch_loon_data("bone_ext") | ||
|
||
data = raw_dataset["bone_ext"] | ||
|
||
curve_name = "idnum" | ||
argument_name = "age" | ||
target_name = "sex" | ||
coordinate_name = "spnbmd" | ||
|
||
curves = FDataIrregular._from_dataframe( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pep8] reported by reviewdog 🐶 |
||
data, | ||
id_column=curve_name, | ||
argument_columns=argument_name, | ||
coordinate_columns=coordinate_name, | ||
argument_names=[argument_name], | ||
coordinate_names=[coordinate_name], | ||
dataset_name="bone_ext", | ||
) | ||
|
||
target = pd.Series( | ||
data.drop_duplicates(subset=["idnum"])[target_name], | ||
name="group", | ||
) | ||
|
||
feature_name = curves.dataset_name.lower() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🚫 [mypy] reported by reviewdog 🐶 |
||
target_names = target.values.tolist() | ||
|
||
if as_frame: | ||
curves = pd.DataFrame({feature_name: curves}) | ||
target_as_frame = target.reset_index(drop=True).to_frame() | ||
frame = pd.concat([curves, target_as_frame], axis=1) | ||
else: | ||
target = target.values.codes | ||
|
||
if return_X_y: | ||
return curves, target | ||
|
||
return Bunch( | ||
data=curves, | ||
target=target, | ||
frame=frame, | ||
categories={}, | ||
feature_names=[argument_name], | ||
target_names=target_names, | ||
DESCR=descr, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[pep8] reported by reviewdog 🐶
WPS360 Found an unnecessary use of a raw string: """