From 6f7c2c3e4ab4ede5fbb9d1cbd2ea8e9486a56cd3 Mon Sep 17 00:00:00 2001 From: antagomir Date: Wed, 27 Jul 2022 01:19:17 +0300 Subject: [PATCH 01/13] First shot on the intro --- paper/magnusson-kainu-lahti.bib | 2 +- paper/magnusson-kainu-lahti.tex | 169 +++++++++++++++++++++++++------- 2 files changed, 135 insertions(+), 36 deletions(-) diff --git a/paper/magnusson-kainu-lahti.bib b/paper/magnusson-kainu-lahti.bib index 972dcfb9..3399059a 100644 --- a/paper/magnusson-kainu-lahti.bib +++ b/paper/magnusson-kainu-lahti.bib @@ -1,6 +1,6 @@ @Article{Morin2012, author = {Morin A, Urban J, Sliz P}, - title = {A quick guide to software licensing for the scientist-programmer} + title = {A quick guide to software licensing for the scientist-programmer}, journal = {PLoS Computational Biology}, year = 2012, volume = 8, diff --git a/paper/magnusson-kainu-lahti.tex b/paper/magnusson-kainu-lahti.tex index 34382789..f393c1fd 100644 --- a/paper/magnusson-kainu-lahti.tex +++ b/paper/magnusson-kainu-lahti.tex @@ -1,35 +1,105 @@ -\title{Opening Official Statistics with the \CRANpkg{pxweb} Package} -\author{by Måns Magnusson, Markus Kainu, Leo Lahti} +\title{Opening Up Official Statistics with the \CRANpkg{pxweb} Package} +\author{by Måns Magnusson, Leo Lahti} + +% Other authors? +% Janne Huovari, many commits +% Markus Kainu, possibly figures & cheat sheet +% Pyry Kantanen, R-universe, technical & submission support, other \maketitle %An abstract of less than 150 words. \abstract{Abstract \CRANpkg{pxweb} R package here.} -\begin{itemize} - \item Motivation here. Why do we need this package? We want to use the open data pipeline, access, use and cite. - \item More and more data is available from National Statistical Agencies (see commented out text below). - \item We need tools to access the data that is simple and efficient to use (see commented out text below). - \item Citing data is a problem in itself, but important. It should be simplifed as much as possible. -\end{itemize} - -%% Potential text to use -% Various statistical authorities are increasingly sharing open data resources \cite{xxx}. For instance, international agencies such as Eurostat\footnote{\url{http://ec.europa.eu/eurostat/data/database}} \cite{xxx}, ILO\footnote{\url{https://www.ilo.org/ilostat}} \cite{xxx} , FAO\footnote{\url{http://www.fao.org/faostat}} \cite{xxx} and World Bank\footnote{\url{https://data.worldbank.org}} \cite{xxx} have released popular open data services. Altogether, national and international statistical authorities are now sharing massive amounts of open data on national and international aspects of politics, economics, demography, health, infrastructure, climate, and other areas. Such statistical data sets can be available with a great geographical resolution, with time series spanning several years to decades or even centuries \cite{xxx}. - - -% Opening statistical data collections is, however, only the first step towards realizing their full potential and value. Algorithmic tools for data access and analysis can greatly increase the value of such data resources and benefit reproducible research \citep{Gandrud13, Boettiger2015}. Dedicated software packages can be used to simplify, standardize, and automate analysis workflows, taking into account variations in raw data formats, access details, and typical use cases so that the end users can avoid repetitive programming tasks, avoid potential misinterpretations and coding errors, and save time. - -% Consequently, various developers have released software tools to facilitate the use of statistical data resources. Instead of merely providing tools for data browsing and standard retrieval, we emphasize the need for algorithms that provide the seamless bridge between original data sources and downstream analysis tasks in statistical software languages. For instance in R, various packages have been recently released for generic open data retrieval, including for instance quandl \cite{quandl} and pdfetch \cite{pdfetch}, and for more dedicated access to specific data sources such as open data from Eurostat \cite{Lahti17eurostat}, World Bank (\CRANpkg{WDI}; \citealt{WDI}), Open Street Map (\CRANpkg{osmar}; \citealt{osmar}) and other sources. - - -% som relevant initial references for citing data -%https://www.openoffice.org/bibliographic/bibtex-defs.pdf -%https://tex.stackexchange.com/questions/109127/how-would-i-cite-a-dataset-with-bibtex +%\begin{itemize} +% \item Motivation here. Why do we need this package? We want to use the open data pipeline, access, use and cite. +% \item More and more data is available from National Statistical Agencies (see commented out text below). +% \item We need tools to access the data that is simple and efficient to use (see commented out text below). +% \item Citing data is a problem in itself, but important. It should be simplifed as much as possible. +%\end{itemize} + + +% Motivation here. Why do we need this package? + +Open data science workflows rely heavily on algorithmic tools for data +retrieval and analysis. Automating major parts of the data science +workflow, such as finding, accessing, integrating, citing, and +reporting data, is helping the end users to dedicate more time on the +actual statistical analysis and interpretation. This can greatly +increase the value of the available data sources and facilitate +reproducible research \citep{Gandrud13, Boettiger2015} and FAIR data +sharing \cite{xxx}. However, the tools often remain missing even when +the data is made available. For instance, whereas dozens of +statistical authorities have started to share data through the PX-WEB +API, a dedicated R package that provides a unified access to these +data collections has been missing. We introduce here the \pkg{pxweb} R +package that has been designed to facilitate seamless access to open +data collections through the PC-Axis API \cite{xxx}, which is widely +adopted by national and international statistical organizations. + +% More and more data is available from National Statistical Agencies + +Statistical authorities are now sharing steadily increasing +collections of official statistics and other open data +resources \cite{xxx}. For instance, international agencies such as +Eurostat\footnote{\url{http://ec.europa.eu/eurostat/data/database}} \cite{xxx}, +ILO\footnote{\url{https://www.ilo.org/ilostat}} \cite{xxx} , +FAO\footnote{\url{http://www.fao.org/faostat}} \cite{xxx} and World +Bank\footnote{\url{https://data.worldbank.org}} \cite{xxx} have +released popular open data services. Altogether, national and +international statistical authorities are now sharing massive amounts +of open data on national and international aspects of politics, +economics, demography, health, infrastructure, climate, and other +areas. Such statistical data sets can be available with a great +geographical resolution, with time series spanning several years to +decades or even centuries \cite{xxx}. + +% We need tools to access the data that is simple and efficient to use + +Opening up official statistics is, however, only the first step +towards realizing their full potential and value. There is a clear +need for automated tools to access these data resources that are +simple and efficient to use. Dedicated software packages help to +simplify, standardize, and automate analysis workflows, taking into +account variations in raw data formats, access details, and typical +use cases so that the end users can avoid repetitive programming +tasks, avoid potential misinterpretations and coding errors, and save +time. Consequently, various developers have released software tools to +facilitate the use of statistical data resources. Instead of merely +providing tools for data browsing and standard retrieval, we emphasize +the need for algorithms that provide the seamless bridge between +original data sources and downstream analysis tasks in statistical +software languages. In R, various packages have been recently released +for generic open data retrieval, including for instance +quandl \cite{quandl} and pdfetch \cite{pdfetch}, and for more +dedicated access to specific data sources such as open data from +Eurostat \cite{Lahti17eurostat}, World Bank +(\CRANpkg{WDI}; \citealt{WDI}), Open Street Map +(\CRANpkg{osmar}; \citealt{osmar}) and other sources. + +% Citing data is a problem in itself, but important. It should be +% simplifed as much as possible. + +Data citations are an important but often neglected aspect of data +reuse. Guidelines for data sharing have emphasized the need to +document the specific data versions, access times, and +sources. Ideally, this information should be cited in a standardized +format. This process should be simplifed as much as possible. + +% TODO +% some relevant initial references for citing data +% https://www.openoffice.org/bibliographic/bibtex-defs.pdf +% https://tex.stackexchange.com/questions/109127/how-would-i-cite-a-dataset-with-bibtex % http://www.dcc.ac.uk/resources/how-guides/cite-datasets#fn11 % https://libguides.ub.uu.se/referensguiden/harvard\_exempel % https://www.scb.se/Upload/PC-Axis/Download/PX-Web/2017v1/Release-notes-pxweb-2017-v1.pdf % https://www.scb.se/sv\_/PC-Axis/Documentation/Error-codes-PC-Axis/ +The \pkg{pxweb} R package is addressing these needs and provides +mature and tested tools to find, access, and cite official statistics +and other information shared in the widely adopted PC-Axis format. + + \subsection[PXWEB and PC-Axis]{PXWEB and PC-Axis} \begin{itemize} @@ -39,10 +109,10 @@ \end{itemize} -\subsection{Our contribution: the \CRANpkg{pxweb} package } +\subsection{The \CRANpkg{pxweb} package} \begin{itemize} - \item History fo the package + \item History of the package \item Design principles \item (See comment out text below) \item Extendibility of the package with new APIS @@ -58,14 +128,13 @@ \subsection{Our contribution: the \CRANpkg{pxweb} package } % In summary, the \CRANpkg{pxweb} package provides custom tools for open statistical data resources provided through the PX-WEB API. Currently, the pxweb package provides seamless algorithmic access from the R environment to dozens of data collections from national authorities in countries such as Estonia, Iceland, Finland, Norway, Sweden, The Netherlands, and elsewhere. Seamless integration with other data analysis tools is facilitated by support for features such as cache, date formatting, tidy data principles \citep{wickham2014}, and the \Cpkg{tibble} \citep{tibble} data format. In this article, we provide an overview of the functionality and use cases based on the current CRAN release version (0.8). The comprehensive on-line documentation, which is available via the package homepage\footnote{\url{http://ropengov.github.io/pxweb}}, includes simple examples for individual functions, generic tutorials, and links to more advanced case studies. Moreover, the package is following best practices in open source software development such as version control, automated unit tests, continuous integration, and collaborative development \citep{PerezRiverol2016}. -% The introduced tools can benefit researchers and data analysts in academia, government, and industry. Complete analytical workflow from raw data to statistical summaries and final publication can be greatly facilitated by combining programmatic data access with downstream data analysis and visualization tools. The pxweb package supports automated, transparent, reproducible, and well-documented data retrieval from statistical authorities. Utilities such as search, subsetting and cache support efficient data processing and analysis. Further custom tools and functionality can be built around this package. - -\section[Usage]{Usage} +\section[Usage]{Example case studies} \begin{itemize} \item Short version of the vignette. \item A nice figure and table should be the result. + \item Cover all relevant functionality, at least by mentioning it and citing the package documentation/website/vignette, if not all can be included here \end{itemize} @@ -76,8 +145,40 @@ \subsection{Using it for another API, not in the catalogue} \section[summary]{Discussion} +\begin{itemize} + \item Reiterate the gap that this package fills: data access for open workflows; summary of the functionality + \item The present version of the package is mature and stable; information on the userbase and downloads? + \item Quality control: CI, unit tests, open development/issues, CRAN checks etc + \item Justification for design choices that are potentially interesting or controversial + \item Examples of known case studies etc. that the package has enabled + \item Future extensions: additional data sources, additional functionality(?)..? +\end{itemize} + + +%Reiterate the gap that this package fills: data access for open workflows; summary of the functionality + +%The present version of the package is mature and stable; information on the userbase and downloads? + +%Quality control: CI, unit tests, open development/issues, CRAN checks etc + +%Justification for design choices that are potentially interesting or controversial + +%Examples of known case studies etc. that the package has enabled + +%Future extensions: additional data sources, additional functionality(?)..? + + + +% The introduced tools can benefit researchers and data analysts in academia, government, and industry. Complete analytical workflow from raw data to statistical summaries and final publication can be greatly facilitated by combining programmatic data access with downstream data analysis and visualization tools. The pxweb package supports automated, transparent, reproducible, and well-documented data retrieval from statistical authorities. Utilities such as search, subsetting and cache support efficient data processing and analysis. Further custom tools and functionality can be built around this package. + +%Data science methods and tools play a key role in bridging the +%The gap between data providers and end users +%Custom data science methods and tools can greatly facilitate briding + % The pxweb R package provides a seamless programmatic access to statistical data resources that are shared via the PX-WEB API. This popular interface has been adopted by dozens of official statistical authorities world-wide, and hence the pxweb package can facilitate the access and analysis of a remarkable vast collection of curated data collections. +%pxR package has been moved to Github, and some improvements have been done https://github.com/cjgb/pxR I think it is two different things. pxR is, to the best of my knowledge, a package to parse PC-Axis files (.px). That file format is old and I expect that JSON-stat will eventually replace the PC-Axis format. We could definitely mention it in the paper, but I think it is essentially a different thing compared to the pxweb package. pxweb is just an API package accessing the API to access the data. + % The available tools include utilities for data query, download, manipulation and visualization, and they can utilize information about the incorporated data hierarchies. The combination of algorithms provides a smooth, automated, reproducible and well-documented access to continuously evolving statistical data streams. The online documentation provides detailed examples on how the package can be used to investigate spatial, temporal, demographic, and other phenomena. % Algorithmic tools, such as the ones provided by the pxweb package, can help to realize the full potential of open statistical data collections. We have introduced a set of targeted tools for the PX-WEB API, which is a widely used data sharing platforms among national and other statistical authorities. Research and citizen science can benefit from the increasing availability of open statistical data resources. @@ -94,9 +195,7 @@ \subsection{Using it for another API, not in the catalogue} \section*{Acknowledgments} -We are grateful to all package contributors, including ... - -The work has been partially funded by Academy of Finland (decisions 295741, 307127 to LL), and is part of rOpenGov\footnote{\url{https://github.ropengov.io}}. +We are grateful to all package contributors. The work has been partially funded by Academy of Finland (decisions 295741, 345630 to LL), and is part of rOpenGov\footnote{\url{https://github.ropengov.io}}. \bibliography{magnusson-kainu-lahti} @@ -109,14 +208,14 @@ \section*{Acknowledgments} Finland\\} \email{mons.magnusson@gmail.com} -\address{Markus Kainu\\ - %Research Department, The Social Insurance Institution of Finland\\ - %PO Box 450, 00101 Helsinki\\ - Finland\\} -\email{markus.kainu@kela.fi} +%\address{Markus Kainu\\ +% %Research Department, The Social Insurance Institution of Finland\\ +% %PO Box 450, 00101 Helsinki\\ +% Finland\\} +%\email{markus.kainu@kela.fi} \address{Leo Lahti\\ - Department of Future Technologies\\ + Department of Computing\\ PO Box 20014 University of Turku\\ Finland\\} \email{leo.lahti@iki.fi} From 933ecc8d44726d9265c5940dbaaa061e6841b5a7 Mon Sep 17 00:00:00 2001 From: antagomir Date: Wed, 27 Jul 2022 13:35:01 +0300 Subject: [PATCH 02/13] Initiate the Discussion --- paper/magnusson-kainu-lahti.tex | 172 ++++++++++++++++++++++++++------ 1 file changed, 142 insertions(+), 30 deletions(-) diff --git a/paper/magnusson-kainu-lahti.tex b/paper/magnusson-kainu-lahti.tex index f393c1fd..2b788fff 100644 --- a/paper/magnusson-kainu-lahti.tex +++ b/paper/magnusson-kainu-lahti.tex @@ -27,12 +27,13 @@ reporting data, is helping the end users to dedicate more time on the actual statistical analysis and interpretation. This can greatly increase the value of the available data sources and facilitate -reproducible research \citep{Gandrud13, Boettiger2015} and FAIR data -sharing \cite{xxx}. However, the tools often remain missing even when +reproducible research \citep{Gandrud13, Boettiger2015} and the sharing +of findable, accessible, interoperable, and reusable (FAIR) data +\cite{xxx}. However, the tools often remain missing even when the data is made available. For instance, whereas dozens of statistical authorities have started to share data through the PX-WEB API, a dedicated R package that provides a unified access to these -data collections has been missing. We introduce here the \pkg{pxweb} R +data collections has been missing. We introduce here the \CRANpkg{pxweb} R package that has been designed to facilitate seamless access to open data collections through the PC-Axis API \cite{xxx}, which is widely adopted by national and international statistical organizations. @@ -95,7 +96,7 @@ % https://www.scb.se/Upload/PC-Axis/Download/PX-Web/2017v1/Release-notes-pxweb-2017-v1.pdf % https://www.scb.se/sv\_/PC-Axis/Documentation/Error-codes-PC-Axis/ -The \pkg{pxweb} R package is addressing these needs and provides +The \CRANpkg{pxweb} R package is addressing these needs and provides mature and tested tools to find, access, and cite official statistics and other information shared in the widely adopted PC-Axis format. @@ -124,11 +125,22 @@ \subsection{The \CRANpkg{pxweb} package} % Whereas dozens of statistical authorities have started to share data through the PX-WEB API, a dedicated R package that provides a unified access to these data collections has been missing. % The \CRANpkg{pxweb} package is now filling this gap [CLOSELY RELATED PKGS SHOULD BE CITED HERE?]. Following its first CRAN release in 2014, the \CRANpkg{pxweb}, several contributors and feedback from the user community have supported the package development. -% [HAS THE PKG BEEN APPLIED IN PUBLICATIONS. THIS WOULD BE A GOOD PLACE TO CITE THOSE?]. SOME brief WORDS ABOUT DATA STANDARDS AND POSSIBLE VARIATIONS BETWEEN DATA PROVIDERS; further details will be in the later section. The pxweb depends on further R packages including \pkg{checkmate} \citep{checkmate}, \pkg{httr} \citep{httr}, \pkg{jsonlite} \citep{jsonlite}. The \pkg{pxweb} package is part of the rOpenGov open data science project \citep{Lahti13icml}. +% [HAS THE PKG BEEN APPLIED IN PUBLICATIONS. THIS WOULD BE A GOOD PLACE TO CITE THOSE? -> OR in DISCUSSION?]. SOME brief WORDS ABOUT DATA STANDARDS AND POSSIBLE VARIATIONS BETWEEN DATA PROVIDERS; further details will be in the later section. The pxweb depends on further R packages including \pkg{checkmate} \citep{checkmate}, \pkg{httr} \citep{httr}, \pkg{jsonlite} \citep{jsonlite}. The \CRANpkg{pxweb} package is part of the rOpenGov open data science project \citep{Lahti13icml}. % In summary, the \CRANpkg{pxweb} package provides custom tools for open statistical data resources provided through the PX-WEB API. Currently, the pxweb package provides seamless algorithmic access from the R environment to dozens of data collections from national authorities in countries such as Estonia, Iceland, Finland, Norway, Sweden, The Netherlands, and elsewhere. Seamless integration with other data analysis tools is facilitated by support for features such as cache, date formatting, tidy data principles \citep{wickham2014}, and the \Cpkg{tibble} \citep{tibble} data format. In this article, we provide an overview of the functionality and use cases based on the current CRAN release version (0.8). The comprehensive on-line documentation, which is available via the package homepage\footnote{\url{http://ropengov.github.io/pxweb}}, includes simple examples for individual functions, generic tutorials, and links to more advanced case studies. Moreover, the package is following best practices in open source software development such as version control, automated unit tests, continuous integration, and collaborative development \citep{PerezRiverol2016}. +%The work has been released as open source under the permissive +%modified BSD-2-clause +%license\footnote{\url{https://opensource.org/licenses/BSD-2-Clause}}, +%which is permissive license and suited for research +%use \cite{Morin2012}. We appreciate feedback from the users through +%the Github issue +%tracker\footnote{\url{https://github.com/rOpenGov/pxweb/issues}}, or +%contributions through pull requests. + + + \section[Usage]{Example case studies} \begin{itemize} @@ -145,53 +157,153 @@ \subsection{Using it for another API, not in the catalogue} \section[summary]{Discussion} -\begin{itemize} - \item Reiterate the gap that this package fills: data access for open workflows; summary of the functionality - \item The present version of the package is mature and stable; information on the userbase and downloads? - \item Quality control: CI, unit tests, open development/issues, CRAN checks etc - \item Justification for design choices that are potentially interesting or controversial - \item Examples of known case studies etc. that the package has enabled - \item Future extensions: additional data sources, additional functionality(?)..? -\end{itemize} +%\begin{itemize} +% \item Reiterate the gap that this package fills: data access for open workflows; summary of the functionality +% \item The present version of the package is mature and stable; information on the userbase and downloads? +% \item Quality control: CI, unit tests, open development/issues, CRAN checks etc +% \item Justification for design choices that are potentially interesting or controversial +% \item Examples of known case studies etc. that the package has enabled +% \item Future extensions: additional data sources, additional functionality(?)..? +%\end{itemize} -%Reiterate the gap that this package fills: data access for open workflows; summary of the functionality +% Summary of the package and motivation + + + +The \CRANpkg{pxweb} package provides a seamless programmatic access to +statistical data resources that are shared via the PX-WEB API. This is +helping to bridge the gap between the providers and end users of +official statistics. Whereas specialized web applications typically +focus on a particular data source or task \cite{xxx}, \CRANpkg{pxweb} +facilitates general programmatic access to open APIs that share data +in the PC-Axis format, which has been widely adopted by national and +international statistical organizations. A user gets a seamless and +standardized access to original online data sources, which allows the +implementation of open and reproducible data science workflows on +official statistics \citep{Gandrud13, Boettiger2015} and supports FAIR +data sharing \cite{xxx}. As such, the package solves a timely +bottleneck in governmental data analytics as the availability of open +data from National Statistical Agencies has been steadily increasing +\cite{xxx}. + +% Summary of the functionality + +The package facilitates algorithmic access and analysis of a +remarkable vast collection of curated data collections from the R +environment to data from national authorities in over a dozen +countries or international organizations, mainly from Europe. The data +catalogue integrated with the package lists 30 readily accessible +databases, and the methods allow the users to specify additional API +sources when necessary. The package automates major parts of the data +science workflow, such as finding, accessing, integrating, citing, and +reporting data. The available tools include utilities for data query, +download, manipulation and visualization, and they can utilize +information about the incorporated data hierarchies. The combination +of algorithms provides a smooth, automated, reproducible and +well-documented access to continuously evolving statistical data +sources. The online documentation provides detailed examples on how +the package can be used to investigate spatial, temporal, demographic, +and other phenomena. The implemented methods take into account +variations in raw data formats, access details, tidy data +principles \citep{wickham2014}, and typical use cases so that the end +users can avoid repetitive programming tasks, avoid potential +misinterpretations and coding errors. This facilitates integration +with other data analysis tools, and helps the end users to dedicate +more time on the statistical analysis and interpretation. In addition +to helping to identify and access data, the package simplifies and +standardizes the process of data citations with specific data +versions, access times, and sources. Our implementations provide +automatically collected citation information and details for the +accessed data sets and version numbers, thus facilitating transparent +and reproducible research in the ever-changing digital +landscape. Automation of the citation data collection is not only +saving time by increased efficiency but also improving the reliability +and accuracy of the citation data. By providing these tools we hope to +promote more wide-spread adoption of data citation +guidelines \cite{xxx}. + +The current, mature version is a result of active development and +testing by the user community since its first CRAN release in 2014 and +a major revision in 2018. The introduced tools can benefit researchers +and data analysts particularly in academia, government, and industry, +but also citizen scientists and NGOs. We expect that the package has +been adopted especially by who are analysing official statistical data +in R and implementing their own data science workflows. The package +has a stable and thoroughly tested functionality. Following the major +rewrite of the package in 2018, the number of downloads has tripled +from 3000 downloads in 2017 to 11000 downloads in 2021. The package is +currently the second most downloaded package of the rOpenGov project +after the eurostat \CRANpkg{pxweb} package \cite{Lahti17eurostat}, and +has roughly the same number of downloads with the \CRANpkg{osmar} +package for the Open Street Map \cite{osmar}. + -%The present version of the package is mature and stable; information on the userbase and downloads? %Quality control: CI, unit tests, open development/issues, CRAN checks etc +The package follows best practices in open source software development +such as version control, automated unit tests, continuous integration, +and collaborative development \citep{PerezRiverol2016}. We hope that +our active commitment to the project maintenance and development of +the package will encourage further feedback and contributions from the +user community. + +Quality control: CI, unit tests, open development/issues, CRAN checks etc + + + %Justification for design choices that are potentially interesting or controversial -%Examples of known case studies etc. that the package has enabled +Whereas \CRANpkg{pxweb} has been designed to access the PC-Axis API, +this should not be confused with the PC-Axis file format (typically +abbreviated as '.px'). We anticipate that the more flexible PC Axis +API is gradually taking over the PC-Axis file format as the data +sharing platform for official statistics. Those who need to access and +parse legacy px files can have a look at the independently developed +pxR package, which is currently maintained in +Github \url{https://github.com/cjgb/pxR}. -%Future extensions: additional data sources, additional functionality(?)..? +%Examples of known case studies etc. that the package has enabled + +%[HAS THE PKG BEEN APPLIED IN PUBLICATIONS. THIS WOULD BE A GOOD PLACE +%TO CITE THOSE?] -% The introduced tools can benefit researchers and data analysts in academia, government, and industry. Complete analytical workflow from raw data to statistical summaries and final publication can be greatly facilitated by combining programmatic data access with downstream data analysis and visualization tools. The pxweb package supports automated, transparent, reproducible, and well-documented data retrieval from statistical authorities. Utilities such as search, subsetting and cache support efficient data processing and analysis. Further custom tools and functionality can be built around this package. -%Data science methods and tools play a key role in bridging the -%The gap between data providers and end users -%Custom data science methods and tools can greatly facilitate briding +%Future extensions: additional data sources, additional functionality(?)..? + +Complete analytical workflow from raw data to statistical summaries +and final publication can be greatly facilitated by combining +programmatic data access with downstream data analysis and +visualization tools. The pxweb package supports automated, +transparent, reproducible, and well-documented data retrieval from +statistical authorities. Further custom tools and functionality can be +built around this package. -% The pxweb R package provides a seamless programmatic access to statistical data resources that are shared via the PX-WEB API. This popular interface has been adopted by dozens of official statistical authorities world-wide, and hence the pxweb package can facilitate the access and analysis of a remarkable vast collection of curated data collections. +Future developments of the package will include improved query +options, analytical, and visualization capabilities. The pxweb package +provides the core functionality. This can be, and has been +complemented by other packages that provide additional utilities built +around it.. here discuss the new pkg by our collaborators. -%pxR package has been moved to Github, and some improvements have been done https://github.com/cjgb/pxR I think it is two different things. pxR is, to the best of my knowledge, a package to parse PC-Axis files (.px). That file format is old and I expect that JSON-stat will eventually replace the PC-Axis format. We could definitely mention it in the paper, but I think it is essentially a different thing compared to the pxweb package. pxweb is just an API package accessing the API to access the data. +Whereas the methods can be used with any PX-WEB API that is locally +accessible, an increasing number of the official statistical resources +are being shared openly. For instance, the statistical authorities in +many nordic countries have invested in open data sharing, which has +supported various use cases by governmental authorities, companies, +and citizen scientists \cite{xxx}. -% The available tools include utilities for data query, download, manipulation and visualization, and they can utilize information about the incorporated data hierarchies. The combination of algorithms provides a smooth, automated, reproducible and well-documented access to continuously evolving statistical data streams. The online documentation provides detailed examples on how the package can be used to investigate spatial, temporal, demographic, and other phenomena. -% Algorithmic tools, such as the ones provided by the pxweb package, can help to realize the full potential of open statistical data collections. We have introduced a set of targeted tools for the PX-WEB API, which is a widely used data sharing platforms among national and other statistical authorities. Research and citizen science can benefit from the increasing availability of open statistical data resources. -% Whereas the pxweb tools can be used with any PX-WEB API that is locally accessible, an increasing number of the official statistical resources are being shared openly. For instance, the statistical authorities in many nordic countries have invested in open data sharing, which has supported various use cases by governmental authorities, companies, and citizen scientists \cite{xxx}. More about connections to the overall open data framework... +% Concluding -% Our work is also advancing data citation practices. In particular, our implementations provide automatically collected citation information and details for the accessed data sets and version numbers, thus facilitating transparent and reproducible research in the ever-changing digital landscape. Automation of the citation data collection is not only saving time by increased efficiency but also improving the reliability and accuracy of the citation data. Data citation practices have been recently discussed \cite{xxx}, with recommended best practices \cite{xxx}. By providing these tools we hope to promote more wide-spread adoption of data citation guidelines. +More about connections to the overall open data science framework... -% Future developments of the package will include improved query options, analytical, and visualization capabilities. The pxweb package provides the core functionality. This can be, and has been complemented by other packages that provide additional utilities built around it.. here discuss the new pkg by our collaborators. +As such, our work contributes to the rapidly growing field of open data science \cite{Lahti2018IDA}, helping to bring state-of-art and up-to-date data sets from dozens of statistical authorities more accessible for the statistical community. This work provides substantial improvements over the previously available tools, and has been extensively tested by an active user community. Open access to data resources facilitates opening of the complete data analytical workflows. Example data sets for statistical methods development. Encourages further data sharing. Unexpected use cases by integration with external sources. -% As such, our work contributes to the rapidly growing field of open data science \cite{Lahti2018IDA}, helping to bring state-of-art and up-to-date data sets from dozens of statistical authorities more accessible for the statistical community. This work provides substantial improvements over the previously available tools, and has been extensively tested by an active user community. Open access to data resources facilitates opening of the complete data analytical workflows. Example data sets for statistical methods development. Encourages further data sharing. Unexpected use cases by integration with external sources. -% The work has been released as open source under the permissive modified BSD-2-clause license\footnote{\url{https://opensource.org/licenses/BSD-2-Clause}}, which is permissive license and suited for research use \cite{Morin2012}. We appreciate feedback from the users through the Github issue tracker\footnote{\url{https://github.com/rOpenGov/pxweb/issues}}, or contributions through pull requests. We hope that our active commitment to the project maintenance and development of the package will encourage further feedback and contributions from the user community. \section*{Acknowledgments} From 403155946d4d6090679476213e73c877d723cd7d Mon Sep 17 00:00:00 2001 From: antagomir Date: Thu, 28 Jul 2022 12:39:05 +0300 Subject: [PATCH 03/13] Discussion and Table added --- paper/magnusson-kainu-lahti.bib | 21 +++++ paper/magnusson-kainu-lahti.tex | 135 +++++++++++++++++++------------- 2 files changed, 102 insertions(+), 54 deletions(-) diff --git a/paper/magnusson-kainu-lahti.bib b/paper/magnusson-kainu-lahti.bib index 3399059a..e2f7fac2 100644 --- a/paper/magnusson-kainu-lahti.bib +++ b/paper/magnusson-kainu-lahti.bib @@ -1,3 +1,24 @@ + + + + + +@Article{Raisamo2019, + author = {Susanna Raisamo and Arho Toikka and Jani Selin and + Maria Heiskanen}, + title = {The density of electronic gambling machines and + area-level socioeconomic status in Finland: a + country with a legal monopoly on gambling and a + decentralised system of {EGM}s}, + journal = {{BMC Public Health}}, + year = 2019, + volume = 1198, + number = 19, + doi = {10.1186/s12889-019-7535-1} +} + + + @Article{Morin2012, author = {Morin A, Urban J, Sliz P}, title = {A quick guide to software licensing for the scientist-programmer}, diff --git a/paper/magnusson-kainu-lahti.tex b/paper/magnusson-kainu-lahti.tex index 2b788fff..2b2977e7 100644 --- a/paper/magnusson-kainu-lahti.tex +++ b/paper/magnusson-kainu-lahti.tex @@ -139,17 +139,35 @@ \subsection{The \CRANpkg{pxweb} package} %tracker\footnote{\url{https://github.com/rOpenGov/pxweb/issues}}, or %contributions through pull requests. +The package facilitates algorithmic access to data from national and +regional authorities in 18 countries, territories, and international +organizations, mainly from Europe. The current data catalogue provides +integrated access to 30 readily accessible databases +(Table~\ref{tab:databases}), and support for specifying additional +sources is available\footnote{Further organizations using PX-WEB are +listed in +https://www.scb.se/en/services/statistical-programs-for-px-files/px-web/pxweb-examples} % +It would be very good to systematically add in the API catalog these +and others we can find now, should be straightfwd. + +\begin{table} +\include{api} +\caption{\label{tab:databases}PX-Web databases that are integrated in the pxweb R package API catalog. The online sources are listed in the pxweb R package. The language codes refer to the ISO 2 Letter Language Codes.} +\end{table} \section[Usage]{Example case studies} \begin{itemize} - \item Short version of the vignette. - \item A nice figure and table should be the result. - \item Cover all relevant functionality, at least by mentioning it and citing the package documentation/website/vignette, if not all can be included here + \item Short version of the vignette. \item A nice figure and + table should be the result. \item Cover all relevant + functionality, at least by mentioning it and citing the package + documentation/website/vignette, if not all can be included here \end{itemize} + + \subsection{Citing data using pxweb} \subsection{Using it for another API, not in the catalogue} @@ -169,8 +187,6 @@ \subsection{Using it for another API, not in the catalogue} % Summary of the package and motivation - - The \CRANpkg{pxweb} package provides a seamless programmatic access to statistical data resources that are shared via the PX-WEB API. This is helping to bridge the gap between the providers and end users of @@ -238,76 +254,87 @@ \subsection{Using it for another API, not in the catalogue} has roughly the same number of downloads with the \CRANpkg{osmar} package for the Open Street Map \cite{osmar}. - - %Quality control: CI, unit tests, open development/issues, CRAN checks etc The package follows best practices in open source software development such as version control, automated unit tests, continuous integration, -and collaborative development \citep{PerezRiverol2016}. We hope that -our active commitment to the project maintenance and development of -the package will encourage further feedback and contributions from the -user community. - -Quality control: CI, unit tests, open development/issues, CRAN checks etc - - +and collaborative development \citep{PerezRiverol2016}. Release +through CRAN ensures compatibility with the broader R ecosystem. We +hope that our active commitment to the project maintenance and +development of the package will encourage further feedback and +contributions from the user community. %Justification for design choices that are potentially interesting or controversial -Whereas \CRANpkg{pxweb} has been designed to access the PC-Axis API, -this should not be confused with the PC-Axis file format (typically -abbreviated as '.px'). We anticipate that the more flexible PC Axis -API is gradually taking over the PC-Axis file format as the data -sharing platform for official statistics. Those who need to access and -parse legacy px files can have a look at the independently developed -pxR package, which is currently maintained in +Whereas \CRANpkg{pxweb} has been designed to access the PX-Web API, +this should not be confused with the related PC-Axis file format +(typically abbreviated as '.px'). We anticipate that the more flexible +PC Axis API is gradually taking over the PC-Axis file format as the +data sharing platform for official statistics. Those who need to +access and parse legacy px files can have a look at the independently +developed pxR package, which is currently maintained in Github \url{https://github.com/cjgb/pxR}. - - %Examples of known case studies etc. that the package has enabled -%[HAS THE PKG BEEN APPLIED IN PUBLICATIONS. THIS WOULD BE A GOOD PLACE -%TO CITE THOSE?] - - -%Future extensions: additional data sources, additional functionality(?)..? - -Complete analytical workflow from raw data to statistical summaries -and final publication can be greatly facilitated by combining -programmatic data access with downstream data analysis and -visualization tools. The pxweb package supports automated, -transparent, reproducible, and well-documented data retrieval from -statistical authorities. Further custom tools and functionality can be -built around this package. - -Future developments of the package will include improved query -options, analytical, and visualization capabilities. The pxweb package -provides the core functionality. This can be, and has been -complemented by other packages that provide additional utilities built -around it.. here discuss the new pkg by our collaborators. - Whereas the methods can be used with any PX-WEB API that is locally accessible, an increasing number of the official statistical resources -are being shared openly. For instance, the statistical authorities in -many nordic countries have invested in open data sharing, which has -supported various use cases by governmental authorities, companies, -and citizen scientists \cite{xxx}. - - +are open access. The statistical authorities in many nordic countries +have invested in open data sharing, which supports use cases by +governmental authorities, companies, and citizen scientists. The +package has been used, for instance, in independent studies on +electronic gambling machines and socioeconomic +status \cite{Raisamo2019}. % Didn't find other citations. + +% Future extensions: additional data sources, additional functionality(?)..? +% Currently unclear to me what is the added value in PxWebApiData / LL +% here discuss the new pkg by our collaborators -> does this refer to PxWebApiData? +% Unexpected use cases by integration with external sources -> Any ideas? + +The \CRANpkg{pxweb} package has been designed to provide the core +functionality for API access, around which further custom tools and +functionality can be built. Future developments of the package will +include improved query options, analytical, and visualization +capabilities. Examples include the independently +developed \CRANpkg{PxWebApiData}, which adds specific functionality in +the nordic countries (Norway, Sweden, Finland), and the +\CRANpkg{geofi} package combines statistical information with tools for +geospatial visualization. Besides research use, official statistics +provide ample material for training in statistics as well as in +computational humanities and social sciences and other fields. Thus, +adding interactive features or specialized tools targeting selected +data sources could support pedagogical case studies. % Concluding -More about connections to the overall open data science framework... - -As such, our work contributes to the rapidly growing field of open data science \cite{Lahti2018IDA}, helping to bring state-of-art and up-to-date data sets from dozens of statistical authorities more accessible for the statistical community. This work provides substantial improvements over the previously available tools, and has been extensively tested by an active user community. Open access to data resources facilitates opening of the complete data analytical workflows. Example data sets for statistical methods development. Encourages further data sharing. Unexpected use cases by integration with external sources. +Transparency and reproducibility of statistical workflows from raw +data to statistical summaries and final publication can be greatly +facilitated by combining programmatic data access with downstream data +analysis and visualization tools. The pxweb package supports +automated, transparent, reproducible, and well-documented data +retrieval from statistical authorities. Programmatic access to data +resources and the availability of well-tested downstream analysis +methods facilitates the implementation of open and reproducible data +science workflows. The \CRANpkg{pxweb} package provides improvements +over the previously available methods, and it has been extensively +tested and refined by an active user community. The work contributes +to the rapidly growing field of open data science \cite{Lahti2018IDA, +xxx} and helps to make up-to-date historical and contemporary data +collections from dozens of statistical authorities more easily +accessible by the statistical analysis research and education. This +could be anticipated to encourage further data sharing by the +authorities as the value of the data is increasing together with the +user base and the number of complementary methods, workflows, and +applications. \section*{Acknowledgments} -We are grateful to all package contributors. The work has been partially funded by Academy of Finland (decisions 295741, 345630 to LL), and is part of rOpenGov\footnote{\url{https://github.ropengov.io}}. +We are grateful to all package contributors. The work has been +partially funded by Academy of Finland (decisions 295741, 345630 to +LL), and is part of +rOpenGov\footnote{\url{https://github.ropengov.io}}. \bibliography{magnusson-kainu-lahti} From 40a39a97244513f54baa0a8e3ea0bab9ca615f05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Magnusson?= Date: Mon, 31 Oct 2022 12:31:37 +0100 Subject: [PATCH 04/13] Updates from Overleaf --- paper/magnusson-kainu-lahti.bib | 18 +- paper/magnusson-kainu-lahti.tex | 338 +++++++++++++++++++++++++++----- paper/main.R | 0 3 files changed, 305 insertions(+), 51 deletions(-) mode change 100755 => 100644 paper/main.R diff --git a/paper/magnusson-kainu-lahti.bib b/paper/magnusson-kainu-lahti.bib index 972dcfb9..0a133436 100644 --- a/paper/magnusson-kainu-lahti.bib +++ b/paper/magnusson-kainu-lahti.bib @@ -1,6 +1,22 @@ +@Article{Raisamo2019, + author = {Susanna Raisamo and Arho Toikka and Jani Selin and + Maria Heiskanen}, + title = {The density of electronic gambling machines and + area-level socioeconomic status in Finland: a + country with a legal monopoly on gambling and a + decentralised system of {EGM}s}, + journal = {{BMC Public Health}}, + year = 2019, + volume = 1198, + number = 19, + doi = {10.1186/s12889-019-7535-1} +} + + + @Article{Morin2012, author = {Morin A, Urban J, Sliz P}, - title = {A quick guide to software licensing for the scientist-programmer} + title = {A quick guide to software licensing for the scientist-programmer}, journal = {PLoS Computational Biology}, year = 2012, volume = 8, diff --git a/paper/magnusson-kainu-lahti.tex b/paper/magnusson-kainu-lahti.tex index 34382789..2b2977e7 100644 --- a/paper/magnusson-kainu-lahti.tex +++ b/paper/magnusson-kainu-lahti.tex @@ -1,35 +1,106 @@ -\title{Opening Official Statistics with the \CRANpkg{pxweb} Package} -\author{by Måns Magnusson, Markus Kainu, Leo Lahti} +\title{Opening Up Official Statistics with the \CRANpkg{pxweb} Package} +\author{by Måns Magnusson, Leo Lahti} + +% Other authors? +% Janne Huovari, many commits +% Markus Kainu, possibly figures & cheat sheet +% Pyry Kantanen, R-universe, technical & submission support, other \maketitle %An abstract of less than 150 words. \abstract{Abstract \CRANpkg{pxweb} R package here.} -\begin{itemize} - \item Motivation here. Why do we need this package? We want to use the open data pipeline, access, use and cite. - \item More and more data is available from National Statistical Agencies (see commented out text below). - \item We need tools to access the data that is simple and efficient to use (see commented out text below). - \item Citing data is a problem in itself, but important. It should be simplifed as much as possible. -\end{itemize} - -%% Potential text to use -% Various statistical authorities are increasingly sharing open data resources \cite{xxx}. For instance, international agencies such as Eurostat\footnote{\url{http://ec.europa.eu/eurostat/data/database}} \cite{xxx}, ILO\footnote{\url{https://www.ilo.org/ilostat}} \cite{xxx} , FAO\footnote{\url{http://www.fao.org/faostat}} \cite{xxx} and World Bank\footnote{\url{https://data.worldbank.org}} \cite{xxx} have released popular open data services. Altogether, national and international statistical authorities are now sharing massive amounts of open data on national and international aspects of politics, economics, demography, health, infrastructure, climate, and other areas. Such statistical data sets can be available with a great geographical resolution, with time series spanning several years to decades or even centuries \cite{xxx}. - - -% Opening statistical data collections is, however, only the first step towards realizing their full potential and value. Algorithmic tools for data access and analysis can greatly increase the value of such data resources and benefit reproducible research \citep{Gandrud13, Boettiger2015}. Dedicated software packages can be used to simplify, standardize, and automate analysis workflows, taking into account variations in raw data formats, access details, and typical use cases so that the end users can avoid repetitive programming tasks, avoid potential misinterpretations and coding errors, and save time. - -% Consequently, various developers have released software tools to facilitate the use of statistical data resources. Instead of merely providing tools for data browsing and standard retrieval, we emphasize the need for algorithms that provide the seamless bridge between original data sources and downstream analysis tasks in statistical software languages. For instance in R, various packages have been recently released for generic open data retrieval, including for instance quandl \cite{quandl} and pdfetch \cite{pdfetch}, and for more dedicated access to specific data sources such as open data from Eurostat \cite{Lahti17eurostat}, World Bank (\CRANpkg{WDI}; \citealt{WDI}), Open Street Map (\CRANpkg{osmar}; \citealt{osmar}) and other sources. - - -% som relevant initial references for citing data -%https://www.openoffice.org/bibliographic/bibtex-defs.pdf -%https://tex.stackexchange.com/questions/109127/how-would-i-cite-a-dataset-with-bibtex +%\begin{itemize} +% \item Motivation here. Why do we need this package? We want to use the open data pipeline, access, use and cite. +% \item More and more data is available from National Statistical Agencies (see commented out text below). +% \item We need tools to access the data that is simple and efficient to use (see commented out text below). +% \item Citing data is a problem in itself, but important. It should be simplifed as much as possible. +%\end{itemize} + + +% Motivation here. Why do we need this package? + +Open data science workflows rely heavily on algorithmic tools for data +retrieval and analysis. Automating major parts of the data science +workflow, such as finding, accessing, integrating, citing, and +reporting data, is helping the end users to dedicate more time on the +actual statistical analysis and interpretation. This can greatly +increase the value of the available data sources and facilitate +reproducible research \citep{Gandrud13, Boettiger2015} and the sharing +of findable, accessible, interoperable, and reusable (FAIR) data +\cite{xxx}. However, the tools often remain missing even when +the data is made available. For instance, whereas dozens of +statistical authorities have started to share data through the PX-WEB +API, a dedicated R package that provides a unified access to these +data collections has been missing. We introduce here the \CRANpkg{pxweb} R +package that has been designed to facilitate seamless access to open +data collections through the PC-Axis API \cite{xxx}, which is widely +adopted by national and international statistical organizations. + +% More and more data is available from National Statistical Agencies + +Statistical authorities are now sharing steadily increasing +collections of official statistics and other open data +resources \cite{xxx}. For instance, international agencies such as +Eurostat\footnote{\url{http://ec.europa.eu/eurostat/data/database}} \cite{xxx}, +ILO\footnote{\url{https://www.ilo.org/ilostat}} \cite{xxx} , +FAO\footnote{\url{http://www.fao.org/faostat}} \cite{xxx} and World +Bank\footnote{\url{https://data.worldbank.org}} \cite{xxx} have +released popular open data services. Altogether, national and +international statistical authorities are now sharing massive amounts +of open data on national and international aspects of politics, +economics, demography, health, infrastructure, climate, and other +areas. Such statistical data sets can be available with a great +geographical resolution, with time series spanning several years to +decades or even centuries \cite{xxx}. + +% We need tools to access the data that is simple and efficient to use + +Opening up official statistics is, however, only the first step +towards realizing their full potential and value. There is a clear +need for automated tools to access these data resources that are +simple and efficient to use. Dedicated software packages help to +simplify, standardize, and automate analysis workflows, taking into +account variations in raw data formats, access details, and typical +use cases so that the end users can avoid repetitive programming +tasks, avoid potential misinterpretations and coding errors, and save +time. Consequently, various developers have released software tools to +facilitate the use of statistical data resources. Instead of merely +providing tools for data browsing and standard retrieval, we emphasize +the need for algorithms that provide the seamless bridge between +original data sources and downstream analysis tasks in statistical +software languages. In R, various packages have been recently released +for generic open data retrieval, including for instance +quandl \cite{quandl} and pdfetch \cite{pdfetch}, and for more +dedicated access to specific data sources such as open data from +Eurostat \cite{Lahti17eurostat}, World Bank +(\CRANpkg{WDI}; \citealt{WDI}), Open Street Map +(\CRANpkg{osmar}; \citealt{osmar}) and other sources. + +% Citing data is a problem in itself, but important. It should be +% simplifed as much as possible. + +Data citations are an important but often neglected aspect of data +reuse. Guidelines for data sharing have emphasized the need to +document the specific data versions, access times, and +sources. Ideally, this information should be cited in a standardized +format. This process should be simplifed as much as possible. + +% TODO +% some relevant initial references for citing data +% https://www.openoffice.org/bibliographic/bibtex-defs.pdf +% https://tex.stackexchange.com/questions/109127/how-would-i-cite-a-dataset-with-bibtex % http://www.dcc.ac.uk/resources/how-guides/cite-datasets#fn11 % https://libguides.ub.uu.se/referensguiden/harvard\_exempel % https://www.scb.se/Upload/PC-Axis/Download/PX-Web/2017v1/Release-notes-pxweb-2017-v1.pdf % https://www.scb.se/sv\_/PC-Axis/Documentation/Error-codes-PC-Axis/ +The \CRANpkg{pxweb} R package is addressing these needs and provides +mature and tested tools to find, access, and cite official statistics +and other information shared in the widely adopted PC-Axis format. + + \subsection[PXWEB and PC-Axis]{PXWEB and PC-Axis} \begin{itemize} @@ -39,10 +110,10 @@ \end{itemize} -\subsection{Our contribution: the \CRANpkg{pxweb} package } +\subsection{The \CRANpkg{pxweb} package} \begin{itemize} - \item History fo the package + \item History of the package \item Design principles \item (See comment out text below) \item Extendibility of the package with new APIS @@ -54,49 +125,216 @@ \subsection{Our contribution: the \CRANpkg{pxweb} package } % Whereas dozens of statistical authorities have started to share data through the PX-WEB API, a dedicated R package that provides a unified access to these data collections has been missing. % The \CRANpkg{pxweb} package is now filling this gap [CLOSELY RELATED PKGS SHOULD BE CITED HERE?]. Following its first CRAN release in 2014, the \CRANpkg{pxweb}, several contributors and feedback from the user community have supported the package development. -% [HAS THE PKG BEEN APPLIED IN PUBLICATIONS. THIS WOULD BE A GOOD PLACE TO CITE THOSE?]. SOME brief WORDS ABOUT DATA STANDARDS AND POSSIBLE VARIATIONS BETWEEN DATA PROVIDERS; further details will be in the later section. The pxweb depends on further R packages including \pkg{checkmate} \citep{checkmate}, \pkg{httr} \citep{httr}, \pkg{jsonlite} \citep{jsonlite}. The \pkg{pxweb} package is part of the rOpenGov open data science project \citep{Lahti13icml}. +% [HAS THE PKG BEEN APPLIED IN PUBLICATIONS. THIS WOULD BE A GOOD PLACE TO CITE THOSE? -> OR in DISCUSSION?]. SOME brief WORDS ABOUT DATA STANDARDS AND POSSIBLE VARIATIONS BETWEEN DATA PROVIDERS; further details will be in the later section. The pxweb depends on further R packages including \pkg{checkmate} \citep{checkmate}, \pkg{httr} \citep{httr}, \pkg{jsonlite} \citep{jsonlite}. The \CRANpkg{pxweb} package is part of the rOpenGov open data science project \citep{Lahti13icml}. % In summary, the \CRANpkg{pxweb} package provides custom tools for open statistical data resources provided through the PX-WEB API. Currently, the pxweb package provides seamless algorithmic access from the R environment to dozens of data collections from national authorities in countries such as Estonia, Iceland, Finland, Norway, Sweden, The Netherlands, and elsewhere. Seamless integration with other data analysis tools is facilitated by support for features such as cache, date formatting, tidy data principles \citep{wickham2014}, and the \Cpkg{tibble} \citep{tibble} data format. In this article, we provide an overview of the functionality and use cases based on the current CRAN release version (0.8). The comprehensive on-line documentation, which is available via the package homepage\footnote{\url{http://ropengov.github.io/pxweb}}, includes simple examples for individual functions, generic tutorials, and links to more advanced case studies. Moreover, the package is following best practices in open source software development such as version control, automated unit tests, continuous integration, and collaborative development \citep{PerezRiverol2016}. -% The introduced tools can benefit researchers and data analysts in academia, government, and industry. Complete analytical workflow from raw data to statistical summaries and final publication can be greatly facilitated by combining programmatic data access with downstream data analysis and visualization tools. The pxweb package supports automated, transparent, reproducible, and well-documented data retrieval from statistical authorities. Utilities such as search, subsetting and cache support efficient data processing and analysis. Further custom tools and functionality can be built around this package. +%The work has been released as open source under the permissive +%modified BSD-2-clause +%license\footnote{\url{https://opensource.org/licenses/BSD-2-Clause}}, +%which is permissive license and suited for research +%use \cite{Morin2012}. We appreciate feedback from the users through +%the Github issue +%tracker\footnote{\url{https://github.com/rOpenGov/pxweb/issues}}, or +%contributions through pull requests. -\section[Usage]{Usage} +The package facilitates algorithmic access to data from national and +regional authorities in 18 countries, territories, and international +organizations, mainly from Europe. The current data catalogue provides +integrated access to 30 readily accessible databases +(Table~\ref{tab:databases}), and support for specifying additional +sources is available\footnote{Further organizations using PX-WEB are +listed in +https://www.scb.se/en/services/statistical-programs-for-px-files/px-web/pxweb-examples} % +It would be very good to systematically add in the API catalog these +and others we can find now, should be straightfwd. -\begin{itemize} - \item Short version of the vignette. - \item A nice figure and table should be the result. -\end{itemize} +\begin{table} +\include{api} +\caption{\label{tab:databases}PX-Web databases that are integrated in the pxweb R package API catalog. The online sources are listed in the pxweb R package. The language codes refer to the ISO 2 Letter Language Codes.} +\end{table} -\subsection{Citing data using pxweb} +\section[Usage]{Example case studies} -\subsection{Using it for another API, not in the catalogue} +\begin{itemize} + \item Short version of the vignette. \item A nice figure and + table should be the result. \item Cover all relevant + functionality, at least by mentioning it and citing the package + documentation/website/vignette, if not all can be included here +\end{itemize} -\section[summary]{Discussion} -% The pxweb R package provides a seamless programmatic access to statistical data resources that are shared via the PX-WEB API. This popular interface has been adopted by dozens of official statistical authorities world-wide, and hence the pxweb package can facilitate the access and analysis of a remarkable vast collection of curated data collections. -% The available tools include utilities for data query, download, manipulation and visualization, and they can utilize information about the incorporated data hierarchies. The combination of algorithms provides a smooth, automated, reproducible and well-documented access to continuously evolving statistical data streams. The online documentation provides detailed examples on how the package can be used to investigate spatial, temporal, demographic, and other phenomena. +\subsection{Citing data using pxweb} -% Algorithmic tools, such as the ones provided by the pxweb package, can help to realize the full potential of open statistical data collections. We have introduced a set of targeted tools for the PX-WEB API, which is a widely used data sharing platforms among national and other statistical authorities. Research and citizen science can benefit from the increasing availability of open statistical data resources. +\subsection{Using it for another API, not in the catalogue} -% Whereas the pxweb tools can be used with any PX-WEB API that is locally accessible, an increasing number of the official statistical resources are being shared openly. For instance, the statistical authorities in many nordic countries have invested in open data sharing, which has supported various use cases by governmental authorities, companies, and citizen scientists \cite{xxx}. More about connections to the overall open data framework... -% Our work is also advancing data citation practices. In particular, our implementations provide automatically collected citation information and details for the accessed data sets and version numbers, thus facilitating transparent and reproducible research in the ever-changing digital landscape. Automation of the citation data collection is not only saving time by increased efficiency but also improving the reliability and accuracy of the citation data. Data citation practices have been recently discussed \cite{xxx}, with recommended best practices \cite{xxx}. By providing these tools we hope to promote more wide-spread adoption of data citation guidelines. +\section[summary]{Discussion} -% Future developments of the package will include improved query options, analytical, and visualization capabilities. The pxweb package provides the core functionality. This can be, and has been complemented by other packages that provide additional utilities built around it.. here discuss the new pkg by our collaborators. +%\begin{itemize} +% \item Reiterate the gap that this package fills: data access for open workflows; summary of the functionality +% \item The present version of the package is mature and stable; information on the userbase and downloads? +% \item Quality control: CI, unit tests, open development/issues, CRAN checks etc +% \item Justification for design choices that are potentially interesting or controversial +% \item Examples of known case studies etc. that the package has enabled +% \item Future extensions: additional data sources, additional functionality(?)..? +%\end{itemize} + + +% Summary of the package and motivation + +The \CRANpkg{pxweb} package provides a seamless programmatic access to +statistical data resources that are shared via the PX-WEB API. This is +helping to bridge the gap between the providers and end users of +official statistics. Whereas specialized web applications typically +focus on a particular data source or task \cite{xxx}, \CRANpkg{pxweb} +facilitates general programmatic access to open APIs that share data +in the PC-Axis format, which has been widely adopted by national and +international statistical organizations. A user gets a seamless and +standardized access to original online data sources, which allows the +implementation of open and reproducible data science workflows on +official statistics \citep{Gandrud13, Boettiger2015} and supports FAIR +data sharing \cite{xxx}. As such, the package solves a timely +bottleneck in governmental data analytics as the availability of open +data from National Statistical Agencies has been steadily increasing +\cite{xxx}. + +% Summary of the functionality + +The package facilitates algorithmic access and analysis of a +remarkable vast collection of curated data collections from the R +environment to data from national authorities in over a dozen +countries or international organizations, mainly from Europe. The data +catalogue integrated with the package lists 30 readily accessible +databases, and the methods allow the users to specify additional API +sources when necessary. The package automates major parts of the data +science workflow, such as finding, accessing, integrating, citing, and +reporting data. The available tools include utilities for data query, +download, manipulation and visualization, and they can utilize +information about the incorporated data hierarchies. The combination +of algorithms provides a smooth, automated, reproducible and +well-documented access to continuously evolving statistical data +sources. The online documentation provides detailed examples on how +the package can be used to investigate spatial, temporal, demographic, +and other phenomena. The implemented methods take into account +variations in raw data formats, access details, tidy data +principles \citep{wickham2014}, and typical use cases so that the end +users can avoid repetitive programming tasks, avoid potential +misinterpretations and coding errors. This facilitates integration +with other data analysis tools, and helps the end users to dedicate +more time on the statistical analysis and interpretation. In addition +to helping to identify and access data, the package simplifies and +standardizes the process of data citations with specific data +versions, access times, and sources. Our implementations provide +automatically collected citation information and details for the +accessed data sets and version numbers, thus facilitating transparent +and reproducible research in the ever-changing digital +landscape. Automation of the citation data collection is not only +saving time by increased efficiency but also improving the reliability +and accuracy of the citation data. By providing these tools we hope to +promote more wide-spread adoption of data citation +guidelines \cite{xxx}. + +The current, mature version is a result of active development and +testing by the user community since its first CRAN release in 2014 and +a major revision in 2018. The introduced tools can benefit researchers +and data analysts particularly in academia, government, and industry, +but also citizen scientists and NGOs. We expect that the package has +been adopted especially by who are analysing official statistical data +in R and implementing their own data science workflows. The package +has a stable and thoroughly tested functionality. Following the major +rewrite of the package in 2018, the number of downloads has tripled +from 3000 downloads in 2017 to 11000 downloads in 2021. The package is +currently the second most downloaded package of the rOpenGov project +after the eurostat \CRANpkg{pxweb} package \cite{Lahti17eurostat}, and +has roughly the same number of downloads with the \CRANpkg{osmar} +package for the Open Street Map \cite{osmar}. + +%Quality control: CI, unit tests, open development/issues, CRAN checks etc + +The package follows best practices in open source software development +such as version control, automated unit tests, continuous integration, +and collaborative development \citep{PerezRiverol2016}. Release +through CRAN ensures compatibility with the broader R ecosystem. We +hope that our active commitment to the project maintenance and +development of the package will encourage further feedback and +contributions from the user community. + +%Justification for design choices that are potentially interesting or controversial + +Whereas \CRANpkg{pxweb} has been designed to access the PX-Web API, +this should not be confused with the related PC-Axis file format +(typically abbreviated as '.px'). We anticipate that the more flexible +PC Axis API is gradually taking over the PC-Axis file format as the +data sharing platform for official statistics. Those who need to +access and parse legacy px files can have a look at the independently +developed pxR package, which is currently maintained in +Github \url{https://github.com/cjgb/pxR}. + +%Examples of known case studies etc. that the package has enabled + +Whereas the methods can be used with any PX-WEB API that is locally +accessible, an increasing number of the official statistical resources +are open access. The statistical authorities in many nordic countries +have invested in open data sharing, which supports use cases by +governmental authorities, companies, and citizen scientists. The +package has been used, for instance, in independent studies on +electronic gambling machines and socioeconomic +status \cite{Raisamo2019}. % Didn't find other citations. + +% Future extensions: additional data sources, additional functionality(?)..? +% Currently unclear to me what is the added value in PxWebApiData / LL +% here discuss the new pkg by our collaborators -> does this refer to PxWebApiData? +% Unexpected use cases by integration with external sources -> Any ideas? + +The \CRANpkg{pxweb} package has been designed to provide the core +functionality for API access, around which further custom tools and +functionality can be built. Future developments of the package will +include improved query options, analytical, and visualization +capabilities. Examples include the independently +developed \CRANpkg{PxWebApiData}, which adds specific functionality in +the nordic countries (Norway, Sweden, Finland), and the +\CRANpkg{geofi} package combines statistical information with tools for +geospatial visualization. Besides research use, official statistics +provide ample material for training in statistics as well as in +computational humanities and social sciences and other fields. Thus, +adding interactive features or specialized tools targeting selected +data sources could support pedagogical case studies. + +% Concluding + +Transparency and reproducibility of statistical workflows from raw +data to statistical summaries and final publication can be greatly +facilitated by combining programmatic data access with downstream data +analysis and visualization tools. The pxweb package supports +automated, transparent, reproducible, and well-documented data +retrieval from statistical authorities. Programmatic access to data +resources and the availability of well-tested downstream analysis +methods facilitates the implementation of open and reproducible data +science workflows. The \CRANpkg{pxweb} package provides improvements +over the previously available methods, and it has been extensively +tested and refined by an active user community. The work contributes +to the rapidly growing field of open data science \cite{Lahti2018IDA, +xxx} and helps to make up-to-date historical and contemporary data +collections from dozens of statistical authorities more easily +accessible by the statistical analysis research and education. This +could be anticipated to encourage further data sharing by the +authorities as the value of the data is increasing together with the +user base and the number of complementary methods, workflows, and +applications. -% As such, our work contributes to the rapidly growing field of open data science \cite{Lahti2018IDA}, helping to bring state-of-art and up-to-date data sets from dozens of statistical authorities more accessible for the statistical community. This work provides substantial improvements over the previously available tools, and has been extensively tested by an active user community. Open access to data resources facilitates opening of the complete data analytical workflows. Example data sets for statistical methods development. Encourages further data sharing. Unexpected use cases by integration with external sources. -% The work has been released as open source under the permissive modified BSD-2-clause license\footnote{\url{https://opensource.org/licenses/BSD-2-Clause}}, which is permissive license and suited for research use \cite{Morin2012}. We appreciate feedback from the users through the Github issue tracker\footnote{\url{https://github.com/rOpenGov/pxweb/issues}}, or contributions through pull requests. We hope that our active commitment to the project maintenance and development of the package will encourage further feedback and contributions from the user community. \section*{Acknowledgments} -We are grateful to all package contributors, including ... - -The work has been partially funded by Academy of Finland (decisions 295741, 307127 to LL), and is part of rOpenGov\footnote{\url{https://github.ropengov.io}}. +We are grateful to all package contributors. The work has been +partially funded by Academy of Finland (decisions 295741, 345630 to +LL), and is part of +rOpenGov\footnote{\url{https://github.ropengov.io}}. \bibliography{magnusson-kainu-lahti} @@ -109,14 +347,14 @@ \section*{Acknowledgments} Finland\\} \email{mons.magnusson@gmail.com} -\address{Markus Kainu\\ - %Research Department, The Social Insurance Institution of Finland\\ - %PO Box 450, 00101 Helsinki\\ - Finland\\} -\email{markus.kainu@kela.fi} +%\address{Markus Kainu\\ +% %Research Department, The Social Insurance Institution of Finland\\ +% %PO Box 450, 00101 Helsinki\\ +% Finland\\} +%\email{markus.kainu@kela.fi} \address{Leo Lahti\\ - Department of Future Technologies\\ + Department of Computing\\ PO Box 20014 University of Turku\\ Finland\\} \email{leo.lahti@iki.fi} diff --git a/paper/main.R b/paper/main.R old mode 100755 new mode 100644 From ce58900764b385faa9b4ec5cb150243efc1cd794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Magnusson?= Date: Mon, 31 Oct 2022 14:06:12 +0100 Subject: [PATCH 05/13] Updates from Overleaf --- paper/magnusson-kainu-lahti.tex | 3 + tests/testthat/test-get_pxweb_dims.R | 18 + tests/testthat/test-get_pxweb_levels.R | 19 + tests/testthat/test-get_pxweb_metadata.R | 47 ++ tests/testthat/test-interactive_pxweb.R | 81 +++ tests/testthat/test-multiple_queries_data.R | 49 ++ tests/testthat/test-pxweb_api_class.R | 63 ++ tests/testthat/test-pxweb_examples.R | 31 + tests/testthat/test-test_pxweb_api.R | 34 + tests/testthat/test-utils_internal.R | 45 ++ .../test-x_deprecated_get_pxweb_data.R | 187 +++++ tests/testthat/test_data/filter_query.json | 28 - tests/testthat/test_data/pxm1_test.rda | Bin 4202 -> 0 bytes tests/testthat/test_data/test_query_px.json | 42 -- tests/testthat/test_data/test_query_sdmx.json | 42 -- tests_bash/pxweb.R | 12 +- tests_bash/pxweb.sh | 18 + vignettes/pxweb.md | 676 ++++++++++++++++++ 18 files changed, 1276 insertions(+), 119 deletions(-) create mode 100644 tests/testthat/test-get_pxweb_dims.R create mode 100644 tests/testthat/test-get_pxweb_levels.R create mode 100644 tests/testthat/test-get_pxweb_metadata.R create mode 100644 tests/testthat/test-interactive_pxweb.R create mode 100644 tests/testthat/test-multiple_queries_data.R create mode 100644 tests/testthat/test-pxweb_api_class.R create mode 100644 tests/testthat/test-pxweb_examples.R create mode 100644 tests/testthat/test-test_pxweb_api.R create mode 100644 tests/testthat/test-utils_internal.R create mode 100644 tests/testthat/test-x_deprecated_get_pxweb_data.R delete mode 100644 tests/testthat/test_data/filter_query.json delete mode 100644 tests/testthat/test_data/pxm1_test.rda delete mode 100644 tests/testthat/test_data/test_query_px.json delete mode 100644 tests/testthat/test_data/test_query_sdmx.json mode change 100755 => 100644 tests_bash/pxweb.sh create mode 100644 vignettes/pxweb.md diff --git a/paper/magnusson-kainu-lahti.tex b/paper/magnusson-kainu-lahti.tex index 2b2977e7..dd153332 100644 --- a/paper/magnusson-kainu-lahti.tex +++ b/paper/magnusson-kainu-lahti.tex @@ -112,6 +112,9 @@ \subsection{The \CRANpkg{pxweb} package} +% In early 2013, Statistics Sweden released PX-WEB as a way to simplify the use and re-use of the statistics produced by the agency \cite{xxx}. The PXWEB API + + \begin{itemize} \item History of the package \item Design principles diff --git a/tests/testthat/test-get_pxweb_dims.R b/tests/testthat/test-get_pxweb_dims.R new file mode 100644 index 00000000..5897e63e --- /dev/null +++ b/tests/testthat/test-get_pxweb_dims.R @@ -0,0 +1,18 @@ +# Test suits for the examples in the documentation + +context("get_pxweb_dims.R") + +test_that(desc="get_pxweb_dims()",{ + + # CRAN seem to run tests in parallel, hence API tests cannot be run on CRAN. + skip_on_cran() + + expect_warning(bottom_node <- get_pxweb_metadata("http://api.scb.se/OV0104/v1/doris/sv/ssd/AM/AM0114/LCIArbKv"), regexp = "deprecated") + expect_warning(dims <- suppressMessages(get_pxweb_dims(bottom_node)), regexp = "deprecated") + +}) + + + + + diff --git a/tests/testthat/test-get_pxweb_levels.R b/tests/testthat/test-get_pxweb_levels.R new file mode 100644 index 00000000..426c6674 --- /dev/null +++ b/tests/testthat/test-get_pxweb_levels.R @@ -0,0 +1,19 @@ +# Test suits for the examples in the documentation + +context("get_pxweb_levels.R") + +test_that(desc="get_pxweb_levels",{ + + # CRAN seem to run tests in parallel, hence API tests cannot be run on CRAN. + skip_on_cran() + + expect_warning( + lev <- get_pxweb_levels(baseURL = + paste0(pxweb_api$new("api.scb.se")$base_url(language = "sv"), "/ssd")), regexp = "deprecated") + +}) + + + + + diff --git a/tests/testthat/test-get_pxweb_metadata.R b/tests/testthat/test-get_pxweb_metadata.R new file mode 100644 index 00000000..af94054d --- /dev/null +++ b/tests/testthat/test-get_pxweb_metadata.R @@ -0,0 +1,47 @@ +# Test suite for get_pxweb_metadata() + +context("get_pxweb_metadata.R") + + +test_that(desc="baseURL 1",{ + + skip_on_cran() + + suppressWarnings( + api_tests_get_pxweb_metadata_baseURL <- list( + list(baseURL = paste0(pxweb_api$new("api.scb.se")$base_url(language = "sv"), "/ssd"), + test_dims = c(21, 4)), + list(baseURL = paste0(pxweb_api$new("api.scb.se")$base_url(), "/ssd"), + test_dims = c(17, 4)) + )) + + for (test in api_tests_get_pxweb_metadata_baseURL){ + expect_warning(test_file <- get_pxweb_metadata(baseURL = test$baseURL), regexp = "deprecated") + expect_that(test_file, is_a("data.frame"), info = test$baseURL) + expect_that(dim(test_file), is_equivalent_to(test$test_dims)) + } +}) + + + + +test_that(desc="baseURL 2",{ + skip("Until next version") + skip_on_cran() + + api_tests_get_pxweb_metadata_path <- list( + "http://api.scb.se/OV0104/v1/doris/sv/ssd/AM/AM0114/LCIArbKv", + "http://api.scb.se/OV0104/v1/doris/sv/ssd/NV/NV0119/IVPKNLonAr", + "http://api.scb.se/OV0104/v1/doris/sv/ssd/HA/HA0201/HA0201B/ExpTotalKNMan", + "http://api.scb.se/OV0104/v1/doris/sv/ssd/HA/HA0201/HA0201B/ExpTotalKNAr", + "http://api.scb.se/OV0104/v1/doris/sv/ssd/HA/HA0201/HA0201B/ImpTotalKNMan", + "http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0401/BE0401A/BefolkprognRev2015", + "http://api.scb.se/OV0104/v1/doris/en/ssd/UF/UF0536/Fullfoljt" + ) + + for (test in api_tests_get_pxweb_metadata_path){ + expect_warning(api_test_file <- get_pxweb_metadata(path = test), regexp = "deprecated") + expect_is(object = api_test_file$variables$variables[[1]], "list") + } +}) + diff --git a/tests/testthat/test-interactive_pxweb.R b/tests/testthat/test-interactive_pxweb.R new file mode 100644 index 00000000..33a100ac --- /dev/null +++ b/tests/testthat/test-interactive_pxweb.R @@ -0,0 +1,81 @@ +# Test suite interactive_pxweb() + +context("interactive_pxweb.R") + +test_that(desc="findData.inputBaseCat",{ + load(system.file("extdata/test_files/testFiles.Rdata", package = "pxweb")) + expect_equal(pxweb:::findData.inputBaseCat(1:2,test_codedAlt), + "\n('q' = Quit, 'b' = Back)") + expect_equal(pxweb:::findData.inputBaseCat(c(3,6),test_codedAlt), + "\n('*' = Select all, 'a' = Show all)") + +}) + +test_that(desc="findData.printNode",{ + xscb <-data.frame(id=c("01","02","03"), + text=c("Värde 1","Värde 2", "Värde 3")) + + expect_output(pxweb:::findData.printNode(xscb, print=TRUE),"Värde 3") + expect_output(pxweb:::findData.printNode(xscb, print=TRUE),"2. ") + expect_that(pxweb:::findData.printNode(xscb, print=FALSE),is_a("character")) + expect_match(pxweb:::findData.printNode(xscb, print=FALSE),"Värde 1") +}) + +test_that(desc="findData.printCode",{ + varListText <- c("first","second","last") + + expect_output(pxweb:::findData.printCode(url="urladress", varListText, clean=TRUE), + "urladress") + expect_output(pxweb:::findData.printCode(url="urladress", varListText, clean=TRUE), + "clean = TRUE") + expect_output(pxweb:::findData.printCode(url="urladress", varListText, clean=TRUE), + "list\\(first") +}) + +test_that(desc="findData.inputConvert",{ + expect_that(pxweb:::findData.inputConvert(input=c("2","2:3","3:7","6")), + is_equivalent_to(c("2","3","4","5","6","7"))) + expect_that(pxweb:::findData.inputConvert(c("4:5")), + is_equivalent_to(c("4","5"))) + expect_that(pxweb:::findData.inputConvert(c("2","10:11","5")), + is_equivalent_to(c("2","5","10","11"))) + expect_that(pxweb:::findData.inputConvert("*"), + is_equivalent_to("*")) + expect_that(pxweb:::findData.inputConvert(input=":3"), + is_equivalent_to(as.character(1:3))) + expect_that(pxweb:::findData.inputConvert(c(":3","5")), + is_equivalent_to(as.character(c(1:3,5)))) + expect_that(pxweb:::findData.inputConvert(c("1", "3", "5:"), max_value=7), + is_equivalent_to(as.character(c(1,3,5:7)))) + expect_that(pxweb:::findData.inputConvert(c("1", "3", "5:")), + is_equivalent_to(as.character(c(1,3,5)))) +}) + +test_that(desc="download_pxweb",{ + load(system.file("extdata/test_files/testFiles.Rdata", package = "pxweb")) + expect_output(pxweb:::download_pxweb(dataNode = testNullNode, test_input = c("n", "n", "y")), + "To download the same data again, use the following code:") +}) + +test_that(desc="findData.input",{ + load(system.file("extdata/test_files/testFiles.Rdata", package = "pxweb")) + + expect_that(pxweb:::findData.input(type="yesno","Testing 'y'", test_input="y", silent=TRUE),is_equivalent_to("y")) + expect_that(pxweb:::findData.input(type="yesno","Testing 'n'", test_input="n", silent=TRUE),is_equivalent_to("n")) + expect_that(pxweb:::findData.input(type="text","Testing 'MyData1'", test_input="MyData1", silent=TRUE), is_equivalent_to("MyData1")) + expect_that(pxweb:::findData.input(type="node", testBaseNode, test_input="3", silent=TRUE), + is_equivalent_to("3")) + expect_that(pxweb:::findData.input(type="node", testBaseNode, test_input="b", silent=TRUE), + is_equivalent_to("b")) + + test_varDF <- list(data.frame(id = as.character(seq(0.5,10,0.5)), + text = paste("Värde", as.character(seq(0.5,10,0.5))), + stringsAsFactors = FALSE), + "testingVärde") + expect_that(pxweb:::findData.input(type="alt", input=test_varDF, test_input="10:12, 1 ,3:1, 2", silent=TRUE), + is_equivalent_to(c("1","2","3","10","11","12"))) + expect_that(pxweb:::findData.input(type="alt", input=test_varDF, test_input="7:,3", silent=TRUE), + is_equivalent_to(c("3",as.character(7:20)))) + +}) + diff --git a/tests/testthat/test-multiple_queries_data.R b/tests/testthat/test-multiple_queries_data.R new file mode 100644 index 00000000..d8ea9f40 --- /dev/null +++ b/tests/testthat/test-multiple_queries_data.R @@ -0,0 +1,49 @@ +# Test suite for doing multiple downloads from the SCB api + +# Tests to run multiple queries (calls) +context("multiple_queries_data.R") + + + +test_that(desc="multiple data calls",{ + skip("Until next version") + skip_on_cran() + + api_tests_multiple_data <- list( + list( + url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/PR/PR0101/PR0101E/Basbeloppet", + dims = list(ContentsCode = c('PR0101A1'), + Tid = c('2001')), + clean = FALSE) + ) + + api_config <- pxweb::api_parameters(url=api_tests_multiple_data[[1]]$url)[[1]] + Sys.sleep(time=api_config$period_in_seconds) + api_file <- paste(tempdir(), "api_time_stamp.Rdata", sep="/") + if(file.exists(api_file)) file.remove(api_file) + + for (test in api_tests_multiple_data){ + api_config <- pxweb::api_parameters(url=test$url)[[1]] + + expect_warning( + for(i in 1:(api_config$calls_per_period + 10)){ + test_data <- + get_pxweb_data(url = test$url, dims = test$dims, clean = test$clean) + }, + info = test$url) + } +}) + + +test_that(desc="multiple metadata calls",{ + skip("Until next version") + skip_on_cran() + + api_tests_multiple_metadata <- list( + pxweb::base_url("api.scb.se", version = "v1", language = "sv") + ) + +}) + + + diff --git a/tests/testthat/test-pxweb_api_class.R b/tests/testthat/test-pxweb_api_class.R new file mode 100644 index 00000000..018b9d0f --- /dev/null +++ b/tests/testthat/test-pxweb_api_class.R @@ -0,0 +1,63 @@ +# Test suite for utils functions + +context("pxweb_api_class.R") + +test_that(desc="pxweb_api_class",{ + + expect_warning( + test_api <- + pxweb_api$new(api = "foo.bar", + url="http://httpbin.org/[lang]/[version]", + description = "test api", + languages = "status", + versions = "404", + calls_per_period = 1, + period_in_seconds = 2, + max_values_to_download = 10)) + + expect_silent(suppressMessages(test_api$write_to_catalogue())) + expect_true("foo.bar" %in% unlist(lapply(suppressWarnings(api_catalogue()), function(X) X$api))) + + expect_error(suppressWarnings( + test_api2 <- + pxweb_api$new(api = c("foo", "bar"), + url="http://httpbin.org/[lang]/[version]", + description = "test api", + languages = "status", + versions = "404", + calls_per_period = 1, + period_in_seconds = 2, + max_values_to_download = 10))) + + expect_error(suppressWarnings( + test_api2 <- + pxweb_api$new(api = "foo.bar", + url="http://httpbin.org/[lang]/[version]", + description = "test api", + calls_per_period = 1, + period_in_seconds = 2, + max_values_to_download = 10))) + + expect_warning(test_api <- test_api$copy()) + + expect_warning(test_api <- pxweb_api$new("api.scb.se")) + + expect_warning(test_api <- pxweb_api$new("scb")) + + expect_warning( + test_api3 <- + pxweb_api$new()) + expect_silent(test_api3$check_input()) + + expect_equal({ + test_api$base_url() + }, "http://api.scb.se/OV0104/v1/doris/en") + + api_cat <- suppressWarnings(api_catalogue()) + for(api in api_cat){ + expect_silent(api$check_input()) + expect_is({api$pxweb_api_to_list()}, "list") + } + +}) + diff --git a/tests/testthat/test-pxweb_examples.R b/tests/testthat/test-pxweb_examples.R new file mode 100644 index 00000000..473faa7e --- /dev/null +++ b/tests/testthat/test-pxweb_examples.R @@ -0,0 +1,31 @@ +# Test suits for the examples in the documentation + +context("tests_pxweb_examples.R") + +test_that(desc="Example tests",{ + # CRAN seem to run tests in parallel, hence API tests cannot be run on CRAN. + skip_on_cran() + + skip("Skip temporarily (until new version)") + + expect_warning( + url <- paste(c(pxweb_api$new("api.scb.se")$base_url(language = "sv"),"ssd","AM","AM0102","AM0102A","KLStabell14LpMan"), collapse="/") + ) + + expect_warning( + metadata <- get_pxweb_metadata(url) + ) + + expect_warning( + test <- get_pxweb_data(metadata$URL, dims=list( + Myndighet = "C02", + Kon = "*", + Heltiddeltid = "*", + ContentsCode = "*", + Tid = "*"))) +}) + + + + + diff --git a/tests/testthat/test-test_pxweb_api.R b/tests/testthat/test-test_pxweb_api.R new file mode 100644 index 00000000..6f3030e3 --- /dev/null +++ b/tests/testthat/test-test_pxweb_api.R @@ -0,0 +1,34 @@ +# Test suite for test_pxweb_api() + +context("test_pxweb_api.R") + + +test_that(desc="test_pxweb_api()",{ + + skip_on_cran() + skip("Until next version") + + api_tests_test_pxweb_api <- list( + list( + url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/TK", + test_dim = c(11, 8) + ), + + list( + url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/KU", + test_dim = c(16, 8) + ) + ) + + test_seeds <- c(as.integer(Sys.time()), 1408310599) + + for (test in api_tests_test_pxweb_api){ + for (seed in test_seeds){ + expect_warning( + test_data <- suppressMessages(test_pxweb_api(url=test$url, seed=seed)), + info = paste(test$url, ", seed ", seed, sep="")) + + expect_equal(object=dim(test_data[[1]]), test$test_dim, info=test$url) + } + } +}) diff --git a/tests/testthat/test-utils_internal.R b/tests/testthat/test-utils_internal.R new file mode 100644 index 00000000..c9aee861 --- /dev/null +++ b/tests/testthat/test-utils_internal.R @@ -0,0 +1,45 @@ +# Test suite for utils functions + +context("utils_internal.R") + +test_that(desc="api_timer()",{ + + skip_on_cran() + + api_file <- paste(tempdir(), "api_time_stamp.Rdata", sep="/") + if(file.exists(api_file)) file.remove(api_file) + suppressWarnings( + test_api <- + pxweb_api$new(api="foo.bar", + url="http://httpbin.org/[lang]/[version]", + description = "test api", + languages = "status", + versions = "404", + calls_per_period = 1, + period_in_seconds = 2, + max_values_to_download = 10)) + suppressMessages(test_api$write_to_catalogue()) + + if(file.exists(api_file)) file.remove(api_file) +}) + + + + + +test_that(desc="create_batch_list()",{ + + api_tests_create_batch_list <- list( + list( + url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/PR/PR0101/PR0101E/Basbeloppet", + dims = list(ContentsCode = c('*'), + Tid = c('*')) + ) + ) + + for (test in api_tests_create_batch_list){ + expect_warning( + res <- pxweb:::create_batch_list(url=test$url, dims=test$dims)) + } +}) + diff --git a/tests/testthat/test-x_deprecated_get_pxweb_data.R b/tests/testthat/test-x_deprecated_get_pxweb_data.R new file mode 100644 index 00000000..a3d9366d --- /dev/null +++ b/tests/testthat/test-x_deprecated_get_pxweb_data.R @@ -0,0 +1,187 @@ +# Test suite for get_pxweb_data() + +# Below is the tests that should be conducted as a list. +# Each listelement is a named object that contains url and dims +# that make up the call through get_pxweb_data(). +# Test will be done that downloading works, that the function returns a data.frame and that +# the size of the data.frame is test_dim, if missing values the dimension is not tested. +# in test_dim. If NA in test_dim, the dimension is ignored. + +context("get_pxweb_data.R") + +test_that(desc="get_pxweb_data()",{ + + skip_on_cran() + pxweb:::pxweb_clear_cache() + + api_tests_get_pxweb_data <- list( + list( + url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/PR/PR0101/PR0101E/Basbeloppet", + dims = list(ContentsCode = c('PR0101A1'), + Tid = c('1995', '1996', '1997')), + clean = TRUE, + test_dim = c(NA, 3), + test_sum = 108200), + + list( + url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/BE/BE0101/BE0101A/BefolkningNy", + dims = list(Region = c('00', '01'), + Civilstand = c('*'), + Alder = c('0', 'tot'), + Kon = c('*'), + ContentsCode = c('BE0101N1'), + Tid = c('2010', '2011', '2012', '2013')), + clean = TRUE, + test_dim = c(128, 7), + test_sum = 47107124), + + list( + url="http://api.scb.se/OV0104/v1/doris/sv/ssd/BE/BE0101/BE0101A/BefolkningNy", + dims = list(Region = c('00', '01'), + Civilstand = c('*'), + Alder = c('0', 'tot'), + Kon = c('*'), + ContentsCode = c('BE0101N1'), + Tid = c('2010', '2011', '2012', '2013')), + clean = FALSE, + test_dim = c(32, 8), + test_sum = NA), + + list( + url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/AM/AM0114/LCIArbKv", + dims = list(SNI2007 = c('*'), + ContentsCode = c('*'), + Tid = c('*')), + clean = FALSE, + test_dim = c(NA, NA), + test_sum = NA), + + # Test swedish letters + list( + url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/ME/ME0104/ME0104C/ME0104T3", + dims = list(Region = c('*'), + Partimm = c('M','C','FP','KD','MP','S','V','SD','\u00D6VRIGA'), + ContentsCode = c('ME0104B7'), + Tid = c('2010')), + clean = TRUE, + test_dim = c(2907, 5), + test_sum = 31999.3), + + list( + url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/TK/TK1001/TK1001S/SnabbStatTK1001", + dims = list("ContentsCode" = c("TK1001AE"), + "Tid" = c("2014M02") + ), + clean = TRUE, + test_sum = 18.3 + ), + + list( + url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/BE/BE0101/BE0101A/BefolkningNy", + dims = list(Region = c('2584'), + Civilstand = c('*'), + Alder = c('1'), + Kon = c('1'), + ContentsCode = c('BE0101N1'), + Tid = c('2017')), + clean = TRUE, + test_dim = c(NA, NA), + test_sum = 144) + + ) + + + for (i in seq_along(api_tests_get_pxweb_data)){ +# if(test$url == "http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0401/BE0401A/BefolkprognRev2014") { +# skip("Known error: comma bug in csv files")} + +# skip("Skip temporarily (until new version)") + test <- api_tests_get_pxweb_data[[i]] + expect_warning( + test_data <- + get_pxweb_data(url = test$url, + dims = test$dims, + clean = test$clean)) + + test_dim_size <- suppressWarnings(pxweb:::calculate_data_dim(pxweb:::get_dim_size(url = test$url, dims=test$dims)[[1]], test$clean)) + expect_equal(object=dim(test_data), test_dim_size, info=test$url) + expect_equal(object=class(test_data), "data.frame", info=test$url) + if(!is.na(test$test_sum)){ + expect_equal(sum(test_data$values, na.rm = TRUE), expected = test$test_sum, label = test$url) + } + } +}) + + +test_that(desc="get_pxweb_data()",{ + skip_on_cran() + skip_on_os("windows") # Due to http error, this is solved in new version + + # PXWEB query + pxweb_query_url <- "http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy" + pxweb_query_list <- + list("Region"=c("00","01","0114","0115","0117","0120","0123"), + "Civilstand"=c("OG","G"), + "Alder"=c("0","1","2","3","4","5","6","7","8","9","10"), + "Kon"=c("1","2"), + "ContentsCode"=c("BE0101N1","BE0101N2"), + "Tid"=c("1968","1969","1970","1971","1972")) + + # Download data + expect_silent(px_data <- + pxweb_get(url = pxweb_query_url, + query = pxweb_query_list)) + expect_warning(pxd1 <- as.data.frame(px_data)) + + + expect_warning(pxd2 <- get_pxweb_data(url = pxweb_query_url, dims = pxweb_query_list, clean = TRUE, encoding = NULL)) + + expect_equal(dim(pxd2)[1], dim(pxd1)[1]*2) + expect_equal(dim(pxd2)[2], dim(pxd1)[2]) + + pxd2pop <- pxd2[pxd2$ContentsCode == "Population", ] + pxd1pop <- pxd1[, 1:6] + pxd2grw <- pxd2[pxd2$ContentsCode == "Population growth", ] + pxd1grw <- pxd1[, c(1:5, 7)] + + expect_equal(sum(pxd1pop$Population, na.rm = TRUE), + sum(pxd2pop$values, na.rm = TRUE)) + expect_equal(sum(pxd1grw$`Population growth`, na.rm = TRUE), + sum(pxd2grw$values, na.rm = TRUE)) + expect_true(all(pxd1pop$Population >= 0 )) + expect_true(all(pxd2pop$values >= 0)) + +}) + +test_that(desc="get_pxweb_data()",{ + skip_on_cran() + skip_on_os("windows") # Due to 429 error, this is solved in new version of pxweb + + # PXWEB query + pxweb_query_url <- "http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy" + pxweb_query_list <- + list("Region"=c("00","01","0114","0115","0117","0120","0123"), + "Civilstand"=c("OG","G"), + "Alder"=c("0","1","2","3","4","5","6","7","8","9","10"), + "Kon"=c("1","2"), + "ContentsCode"=c("BE0101N1"), + "Tid"=c("1968","1969","1970","1971","1972")) + + # Download data + expect_silent(px_data <- + pxweb_get(url = pxweb_query_url, + query = pxweb_query_list)) + expect_silent(pxd1 <- as.data.frame(px_data)) + + expect_warning(pxd2 <- get_pxweb_data(url = pxweb_query_url, dims = pxweb_query_list, clean = TRUE, encoding = NULL)) + + expect_equal(dim(pxd2)[1], dim(pxd1)[1]) + expect_equal(dim(pxd2)[2]-1, dim(pxd1)[2]) + + expect_equal(sum(pxd1$Population, na.rm = TRUE), + sum(pxd2$values, na.rm = TRUE)) + expect_true(all(pxd1$Population >= 0 )) + expect_true(all(pxd2$values >= 0)) + expect_true(all(table(pxd1$Population) == table(pxd2$values))) + +}) diff --git a/tests/testthat/test_data/filter_query.json b/tests/testthat/test_data/filter_query.json deleted file mode 100644 index 88589567..00000000 --- a/tests/testthat/test_data/filter_query.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "query": [ - { - "code": "Region", - "selection": { - "filter": "all", - "values": ["039*"] - } - }, - { - "code": "ContentsCode", - "selection": { - "filter": "all", - "values": ["*"] - } - }, - { - "code": "Tid", - "selection": { - "filter": "top", - "values": ["4"] - } - } - ], - "response": { - "format": "json-stat" - } -} diff --git a/tests/testthat/test_data/pxm1_test.rda b/tests/testthat/test_data/pxm1_test.rda deleted file mode 100644 index 51b4ce6870a8cd04385c932d34f76325cab4760e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4202 zcmaLRXE+-S*9UN;5=yGA3SyPIYg4sXP}C}lno(M#V$UKKyOgSx+Pg+$Q!8dsv$d%e zMbs`Kc8u`!x}W!bKfdQW{}2D)hjSgy7;3VAhY}AP^7_@Ecwgn1J`-YYyX&@{c#<9M zdO@k-rIsjGs1}i7GlK?6uoiYarlcdA=XMWZZu_P{esmof8YDVr4!?V#Iy3|1+?FNg zJq17QzYq`<6fYA$T{_~#k8x_iU@&=D3#@laFTTHGp>IHX5eS2|06-0!Jsc#Z#$aN* zx~00r5xvB-Dyz<|<&#s=!`M=uksF`mf$B(|pMJoYV$D)MMZU6PiUHtf6_OI%E=T(J_I~LpXitdy)>SmzUB7!kOUK zaA&$Yki;NHA#ZO^8P2*kLsYnfB{%072akqug~*bZ3x=GM55GGCgz%CLlE0u0b%1{X zL=-53g=apOcQ^DY(=xG|*aJ1YvXpsftbjh7{txHKMf5^Og!5eULU}hjXu3`8ba%|A zRx-e&-^L;&n?eR7o_oKXwp8AxuwlB{LO$#la?D)x;zU`WW2SQ5Oqo`LJ^_(aK1FHA zk+(+{3xv0lwNoj$==Yk967zUD4_Ct7kR#++DhQEGJNL-pLl2ZfrxDoTL5D+9ag0CU z^kE1oB=Z%Z_J{*X^xMoczfk4!mUyz}8mE{?S*V-$_pdc>ec`=VE5*IanaXpIS~y0( zj)wH^<6!i5h>kwWrWyw|%3ylpdB-9!2TC{$ji*N47PZRGVEEAWkYKVj$^^!c zax%M!4y9|CTm*W1Q#F*ZXV0~Ghp;|zS7k>^?MwqmH6~8&LB4PvRTZTn``*B3+fEF4 z5E$U;LNwuK9P8){J-tEjA&d8C%O`KAykMt?cPvwIR%;M0)Rx&2sH8SRKyokq;^>hb z0QPE?D53jmU)G=N?&W@>IbQ~QG0cn zRC0a!cGziytOhJb%2qSrI-?sx8bWmGI@-FcpgBP|_9+(t)(+ag0AUQ6Q2qa)X)hJPblpHZ&e zO`w~C44m>}0a!}8JJ`t6+gj0)vK)iAy1{A>X}Qy>5(iP5pW!&sGs^BYX2*2b-)ykP z%n&*+d)oiqfor7ibWyS3ZdKaf@l<<#VMJS%i!VYr5q-SaxX5U}>)cz}YByHj(a6_2 zgJMEs%-uKt{^%K}`MTTaCOb8Vnr-ysJjw07Jw5q;i+Tyn}2)7ZJnDV*> zWn`Z4i2+Y0yzyLl_Zw9CvAarx+y8BeOIyInwm!74R>t{p)qUe2#j7NAO!yWu-8dcw zyQ<2$+F2$AYD+Kf6UrJPgqEpk2DLP(XvQk}jd=ukwoc@X$<1$+nk<|J(_ z1^FI~=lEKvo@xMj?N{yBd?`1CNDb09Jt)?+wK~HI>6qA0<}WBFo3;(`i0sTEKqrwC z3!YNeN{bWMEfju`Q4euDom?qNK`hkLvF}An+XU;6v1Sl&h3_^I`3T*B$b_9M|$>)?a61TWyR#-x8e^NLR0Ck!W4uF-NptX4JP@jXGxZ6gGM# z56)5L@@9mcB&+*y2G)*woL7&n%(WVx|1>)`mMF0Q0tUG6PK|8VJ)hnUwp7A+VHu@k z+IC#_AMbb$u*+QedpWK!(8_jX9-|--h|_!gUhbyx7d^ z;1#|y=1LXHzX$@A80uftNX15>d>2yv{IaQodd{jgY5pj-KgX2jEVC0~nes(w1+|d; zn*s1{^ZFAXO%2CA$8;0?A?8+-fcbg|c<*i+aNTcUJ0*Tf+QC4ZbwRSGlYiEfU%P!U z#!$G>Q^tLIq~}n~II_!XGl?uoT73{(q&3@n3v->BVjTH}Xv^h(V=P z@A1d72w?QLlU_Fb&5=<|vMS7QTbPZGvX?>EhGk81p|@wMKzIi=VMRLYHjTC@waG|(#` zO=lFbt6yInxn!^MK6a}r^SQhrsz&=~!}oHnnmjbTdn;FSp8w6N(W<8*9kZxDf6-~g z{IMSA#CwL@88u{Dqyy@E!gzuhrM%#U-*Ls6JUmHS$Z|aCY)=uVJ;p&yY_& zgo-2epRC*=E?^l;b}`vovAW~4y)eaDgRFU!0}uU=GNT3~p&q%(JsFvto(kZiF-KM` zoY5g$TtO~2CO@FmF+jGwl>LLaNr!!Yt5a>W4mPy$xC8GsX=gT^(CVAPKO}8y&!2Q& zkAC5CX@+Wdk$$zi>;kUhmYpxq!iz6G+xcj3?^CMMPpj=aemHSfQc8sHvJ2>6Q~9jF zVEL$T!`6alT8ype@@&9{lpLDSi(>9Z89L0n6iTOur}W=as(MwC;;XlNcV?LTN$}*$ zKRh&5f9ASH^*4rDw(Qr0;Tn$7VUKB0n6qkQKcEp~EQfvRv zdVM7UY?*5@D|zc(ka*!1WXJF)EnfYgWa(G@5 zW%RUaMP$M3w&i*z!@T7>tD>&ATvWPmy5V_4l%XuEsy2eQKMr8}ryq^9y~?Y!(+zQ( zdZd7ZOc#9oCQ|N;)A6Fuxx^HIiDog3|HEXibUswX z=GN;=f5msH=M2e1(y)f~{fg%3t5QhPdQP(67>a(3Yx4*?WG<|yDoko-Atk?aL2ts5R}_F*2g=pp5cn`nb-K>F=A47I~{t3kD+XNf*6F($s*-i?bDzu!qs|CwMh?`>5% z+CW)h^p7`>^N)4v7YOU$!=@UK*4xMY#m~6RORSMj)3b36>Xn2OU?8nXT(xATbjfzvMP zOcn?iR0jCTGh4PiY;1P*&`9Z&)C=F52bea}Rla7m+>~k@cJ)Y}!$B~ejBepua{yBe z=$*ZcP?tT6$N&z~xKu-zYTqanelA0|^ZS8i%HLk;#B9m=u#v;oL$VhKDejbng$p(S z{JfYs1_)+==^0REHLInybYrs{Q7Bwqk#6VULE}Idj)T6-fyHnPwZmD$pymGI=S*P+ zAIWIEfG6A9GEusb!`(x2A>oG1<{?h5wbR%?+?fC{#epi@SuFz>CHt1I9O;9TO|eUy zAGYpmel1JO%;{e%nncJ}x2U4!eOgqj92|*4pxmORS_x!ID~W7~9GN1_LZGlN2WR5H=q-Va ziyRpv$U>kRUF&cn8z{G6>AyHhXo5gZyVjkEmZ03SC2vV&W#q`esNc2jNK^pjqL&UN zkU5bf69fbV`Y$3uxm8OClE|--Bh!Ra2-Lc3-I@3=(n=shBSl9ERFJRAT}*Hy1;{#Y z>A(1s5CQr6w2R4!_y}ZOyhJOB{1hqr7oQ{VF&fF~zeyG_bR)g;Udpbut|ROw?WrF0 z!#Q43{0gkC#W@@AdRGG4XN$fPxkfhb!;+Qw60!0ZwA8NAT?NZuWTh6d6nURoZk)-hZtT*1Y8QWdIh^4A zRGl(-L;A+2MVYrj%cH=iD-Va9d=-(y50RtdkVhkMYQ+Ms|F@v(Eo?7g5Or1S+(T{# zS4vP|;AoR% 0){ for(i in seq_along(new_api_paths)){ # cat(new_api_paths[i], "\n") - first_results[[i]] <- try(pxweb_test_api(url = new_api_paths[i], test_type = "first", verbose = TRUE, time_limit = 15*60), silent = TRUE) + first_results[[i]] <- try(pxweb_test_api(url = new_api_paths[i], test_type = "first", verbose = TRUE, time_limit = 10*60), silent = TRUE) if(inherits(first_results[[i]], "try-error")){ new_api_errored[i] <- TRUE } @@ -147,7 +146,6 @@ for (i in seq_along(apis)) { if(any(errored) | length(warns) > 0 | any(config_diff) | any(new_api_errored) | any(duplicated_alias) | any(parameter_error) | any(duplicated_names)){ - # quit(save = "no", status = 1) - quit(save = "no", status = 0) + quit(save = "no", status = 1) } diff --git a/tests_bash/pxweb.sh b/tests_bash/pxweb.sh old mode 100755 new mode 100644 index 2c33f0e6..81411eeb --- a/tests_bash/pxweb.sh +++ b/tests_bash/pxweb.sh @@ -1,6 +1,24 @@ #!/bin/bash # Need to be in Project root +# git log -n 1 R --version +script_start_time=$(date +"%T") +# Set Pxweb Error to 1 (to break Travis) +echo "PXWEB_ERROR=1" > "PXWEB_ERROR.sh" + +echo $(date +"%T") + +echo " " && echo -en "travis_fold:start:test-pxweb\n" Rscript --vanilla tests_bash/pxweb.R +r_exit=$?; echo $r_exit; if [[ $r_exit != 0 ]]; then exit $r_exit; fi +echo $r_exit +echo "travis_fold:end:test-pxweb" + +echo $(date +"%T") + +script_end_time=$(date +"%T") +echo "Start: $script_start_time Stop: $script_end_time" +# Set PXWEB Error to 0 +echo "PXWEB_ERROR=0" > "PXWEB_ERROR.sh" diff --git a/vignettes/pxweb.md b/vignettes/pxweb.md new file mode 100644 index 00000000..4cad47d3 --- /dev/null +++ b/vignettes/pxweb.md @@ -0,0 +1,676 @@ +--- +title: "PX-WEB API Interface for R" +author: "Mans Magnusson, Leo Lahti et al." +date: "2018-12-25" +output: + rmarkdown::html_vignette: + toc: true +vignette: > + %\VignetteIndexEntry{pxweb tutorial} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} +--- + + +This R package provides tools to access [PX-WEB +API](http://www.scb.se/Grupp/OmSCB/API/API-description.pdf). Your +[contributions](http://ropengov.github.io/contribute/) and [bug +reports and other feedback](https://github.com/ropengov/pxweb) are +welcome! + + +More information on the PX-Web/PC-Axis API can be found [here](http://www.scb.se/Grupp/OmSCB/API/API-description.pdf). + +## Table of contents + +[Introduction](#introduction) (Introduction) +[Installation](#installation) (Installation) +[Using the PXWEB R package](#usage) (Using PXWEB from R) + +## Introduction + +PXWEB is an API structure developed by Statistics Sweden together with other national statistical institutions (NSI) to disseminate public statistics in a structured way. This enables downloading and usage of data from statistical agencies without using a web browser direct over HTTP/HTTPS. + +The `pxweb` R package connects any PXWEB API to R and hence facilitate the access, use and referencing of data from PXWEB APIs. + +### Available data sources and tools + +[A number of organizations](http://www.scb.se/sv_/PC-Axis/Programs/PX-Web/PX-Web-examples/) use PXWEB to distribute hierarchical data. You can browse the available data sets at: + + * [Statistics Sweden](http://www.statistikdatabasen.scb.se/pxweb/en/ssd/) with [API Description](http://www.scb.se/Grupp/OmSCB/API/API-description.pdf) + * [Statistics Finland](http://tilastokeskus.fi/til/aihealuejako.html) [StatFi API Description](http://pxnet2.stat.fi/api1.html) + * [Other organizations using PX-WEB](http://www.scb.se/sv_/PC-Axis/Programs/PX-Web/PX-Web-examples/) + +### About PXWEB APIs + +The data in PXWEB APIs consists of a metadata part and a data +part. Metadata is structured in a hierarchical node tree, where each +node contains information about subnodes that are below it in the tree +or, if the nodes are at the bottom of the tree structure, the data +referenced by the node as well as what dimensions are available for +the data at that subnode. + +## Installation + +To install the latest stable release version from CRAN, just use: + + +```r +install.packages("pxweb") +``` + + +To install the latest stable release version from GitHub, just use: + + +```r +library("devtools") +devtools::install_github("ropengov/pxweb") +``` + +Test the installation by loading the library: + + +```r +library(pxweb) +``` + +A tutorial is included with the package with: +```r +vignette(topic="pxweb") +``` + + +### Installation issues + +We also recommend setting the UTF-8 encoding since each individual API may have local specificl letters: + + +```r +Sys.setlocale(locale="UTF-8") +``` + + +## Accessing PXWEB from R + +There are two ways of using the `pxweb` R package to access data, either interactively of using the core functions. To access data, two parts are needed, an URL to the data table in the API and a query specifying what data is of interest. + +## Interactive use + +The simplest way of using `pxweb` is to use it interactively and navigate the API to the data of interest and then set up the data query of interest. + + +```r +# Navigate through all pxweb api:s in the R package API catalogue +d <- pxweb_interactive() + +# Get data from SCB (Statistics Sweden) +d <- pxweb_interactive(api = "api.scb.se") + +# Fetching data from statfi (Statistics Finland) +d <- interactive_pxweb("pxnet2.stat.fi") + +# Fetching data from StatBank (Statistics Norway) +d <- interactive_pxweb("data.ssb.no") + +# To see all available PXWEB APIs use +pxweb_apis <- pxweb_api_catalogue() +``` + +In the example above we use the interactive functionality from the PXWEB API root, but we could use any path to the API. + + +```r +# Start with a specific path. +d <- pxweb_interactive("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A") +``` + +This also means that we can navigate any PXWEB API, irrespectively of if they are a part of the R package API catalog or not. Just supply an URL to somewhere in the API and then navigate the API from there. + +Due to new CRAN policies, it is not possible to use an R function to edit the api catalogue of the R package, but editing the can be done easily from R using `file.edit()`. + + +```r +file.edit(pxweb_api_catalogue_path()) +``` + +Although, if the `pxweb` is installed again, it will overwrite the old api catalogue. So the easiest way is to do add a PXWEB API to the global catalogue. To do this, just do a pull request at the pxweb GitHub page [here](https://github.com/rOpenGov/pxweb). + +## Direct use + +Under the hood, the pxweb package uses the `pxweb_get()` function to access data from the PXWEB API. It also keeps track of the time limits of the API and split up to big queries into optimal downloadable chunks. If we use `pxweb_get()` without a query, the function either returns a PXWEB LEVELS object or a PXWEB METADATA object, depending if the URL points to a table in the API or not. Here is an example of a PXWEB LEVELS object. + + +```r +# Get PXWEB levels +px_levels <- pxweb_get("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/") +px_levels +``` + +``` +## PXWEB LEVELS +## BefolkningNy (t): Population by region, marital status, age and sex. Year 1968 - 2017 +## BefolkningR1860 (t): Population by age and sex. Year 1860 - 2017 +## FolkmangdNov (t): Population 1 November by region, age and sex. Year 2002 - 2018 +## FolkmangdSmaort (t): Population by smaller localities (places with 50-199 inhabitants). Every fifth year 1995 - 2010 +## FolkmangdTatort (t): Population by localities. Every fifth year 1960 - 2017 +## FolkmangdTatortH (t): Population by localities with older/changed names. Every fifth year 1960 - 1980 +## FolkmangdDistrikt (t): Population by district, Landscape or Part of the country by sex. Year 2015 - 2017 +``` + +And if we use `pxweb_get()` for a table, a PXWEB METADATA object is returned. + + +```r +# Get PXWEB metadata about a table +px_meta <- pxweb_get("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy") +px_meta +``` + +``` +## PXWEB METADATA +## Population by region, marital status, age, sex, observations and year +## variables: +## [[1]] Region: region +## [[2]] Civilstand: marital status +## [[3]] Alder: age +## [[4]] Kon: sex +## [[5]] ContentsCode: observations +## [[6]] Tid: year +``` + +### Creating data queries + +To download data we need both the URL to the table and a query specifying what parts of the table are of interest. An URL to a table is an URL that will return a metadata object if not a query is supplied. Creating a query can be done in three main ways. The first and simplest approach is to use `pxweb_interactive()` to explore the table URL and create a query interactively. + + +```r +d <- pxweb_interactive("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy") +``` + +The interactive function will return the query and the url, even if the data is not downloaded. + + + + +```r +d$url +``` + +``` +## [1] "http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy" +``` + +```r +d$query +``` + +``` +## PXWEB QUERY +## query: +## [[1]] Region (item): +## 00 +## [[2]] Civilstand (item): +## OG, G, ÄNKL, SK +## [[3]] Alder (item): +## tot +## [[4]] ContentsCode (item): +## BE0101N1 +## [[5]] Tid (item): +## 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 +``` + +We can also turn the query to a json query that can be used outside R. + + +```r +pxweb_query_as_json(d$query, pretty = TRUE) +``` + +``` +## { +## "query": [ +## { +## "code": "Region", +## "selection": { +## "filter": "item", +## "values": ["00"] +## } +## }, +## { +## "code": "Civilstand", +## "selection": { +## "filter": "item", +## "values": ["OG", "G", "ÄNKL", "SK"] +## } +## }, +## { +## "code": "Alder", +## "selection": { +## "filter": "item", +## "values": ["tot"] +## } +## }, +## { +## "code": "ContentsCode", +## "selection": { +## "filter": "item", +## "values": ["BE0101N1"] +## } +## }, +## { +## "code": "Tid", +## "selection": { +## "filter": "item", +## "values": ["2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017"] +## } +## } +## ], +## "response": { +## "format": "json" +## } +## } +``` + + +The second approach is to specify the query either as an R list or a JSON object. Some Statistical Agencies, such as Statistics Sweden, supply queries directly as a JSON object on their web pages. These queries can be used directly. Below is another example of a JSON query for the table above. For details on how to set up a JSON query, see the PXWEB API documentation. + +``` +{ + "query": [ + { + "code": "Civilstand", + "selection": { + "filter": "item", + "values": ["OG", "G", "ÄNKL", "SK"] + } + }, + { + "code": "Kon", + "selection": { + "filter": "item", + "values": ["1", "2"] + } + }, + { + "code": "ContentsCode", + "selection": { + "filter": "item", + "values": ["BE0101N1"] + } + }, + { + "code": "Tid", + "selection": { + "filter": "item", + "values": ["2015", "2016", "2017"] + } + } + ], + "response": { + "format": "json" + } +} +``` + +To use this JSON query we just store the JSON query as a file and supply the path to the file to the ```pxweb_query()``` function. + + +```r +pxq <- pxweb_query("path/to/the/json/query.json") +``` + +Finally, we can create a PXWEB query from an R list where each list element is a variable and selected observation. + + +```r +pxweb_query_list <- + list("Civilstand"=c("*"), # Use "*" to select all + "Kon"=c("1","2"), + "ContentsCode"=c("BE0101N1"), + "Tid"=c("2015","2016","2017")) +pxq <- pxweb_query(pxweb_query_list) +pxq +``` + +``` +## PXWEB QUERY +## query: +## [[1]] Civilstand (all): +## * +## [[2]] Kon (item): +## 1, 2 +## [[3]] ContentsCode (item): +## BE0101N1 +## [[4]] Tid (item): +## 2015, 2016, 2017 +``` + +The query can be validated against the metadata object to asses that the query can be used. This is done automatically when the data is fetched with ```pxweb_get()```, but can also be done manually. + + +```r +pxweb_validate_query_with_metadata(pxq, px_meta) +``` + +### Downloading data + +When we have the URL to a data table and a query we can simply download the data with ```pxweb_get()```. The function returns a `pxweb_data` object that contains the downloaded data. + + +```r +pxd <- pxweb_get("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy", + pxq) +pxd +``` + +``` +## PXWEB DATA +## With 4 variables and 24 observations. +``` + +If we instead want a JSON-stat object, we just change the response format to JSON-stat and we will get a JSON-stat object returned. Only JSON and JSON-stat formats are implemented in the PXWEB API. + + +```r +pxq$response$format <- "json-stat" +pxjstat <- pxweb_get("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy", + pxq) +pxjstat +``` + +``` +## { +## "dataset": { +## "dimension": { +## "Civilstand": { +## "label": ["marital status"], +## "category": { +## "index": { +## "OG": [0], +## "G": [1], +## "ÄNKL": [2], +## "SK": [3] +## }, +## "label": { +## "OG": ["single"], +## "G": ["married"], +## "ÄNKL": ["widowers/widows"], +## "SK": ["divorced"] +## } +## } +## }, +## "Kon": { +## "label": ["sex"], +## "category": { +## "index": { +## "1": [0], +## "2": [1] +## }, +## "label": { +## "1": ["men"], +## "2": ["women"] +## } +## } +## }, +## "ContentsCode": { +## "label": ["observations"], +## "category": { +## "index": { +## "BE0101N1": [0] +## }, +## "label": { +## "BE0101N1": ["Population"] +## }, +## "unit": { +## "BE0101N1": { +## "base": ["number"], +## "decimals": [0] +## } +## } +## } +## }, +## "Tid": { +## "label": ["year"], +## "category": { +## "index": { +## "2015": [0], +## "2016": [1], +## "2017": [2] +## }, +## "label": { +## "2015": ["2015"], +## "2016": ["2016"], +## "2017": ["2017"] +## } +## } +## }, +## "id": [ +## ["Civilstand"], +## ["Kon"], +## ["ContentsCode"], +## ["Tid"] +## ], +## "size": [ +## [4], +## [2], +## [1], +## [3] +## ], +## "role": { +## "metric": [ +## ["ContentsCode"] +## ], +## "time": [ +## ["Tid"] +## ] +## } +## }, +## "label": ["Population by marital status, sex, observations and year"], +## "source": ["Statistics Sweden"], +## "updated": ["2018-12-25T09:58:00Z"], +## "value": [ +## [2762601], +## [2820248], +## [2870477], +## [2394842], +## [2437315], +## [2477012], +## [1651482], +## [1672460], +## [1687016], +## [1639519], +## [1657129], +## [1671381], +## [99751], +## [99654], +## [99682], +## [345008], +## [340709], +## [335961], +## [417132], +## [420985], +## [425487], +## [540682], +## [546653], +## [553226] +## ] +## } +## } +``` + +If the queries are large (contain more values than the PXWEB API maximum allowed values), the query is chunked into optimal chunks and is then downloaded sequentially. PXWEB data objects are then combined to one large PXWEB data object, while JSON-stat objects are returned as a list of JSON-stat objects. + +For more advanced connections to the API, the `pxweb_advanced_get()` gives the flexibility to access the underlying HTTP calls using `httr` as well as logging the HTTP calls for debugging. + +The downloaded PXWEB data objects can then be converted to either `data.frame`s or to a character matrix. The character matrix contains the "raw" data while the data.frame returns a data.frame for analysis in a tidy format. This means that missing values (such as ".." are converted to `NA`) in a data.frame. Using the arguments `variable.value.type` and `column.name.type` we can also choose if we want the code or the text column names and value types. + + +```r +pxdf <- as.data.frame(pxd, column.name.type = "text", variable.value.type = "text") +head(pxdf) +``` + +``` +## marital status sex year Population +## 1 single men 2015 2762601 +## 2 single men 2016 2820248 +## 3 single men 2017 2870477 +## 4 single women 2015 2394842 +## 5 single women 2016 2437315 +## 6 single women 2017 2477012 +``` + + + +```r +pxdf <- as.data.frame(pxd, column.name.type = "code", variable.value.type = "code") +head(pxdf) +``` + +``` +## Civilstand Kon Tid BE0101N1 +## 1 OG 1 2015 2762601 +## 2 OG 1 2016 2820248 +## 3 OG 1 2017 2870477 +## 4 OG 2 2015 2394842 +## 5 OG 2 2016 2437315 +## 6 OG 2 2017 2477012 +``` + +In a similar way, we can access the raw data as a character matrix with `as.matrix`. + + +```r +pxmat <- as.matrix(pxd, column.name.type = "code", variable.value.type = "code") +head(pxmat) +``` + +``` +## Civilstand Kon Tid BE0101N1 +## [1,] "OG" "1" "2015" "2762601" +## [2,] "OG" "1" "2016" "2820248" +## [3,] "OG" "1" "2017" "2870477" +## [4,] "OG" "2" "2015" "2394842" +## [5,] "OG" "2" "2016" "2437315" +## [6,] "OG" "2" "2017" "2477012" +``` + +### Access data footnotes/comments + +In addition to the data, the PXWEB DATA object may also contain comments for the data. This can be accessed using `pxweb_data_comments()` function. + + +```r +pxdc <- pxweb_data_comments(pxd) +pxdc +``` + +``` +## NO PXWEB DATA COMMENTS +``` + +In this case, we did not have any comments. If we have comments we can turn the comments into a data.frame with one comment per row. + + +```r +as.data.frame(pxdc) +``` + +## Citation + +Finally, if we use the data, we can easily create a citation for a `pxweb_data` object using the `pxweb_cite()` function. For full reproducibility, please also cite the package. + + +```r +pxweb_cite(pxd) +``` + +``` +## +## Statistics Sweden (2018). "Population by region, marital status, +## age, sex, observations and year." [Data accessed 2018-12-25 +## 11:58:53 using pxweb R package 0.8.32], . +## +## A BibTeX entry for LaTeX users is +## +## @Misc{, +## title = {Population by region, marital status, age, sex, observations and year}, +## author = {{Statistics Sweden}}, +## organization = {Statistics Sweden}, +## address = {Stockholm, Sweden}, +## year = {2018}, +## url = {http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy}, +## note = {[Data accessed 2018-12-25 11:58:53 using pxweb R package 0.8.32]}, +## } +``` + +``` +## +## Kindly cite the pxweb R package as follows: +## +## (C) Mans Magnusson, Markus Kainu, Janne Huovari, and Leo Lahti +## (rOpenGov 2014-2016). pxweb: R tools for PXWEB API. URL: +## http://github.com/ropengov/pxweb +## +## A BibTeX entry for LaTeX users is +## +## @Misc{, +## title = {pxweb: R tools for PX-WEB API}, +## author = {Mans Magnusson and Markus Kainu and Janne Huovari and Leo Lahti}, +## year = {2014-2018}, +## } +``` + + + + +### Known issues + +Currently, the `pxweb` package is not thread-safe, and hence it is not safe to runt multiple get functions in parallel or in different R sessions. + +## Licensing + +This work can be freely used, modified and distributed under the open license specified in the [DESCRIPTION file](https://github.com/rOpenGov/pxweb/blob/master/DESCRIPTION). + + +## Session info + +This vignette was created with + + +```r +sessionInfo() +``` + +``` +## R version 3.5.1 (2018-07-02) +## Platform: x86_64-pc-linux-gnu (64-bit) +## Running under: Ubuntu 18.04.1 LTS +## +## Matrix products: default +## BLAS: /home/lei/bin/R-3.5.1/lib/libRblas.so +## LAPACK: /home/lei/bin/R-3.5.1/lib/libRlapack.so +## +## locale: +## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C +## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 +## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 +## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C +## [9] LC_ADDRESS=C LC_TELEPHONE=C +## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C +## +## attached base packages: +## [1] stats graphics grDevices utils datasets methods base +## +## other attached packages: +## [1] pxweb_0.8.32 rmarkdown_1.10 knitr_1.20 +## +## loaded via a namespace (and not attached): +## [1] Rcpp_1.0.0 digest_0.6.18 rprojroot_1.3-2 R6_2.3.0 +## [5] jsonlite_1.5 backports_1.1.2 magrittr_1.5 evaluate_0.12 +## [9] httr_1.3.1 stringi_1.2.4 curl_3.2 checkmate_1.8.5 +## [13] tools_3.5.1 stringr_1.3.1 yaml_2.2.0 compiler_3.5.1 +## [17] htmltools_0.3.6 +``` From 38c68cf903f57d3839d637ab68b463095e60e847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Magnusson?= Date: Mon, 31 Oct 2022 14:06:46 +0100 Subject: [PATCH 06/13] Test to not start GA --- paper/magnusson-kainu-lahti.tex | 1 + 1 file changed, 1 insertion(+) diff --git a/paper/magnusson-kainu-lahti.tex b/paper/magnusson-kainu-lahti.tex index dd153332..b6500bbc 100644 --- a/paper/magnusson-kainu-lahti.tex +++ b/paper/magnusson-kainu-lahti.tex @@ -56,6 +56,7 @@ decades or even centuries \cite{xxx}. % We need tools to access the data that is simple and efficient to use +% TEST! Opening up official statistics is, however, only the first step towards realizing their full potential and value. There is a clear From 6f13ec2a9e9bdd34a752901bfbb804f2817e3139 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Magnusson?= Date: Mon, 31 Oct 2022 14:11:48 +0100 Subject: [PATCH 07/13] TEST GA --- paper/magnusson-kainu-lahti.tex | 1 - 1 file changed, 1 deletion(-) diff --git a/paper/magnusson-kainu-lahti.tex b/paper/magnusson-kainu-lahti.tex index b6500bbc..dd153332 100644 --- a/paper/magnusson-kainu-lahti.tex +++ b/paper/magnusson-kainu-lahti.tex @@ -56,7 +56,6 @@ decades or even centuries \cite{xxx}. % We need tools to access the data that is simple and efficient to use -% TEST! Opening up official statistics is, however, only the first step towards realizing their full potential and value. There is a clear From 1a08ac327b1ff7a9ffa855e6c35ddd837884c3ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=CC=8Ans=20Magnusson?= Date: Mon, 31 Oct 2022 14:23:30 +0100 Subject: [PATCH 08/13] test --- paper/magnusson-kainu-lahti.tex | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/paper/magnusson-kainu-lahti.tex b/paper/magnusson-kainu-lahti.tex index dd153332..f1a1965d 100644 --- a/paper/magnusson-kainu-lahti.tex +++ b/paper/magnusson-kainu-lahti.tex @@ -10,6 +10,7 @@ %An abstract of less than 150 words. \abstract{Abstract \CRANpkg{pxweb} R package here.} +% TEST %\begin{itemize} % \item Motivation here. Why do we need this package? We want to use the open data pipeline, access, use and cite. @@ -123,12 +124,12 @@ \subsection{The \CRANpkg{pxweb} package} \item Can be used in other packages as the workhorse for accessing API data (See Oyvinds project). \end{itemize} -% In 2018, we made major design decisions and largely rewrote the package in order to simplify the overall design while improving the overall capabilities and efficiency. Hence the current, mature version, is a result of active development and testing by the user community +% In 2018, we made major design decisions and largely rewrote the package in order to simplify the overall design while improving the overall capabilities and efficiency. Hence the current, mature version, is a result of active development and testing by the user community -% Whereas dozens of statistical authorities have started to share data through the PX-WEB API, a dedicated R package that provides a unified access to these data collections has been missing. +% Whereas dozens of statistical authorities have started to share data through the PX-WEB API, a dedicated R package that provides a unified access to these data collections has been missing. -% The \CRANpkg{pxweb} package is now filling this gap [CLOSELY RELATED PKGS SHOULD BE CITED HERE?]. Following its first CRAN release in 2014, the \CRANpkg{pxweb}, several contributors and feedback from the user community have supported the package development. -% [HAS THE PKG BEEN APPLIED IN PUBLICATIONS. THIS WOULD BE A GOOD PLACE TO CITE THOSE? -> OR in DISCUSSION?]. SOME brief WORDS ABOUT DATA STANDARDS AND POSSIBLE VARIATIONS BETWEEN DATA PROVIDERS; further details will be in the later section. The pxweb depends on further R packages including \pkg{checkmate} \citep{checkmate}, \pkg{httr} \citep{httr}, \pkg{jsonlite} \citep{jsonlite}. The \CRANpkg{pxweb} package is part of the rOpenGov open data science project \citep{Lahti13icml}. +% The \CRANpkg{pxweb} package is now filling this gap [CLOSELY RELATED PKGS SHOULD BE CITED HERE?]. Following its first CRAN release in 2014, the \CRANpkg{pxweb}, several contributors and feedback from the user community have supported the package development. +% [HAS THE PKG BEEN APPLIED IN PUBLICATIONS. THIS WOULD BE A GOOD PLACE TO CITE THOSE? -> OR in DISCUSSION?]. SOME brief WORDS ABOUT DATA STANDARDS AND POSSIBLE VARIATIONS BETWEEN DATA PROVIDERS; further details will be in the later section. The pxweb depends on further R packages including \pkg{checkmate} \citep{checkmate}, \pkg{httr} \citep{httr}, \pkg{jsonlite} \citep{jsonlite}. The \CRANpkg{pxweb} package is part of the rOpenGov open data science project \citep{Lahti13icml}. % In summary, the \CRANpkg{pxweb} package provides custom tools for open statistical data resources provided through the PX-WEB API. Currently, the pxweb package provides seamless algorithmic access from the R environment to dozens of data collections from national authorities in countries such as Estonia, Iceland, Finland, Norway, Sweden, The Netherlands, and elsewhere. Seamless integration with other data analysis tools is facilitated by support for features such as cache, date formatting, tidy data principles \citep{wickham2014}, and the \Cpkg{tibble} \citep{tibble} data format. In this article, we provide an overview of the functionality and use cases based on the current CRAN release version (0.8). The comprehensive on-line documentation, which is available via the package homepage\footnote{\url{http://ropengov.github.io/pxweb}}, includes simple examples for individual functions, generic tutorials, and links to more advanced case studies. Moreover, the package is following best practices in open source software development such as version control, automated unit tests, continuous integration, and collaborative development \citep{PerezRiverol2016}. @@ -184,7 +185,7 @@ \subsection{Using it for another API, not in the catalogue} % \item Quality control: CI, unit tests, open development/issues, CRAN checks etc % \item Justification for design choices that are potentially interesting or controversial % \item Examples of known case studies etc. that the package has enabled -% \item Future extensions: additional data sources, additional functionality(?)..? +% \item Future extensions: additional data sources, additional functionality(?)..? %\end{itemize} @@ -289,7 +290,7 @@ \subsection{Using it for another API, not in the catalogue} electronic gambling machines and socioeconomic status \cite{Raisamo2019}. % Didn't find other citations. -% Future extensions: additional data sources, additional functionality(?)..? +% Future extensions: additional data sources, additional functionality(?)..? % Currently unclear to me what is the added value in PxWebApiData / LL % here discuss the new pkg by our collaborators -> does this refer to PxWebApiData? % Unexpected use cases by integration with external sources -> Any ideas? @@ -342,7 +343,7 @@ \section*{Acknowledgments} \bibliography{magnusson-kainu-lahti} - + \address{M\r{a}ns Magnusson\\ Department of Computer Science\\ From 4be15c234fe29d0ea8a63314b776f926aa532ffb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=CC=8Ans=20Magnusson?= Date: Mon, 31 Oct 2022 14:25:45 +0100 Subject: [PATCH 09/13] Minor fix --- R/utils_tests.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/utils_tests.R b/R/utils_tests.R index 1db6f8a6..059dbfc1 100644 --- a/R/utils_tests.R +++ b/R/utils_tests.R @@ -1,6 +1,5 @@ # Functions only used for testing - on_github_actions <- function() identical(Sys.getenv("GITHUB_ACTIONS"), "true") get_root_path <- function() { From 36ef70d297c03686f8e4dc3645e95368a4945ba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Magnusson?= Date: Mon, 31 Oct 2022 14:27:08 +0100 Subject: [PATCH 10/13] Test GA from OL --- paper/magnusson-kainu-lahti.tex | 1 - 1 file changed, 1 deletion(-) diff --git a/paper/magnusson-kainu-lahti.tex b/paper/magnusson-kainu-lahti.tex index f1a1965d..0bf0e811 100644 --- a/paper/magnusson-kainu-lahti.tex +++ b/paper/magnusson-kainu-lahti.tex @@ -10,7 +10,6 @@ %An abstract of less than 150 words. \abstract{Abstract \CRANpkg{pxweb} R package here.} -% TEST %\begin{itemize} % \item Motivation here. Why do we need this package? We want to use the open data pipeline, access, use and cite. From f6bfb8ef90af63a47f643d499207d972cb7ad2f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=CC=8Ans=20Magnusson?= Date: Mon, 31 Oct 2022 14:59:00 +0100 Subject: [PATCH 11/13] Revert "Updates from Overleaf" This reverts commit ce58900764b385faa9b4ec5cb150243efc1cd794. --- paper/magnusson-kainu-lahti.tex | 3 - tests/testthat/test-get_pxweb_dims.R | 18 - tests/testthat/test-get_pxweb_levels.R | 19 - tests/testthat/test-get_pxweb_metadata.R | 47 -- tests/testthat/test-interactive_pxweb.R | 81 --- tests/testthat/test-multiple_queries_data.R | 49 -- tests/testthat/test-pxweb_api_class.R | 63 -- tests/testthat/test-pxweb_examples.R | 31 - tests/testthat/test-test_pxweb_api.R | 34 - tests/testthat/test-utils_internal.R | 45 -- .../test-x_deprecated_get_pxweb_data.R | 187 ----- tests/testthat/test_data/filter_query.json | 28 + tests/testthat/test_data/pxm1_test.rda | Bin 0 -> 4202 bytes tests/testthat/test_data/test_query_px.json | 42 ++ tests/testthat/test_data/test_query_sdmx.json | 42 ++ tests_bash/pxweb.R | 12 +- tests_bash/pxweb.sh | 18 - vignettes/pxweb.md | 676 ------------------ 18 files changed, 119 insertions(+), 1276 deletions(-) delete mode 100644 tests/testthat/test-get_pxweb_dims.R delete mode 100644 tests/testthat/test-get_pxweb_levels.R delete mode 100644 tests/testthat/test-get_pxweb_metadata.R delete mode 100644 tests/testthat/test-interactive_pxweb.R delete mode 100644 tests/testthat/test-multiple_queries_data.R delete mode 100644 tests/testthat/test-pxweb_api_class.R delete mode 100644 tests/testthat/test-pxweb_examples.R delete mode 100644 tests/testthat/test-test_pxweb_api.R delete mode 100644 tests/testthat/test-utils_internal.R delete mode 100644 tests/testthat/test-x_deprecated_get_pxweb_data.R create mode 100644 tests/testthat/test_data/filter_query.json create mode 100644 tests/testthat/test_data/pxm1_test.rda create mode 100644 tests/testthat/test_data/test_query_px.json create mode 100644 tests/testthat/test_data/test_query_sdmx.json mode change 100644 => 100755 tests_bash/pxweb.sh delete mode 100644 vignettes/pxweb.md diff --git a/paper/magnusson-kainu-lahti.tex b/paper/magnusson-kainu-lahti.tex index 0bf0e811..c1bdc4e3 100644 --- a/paper/magnusson-kainu-lahti.tex +++ b/paper/magnusson-kainu-lahti.tex @@ -112,9 +112,6 @@ \subsection{The \CRANpkg{pxweb} package} -% In early 2013, Statistics Sweden released PX-WEB as a way to simplify the use and re-use of the statistics produced by the agency \cite{xxx}. The PXWEB API - - \begin{itemize} \item History of the package \item Design principles diff --git a/tests/testthat/test-get_pxweb_dims.R b/tests/testthat/test-get_pxweb_dims.R deleted file mode 100644 index 5897e63e..00000000 --- a/tests/testthat/test-get_pxweb_dims.R +++ /dev/null @@ -1,18 +0,0 @@ -# Test suits for the examples in the documentation - -context("get_pxweb_dims.R") - -test_that(desc="get_pxweb_dims()",{ - - # CRAN seem to run tests in parallel, hence API tests cannot be run on CRAN. - skip_on_cran() - - expect_warning(bottom_node <- get_pxweb_metadata("http://api.scb.se/OV0104/v1/doris/sv/ssd/AM/AM0114/LCIArbKv"), regexp = "deprecated") - expect_warning(dims <- suppressMessages(get_pxweb_dims(bottom_node)), regexp = "deprecated") - -}) - - - - - diff --git a/tests/testthat/test-get_pxweb_levels.R b/tests/testthat/test-get_pxweb_levels.R deleted file mode 100644 index 426c6674..00000000 --- a/tests/testthat/test-get_pxweb_levels.R +++ /dev/null @@ -1,19 +0,0 @@ -# Test suits for the examples in the documentation - -context("get_pxweb_levels.R") - -test_that(desc="get_pxweb_levels",{ - - # CRAN seem to run tests in parallel, hence API tests cannot be run on CRAN. - skip_on_cran() - - expect_warning( - lev <- get_pxweb_levels(baseURL = - paste0(pxweb_api$new("api.scb.se")$base_url(language = "sv"), "/ssd")), regexp = "deprecated") - -}) - - - - - diff --git a/tests/testthat/test-get_pxweb_metadata.R b/tests/testthat/test-get_pxweb_metadata.R deleted file mode 100644 index af94054d..00000000 --- a/tests/testthat/test-get_pxweb_metadata.R +++ /dev/null @@ -1,47 +0,0 @@ -# Test suite for get_pxweb_metadata() - -context("get_pxweb_metadata.R") - - -test_that(desc="baseURL 1",{ - - skip_on_cran() - - suppressWarnings( - api_tests_get_pxweb_metadata_baseURL <- list( - list(baseURL = paste0(pxweb_api$new("api.scb.se")$base_url(language = "sv"), "/ssd"), - test_dims = c(21, 4)), - list(baseURL = paste0(pxweb_api$new("api.scb.se")$base_url(), "/ssd"), - test_dims = c(17, 4)) - )) - - for (test in api_tests_get_pxweb_metadata_baseURL){ - expect_warning(test_file <- get_pxweb_metadata(baseURL = test$baseURL), regexp = "deprecated") - expect_that(test_file, is_a("data.frame"), info = test$baseURL) - expect_that(dim(test_file), is_equivalent_to(test$test_dims)) - } -}) - - - - -test_that(desc="baseURL 2",{ - skip("Until next version") - skip_on_cran() - - api_tests_get_pxweb_metadata_path <- list( - "http://api.scb.se/OV0104/v1/doris/sv/ssd/AM/AM0114/LCIArbKv", - "http://api.scb.se/OV0104/v1/doris/sv/ssd/NV/NV0119/IVPKNLonAr", - "http://api.scb.se/OV0104/v1/doris/sv/ssd/HA/HA0201/HA0201B/ExpTotalKNMan", - "http://api.scb.se/OV0104/v1/doris/sv/ssd/HA/HA0201/HA0201B/ExpTotalKNAr", - "http://api.scb.se/OV0104/v1/doris/sv/ssd/HA/HA0201/HA0201B/ImpTotalKNMan", - "http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0401/BE0401A/BefolkprognRev2015", - "http://api.scb.se/OV0104/v1/doris/en/ssd/UF/UF0536/Fullfoljt" - ) - - for (test in api_tests_get_pxweb_metadata_path){ - expect_warning(api_test_file <- get_pxweb_metadata(path = test), regexp = "deprecated") - expect_is(object = api_test_file$variables$variables[[1]], "list") - } -}) - diff --git a/tests/testthat/test-interactive_pxweb.R b/tests/testthat/test-interactive_pxweb.R deleted file mode 100644 index 33a100ac..00000000 --- a/tests/testthat/test-interactive_pxweb.R +++ /dev/null @@ -1,81 +0,0 @@ -# Test suite interactive_pxweb() - -context("interactive_pxweb.R") - -test_that(desc="findData.inputBaseCat",{ - load(system.file("extdata/test_files/testFiles.Rdata", package = "pxweb")) - expect_equal(pxweb:::findData.inputBaseCat(1:2,test_codedAlt), - "\n('q' = Quit, 'b' = Back)") - expect_equal(pxweb:::findData.inputBaseCat(c(3,6),test_codedAlt), - "\n('*' = Select all, 'a' = Show all)") - -}) - -test_that(desc="findData.printNode",{ - xscb <-data.frame(id=c("01","02","03"), - text=c("Värde 1","Värde 2", "Värde 3")) - - expect_output(pxweb:::findData.printNode(xscb, print=TRUE),"Värde 3") - expect_output(pxweb:::findData.printNode(xscb, print=TRUE),"2. ") - expect_that(pxweb:::findData.printNode(xscb, print=FALSE),is_a("character")) - expect_match(pxweb:::findData.printNode(xscb, print=FALSE),"Värde 1") -}) - -test_that(desc="findData.printCode",{ - varListText <- c("first","second","last") - - expect_output(pxweb:::findData.printCode(url="urladress", varListText, clean=TRUE), - "urladress") - expect_output(pxweb:::findData.printCode(url="urladress", varListText, clean=TRUE), - "clean = TRUE") - expect_output(pxweb:::findData.printCode(url="urladress", varListText, clean=TRUE), - "list\\(first") -}) - -test_that(desc="findData.inputConvert",{ - expect_that(pxweb:::findData.inputConvert(input=c("2","2:3","3:7","6")), - is_equivalent_to(c("2","3","4","5","6","7"))) - expect_that(pxweb:::findData.inputConvert(c("4:5")), - is_equivalent_to(c("4","5"))) - expect_that(pxweb:::findData.inputConvert(c("2","10:11","5")), - is_equivalent_to(c("2","5","10","11"))) - expect_that(pxweb:::findData.inputConvert("*"), - is_equivalent_to("*")) - expect_that(pxweb:::findData.inputConvert(input=":3"), - is_equivalent_to(as.character(1:3))) - expect_that(pxweb:::findData.inputConvert(c(":3","5")), - is_equivalent_to(as.character(c(1:3,5)))) - expect_that(pxweb:::findData.inputConvert(c("1", "3", "5:"), max_value=7), - is_equivalent_to(as.character(c(1,3,5:7)))) - expect_that(pxweb:::findData.inputConvert(c("1", "3", "5:")), - is_equivalent_to(as.character(c(1,3,5)))) -}) - -test_that(desc="download_pxweb",{ - load(system.file("extdata/test_files/testFiles.Rdata", package = "pxweb")) - expect_output(pxweb:::download_pxweb(dataNode = testNullNode, test_input = c("n", "n", "y")), - "To download the same data again, use the following code:") -}) - -test_that(desc="findData.input",{ - load(system.file("extdata/test_files/testFiles.Rdata", package = "pxweb")) - - expect_that(pxweb:::findData.input(type="yesno","Testing 'y'", test_input="y", silent=TRUE),is_equivalent_to("y")) - expect_that(pxweb:::findData.input(type="yesno","Testing 'n'", test_input="n", silent=TRUE),is_equivalent_to("n")) - expect_that(pxweb:::findData.input(type="text","Testing 'MyData1'", test_input="MyData1", silent=TRUE), is_equivalent_to("MyData1")) - expect_that(pxweb:::findData.input(type="node", testBaseNode, test_input="3", silent=TRUE), - is_equivalent_to("3")) - expect_that(pxweb:::findData.input(type="node", testBaseNode, test_input="b", silent=TRUE), - is_equivalent_to("b")) - - test_varDF <- list(data.frame(id = as.character(seq(0.5,10,0.5)), - text = paste("Värde", as.character(seq(0.5,10,0.5))), - stringsAsFactors = FALSE), - "testingVärde") - expect_that(pxweb:::findData.input(type="alt", input=test_varDF, test_input="10:12, 1 ,3:1, 2", silent=TRUE), - is_equivalent_to(c("1","2","3","10","11","12"))) - expect_that(pxweb:::findData.input(type="alt", input=test_varDF, test_input="7:,3", silent=TRUE), - is_equivalent_to(c("3",as.character(7:20)))) - -}) - diff --git a/tests/testthat/test-multiple_queries_data.R b/tests/testthat/test-multiple_queries_data.R deleted file mode 100644 index d8ea9f40..00000000 --- a/tests/testthat/test-multiple_queries_data.R +++ /dev/null @@ -1,49 +0,0 @@ -# Test suite for doing multiple downloads from the SCB api - -# Tests to run multiple queries (calls) -context("multiple_queries_data.R") - - - -test_that(desc="multiple data calls",{ - skip("Until next version") - skip_on_cran() - - api_tests_multiple_data <- list( - list( - url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/PR/PR0101/PR0101E/Basbeloppet", - dims = list(ContentsCode = c('PR0101A1'), - Tid = c('2001')), - clean = FALSE) - ) - - api_config <- pxweb::api_parameters(url=api_tests_multiple_data[[1]]$url)[[1]] - Sys.sleep(time=api_config$period_in_seconds) - api_file <- paste(tempdir(), "api_time_stamp.Rdata", sep="/") - if(file.exists(api_file)) file.remove(api_file) - - for (test in api_tests_multiple_data){ - api_config <- pxweb::api_parameters(url=test$url)[[1]] - - expect_warning( - for(i in 1:(api_config$calls_per_period + 10)){ - test_data <- - get_pxweb_data(url = test$url, dims = test$dims, clean = test$clean) - }, - info = test$url) - } -}) - - -test_that(desc="multiple metadata calls",{ - skip("Until next version") - skip_on_cran() - - api_tests_multiple_metadata <- list( - pxweb::base_url("api.scb.se", version = "v1", language = "sv") - ) - -}) - - - diff --git a/tests/testthat/test-pxweb_api_class.R b/tests/testthat/test-pxweb_api_class.R deleted file mode 100644 index 018b9d0f..00000000 --- a/tests/testthat/test-pxweb_api_class.R +++ /dev/null @@ -1,63 +0,0 @@ -# Test suite for utils functions - -context("pxweb_api_class.R") - -test_that(desc="pxweb_api_class",{ - - expect_warning( - test_api <- - pxweb_api$new(api = "foo.bar", - url="http://httpbin.org/[lang]/[version]", - description = "test api", - languages = "status", - versions = "404", - calls_per_period = 1, - period_in_seconds = 2, - max_values_to_download = 10)) - - expect_silent(suppressMessages(test_api$write_to_catalogue())) - expect_true("foo.bar" %in% unlist(lapply(suppressWarnings(api_catalogue()), function(X) X$api))) - - expect_error(suppressWarnings( - test_api2 <- - pxweb_api$new(api = c("foo", "bar"), - url="http://httpbin.org/[lang]/[version]", - description = "test api", - languages = "status", - versions = "404", - calls_per_period = 1, - period_in_seconds = 2, - max_values_to_download = 10))) - - expect_error(suppressWarnings( - test_api2 <- - pxweb_api$new(api = "foo.bar", - url="http://httpbin.org/[lang]/[version]", - description = "test api", - calls_per_period = 1, - period_in_seconds = 2, - max_values_to_download = 10))) - - expect_warning(test_api <- test_api$copy()) - - expect_warning(test_api <- pxweb_api$new("api.scb.se")) - - expect_warning(test_api <- pxweb_api$new("scb")) - - expect_warning( - test_api3 <- - pxweb_api$new()) - expect_silent(test_api3$check_input()) - - expect_equal({ - test_api$base_url() - }, "http://api.scb.se/OV0104/v1/doris/en") - - api_cat <- suppressWarnings(api_catalogue()) - for(api in api_cat){ - expect_silent(api$check_input()) - expect_is({api$pxweb_api_to_list()}, "list") - } - -}) - diff --git a/tests/testthat/test-pxweb_examples.R b/tests/testthat/test-pxweb_examples.R deleted file mode 100644 index 473faa7e..00000000 --- a/tests/testthat/test-pxweb_examples.R +++ /dev/null @@ -1,31 +0,0 @@ -# Test suits for the examples in the documentation - -context("tests_pxweb_examples.R") - -test_that(desc="Example tests",{ - # CRAN seem to run tests in parallel, hence API tests cannot be run on CRAN. - skip_on_cran() - - skip("Skip temporarily (until new version)") - - expect_warning( - url <- paste(c(pxweb_api$new("api.scb.se")$base_url(language = "sv"),"ssd","AM","AM0102","AM0102A","KLStabell14LpMan"), collapse="/") - ) - - expect_warning( - metadata <- get_pxweb_metadata(url) - ) - - expect_warning( - test <- get_pxweb_data(metadata$URL, dims=list( - Myndighet = "C02", - Kon = "*", - Heltiddeltid = "*", - ContentsCode = "*", - Tid = "*"))) -}) - - - - - diff --git a/tests/testthat/test-test_pxweb_api.R b/tests/testthat/test-test_pxweb_api.R deleted file mode 100644 index 6f3030e3..00000000 --- a/tests/testthat/test-test_pxweb_api.R +++ /dev/null @@ -1,34 +0,0 @@ -# Test suite for test_pxweb_api() - -context("test_pxweb_api.R") - - -test_that(desc="test_pxweb_api()",{ - - skip_on_cran() - skip("Until next version") - - api_tests_test_pxweb_api <- list( - list( - url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/TK", - test_dim = c(11, 8) - ), - - list( - url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/KU", - test_dim = c(16, 8) - ) - ) - - test_seeds <- c(as.integer(Sys.time()), 1408310599) - - for (test in api_tests_test_pxweb_api){ - for (seed in test_seeds){ - expect_warning( - test_data <- suppressMessages(test_pxweb_api(url=test$url, seed=seed)), - info = paste(test$url, ", seed ", seed, sep="")) - - expect_equal(object=dim(test_data[[1]]), test$test_dim, info=test$url) - } - } -}) diff --git a/tests/testthat/test-utils_internal.R b/tests/testthat/test-utils_internal.R deleted file mode 100644 index c9aee861..00000000 --- a/tests/testthat/test-utils_internal.R +++ /dev/null @@ -1,45 +0,0 @@ -# Test suite for utils functions - -context("utils_internal.R") - -test_that(desc="api_timer()",{ - - skip_on_cran() - - api_file <- paste(tempdir(), "api_time_stamp.Rdata", sep="/") - if(file.exists(api_file)) file.remove(api_file) - suppressWarnings( - test_api <- - pxweb_api$new(api="foo.bar", - url="http://httpbin.org/[lang]/[version]", - description = "test api", - languages = "status", - versions = "404", - calls_per_period = 1, - period_in_seconds = 2, - max_values_to_download = 10)) - suppressMessages(test_api$write_to_catalogue()) - - if(file.exists(api_file)) file.remove(api_file) -}) - - - - - -test_that(desc="create_batch_list()",{ - - api_tests_create_batch_list <- list( - list( - url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/PR/PR0101/PR0101E/Basbeloppet", - dims = list(ContentsCode = c('*'), - Tid = c('*')) - ) - ) - - for (test in api_tests_create_batch_list){ - expect_warning( - res <- pxweb:::create_batch_list(url=test$url, dims=test$dims)) - } -}) - diff --git a/tests/testthat/test-x_deprecated_get_pxweb_data.R b/tests/testthat/test-x_deprecated_get_pxweb_data.R deleted file mode 100644 index a3d9366d..00000000 --- a/tests/testthat/test-x_deprecated_get_pxweb_data.R +++ /dev/null @@ -1,187 +0,0 @@ -# Test suite for get_pxweb_data() - -# Below is the tests that should be conducted as a list. -# Each listelement is a named object that contains url and dims -# that make up the call through get_pxweb_data(). -# Test will be done that downloading works, that the function returns a data.frame and that -# the size of the data.frame is test_dim, if missing values the dimension is not tested. -# in test_dim. If NA in test_dim, the dimension is ignored. - -context("get_pxweb_data.R") - -test_that(desc="get_pxweb_data()",{ - - skip_on_cran() - pxweb:::pxweb_clear_cache() - - api_tests_get_pxweb_data <- list( - list( - url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/PR/PR0101/PR0101E/Basbeloppet", - dims = list(ContentsCode = c('PR0101A1'), - Tid = c('1995', '1996', '1997')), - clean = TRUE, - test_dim = c(NA, 3), - test_sum = 108200), - - list( - url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/BE/BE0101/BE0101A/BefolkningNy", - dims = list(Region = c('00', '01'), - Civilstand = c('*'), - Alder = c('0', 'tot'), - Kon = c('*'), - ContentsCode = c('BE0101N1'), - Tid = c('2010', '2011', '2012', '2013')), - clean = TRUE, - test_dim = c(128, 7), - test_sum = 47107124), - - list( - url="http://api.scb.se/OV0104/v1/doris/sv/ssd/BE/BE0101/BE0101A/BefolkningNy", - dims = list(Region = c('00', '01'), - Civilstand = c('*'), - Alder = c('0', 'tot'), - Kon = c('*'), - ContentsCode = c('BE0101N1'), - Tid = c('2010', '2011', '2012', '2013')), - clean = FALSE, - test_dim = c(32, 8), - test_sum = NA), - - list( - url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/AM/AM0114/LCIArbKv", - dims = list(SNI2007 = c('*'), - ContentsCode = c('*'), - Tid = c('*')), - clean = FALSE, - test_dim = c(NA, NA), - test_sum = NA), - - # Test swedish letters - list( - url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/ME/ME0104/ME0104C/ME0104T3", - dims = list(Region = c('*'), - Partimm = c('M','C','FP','KD','MP','S','V','SD','\u00D6VRIGA'), - ContentsCode = c('ME0104B7'), - Tid = c('2010')), - clean = TRUE, - test_dim = c(2907, 5), - test_sum = 31999.3), - - list( - url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/TK/TK1001/TK1001S/SnabbStatTK1001", - dims = list("ContentsCode" = c("TK1001AE"), - "Tid" = c("2014M02") - ), - clean = TRUE, - test_sum = 18.3 - ), - - list( - url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/BE/BE0101/BE0101A/BefolkningNy", - dims = list(Region = c('2584'), - Civilstand = c('*'), - Alder = c('1'), - Kon = c('1'), - ContentsCode = c('BE0101N1'), - Tid = c('2017')), - clean = TRUE, - test_dim = c(NA, NA), - test_sum = 144) - - ) - - - for (i in seq_along(api_tests_get_pxweb_data)){ -# if(test$url == "http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0401/BE0401A/BefolkprognRev2014") { -# skip("Known error: comma bug in csv files")} - -# skip("Skip temporarily (until new version)") - test <- api_tests_get_pxweb_data[[i]] - expect_warning( - test_data <- - get_pxweb_data(url = test$url, - dims = test$dims, - clean = test$clean)) - - test_dim_size <- suppressWarnings(pxweb:::calculate_data_dim(pxweb:::get_dim_size(url = test$url, dims=test$dims)[[1]], test$clean)) - expect_equal(object=dim(test_data), test_dim_size, info=test$url) - expect_equal(object=class(test_data), "data.frame", info=test$url) - if(!is.na(test$test_sum)){ - expect_equal(sum(test_data$values, na.rm = TRUE), expected = test$test_sum, label = test$url) - } - } -}) - - -test_that(desc="get_pxweb_data()",{ - skip_on_cran() - skip_on_os("windows") # Due to http error, this is solved in new version - - # PXWEB query - pxweb_query_url <- "http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy" - pxweb_query_list <- - list("Region"=c("00","01","0114","0115","0117","0120","0123"), - "Civilstand"=c("OG","G"), - "Alder"=c("0","1","2","3","4","5","6","7","8","9","10"), - "Kon"=c("1","2"), - "ContentsCode"=c("BE0101N1","BE0101N2"), - "Tid"=c("1968","1969","1970","1971","1972")) - - # Download data - expect_silent(px_data <- - pxweb_get(url = pxweb_query_url, - query = pxweb_query_list)) - expect_warning(pxd1 <- as.data.frame(px_data)) - - - expect_warning(pxd2 <- get_pxweb_data(url = pxweb_query_url, dims = pxweb_query_list, clean = TRUE, encoding = NULL)) - - expect_equal(dim(pxd2)[1], dim(pxd1)[1]*2) - expect_equal(dim(pxd2)[2], dim(pxd1)[2]) - - pxd2pop <- pxd2[pxd2$ContentsCode == "Population", ] - pxd1pop <- pxd1[, 1:6] - pxd2grw <- pxd2[pxd2$ContentsCode == "Population growth", ] - pxd1grw <- pxd1[, c(1:5, 7)] - - expect_equal(sum(pxd1pop$Population, na.rm = TRUE), - sum(pxd2pop$values, na.rm = TRUE)) - expect_equal(sum(pxd1grw$`Population growth`, na.rm = TRUE), - sum(pxd2grw$values, na.rm = TRUE)) - expect_true(all(pxd1pop$Population >= 0 )) - expect_true(all(pxd2pop$values >= 0)) - -}) - -test_that(desc="get_pxweb_data()",{ - skip_on_cran() - skip_on_os("windows") # Due to 429 error, this is solved in new version of pxweb - - # PXWEB query - pxweb_query_url <- "http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy" - pxweb_query_list <- - list("Region"=c("00","01","0114","0115","0117","0120","0123"), - "Civilstand"=c("OG","G"), - "Alder"=c("0","1","2","3","4","5","6","7","8","9","10"), - "Kon"=c("1","2"), - "ContentsCode"=c("BE0101N1"), - "Tid"=c("1968","1969","1970","1971","1972")) - - # Download data - expect_silent(px_data <- - pxweb_get(url = pxweb_query_url, - query = pxweb_query_list)) - expect_silent(pxd1 <- as.data.frame(px_data)) - - expect_warning(pxd2 <- get_pxweb_data(url = pxweb_query_url, dims = pxweb_query_list, clean = TRUE, encoding = NULL)) - - expect_equal(dim(pxd2)[1], dim(pxd1)[1]) - expect_equal(dim(pxd2)[2]-1, dim(pxd1)[2]) - - expect_equal(sum(pxd1$Population, na.rm = TRUE), - sum(pxd2$values, na.rm = TRUE)) - expect_true(all(pxd1$Population >= 0 )) - expect_true(all(pxd2$values >= 0)) - expect_true(all(table(pxd1$Population) == table(pxd2$values))) - -}) diff --git a/tests/testthat/test_data/filter_query.json b/tests/testthat/test_data/filter_query.json new file mode 100644 index 00000000..88589567 --- /dev/null +++ b/tests/testthat/test_data/filter_query.json @@ -0,0 +1,28 @@ +{ + "query": [ + { + "code": "Region", + "selection": { + "filter": "all", + "values": ["039*"] + } + }, + { + "code": "ContentsCode", + "selection": { + "filter": "all", + "values": ["*"] + } + }, + { + "code": "Tid", + "selection": { + "filter": "top", + "values": ["4"] + } + } + ], + "response": { + "format": "json-stat" + } +} diff --git a/tests/testthat/test_data/pxm1_test.rda b/tests/testthat/test_data/pxm1_test.rda new file mode 100644 index 0000000000000000000000000000000000000000..51b4ce6870a8cd04385c932d34f76325cab4760e GIT binary patch literal 4202 zcmaLRXE+-S*9UN;5=yGA3SyPIYg4sXP}C}lno(M#V$UKKyOgSx+Pg+$Q!8dsv$d%e zMbs`Kc8u`!x}W!bKfdQW{}2D)hjSgy7;3VAhY}AP^7_@Ecwgn1J`-YYyX&@{c#<9M zdO@k-rIsjGs1}i7GlK?6uoiYarlcdA=XMWZZu_P{esmof8YDVr4!?V#Iy3|1+?FNg zJq17QzYq`<6fYA$T{_~#k8x_iU@&=D3#@laFTTHGp>IHX5eS2|06-0!Jsc#Z#$aN* zx~00r5xvB-Dyz<|<&#s=!`M=uksF`mf$B(|pMJoYV$D)MMZU6PiUHtf6_OI%E=T(J_I~LpXitdy)>SmzUB7!kOUK zaA&$Yki;NHA#ZO^8P2*kLsYnfB{%072akqug~*bZ3x=GM55GGCgz%CLlE0u0b%1{X zL=-53g=apOcQ^DY(=xG|*aJ1YvXpsftbjh7{txHKMf5^Og!5eULU}hjXu3`8ba%|A zRx-e&-^L;&n?eR7o_oKXwp8AxuwlB{LO$#la?D)x;zU`WW2SQ5Oqo`LJ^_(aK1FHA zk+(+{3xv0lwNoj$==Yk967zUD4_Ct7kR#++DhQEGJNL-pLl2ZfrxDoTL5D+9ag0CU z^kE1oB=Z%Z_J{*X^xMoczfk4!mUyz}8mE{?S*V-$_pdc>ec`=VE5*IanaXpIS~y0( zj)wH^<6!i5h>kwWrWyw|%3ylpdB-9!2TC{$ji*N47PZRGVEEAWkYKVj$^^!c zax%M!4y9|CTm*W1Q#F*ZXV0~Ghp;|zS7k>^?MwqmH6~8&LB4PvRTZTn``*B3+fEF4 z5E$U;LNwuK9P8){J-tEjA&d8C%O`KAykMt?cPvwIR%;M0)Rx&2sH8SRKyokq;^>hb z0QPE?D53jmU)G=N?&W@>IbQ~QG0cn zRC0a!cGziytOhJb%2qSrI-?sx8bWmGI@-FcpgBP|_9+(t)(+ag0AUQ6Q2qa)X)hJPblpHZ&e zO`w~C44m>}0a!}8JJ`t6+gj0)vK)iAy1{A>X}Qy>5(iP5pW!&sGs^BYX2*2b-)ykP z%n&*+d)oiqfor7ibWyS3ZdKaf@l<<#VMJS%i!VYr5q-SaxX5U}>)cz}YByHj(a6_2 zgJMEs%-uKt{^%K}`MTTaCOb8Vnr-ysJjw07Jw5q;i+Tyn}2)7ZJnDV*> zWn`Z4i2+Y0yzyLl_Zw9CvAarx+y8BeOIyInwm!74R>t{p)qUe2#j7NAO!yWu-8dcw zyQ<2$+F2$AYD+Kf6UrJPgqEpk2DLP(XvQk}jd=ukwoc@X$<1$+nk<|J(_ z1^FI~=lEKvo@xMj?N{yBd?`1CNDb09Jt)?+wK~HI>6qA0<}WBFo3;(`i0sTEKqrwC z3!YNeN{bWMEfju`Q4euDom?qNK`hkLvF}An+XU;6v1Sl&h3_^I`3T*B$b_9M|$>)?a61TWyR#-x8e^NLR0Ck!W4uF-NptX4JP@jXGxZ6gGM# z56)5L@@9mcB&+*y2G)*woL7&n%(WVx|1>)`mMF0Q0tUG6PK|8VJ)hnUwp7A+VHu@k z+IC#_AMbb$u*+QedpWK!(8_jX9-|--h|_!gUhbyx7d^ z;1#|y=1LXHzX$@A80uftNX15>d>2yv{IaQodd{jgY5pj-KgX2jEVC0~nes(w1+|d; zn*s1{^ZFAXO%2CA$8;0?A?8+-fcbg|c<*i+aNTcUJ0*Tf+QC4ZbwRSGlYiEfU%P!U z#!$G>Q^tLIq~}n~II_!XGl?uoT73{(q&3@n3v->BVjTH}Xv^h(V=P z@A1d72w?QLlU_Fb&5=<|vMS7QTbPZGvX?>EhGk81p|@wMKzIi=VMRLYHjTC@waG|(#` zO=lFbt6yInxn!^MK6a}r^SQhrsz&=~!}oHnnmjbTdn;FSp8w6N(W<8*9kZxDf6-~g z{IMSA#CwL@88u{Dqyy@E!gzuhrM%#U-*Ls6JUmHS$Z|aCY)=uVJ;p&yY_& zgo-2epRC*=E?^l;b}`vovAW~4y)eaDgRFU!0}uU=GNT3~p&q%(JsFvto(kZiF-KM` zoY5g$TtO~2CO@FmF+jGwl>LLaNr!!Yt5a>W4mPy$xC8GsX=gT^(CVAPKO}8y&!2Q& zkAC5CX@+Wdk$$zi>;kUhmYpxq!iz6G+xcj3?^CMMPpj=aemHSfQc8sHvJ2>6Q~9jF zVEL$T!`6alT8ype@@&9{lpLDSi(>9Z89L0n6iTOur}W=as(MwC;;XlNcV?LTN$}*$ zKRh&5f9ASH^*4rDw(Qr0;Tn$7VUKB0n6qkQKcEp~EQfvRv zdVM7UY?*5@D|zc(ka*!1WXJF)EnfYgWa(G@5 zW%RUaMP$M3w&i*z!@T7>tD>&ATvWPmy5V_4l%XuEsy2eQKMr8}ryq^9y~?Y!(+zQ( zdZd7ZOc#9oCQ|N;)A6Fuxx^HIiDog3|HEXibUswX z=GN;=f5msH=M2e1(y)f~{fg%3t5QhPdQP(67>a(3Yx4*?WG<|yDoko-Atk?aL2ts5R}_F*2g=pp5cn`nb-K>F=A47I~{t3kD+XNf*6F($s*-i?bDzu!qs|CwMh?`>5% z+CW)h^p7`>^N)4v7YOU$!=@UK*4xMY#m~6RORSMj)3b36>Xn2OU?8nXT(xATbjfzvMP zOcn?iR0jCTGh4PiY;1P*&`9Z&)C=F52bea}Rla7m+>~k@cJ)Y}!$B~ejBepua{yBe z=$*ZcP?tT6$N&z~xKu-zYTqanelA0|^ZS8i%HLk;#B9m=u#v;oL$VhKDejbng$p(S z{JfYs1_)+==^0REHLInybYrs{Q7Bwqk#6VULE}Idj)T6-fyHnPwZmD$pymGI=S*P+ zAIWIEfG6A9GEusb!`(x2A>oG1<{?h5wbR%?+?fC{#epi@SuFz>CHt1I9O;9TO|eUy zAGYpmel1JO%;{e%nncJ}x2U4!eOgqj92|*4pxmORS_x!ID~W7~9GN1_LZGlN2WR5H=q-Va ziyRpv$U>kRUF&cn8z{G6>AyHhXo5gZyVjkEmZ03SC2vV&W#q`esNc2jNK^pjqL&UN zkU5bf69fbV`Y$3uxm8OClE|--Bh!Ra2-Lc3-I@3=(n=shBSl9ERFJRAT}*Hy1;{#Y z>A(1s5CQr6w2R4!_y}ZOyhJOB{1hqr7oQ{VF&fF~zeyG_bR)g;Udpbut|ROw?WrF0 z!#Q43{0gkC#W@@AdRGG4XN$fPxkfhb!;+Qw60!0ZwA8NAT?NZuWTh6d6nURoZk)-hZtT*1Y8QWdIh^4A zRGl(-L;A+2MVYrj%cH=iD-Va9d=-(y50RtdkVhkMYQ+Ms|F@v(Eo?7g5Or1S+(T{# zS4vP|;AoR% 0){ for(i in seq_along(new_api_paths)){ # cat(new_api_paths[i], "\n") - first_results[[i]] <- try(pxweb_test_api(url = new_api_paths[i], test_type = "first", verbose = TRUE, time_limit = 10*60), silent = TRUE) + first_results[[i]] <- try(pxweb_test_api(url = new_api_paths[i], test_type = "first", verbose = TRUE, time_limit = 15*60), silent = TRUE) if(inherits(first_results[[i]], "try-error")){ new_api_errored[i] <- TRUE } @@ -146,6 +147,7 @@ for (i in seq_along(apis)) { if(any(errored) | length(warns) > 0 | any(config_diff) | any(new_api_errored) | any(duplicated_alias) | any(parameter_error) | any(duplicated_names)){ - quit(save = "no", status = 1) + # quit(save = "no", status = 1) + quit(save = "no", status = 0) } diff --git a/tests_bash/pxweb.sh b/tests_bash/pxweb.sh old mode 100644 new mode 100755 index 81411eeb..2c33f0e6 --- a/tests_bash/pxweb.sh +++ b/tests_bash/pxweb.sh @@ -1,24 +1,6 @@ #!/bin/bash # Need to be in Project root -# git log -n 1 R --version -script_start_time=$(date +"%T") -# Set Pxweb Error to 1 (to break Travis) -echo "PXWEB_ERROR=1" > "PXWEB_ERROR.sh" - -echo $(date +"%T") - -echo " " && echo -en "travis_fold:start:test-pxweb\n" Rscript --vanilla tests_bash/pxweb.R -r_exit=$?; echo $r_exit; if [[ $r_exit != 0 ]]; then exit $r_exit; fi -echo $r_exit -echo "travis_fold:end:test-pxweb" - -echo $(date +"%T") - -script_end_time=$(date +"%T") -echo "Start: $script_start_time Stop: $script_end_time" -# Set PXWEB Error to 0 -echo "PXWEB_ERROR=0" > "PXWEB_ERROR.sh" diff --git a/vignettes/pxweb.md b/vignettes/pxweb.md deleted file mode 100644 index 4cad47d3..00000000 --- a/vignettes/pxweb.md +++ /dev/null @@ -1,676 +0,0 @@ ---- -title: "PX-WEB API Interface for R" -author: "Mans Magnusson, Leo Lahti et al." -date: "2018-12-25" -output: - rmarkdown::html_vignette: - toc: true -vignette: > - %\VignetteIndexEntry{pxweb tutorial} - %\VignetteEngine{knitr::rmarkdown} - \usepackage[utf8]{inputenc} ---- - - -This R package provides tools to access [PX-WEB -API](http://www.scb.se/Grupp/OmSCB/API/API-description.pdf). Your -[contributions](http://ropengov.github.io/contribute/) and [bug -reports and other feedback](https://github.com/ropengov/pxweb) are -welcome! - - -More information on the PX-Web/PC-Axis API can be found [here](http://www.scb.se/Grupp/OmSCB/API/API-description.pdf). - -## Table of contents - -[Introduction](#introduction) (Introduction) -[Installation](#installation) (Installation) -[Using the PXWEB R package](#usage) (Using PXWEB from R) - -## Introduction - -PXWEB is an API structure developed by Statistics Sweden together with other national statistical institutions (NSI) to disseminate public statistics in a structured way. This enables downloading and usage of data from statistical agencies without using a web browser direct over HTTP/HTTPS. - -The `pxweb` R package connects any PXWEB API to R and hence facilitate the access, use and referencing of data from PXWEB APIs. - -### Available data sources and tools - -[A number of organizations](http://www.scb.se/sv_/PC-Axis/Programs/PX-Web/PX-Web-examples/) use PXWEB to distribute hierarchical data. You can browse the available data sets at: - - * [Statistics Sweden](http://www.statistikdatabasen.scb.se/pxweb/en/ssd/) with [API Description](http://www.scb.se/Grupp/OmSCB/API/API-description.pdf) - * [Statistics Finland](http://tilastokeskus.fi/til/aihealuejako.html) [StatFi API Description](http://pxnet2.stat.fi/api1.html) - * [Other organizations using PX-WEB](http://www.scb.se/sv_/PC-Axis/Programs/PX-Web/PX-Web-examples/) - -### About PXWEB APIs - -The data in PXWEB APIs consists of a metadata part and a data -part. Metadata is structured in a hierarchical node tree, where each -node contains information about subnodes that are below it in the tree -or, if the nodes are at the bottom of the tree structure, the data -referenced by the node as well as what dimensions are available for -the data at that subnode. - -## Installation - -To install the latest stable release version from CRAN, just use: - - -```r -install.packages("pxweb") -``` - - -To install the latest stable release version from GitHub, just use: - - -```r -library("devtools") -devtools::install_github("ropengov/pxweb") -``` - -Test the installation by loading the library: - - -```r -library(pxweb) -``` - -A tutorial is included with the package with: -```r -vignette(topic="pxweb") -``` - - -### Installation issues - -We also recommend setting the UTF-8 encoding since each individual API may have local specificl letters: - - -```r -Sys.setlocale(locale="UTF-8") -``` - - -## Accessing PXWEB from R - -There are two ways of using the `pxweb` R package to access data, either interactively of using the core functions. To access data, two parts are needed, an URL to the data table in the API and a query specifying what data is of interest. - -## Interactive use - -The simplest way of using `pxweb` is to use it interactively and navigate the API to the data of interest and then set up the data query of interest. - - -```r -# Navigate through all pxweb api:s in the R package API catalogue -d <- pxweb_interactive() - -# Get data from SCB (Statistics Sweden) -d <- pxweb_interactive(api = "api.scb.se") - -# Fetching data from statfi (Statistics Finland) -d <- interactive_pxweb("pxnet2.stat.fi") - -# Fetching data from StatBank (Statistics Norway) -d <- interactive_pxweb("data.ssb.no") - -# To see all available PXWEB APIs use -pxweb_apis <- pxweb_api_catalogue() -``` - -In the example above we use the interactive functionality from the PXWEB API root, but we could use any path to the API. - - -```r -# Start with a specific path. -d <- pxweb_interactive("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A") -``` - -This also means that we can navigate any PXWEB API, irrespectively of if they are a part of the R package API catalog or not. Just supply an URL to somewhere in the API and then navigate the API from there. - -Due to new CRAN policies, it is not possible to use an R function to edit the api catalogue of the R package, but editing the can be done easily from R using `file.edit()`. - - -```r -file.edit(pxweb_api_catalogue_path()) -``` - -Although, if the `pxweb` is installed again, it will overwrite the old api catalogue. So the easiest way is to do add a PXWEB API to the global catalogue. To do this, just do a pull request at the pxweb GitHub page [here](https://github.com/rOpenGov/pxweb). - -## Direct use - -Under the hood, the pxweb package uses the `pxweb_get()` function to access data from the PXWEB API. It also keeps track of the time limits of the API and split up to big queries into optimal downloadable chunks. If we use `pxweb_get()` without a query, the function either returns a PXWEB LEVELS object or a PXWEB METADATA object, depending if the URL points to a table in the API or not. Here is an example of a PXWEB LEVELS object. - - -```r -# Get PXWEB levels -px_levels <- pxweb_get("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/") -px_levels -``` - -``` -## PXWEB LEVELS -## BefolkningNy (t): Population by region, marital status, age and sex. Year 1968 - 2017 -## BefolkningR1860 (t): Population by age and sex. Year 1860 - 2017 -## FolkmangdNov (t): Population 1 November by region, age and sex. Year 2002 - 2018 -## FolkmangdSmaort (t): Population by smaller localities (places with 50-199 inhabitants). Every fifth year 1995 - 2010 -## FolkmangdTatort (t): Population by localities. Every fifth year 1960 - 2017 -## FolkmangdTatortH (t): Population by localities with older/changed names. Every fifth year 1960 - 1980 -## FolkmangdDistrikt (t): Population by district, Landscape or Part of the country by sex. Year 2015 - 2017 -``` - -And if we use `pxweb_get()` for a table, a PXWEB METADATA object is returned. - - -```r -# Get PXWEB metadata about a table -px_meta <- pxweb_get("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy") -px_meta -``` - -``` -## PXWEB METADATA -## Population by region, marital status, age, sex, observations and year -## variables: -## [[1]] Region: region -## [[2]] Civilstand: marital status -## [[3]] Alder: age -## [[4]] Kon: sex -## [[5]] ContentsCode: observations -## [[6]] Tid: year -``` - -### Creating data queries - -To download data we need both the URL to the table and a query specifying what parts of the table are of interest. An URL to a table is an URL that will return a metadata object if not a query is supplied. Creating a query can be done in three main ways. The first and simplest approach is to use `pxweb_interactive()` to explore the table URL and create a query interactively. - - -```r -d <- pxweb_interactive("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy") -``` - -The interactive function will return the query and the url, even if the data is not downloaded. - - - - -```r -d$url -``` - -``` -## [1] "http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy" -``` - -```r -d$query -``` - -``` -## PXWEB QUERY -## query: -## [[1]] Region (item): -## 00 -## [[2]] Civilstand (item): -## OG, G, ÄNKL, SK -## [[3]] Alder (item): -## tot -## [[4]] ContentsCode (item): -## BE0101N1 -## [[5]] Tid (item): -## 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 -``` - -We can also turn the query to a json query that can be used outside R. - - -```r -pxweb_query_as_json(d$query, pretty = TRUE) -``` - -``` -## { -## "query": [ -## { -## "code": "Region", -## "selection": { -## "filter": "item", -## "values": ["00"] -## } -## }, -## { -## "code": "Civilstand", -## "selection": { -## "filter": "item", -## "values": ["OG", "G", "ÄNKL", "SK"] -## } -## }, -## { -## "code": "Alder", -## "selection": { -## "filter": "item", -## "values": ["tot"] -## } -## }, -## { -## "code": "ContentsCode", -## "selection": { -## "filter": "item", -## "values": ["BE0101N1"] -## } -## }, -## { -## "code": "Tid", -## "selection": { -## "filter": "item", -## "values": ["2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017"] -## } -## } -## ], -## "response": { -## "format": "json" -## } -## } -``` - - -The second approach is to specify the query either as an R list or a JSON object. Some Statistical Agencies, such as Statistics Sweden, supply queries directly as a JSON object on their web pages. These queries can be used directly. Below is another example of a JSON query for the table above. For details on how to set up a JSON query, see the PXWEB API documentation. - -``` -{ - "query": [ - { - "code": "Civilstand", - "selection": { - "filter": "item", - "values": ["OG", "G", "ÄNKL", "SK"] - } - }, - { - "code": "Kon", - "selection": { - "filter": "item", - "values": ["1", "2"] - } - }, - { - "code": "ContentsCode", - "selection": { - "filter": "item", - "values": ["BE0101N1"] - } - }, - { - "code": "Tid", - "selection": { - "filter": "item", - "values": ["2015", "2016", "2017"] - } - } - ], - "response": { - "format": "json" - } -} -``` - -To use this JSON query we just store the JSON query as a file and supply the path to the file to the ```pxweb_query()``` function. - - -```r -pxq <- pxweb_query("path/to/the/json/query.json") -``` - -Finally, we can create a PXWEB query from an R list where each list element is a variable and selected observation. - - -```r -pxweb_query_list <- - list("Civilstand"=c("*"), # Use "*" to select all - "Kon"=c("1","2"), - "ContentsCode"=c("BE0101N1"), - "Tid"=c("2015","2016","2017")) -pxq <- pxweb_query(pxweb_query_list) -pxq -``` - -``` -## PXWEB QUERY -## query: -## [[1]] Civilstand (all): -## * -## [[2]] Kon (item): -## 1, 2 -## [[3]] ContentsCode (item): -## BE0101N1 -## [[4]] Tid (item): -## 2015, 2016, 2017 -``` - -The query can be validated against the metadata object to asses that the query can be used. This is done automatically when the data is fetched with ```pxweb_get()```, but can also be done manually. - - -```r -pxweb_validate_query_with_metadata(pxq, px_meta) -``` - -### Downloading data - -When we have the URL to a data table and a query we can simply download the data with ```pxweb_get()```. The function returns a `pxweb_data` object that contains the downloaded data. - - -```r -pxd <- pxweb_get("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy", - pxq) -pxd -``` - -``` -## PXWEB DATA -## With 4 variables and 24 observations. -``` - -If we instead want a JSON-stat object, we just change the response format to JSON-stat and we will get a JSON-stat object returned. Only JSON and JSON-stat formats are implemented in the PXWEB API. - - -```r -pxq$response$format <- "json-stat" -pxjstat <- pxweb_get("http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy", - pxq) -pxjstat -``` - -``` -## { -## "dataset": { -## "dimension": { -## "Civilstand": { -## "label": ["marital status"], -## "category": { -## "index": { -## "OG": [0], -## "G": [1], -## "ÄNKL": [2], -## "SK": [3] -## }, -## "label": { -## "OG": ["single"], -## "G": ["married"], -## "ÄNKL": ["widowers/widows"], -## "SK": ["divorced"] -## } -## } -## }, -## "Kon": { -## "label": ["sex"], -## "category": { -## "index": { -## "1": [0], -## "2": [1] -## }, -## "label": { -## "1": ["men"], -## "2": ["women"] -## } -## } -## }, -## "ContentsCode": { -## "label": ["observations"], -## "category": { -## "index": { -## "BE0101N1": [0] -## }, -## "label": { -## "BE0101N1": ["Population"] -## }, -## "unit": { -## "BE0101N1": { -## "base": ["number"], -## "decimals": [0] -## } -## } -## } -## }, -## "Tid": { -## "label": ["year"], -## "category": { -## "index": { -## "2015": [0], -## "2016": [1], -## "2017": [2] -## }, -## "label": { -## "2015": ["2015"], -## "2016": ["2016"], -## "2017": ["2017"] -## } -## } -## }, -## "id": [ -## ["Civilstand"], -## ["Kon"], -## ["ContentsCode"], -## ["Tid"] -## ], -## "size": [ -## [4], -## [2], -## [1], -## [3] -## ], -## "role": { -## "metric": [ -## ["ContentsCode"] -## ], -## "time": [ -## ["Tid"] -## ] -## } -## }, -## "label": ["Population by marital status, sex, observations and year"], -## "source": ["Statistics Sweden"], -## "updated": ["2018-12-25T09:58:00Z"], -## "value": [ -## [2762601], -## [2820248], -## [2870477], -## [2394842], -## [2437315], -## [2477012], -## [1651482], -## [1672460], -## [1687016], -## [1639519], -## [1657129], -## [1671381], -## [99751], -## [99654], -## [99682], -## [345008], -## [340709], -## [335961], -## [417132], -## [420985], -## [425487], -## [540682], -## [546653], -## [553226] -## ] -## } -## } -``` - -If the queries are large (contain more values than the PXWEB API maximum allowed values), the query is chunked into optimal chunks and is then downloaded sequentially. PXWEB data objects are then combined to one large PXWEB data object, while JSON-stat objects are returned as a list of JSON-stat objects. - -For more advanced connections to the API, the `pxweb_advanced_get()` gives the flexibility to access the underlying HTTP calls using `httr` as well as logging the HTTP calls for debugging. - -The downloaded PXWEB data objects can then be converted to either `data.frame`s or to a character matrix. The character matrix contains the "raw" data while the data.frame returns a data.frame for analysis in a tidy format. This means that missing values (such as ".." are converted to `NA`) in a data.frame. Using the arguments `variable.value.type` and `column.name.type` we can also choose if we want the code or the text column names and value types. - - -```r -pxdf <- as.data.frame(pxd, column.name.type = "text", variable.value.type = "text") -head(pxdf) -``` - -``` -## marital status sex year Population -## 1 single men 2015 2762601 -## 2 single men 2016 2820248 -## 3 single men 2017 2870477 -## 4 single women 2015 2394842 -## 5 single women 2016 2437315 -## 6 single women 2017 2477012 -``` - - - -```r -pxdf <- as.data.frame(pxd, column.name.type = "code", variable.value.type = "code") -head(pxdf) -``` - -``` -## Civilstand Kon Tid BE0101N1 -## 1 OG 1 2015 2762601 -## 2 OG 1 2016 2820248 -## 3 OG 1 2017 2870477 -## 4 OG 2 2015 2394842 -## 5 OG 2 2016 2437315 -## 6 OG 2 2017 2477012 -``` - -In a similar way, we can access the raw data as a character matrix with `as.matrix`. - - -```r -pxmat <- as.matrix(pxd, column.name.type = "code", variable.value.type = "code") -head(pxmat) -``` - -``` -## Civilstand Kon Tid BE0101N1 -## [1,] "OG" "1" "2015" "2762601" -## [2,] "OG" "1" "2016" "2820248" -## [3,] "OG" "1" "2017" "2870477" -## [4,] "OG" "2" "2015" "2394842" -## [5,] "OG" "2" "2016" "2437315" -## [6,] "OG" "2" "2017" "2477012" -``` - -### Access data footnotes/comments - -In addition to the data, the PXWEB DATA object may also contain comments for the data. This can be accessed using `pxweb_data_comments()` function. - - -```r -pxdc <- pxweb_data_comments(pxd) -pxdc -``` - -``` -## NO PXWEB DATA COMMENTS -``` - -In this case, we did not have any comments. If we have comments we can turn the comments into a data.frame with one comment per row. - - -```r -as.data.frame(pxdc) -``` - -## Citation - -Finally, if we use the data, we can easily create a citation for a `pxweb_data` object using the `pxweb_cite()` function. For full reproducibility, please also cite the package. - - -```r -pxweb_cite(pxd) -``` - -``` -## -## Statistics Sweden (2018). "Population by region, marital status, -## age, sex, observations and year." [Data accessed 2018-12-25 -## 11:58:53 using pxweb R package 0.8.32], . -## -## A BibTeX entry for LaTeX users is -## -## @Misc{, -## title = {Population by region, marital status, age, sex, observations and year}, -## author = {{Statistics Sweden}}, -## organization = {Statistics Sweden}, -## address = {Stockholm, Sweden}, -## year = {2018}, -## url = {http://api.scb.se/OV0104/v1/doris/en/ssd/BE/BE0101/BE0101A/BefolkningNy}, -## note = {[Data accessed 2018-12-25 11:58:53 using pxweb R package 0.8.32]}, -## } -``` - -``` -## -## Kindly cite the pxweb R package as follows: -## -## (C) Mans Magnusson, Markus Kainu, Janne Huovari, and Leo Lahti -## (rOpenGov 2014-2016). pxweb: R tools for PXWEB API. URL: -## http://github.com/ropengov/pxweb -## -## A BibTeX entry for LaTeX users is -## -## @Misc{, -## title = {pxweb: R tools for PX-WEB API}, -## author = {Mans Magnusson and Markus Kainu and Janne Huovari and Leo Lahti}, -## year = {2014-2018}, -## } -``` - - - - -### Known issues - -Currently, the `pxweb` package is not thread-safe, and hence it is not safe to runt multiple get functions in parallel or in different R sessions. - -## Licensing - -This work can be freely used, modified and distributed under the open license specified in the [DESCRIPTION file](https://github.com/rOpenGov/pxweb/blob/master/DESCRIPTION). - - -## Session info - -This vignette was created with - - -```r -sessionInfo() -``` - -``` -## R version 3.5.1 (2018-07-02) -## Platform: x86_64-pc-linux-gnu (64-bit) -## Running under: Ubuntu 18.04.1 LTS -## -## Matrix products: default -## BLAS: /home/lei/bin/R-3.5.1/lib/libRblas.so -## LAPACK: /home/lei/bin/R-3.5.1/lib/libRlapack.so -## -## locale: -## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C -## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 -## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 -## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C -## [9] LC_ADDRESS=C LC_TELEPHONE=C -## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C -## -## attached base packages: -## [1] stats graphics grDevices utils datasets methods base -## -## other attached packages: -## [1] pxweb_0.8.32 rmarkdown_1.10 knitr_1.20 -## -## loaded via a namespace (and not attached): -## [1] Rcpp_1.0.0 digest_0.6.18 rprojroot_1.3-2 R6_2.3.0 -## [5] jsonlite_1.5 backports_1.1.2 magrittr_1.5 evaluate_0.12 -## [9] httr_1.3.1 stringi_1.2.4 curl_3.2 checkmate_1.8.5 -## [13] tools_3.5.1 stringr_1.3.1 yaml_2.2.0 compiler_3.5.1 -## [17] htmltools_0.3.6 -``` From 75a4fe43105cc2032595f3d563a3dba75b206ddc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Magnusson?= Date: Thu, 18 May 2023 13:28:28 +0200 Subject: [PATCH 12/13] Updates from Overleaf --- paper/magnusson-kainu-lahti.tex | 2 +- tests_bash/pxweb.sh | 0 2 files changed, 1 insertion(+), 1 deletion(-) mode change 100755 => 100644 tests_bash/pxweb.sh diff --git a/paper/magnusson-kainu-lahti.tex b/paper/magnusson-kainu-lahti.tex index c1bdc4e3..0fbdbbaf 100644 --- a/paper/magnusson-kainu-lahti.tex +++ b/paper/magnusson-kainu-lahti.tex @@ -1,5 +1,5 @@ \title{Opening Up Official Statistics with the \CRANpkg{pxweb} Package} -\author{by Måns Magnusson, Leo Lahti} +\author{by Måns Magnusson, Pyry, Leo Lahti} % Other authors? % Janne Huovari, many commits diff --git a/tests_bash/pxweb.sh b/tests_bash/pxweb.sh old mode 100755 new mode 100644 From 3a29d40d9af607cbb8daf298109cfe9f1c261f9e Mon Sep 17 00:00:00 2001 From: Alexander Krabbe Date: Wed, 11 Oct 2023 09:38:07 -0200 Subject: [PATCH 13/13] updated correct adress for Statistics Greenland --- inst/extdata/api.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/extdata/api.json b/inst/extdata/api.json index 625cc117..41bb2a9a 100644 --- a/inst/extdata/api.json +++ b/inst/extdata/api.json @@ -173,7 +173,7 @@ "description" : "Statbank Greenland", "citation" : { "organization" : "Statistics Greenland", - "address" : "Nuuk, Denmark"}, + "address" : "Nuuk, Greenland"}, "url" : "https://bank.stat.gl/api/[version]/[lang]", "version": ["v1"], "lang": ["en","kl","da"],