diff --git a/Makefile b/Makefile
index 9ff983a..22bf0f0 100644
--- a/Makefile
+++ b/Makefile
@@ -24,11 +24,11 @@ format:
test:
env/bin/pytest .
-docs:
+doc:
env/bin/sphinx-build -b html docs docs/_build
env/bin/python -m webbrowser -t "docs/_build/index.html"
-clean_docs:
+clean_doc:
rm -rf docs/_build
clean_build:
diff --git a/docs/getting_started_with_epidatpy.rst b/docs/getting_started_with_epidatpy.rst
new file mode 100644
index 0000000..a020326
--- /dev/null
+++ b/docs/getting_started_with_epidatpy.rst
@@ -0,0 +1,201 @@
+Getting started with epidatpy
+=============================
+
+The epidatpy package provides access to all the endpoints of the `Delphi Epidata
+API `_, and can be used to make
+requests for specific signals on specific dates and in select geographic
+regions.
+
+Setup
+-----
+
+**Installation**
+
+You can install the stable version of this package from PyPi:
+
+>>> pip install epidatpy
+
+Or if you want the development version, install from GitHub:
+
+>>> pip install -e "git+https://github.com/cmu-delphi/epidatpy.git#egg=epidatpy"
+
+**API Keys**
+
+The Delphi API requires a (free) API key for full functionality. While most
+endpoints are available without one, there are
+`limits on API usage for anonymous users `_,
+including a rate limit.
+
+To generate your key,
+`register for a pseudo-anonymous account `_.
+
+*Note* that private endpoints (i.e. those prefixed with ``pvt_``) require a
+separate key that needs to be passed as an argument. These endpoints require
+specific data use agreements to access.
+
+Basic Usage
+-----------
+
+Fetching data from the Delphi Epidata API is simple. Suppose we are
+interested in the ``covidcast``
+`endpoint `_,
+which provides access to a
+`wide range of data `_
+on COVID-19. Reviewing the endpoint documentation, we see that we
+`need to specify `_
+a data source name, a signal name, a geographic level, a time resolution, and
+the location and times of interest.
+
+The ``pub_covidcast`` function lets us access the ``covidcast`` endpoint:
+
+>>> from epidatpy import EpiDataContext, EpiRange
+>>> epidata = EpiDataContext(use_cache=True, cache_max_age_days=1)
+>>> # Obtain the most up-to-date version of the smoothed covid-like illness (CLI)
+>>> # signal from the COVID-19 Trends and Impact survey for the US
+>>> apicall = epidata.pub_covidcast(
+... data_source = "fb-survey",
+... signals = "smoothed_cli",
+... geo_type = "nation",
+... time_type = "day",
+... geo_values = "us",
+... time_values = EpiRange(20210405, 20210410))
+EpiDataCall(endpoint=covidcast/, params={'data_source': 'fb-survey', 'signals': 'smoothed_cli', 'geo_type': 'nation', 'time_type': 'day', 'geo_values': 'us', 'time_values': '20210405-20210410'})
+
+``pub_covidcast`` returns an ``EpiDataCall``, which can be further converted into different output formats - such as a Pandas DataFrame:
+
+>>> data = apicall.df()
+>>> data.head()
+ source signal geo_type geo_value time_type time_value issue lag value stderr sample_size direction missing_value missing_stderr missing_sample_size
+0 fb-survey smoothed_cli nation us day 2021-04-05 2021-04-10 5 0.675832 0.014826 244046 0 0 0
+1 fb-survey smoothed_cli nation us day 2021-04-06 2021-04-11 5 0.690687 0.014998 242979 0 0 0
+2 fb-survey smoothed_cli nation us day 2021-04-07 2021-04-12 5 0.690664 0.015023 242153 0 0 0
+3 fb-survey smoothed_cli nation us day 2021-04-08 2021-04-13 5 0.706503 0.015236 241380 0 0 0
+4 fb-survey smoothed_cli nation us day 2021-04-09 2021-04-14 5 0.724306 0.015466 240256 0 0 0
+
+Each row represents one observation in the US on one
+day. The geographical abbreviation is given in the ``geo_value`` column, the date in
+the ``time_value`` column. Here `value` is the requested signal -- in this
+case, the smoothed estimate of the percentage of people with COVID-like
+illness, based on the symptom surveys, and ``stderr`` is its standard error.
+
+The Epidata API makes signals available at different geographic levels,
+depending on the endpoint. To request signals for all states instead of the
+entire US, we use the ``geo_type`` argument paired with ``*`` for the
+``geo_values`` argument. (Only some endpoints allow for the use of ``*`` to
+access data at all locations. Check the help for a given endpoint to see if
+it supports ``*``.)
+
+>>> apicall = epidata.pub_covidcast(
+... data_source = "fb-survey",
+... signals = "smoothed_cli",
+... geo_type = "state",
+... time_type = "day",
+... geo_values = "*",
+... time_values = EpiRange(20210405, 20210410))
+EpiDataCall(endpoint=covidcast/, params={'data_source': 'fb-survey', 'signals': 'smoothed_cli', 'geo_type': 'state', 'time_type': 'day', 'geo_values': '*', 'time_values': '20210405-20210410'})
+>>> apicall.df.head()
+ source signal geo_type geo_value time_type time_value issue lag value stderr sample_size direction missing_value missing_stderr missing_sample_size
+0 fb-survey smoothed_cli state ak day 2021-04-05 2021-04-10 5 0.736883 0.275805 720.0 0 0 0
+1 fb-survey smoothed_cli state al day 2021-04-05 2021-04-10 5 0.796627 0.137734 3332.1117 0 0 0
+2 fb-survey smoothed_cli state ar day 2021-04-05 2021-04-10 5 0.561916 0.131108 2354.9911 0 0 0
+3 fb-survey smoothed_cli state az day 2021-04-05 2021-04-10 5 0.62283 0.105354 4742.2778 0 0 0
+4 fb-survey smoothed_cli state ca day 2021-04-05 2021-04-10 5 0.444169 0.040576 21382.3806 0 0 0
+
+We can fetch a subset of states by listing out the desired locations:
+
+>>> apicall = epidata.pub_covidcast(
+... data_source = "fb-survey",
+... signals = "smoothed_cli",
+... geo_type = "state",
+... time_type = "day",
+... geo_values = "pa,ca,fl",
+... time_values = EpiRange(20210405, 20210410))
+EpiDataCall(endpoint=covidcast/, params={'data_source': 'fb-survey', 'signals': 'smoothed_cli', 'geo_type': 'state', 'time_type': 'day', 'geo_values': 'pa,ca,fl', 'time_values': '20210405-20210410'})
+>>> apicall.df.head()
+ source signal geo_type geo_value time_type time_value issue lag value stderr sample_size direction missing_value missing_stderr missing_sample_size
+0 fb-survey smoothed_cli state ca day 2021-04-05 2021-04-10 5 0.444169 0.040576 21382.3806 0 0 0
+1 fb-survey smoothed_cli state fl day 2021-04-05 2021-04-10 5 0.690415 0.058204 16099.0005 0 0 0
+2 fb-survey smoothed_cli state pa day 2021-04-05 2021-04-10 5 0.715758 0.072999 10894.0057 0 0 0
+3 fb-survey smoothed_cli state ca day 2021-04-06 2021-04-11 5 0.45604 0.04127 21176.3902 0 0 0
+4 fb-survey smoothed_cli state fl day 2021-04-06 2021-04-11 5 0.730692 0.059907 15975.0007 0 0 0
+
+We can also request data for a single location at a time, via the ``geo_values`` argument.
+
+>>> apicall = epidata.pub_covidcast(
+... data_source = "fb-survey",
+... signals = "smoothed_cli",
+... geo_type = "state",
+... time_type = "day",
+... geo_values = "pa,ca,fl",
+... time_values = EpiRange(20210405, 20210410))
+EpiDataCall(endpoint=covidcast/, params={'data_source': 'fb-survey', 'signals': 'smoothed_cli', 'geo_type': 'state', 'time_type': 'day', 'geo_values': 'pa', 'time_values': '20210405-20210410'})
+>>> apicall.df.head()
+ source signal geo_type geo_value time_type time_value issue lag value stderr sample_size direction missing_value missing_stderr missing_sample_size
+0 fb-survey smoothed_cli state pa day 2021-04-05 2021-04-10 5 0.715758 0.072999 10894.0057 0 0 0
+1 fb-survey smoothed_cli state pa day 2021-04-06 2021-04-11 5 0.69321 0.070869 10862.0055 0 0 0
+2 fb-survey smoothed_cli state pa day 2021-04-07 2021-04-12 5 0.685934 0.070654 10790.0054 0 0 0
+3 fb-survey smoothed_cli state pa day 2021-04-08 2021-04-13 5 0.681511 0.071394 10731.0044 0 0 0
+4 fb-survey smoothed_cli state pa day 2021-04-09 2021-04-14 5 0.709416 0.072162 10590.0049 0 0 0
+
+Getting versioned data
+----------------------
+
+The Epidata API stores a historical record of all data, including corrections
+and updates, which is particularly useful for accurately backtesting
+forecasting models. To fetch versioned data, we can use the ``as_of``
+argument:
+
+>>> apicall = epidata.pub_covidcast(
+... data_source = "fb-survey",
+... signals = "smoothed_cli",
+... geo_type = "state",
+... time_type = "day",
+... geo_values = "pa,ca,fl",
+... time_values = EpiRange(20210405, 20210410),
+... as_of = "2021-06-01")
+
+Plotting
+--------
+
+Because the output data is a standard Pandas DataFrame, we can easily plot
+it using any of the available Python libraries:
+
+>>> data.plot(x="time_value", y="value", title="Smoothed CLI from Facebook Survey", xlabel="Date", ylabel="CLI")
+
+.. image:: images/Figure_1.png
+ :width: 800
+ :alt: Smoothed CLI from Facebook Survey
+
+Finding locations of interest
+-----------------------------
+
+Most data is only available for the US. Select endpoints report other countries at the national and/or regional levels. Endpoint descriptions explicitly state when they cover non-US locations.
+
+For endpoints that report US data, see the
+`geographic coding documentation `_
+for available geographic levels.
+
+International data
+------------------
+
+International data is available via
+
+- ``pub_dengue_nowcast`` (North and South America)
+- ``pub_ecdc_ili`` (Europe)
+- ``pub_kcdc_ili`` (Korea)
+- ``pub_nidss_dengue`` (Taiwan)
+- ``pub_nidss_flu`` (Taiwan)
+- ``pub_paho_dengue`` (North and South America)
+- ``pvt_dengue_sensors`` (North and South America)
+
+Finding data sources and signals of interest
+--------------------------------------------
+
+Above we used data from `Delphi’s symptom surveys `_,
+but the Epidata API includes numerous data streams: medical claims data, cases
+and deaths, mobility, and many others. This can make it a challenge to find
+the data stream that you are most interested in.
+
+The Epidata documentation lists all the data sources and signals available
+through the API for `COVID-19 `_
+and for `other diseases `_.
diff --git a/docs/images/Figure_1.png b/docs/images/Figure_1.png
new file mode 100644
index 0000000..44fd250
Binary files /dev/null and b/docs/images/Figure_1.png differ
diff --git a/docs/index.rst b/docs/index.rst
index a046b74..c7cfd5e 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -74,3 +74,5 @@ Contents
epidatpy
+ getting_started_with_epidatpy
+
diff --git a/docs_smoke_test.py b/docs_smoke_test.py
new file mode 100644
index 0000000..830754c
--- /dev/null
+++ b/docs_smoke_test.py
@@ -0,0 +1,75 @@
+from epidatpy import CovidcastEpidata, EpiDataContext, EpiRange
+import pandas as pd
+
+epidata = EpiDataContext(use_cache=True, cache_max_age_days=1)
+apicall = epidata.pub_covidcast(
+ data_source = "fb-survey",
+ signals = "smoothed_cli",
+ geo_type = "nation",
+ time_type = "day",
+ geo_values = "us",
+ time_values = EpiRange(20210405, 20210410))
+print(apicall)
+
+pd.set_option('display.max_columns', None)
+pd.set_option('display.max_rows', None)
+pd.set_option('display.width', 1000)
+
+data = apicall.df()
+print(data.head())
+
+apicall2 = epidata.pub_covidcast(
+ data_source = "fb-survey",
+ signals = "smoothed_cli",
+ geo_type = "state",
+ time_type = "day",
+ geo_values = "*",
+ time_values = EpiRange(20210405, 20210410))
+print(apicall2)
+
+data2 = apicall2.df()
+print(data2.head())
+
+apicall3 = epidata.pub_covidcast(
+ data_source = "fb-survey",
+ signals = "smoothed_cli",
+ geo_type = "state",
+ time_type = "day",
+ geo_values = "pa,ca,fl",
+ time_values = EpiRange(20210405, 20210410))
+print(apicall3)
+
+data3 = apicall3.df()
+print(data3.head())
+
+apicall4 = epidata.pub_covidcast(
+ data_source = "fb-survey",
+ signals = "smoothed_cli",
+ geo_type = "state",
+ time_type = "day",
+ geo_values = "pa",
+ time_values = EpiRange(20210405, 20210410))
+print(apicall4)
+
+data4 = apicall4.df()
+print(data4.head())
+
+apicall5 = epidata.pub_covidcast(
+ data_source = "fb-survey",
+ signals = "smoothed_cli",
+ geo_type = "state",
+ time_type = "day",
+ geo_values = "pa",
+ time_values = EpiRange(20210405, 20210410),
+ as_of = "2021-06-01")
+print(apicall5)
+
+data5 = apicall5.df()
+print(data5.head())
+
+# requires matplotlib
+import matplotlib.pyplot as plt
+
+data.plot(x="time_value", y="value", title="Smoothed CLI from Facebook Survey", xlabel="Date", ylabel="CLI")
+plt.subplots_adjust(bottom=.2)
+plt.show()
\ No newline at end of file
diff --git a/epidatpy/_covidcast.py b/epidatpy/_covidcast.py
index 4b6557e..f24d8c2 100644
--- a/epidatpy/_covidcast.py
+++ b/epidatpy/_covidcast.py
@@ -72,7 +72,7 @@ def define_covidcast_fields() -> List[EpidataFieldInfo]:
EpidataFieldInfo("lag", EpidataFieldType.int),
EpidataFieldInfo("value", EpidataFieldType.float),
EpidataFieldInfo("stderr", EpidataFieldType.float),
- EpidataFieldInfo("sample_size", EpidataFieldType.int),
+ EpidataFieldInfo("sample_size", EpidataFieldType.text),
EpidataFieldInfo("direction", EpidataFieldType.float),
EpidataFieldInfo("missing_value", EpidataFieldType.int),
EpidataFieldInfo("missing_stderr", EpidataFieldType.int),