Release 0.39.1

See release notes.
databento · Aug 13, 2024 · f0dbdc9 · f0dbdc9
2 parents aee1b09 + a10edad
commit f0dbdc9
Show file tree

Hide file tree

Showing 14 changed files with 480 additions and 83 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changelog
 
+## 0.39.1 - 2024-08-13
+
+#### Bug fixes
+- Fixed an issue where a symbol list which contained a `None` would produce a convoluted exception
+
 ## 0.39.0 - 2024-07-30
 
 #### Enhancements

diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ The library is fully compatible with the latest distribution of Anaconda 3.8 and
 The minimum dependencies as found in the `pyproject.toml` are also listed below:
 - python = "^3.8"
 - aiohttp = "^3.8.3"
-- databento-dbn = "0.19.1"
+- databento-dbn = "0.20.0"
 - numpy= ">=1.23.5"
 - pandas = ">=1.5.3"
 - pip-system-certs = ">=4.0" (Windows only)

diff --git a/databento/common/parsing.py b/databento/common/parsing.py
@@ -58,60 +58,90 @@ def optional_values_list_to_string(
     return values_list_to_string(values)
 
 
-@singledispatch
+def optional_string_to_list(
+    value: Iterable[str] | str | None,
+) -> Iterable[str] | list[str] | None:
+    """
+    Convert a comma-separated string into a list of strings, or return the
+    original input if not a string.
+
+    Parameters
+    ----------
+    value : iterable of str or str, optional
+        The input value to be parsed.
+
+    Returns
+    -------
+    Iterable[str] | list[str] | `None`
+
+    """
+    return value.strip().strip(",").split(",") if isinstance(value, str) else value
+
+
 def optional_symbols_list_to_list(
     symbols: Iterable[str | int | Integral] | str | int | Integral | None,
     stype_in: SType,
 ) -> list[str]:
     """
-    Create a list from a symbols string or iterable of symbol strings (if not
-    None).
+    Create a list from an optional symbols string or iterable of symbol
+    strings. If symbols is `None`, this function returns `[ALL_SYMBOLS]`.
 
     Parameters
     ----------
     symbols : Iterable of str or int or Number, or str or int or Number, optional
-        The symbols to concatenate.
+        The symbols to concatenate; or `None`.
     stype_in : SType
         The input symbology type for the request.
 
     Returns
     -------
     list[str]
 
-    Notes
-    -----
-    If None is given, [ALL_SYMBOLS] is returned.
+    See Also
+    --------
+    symbols_list_to_list
 
     """
-    raise TypeError(
-        f"`{symbols}` is not a valid type for symbol input; "
-        "allowed types are Iterable[str | int], str, int, and None.",
-    )
+    if symbols is None:
+        return [ALL_SYMBOLS]
+    return symbols_list_to_list(symbols, stype_in)
 
 
-@optional_symbols_list_to_list.register(cls=type(None))
-def _(_: None, __: SType) -> list[str]:
+@singledispatch
+def symbols_list_to_list(
+    symbols: Iterable[str | int | Integral] | str | int | Integral,
+    stype_in: SType,
+) -> list[str]:
     """
-    Dispatch method for optional_symbols_list_to_list. Handles None which
-    defaults to [ALL_SYMBOLS].
+    Create a list from a symbols string or iterable of symbol strings.
 
-    See Also
-    --------
-    optional_symbols_list_to_list
+    Parameters
+    ----------
+    symbols : Iterable of str or int or Number, or str or int or Number
+        The symbols to concatenate.
+    stype_in : SType
+        The input symbology type for the request.
+
+    Returns
+    -------
+    list[str]
 
     """
-    return [ALL_SYMBOLS]
+    raise TypeError(
+        f"`{symbols}` is not a valid type for symbol input; "
+        "allowed types are Iterable[str | int], str, and int.",
+    )
 
 
-@optional_symbols_list_to_list.register(cls=Integral)
+@symbols_list_to_list.register(cls=Integral)
 def _(symbols: Integral, stype_in: SType) -> list[str]:
     """
     Dispatch method for optional_symbols_list_to_list. Handles integral types,
     alerting when an integer is given for STypes that expect strings.
 
     See Also
     --------
-    optional_symbols_list_to_list
+    symbols_list_to_list
 
     """
     if stype_in == SType.INSTRUMENT_ID:
@@ -122,15 +152,15 @@ def _(symbols: Integral, stype_in: SType) -> list[str]:
     )
 
 
-@optional_symbols_list_to_list.register(cls=str)
+@symbols_list_to_list.register(cls=str)
 def _(symbols: str, stype_in: SType) -> list[str]:
     """
     Dispatch method for optional_symbols_list_to_list. Handles str, splitting
     on commas and validating smart symbology.
 
     See Also
     --------
-    optional_symbols_list_to_list
+    symbols_list_to_list
 
     """
     if not symbols:
@@ -147,19 +177,19 @@ def _(symbols: str, stype_in: SType) -> list[str]:
     return list(map(str.upper, map(str.strip, symbol_list)))
 
 
-@optional_symbols_list_to_list.register(cls=Iterable)
+@symbols_list_to_list.register(cls=Iterable)
 def _(symbols: Iterable[Any], stype_in: SType) -> list[str]:
     """
     Dispatch method for optional_symbols_list_to_list. Handles Iterables by
     dispatching the individual members.
 
     See Also
     --------
-    optional_symbols_list_to_list
+    symbols_list_to_list
 
     """
     symbol_to_list = partial(
-        optional_symbols_list_to_list,
+        symbols_list_to_list,
         stype_in=stype_in,
     )
     aggregated: list[str] = []

diff --git a/databento/historical/api/batch.py b/databento/historical/api/batch.py
@@ -36,8 +36,8 @@
 from databento.common.http import check_http_error
 from databento.common.parsing import datetime_to_string
 from databento.common.parsing import optional_datetime_to_string
-from databento.common.parsing import optional_symbols_list_to_list
 from databento.common.parsing import optional_values_list_to_string
+from databento.common.parsing import symbols_list_to_list
 from databento.common.publishers import Dataset
 from databento.common.types import Default
 from databento.common.validation import validate_enum
@@ -147,7 +147,7 @@ def submit_job(
 
         """
         stype_in_valid = validate_enum(stype_in, SType, "stype_in")
-        symbols_list = optional_symbols_list_to_list(symbols, stype_in_valid)
+        symbols_list = symbols_list_to_list(symbols, stype_in_valid)
         data: dict[str, object | None] = {
             "dataset": validate_semantic_string(dataset, "dataset"),
             "start": datetime_to_string(start),

diff --git a/databento/live/protocol.py b/databento/live/protocol.py
@@ -17,7 +17,7 @@
 from databento.common.error import BentoError
 from databento.common.iterator import chunk
 from databento.common.parsing import optional_datetime_to_unix_nanoseconds
-from databento.common.parsing import optional_symbols_list_to_list
+from databento.common.parsing import symbols_list_to_list
 from databento.common.publishers import Dataset
 from databento.common.types import DBNRecord
 from databento.common.validation import validate_enum
@@ -310,7 +310,7 @@ def subscribe(
         )
 
         stype_in_valid = validate_enum(stype_in, SType, "stype_in")
-        symbols_list = optional_symbols_list_to_list(symbols, stype_in_valid)
+        symbols_list = symbols_list_to_list(symbols, stype_in_valid)
 
         subscriptions: list[SubscriptionRequest] = []
         for batch in chunk(symbols_list, SYMBOL_LIST_BATCH_SIZE):

diff --git a/databento/reference/api/corporate.py b/databento/reference/api/corporate.py
@@ -13,11 +13,10 @@
 from databento.common.http import BentoHttpAPI
 from databento.common.parsing import convert_date_columns
 from databento.common.parsing import convert_datetime_columns
-from databento.common.parsing import datetime_to_date_string
-from databento.common.parsing import optional_date_to_string
+from databento.common.parsing import datetime_to_string
+from databento.common.parsing import optional_datetime_to_string
+from databento.common.parsing import optional_string_to_list
 from databento.common.parsing import optional_symbols_list_to_list
-from databento.common.publishers import Dataset
-from databento.common.validation import validate_semantic_string
 
 
 class CorporateActionsHttpAPI(BentoHttpAPI):
@@ -31,12 +30,16 @@ def __init__(self, key: str, gateway: str) -> None:
 
     def get_range(
         self,
-        start_date: date | str,
-        end_date: date | str | None = None,
-        dataset: Dataset | str | None = None,
+        start: pd.Timestamp | date | str | int,
+        end: pd.Timestamp | date | str | int | None = None,
+        index: str = "event_date",
         symbols: Iterable[str] | str | None = None,
         stype_in: SType | str = "raw_symbol",
         events: Iterable[str] | str | None = None,
+        countries: Iterable[str] | str | None = None,
+        security_types: Iterable[str] | str | None = None,
+        flatten: bool = True,
+        pit: bool = False,
     ) -> pd.DataFrame:
         """
         Request a new corporate actions time series from Databento.
@@ -45,12 +48,17 @@ def get_range(
 
         Parameters
         ----------
-        start_date : date or str
-            The start date (UTC) of the request time range (inclusive).
-        end_date : date or str, optional
-            The end date (UTC) of the request time range (exclusive).
-        dataset : Dataset or str, optional
-            The dataset code (string identifier) for the request.
+        start : pd.Timestamp or date or str or int
+            The start datetime of the request time range (inclusive).
+            Assumes UTC as timezone unless passed a tz-aware object.
+            If an integer is passed, then this represents nanoseconds since the UNIX epoch.
+        end : pd.Timestamp or date or str or int, optional
+            The end datetime of the request time range (exclusive).
+            Assumes UTC as timezone unless passed a tz-aware object.
+            If an integer is passed, then this represents nanoseconds since the UNIX epoch.
+        index : str, default 'event_date'
+            The index column to filter the `start` and `end` time range on.
+            Use any of 'event_date', 'ex_date' or 'ts_record'.
         symbols : Iterable[str] or str, optional
             The symbols to filter for. Takes up to 2,000 symbols per request.
             If more than 1 symbol is specified, the data is merged and sorted by time.
@@ -62,28 +70,48 @@ def get_range(
         events : Iterable[str] or str, optional
             The event types to filter for.
             Takes any number of event types per request.
-            If not specified then will be for **all** event types.
+            If not specified then will select **all** event types by default.
             See [EVENT](https://databento.com/docs/standards-and-conventions/reference-data-enums#event) enum.
+        countries : Iterable[str] or str, optional
+            The listing countries to filter for.
+            Takes any number of two letter ISO 3166-1 alpha-2 country codes per request.
+            If not specified then will select **all** listing countries by default.
+            See [CNTRY](https://databento.com/docs/standards-and-conventions/reference-data-enums#cntry) enum.
+        security_types : Iterable[str] or str, optional
+            The security types to filter for.
+            Takes any number of security types per request.
+            If not specified then will select **all** security types by default.
+            See [SECTYPE](https://databento.com/docs/standards-and-conventions/reference-data-enums#sectype) enum.
+        flatten : bool, default True
+            If nested JSON objects within the `date_info`, `rate_info`, and `event_info` fields
+            should be flattened into separate columns in the resulting DataFrame.
+        pit : bool, default False
+            Determines whether to retain all historical records or only the latest records.
+            If True, all historical records for each `event_unique_id` will be retained, preserving
+            the complete point-in-time history.
+            If False (default), the DataFrame will include only the most recent record for each
+            `event_unique_id` based on the `ts_record` timestamp.
 
         Returns
         -------
         pandas.DataFrame
             The data converted into a data frame.
 
         """
-        dataset = validate_semantic_string(dataset, "dataset") if dataset is not None else None
         symbols_list = optional_symbols_list_to_list(symbols, SType.RAW_SYMBOL)
-
-        if isinstance(events, str):
-            events = events.strip().strip(",").split(",")
+        events = optional_string_to_list(events)
+        countries = optional_string_to_list(countries)
+        security_types = optional_string_to_list(security_types)
 
         data: dict[str, object | None] = {
-            "start_date": datetime_to_date_string(start_date),
-            "end_date": optional_date_to_string(end_date),
-            "dataset": dataset,
+            "start": datetime_to_string(start),
+            "end": optional_datetime_to_string(end),
+            "index": index,
             "symbols": ",".join(symbols_list),
             "stype_in": stype_in,
             "events": ",".join(events) if events else None,
+            "countries": ",".join(countries) if countries else None,
+            "security_types": ",".join(security_types) if security_types else None,
         }
 
         response = self._post(
@@ -93,7 +121,35 @@ def get_range(
         )
 
         df = pd.read_json(StringIO(response.text), lines=True)
+        if df.empty:
+            return df
+
         convert_datetime_columns(df, CORPORATE_ACTIONS_DATETIME_COLUMNS)
         convert_date_columns(df, CORPORATE_ACTIONS_DATE_COLUMNS)
 
+        if flatten:
+            # Normalize the dynamic JSON fields
+            date_info_normalized = pd.json_normalize(df["date_info"]).set_index(df.index)
+            rate_info_normalized = pd.json_normalize(df["rate_info"]).set_index(df.index)
+            event_info_normalized = pd.json_normalize(df["event_info"]).set_index(df.index)
+
+            # Merge normalized columns
+            df = df.merge(date_info_normalized, left_index=True, right_index=True)
+            df = df.merge(rate_info_normalized, left_index=True, right_index=True)
+            df = df.merge(event_info_normalized, left_index=True, right_index=True)
+
+            # Drop the original JSON columns
+            df.drop(columns=["date_info", "rate_info", "event_info"], inplace=True)
+
+        if pit:
+            df.set_index(index, inplace=True)
+            df.sort_index(inplace=True)
+        else:
+            # Filter for the latest record of each unique event
+            df.sort_values("ts_record", inplace=True)
+            df = df.groupby("event_unique_id").agg("last").reset_index()
+            df.set_index(index, inplace=True)
+            if index != "ts_record":
+                df.sort_index(inplace=True)
+
         return df
diff --git a/databento/version.py b/databento/version.py
@@ -1 +1 @@
-__version__ = "0.39.0"
+__version__ = "0.39.1"