From d5e785af840255a57db4a168d15d091e27dfaccd Mon Sep 17 00:00:00 2001 From: naga Date: Wed, 21 Feb 2024 19:10:32 -0500 Subject: [PATCH 1/2] Add DataSource, SimAsset, & TimeSequence - Add DataSource, SimAsset, and TimeSequence template classes - Add SimAssets: OnChainAsset, OnChainAssetPair - Add TimeSequence: DateTimeSequence - Add DataSources: CoinGeckoPriceVolumeSource, CsvDataSource - Add get_asset_data and get_pool_data pipeline convenience functions - Add pool_data.get_pool_assets() - Incorporate new template classes into relevant function signatures - Make CoinGecko data processing more robust - Fix error in unit conversion for CoinGecko volume data --- .github/workflows/CI.yml | 3 + .github/workflows/make_test_data.yml | 3 + .github/workflows/volume_limited_arb.yml | 3 + ...20240221_175629_nagakingg_data_sources.rst | 34 +++ curvesim/exceptions/__init__.py | 15 ++ .../iterators/price_samplers/price_volume.py | 93 ++++---- curvesim/metrics/base.py | 7 +- curvesim/metrics/metrics.py | 4 +- curvesim/network/coingecko.py | 133 ++---------- curvesim/pipelines/common/__init__.py | 4 + curvesim/pipelines/common/get_asset_data.py | 26 +++ curvesim/pipelines/common/get_pool_data.py | 54 +++++ curvesim/pipelines/simple/__init__.py | 48 ++--- curvesim/pipelines/simple/__main__.py | 2 +- .../pipelines/vol_limited_arb/__init__.py | 46 ++-- curvesim/pool/sim_interface/cryptoswap.py | 15 -- curvesim/pool/sim_interface/metapool.py | 18 -- curvesim/pool/sim_interface/pool.py | 15 -- curvesim/pool_data/__init__.py | 3 +- curvesim/pool_data/queries/metadata.py | 2 - curvesim/pool_data/queries/pool_assets.py | 51 +++++ curvesim/pool_data/queries/pool_volume.py | 45 ++-- curvesim/price_data/__init__.py | 84 ++++---- curvesim/price_data/data_sources/__init__.py | 15 ++ curvesim/price_data/data_sources/coingecko.py | 133 ++++++++++++ curvesim/price_data/data_sources/local.py | 56 +++++ curvesim/price_data/sources.py | 36 ---- curvesim/templates/__init__.py | 23 +- curvesim/templates/data_source.py | 107 ++++++++++ curvesim/templates/sim_asset.py | 65 ++++++ curvesim/templates/sim_assets.py | 18 -- curvesim/templates/sim_pool.py | 13 -- curvesim/templates/time_sequence.py | 103 +++++++++ requirements.txt | 142 +++++++------ requirements_dev.txt | 3 + test/__init__.py | 3 + test/ci.py | 22 +- test/integration/test_get_asset_data.py | 44 ++++ test/integration/test_get_pool_assets.py | 39 ++++ ..._metadata.py => test_get_pool_metadata.py} | 4 +- test/integration/test_get_pool_volume.py | 8 +- test/pool_metadata.py | 194 +++++++++++++++++ test/simple_ci.py | 15 +- test/unit/test_metrics.py | 1 - test/unit/test_pool_metadata.py | 200 +----------------- 45 files changed, 1254 insertions(+), 698 deletions(-) create mode 100644 changelog.d/20240221_175629_nagakingg_data_sources.rst create mode 100644 curvesim/pipelines/common/get_asset_data.py create mode 100644 curvesim/pipelines/common/get_pool_data.py create mode 100644 curvesim/pool_data/queries/pool_assets.py create mode 100644 curvesim/price_data/data_sources/__init__.py create mode 100644 curvesim/price_data/data_sources/coingecko.py create mode 100644 curvesim/price_data/data_sources/local.py delete mode 100644 curvesim/price_data/sources.py create mode 100644 curvesim/templates/data_source.py create mode 100644 curvesim/templates/sim_asset.py delete mode 100644 curvesim/templates/sim_assets.py create mode 100644 curvesim/templates/time_sequence.py create mode 100644 test/integration/test_get_asset_data.py create mode 100644 test/integration/test_get_pool_assets.py rename test/integration/{test_pool_metadata.py => test_get_pool_metadata.py} (99%) create mode 100644 test/pool_metadata.py diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 32bf44c85..fc0477cec 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -8,6 +8,9 @@ on: pull_request: branches: [ main, develop ] +env: + ALCHEMY_API_KEY: ${{ secrets.ALCHEMY_API_KEY }} + jobs: generate: name: Generate comparison results diff --git a/.github/workflows/make_test_data.yml b/.github/workflows/make_test_data.yml index d04b57d70..58cb2c219 100644 --- a/.github/workflows/make_test_data.yml +++ b/.github/workflows/make_test_data.yml @@ -2,6 +2,9 @@ name: Generate comparison results for CI test on: workflow_dispatch +env: + ALCHEMY_API_KEY: ${{ secrets.ALCHEMY_API_KEY }} + jobs: build: runs-on: ubuntu-latest diff --git a/.github/workflows/volume_limited_arb.yml b/.github/workflows/volume_limited_arb.yml index 9ac4cce0b..cc2afaacc 100644 --- a/.github/workflows/volume_limited_arb.yml +++ b/.github/workflows/volume_limited_arb.yml @@ -4,6 +4,9 @@ name: Run volume limited arbitrage pipeline # events but only for the main and develop branches. on: workflow_dispatch +env: + ALCHEMY_API_KEY: ${{ secrets.ALCHEMY_API_KEY }} + jobs: build: runs-on: ubuntu-latest diff --git a/changelog.d/20240221_175629_nagakingg_data_sources.rst b/changelog.d/20240221_175629_nagakingg_data_sources.rst new file mode 100644 index 000000000..1bb7ca57a --- /dev/null +++ b/changelog.d/20240221_175629_nagakingg_data_sources.rst @@ -0,0 +1,34 @@ +Removed +------- +- Removed SimAssets type and SimPool.assets property +- Removed coin_names property from PricingMetrics +- Removed Coingecko pool_prices and coin_ids_from addresses + + +Added +----- +- Added DataSource, SimAsset, and TimeSequence template classes +- Added OnChainAsset and OnChainAssetPair as common SimAsset types +- Added DateTimeSequence for TimeSequences of datetimes +- Added get_asset_data() and get_pool_data() convenience functions to pipelines.common +- Added pool_data.get_pool_assets() +- Added CoinGeckoPriceVolumeSource and CsvDataSource in price_data.data_sources + +Changed +------- +- Moved price/volume data retrieval outside of PriceVolume iterator +- Made explicit price and volume properties for PriceVolume iterator +- Changed Coingecko price data resampling to hourly samples with 10 minute tolerance +- Moved Coingecko resampling and DataFrame processing into CoinGeckoPriceVolumeSource +- Unified simple and volume-limited arbitrage pipeline interfaces +- Replaced pipeline arguments 'end_ts' & 'days' with 'time_sequence' & 'pool_ts' +- Renamed price_data.get() to price_data.get_price_data() +- Changed get_price_data() interface to use SimAsset, TimeSequence, and DataSource +- Replaced get_pool_volume() 'days' and 'end' arguments to 'start' and 'end' + +Fixed +----- +- Fixed error in unit conversion for CoinGecko volume data. + Bug was introduced in commit df79810. + + diff --git a/curvesim/exceptions/__init__.py b/curvesim/exceptions/__init__.py index 6ad609ccb..46e3490b7 100644 --- a/curvesim/exceptions/__init__.py +++ b/curvesim/exceptions/__init__.py @@ -1,3 +1,6 @@ +"""Contains various exceptions used in curvesim.""" + + class CurvesimException(Exception): """Base exception class""" @@ -27,6 +30,10 @@ def __repr__(self): return f"HttpClientError({self.status}, {self.message}, url={self.url})" +class CurvesimTypeError(CurvesimException, TypeError): + """Raised when an argument is the wrong type.""" + + class CurvesimValueError(CurvesimException, ValueError): """Raised when an argument has an inappropriate value (but the right type).""" @@ -77,3 +84,11 @@ class StateLogError(CurvesimException): class UnregisteredPoolError(StateLogError): """Error raised when a pool type is not recognized by the metrics framework.""" + + +class TimeSequenceError(CurvesimException): + """Error using a TimeSequence object.""" + + +class DataSourceError(CurvesimException): + """Error using a DataSource object.""" diff --git a/curvesim/iterators/price_samplers/price_volume.py b/curvesim/iterators/price_samplers/price_volume.py index b7728d4d5..ba6722f22 100644 --- a/curvesim/iterators/price_samplers/price_volume.py +++ b/curvesim/iterators/price_samplers/price_volume.py @@ -1,7 +1,12 @@ +""" +Contains PriceVolume price sampler and PriceVolumeSample dataclass. +""" + from typing import Iterator +from pandas import DataFrame + from curvesim.logging import get_logger -from curvesim.price_data import get from curvesim.templates.price_samplers import PriceSample, PriceSampler from curvesim.utils import dataclass, override @@ -26,48 +31,22 @@ class PriceVolumeSample(PriceSample): class PriceVolume(PriceSampler): """ - An iterator that retrieves price/volume and iterates over timepoints in the data. + Iterates over price and volume data in the provided DataFrame. """ - def __init__( - self, - assets, - *, - days=60, - data_dir="data", - src="coingecko", - end=None, - ): + def __init__(self, data: DataFrame): """ - Retrieves price/volume data and prepares it for iteration. - Parameters ---------- - assets: SimAssets - Object giving the properties of the assets for simulation - (e.g., symbols, addresses, chain) - - days: int, defaults to 60 - Number of days to pull data for. - - data_dir: str, defaults to "data" - Relative path to saved data folder. - - src: str, defaults to "coingecko" - Identifies pricing source: coingecko or local. + data: DataFrame + DataFrame with prices and volumes for each asset pair. + Format should match output of :fun:"curvesim.price_data.get_price_data". + Row indices: datetime.datetime or pandas.Timestamp. + Column indices: MultIndex with "price" and "volume" level 1 for each tuple + of symbols in level 2. """ - prices, volumes, _ = get( - assets.addresses, - chain=assets.chain, - days=days, - data_dir=data_dir, - src=src, - end=end, - ) - - self.prices = prices.set_axis(assets.symbol_pairs, axis="columns") - self.volumes = volumes.set_axis(assets.symbol_pairs, axis="columns") + self.data = data @override def __iter__(self) -> Iterator[PriceVolumeSample]: @@ -76,16 +55,32 @@ def __iter__(self) -> Iterator[PriceVolumeSample]: ------- :class:`PriceVolumeSample` """ - for price_row, volume_row in zip( - self.prices.iterrows(), self.volumes.iterrows() - ): - price_timestamp, prices = price_row - volume_timestamp, volumes = volume_row - assert ( - price_timestamp == volume_timestamp - ), "Price/volume timestamps don't match" - - prices = prices.to_dict() - volumes = volumes.to_dict() - - yield PriceVolumeSample(price_timestamp, prices, volumes) # type:ignore + for row in self.data.iterrows(): + timestamp, row_data = row + + prices = row_data["price"].to_dict() + volumes = row_data["volume"].to_dict() + + yield PriceVolumeSample(timestamp, prices, volumes) # type:ignore + + @property + def prices(self): + """ + Returns price data for all asset pairs. + + Returns + ------- + pandas.DataFrame + """ + return self.data["price"] + + @property + def volumes(self): + """ + Returns volume data for all asset pairs. + + Returns + ------- + pandas.DataFrame + """ + return self.data["volume"] diff --git a/curvesim/metrics/base.py b/curvesim/metrics/base.py index f61e734aa..47d18848b 100644 --- a/curvesim/metrics/base.py +++ b/curvesim/metrics/base.py @@ -312,8 +312,6 @@ def __init__(self, coin_names, **kwargs): Symbols for the coins used in a simulation. A numeraire is selected from the specified coins. """ - - self.coin_names = coin_names self.numeraire = get_numeraire(coin_names) super().__init__(**kwargs) @@ -348,9 +346,6 @@ def get_market_price(self, base, quote, prices): return prices[(base, quote)] -pandas_coin_pair_attr = {DataFrame: "columns", Series: "index"} - - def get_coin_pairs(prices): """ Returns the coin pairs available in the price data. @@ -418,4 +413,4 @@ def __init__(self, pool, **kwargs): :func:`pool_config` and stored as :python:`self._pool` for access during metric computations. Number and names of coins derived from pool metadata. """ - super().__init__(pool.assets.symbols, pool=pool) + super().__init__(pool.asset_names, pool=pool) diff --git a/curvesim/metrics/metrics.py b/curvesim/metrics/metrics.py index 9e421f9f7..e12fb8e42 100644 --- a/curvesim/metrics/metrics.py +++ b/curvesim/metrics/metrics.py @@ -88,7 +88,7 @@ def config(self): } def __init__(self, pool, **kwargs): - super().__init__(pool.assets.symbols) + super().__init__(pool.asset_names) def compute_arb_metrics(self, **kwargs): """Computes all metrics for each timestamp in an individual run.""" @@ -218,7 +218,7 @@ def get_stableswap_metapool_volume(self, **kwargs): """ trade_data = kwargs["trade_data"] - meta_asset = self._pool.assets.symbols[0] + meta_asset = self._pool.asset_names[0] def per_timestamp_function(trade_data): volume = 0 diff --git a/curvesim/network/coingecko.py b/curvesim/network/coingecko.py index 554ac9530..f02472e21 100644 --- a/curvesim/network/coingecko.py +++ b/curvesim/network/coingecko.py @@ -2,14 +2,8 @@ Network connector for Coingecko. """ # pylint: disable=redefined-outer-name -import asyncio -from datetime import datetime, timedelta, timezone - -import numpy as np import pandas as pd -from curvesim.utils import get_pairs - from .http import HTTP from .utils import sync @@ -40,103 +34,15 @@ async def get_prices(coin_id, vs_currency, start, end): r = await _get_prices(coin_id, vs_currency, start, end) # Format data - data = pd.DataFrame(r["prices"], columns=["timestamp", "prices"]) - data = data.merge( - pd.DataFrame(r["total_volumes"], columns=["timestamp", "volumes"]) - ) + data = pd.DataFrame(r["prices"], columns=["timestamp", "price"]) + data = data.merge(pd.DataFrame(r["total_volumes"], columns=["timestamp", "volume"])) data["timestamp"] = pd.to_datetime(data["timestamp"], unit="ms", utc="True") data = data.set_index("timestamp") return data -async def _pool_prices(coins, vs_currency, days, end=None): - if end is not None: - # Times to reindex to: daily intervals - # Coingecko only allows daily data when more than 90 days in the past - # for the free REST endpoint - t_end = datetime.fromtimestamp(end, tz=timezone.utc) - t_start = t_end - timedelta(days=days + 1) - t_samples = pd.date_range(start=t_start, end=t_end, freq="1D", tz=timezone.utc) - else: - # Times to reindex to: hourly intervals starting on half hour mark - t_end = datetime.now(timezone.utc) - timedelta(days=1) - t_end = t_end.replace(hour=23, minute=30, second=0, microsecond=0) - t_start = t_end - timedelta(days=days + 1) - t_samples = pd.date_range(start=t_start, end=t_end, freq="60T", tz=timezone.utc) - end = t_end.timestamp() - - # Fetch data - tasks = [] - for coin in coins: - start = t_start.timestamp() - 86400 * 3 - tasks.append(get_prices(coin, vs_currency, start, end)) - - data = await asyncio.gather(*tasks) - - # Format data - qprices = [] - qvolumes = [] - for d in data: - d.drop(d.tail(1).index, inplace=True) # remove last row - d = d.reindex(t_samples, method="ffill") - qprices.append(d["prices"]) - qvolumes.append(d["volumes"]) - - qprices = pd.concat(qprices, axis=1) - qvolumes = pd.concat(qvolumes, axis=1) - qvolumes = qvolumes / np.array(qprices) - - return qprices, qvolumes - - -def pool_prices(coins, vs_currency, days, chain="mainnet", end=None): - """ - Pull price and volume data for given coins, quoted in given - quote currency for given days. - - Parameters - ---------- - coins: list of str - List of coin addresses. - vs_currency: str - Symbol for quote currency. - days: int - Number of days to pull data for. - - Returns - ------- - pair of pandas.Series - prices Series and volumes Series - """ - # Get data - coins = coin_ids_from_addresses_sync(coins, chain) - qprices, qvolumes = _pool_prices_sync(coins, vs_currency, days, end) - - # Compute prices by coin pairs - combos = get_pairs(len(coins)) - prices = [] - volumes = [] - - for pair in combos: - base_price = qprices.iloc[:, pair[0]] - base_volume = qvolumes.iloc[:, pair[0]] - - quote_price = qprices.iloc[:, pair[1]] - quote_volume = qvolumes.iloc[:, pair[1]] - - # divide prices: (usd/base) / (usd/quote) = quote/base - prices.append(base_price / quote_price) - # sum volumes and convert to base: usd / (usd/base) = base - volumes.append((base_volume + quote_volume) / base_price) - - prices = pd.concat(prices, axis=1) - volumes = pd.concat(volumes, axis=1) - - return prices, volumes - - -async def _coin_id_from_address(address, chain): +async def coin_id_from_address(address, chain): address = address.lower() chain = PLATFORMS[chain.lower()] url = URL + f"coins/{chain}/contract/{address}" @@ -148,23 +54,9 @@ async def _coin_id_from_address(address, chain): return coin_id -async def coin_ids_from_addresses(addresses, chain): - if isinstance(addresses, str): - coin_ids = await _coin_id_from_address(addresses, chain) - - else: - tasks = [] - for addr in addresses: - tasks.append(_coin_id_from_address(addr, chain)) - - coin_ids = await asyncio.gather(*tasks) - - return coin_ids - - # Sync -_pool_prices_sync = sync(_pool_prices) -coin_ids_from_addresses_sync = sync(coin_ids_from_addresses) +get_prices_sync = sync(get_prices) +coin_id_from_address_sync = sync(coin_id_from_address) if __name__ == "__main__": @@ -174,11 +66,12 @@ async def coin_ids_from_addresses(addresses, chain): "0xdAC17F958D2ee523a2206206994597C13D831ec7", ] chain = "mainnet" - print("Coin addresses:", coin_addresses) - print("Chain", chain) - vs_ccy = "USD" - days = 1 - prices, volumes = pool_prices(coin_addresses, vs_ccy, days, chain) - print(prices.head()) - print(volumes.head()) + end = 1708403238 + start = end - 68400 + + for address in coin_addresses: + coin_id = coin_id_from_address_sync(address, chain) + data = get_prices_sync(coin_id, vs_ccy, start, end) + print(f"\n{coin_id.upper()}: {address} ({chain})") + print(data.head()) diff --git a/curvesim/pipelines/common/__init__.py b/curvesim/pipelines/common/__init__.py index ce0110541..33b1329c0 100644 --- a/curvesim/pipelines/common/__init__.py +++ b/curvesim/pipelines/common/__init__.py @@ -1,6 +1,7 @@ """ Contains variables and functions common to the arbitrage pipelines. """ +__all__ = ["DEFAULT_METRICS", "get_arb_trades", "get_asset_data", "get_pool_data"] from scipy.optimize import root_scalar @@ -8,6 +9,9 @@ from curvesim.metrics import metrics as Metrics from curvesim.templates.trader import ArbTrade +from .get_asset_data import get_asset_data +from .get_pool_data import get_pool_data + logger = get_logger(__name__) DEFAULT_METRICS = [ Metrics.Timestamp, diff --git a/curvesim/pipelines/common/get_asset_data.py b/curvesim/pipelines/common/get_asset_data.py new file mode 100644 index 000000000..707da05a8 --- /dev/null +++ b/curvesim/pipelines/common/get_asset_data.py @@ -0,0 +1,26 @@ +""" +Contains convenience functions for fetching asset price/volume data. +""" +from datetime import datetime, timedelta, timezone + +from curvesim.pool_data import get_pool_assets +from curvesim.price_data import get_price_data +from curvesim.templates import DateTimeSequence + + +def get_asset_data(pool_metadata, time_sequence, data_source): + """ + Fetches price/volume data for a pool's assets. + """ + sim_assets = get_pool_assets(pool_metadata) + time_sequence = time_sequence or _make_default_time_sequence() + asset_data = get_price_data(sim_assets, time_sequence, data_source=data_source) + return asset_data, time_sequence + + +def _make_default_time_sequence(): + t_end = datetime.now(timezone.utc) - timedelta(days=1) + t_end = t_end.replace(hour=23, minute=0, second=0, microsecond=0) + t_start = t_end - timedelta(days=60) + timedelta(hours=1) + time_sequence = DateTimeSequence.from_range(start=t_start, end=t_end, freq="1h") + return time_sequence diff --git a/curvesim/pipelines/common/get_pool_data.py b/curvesim/pipelines/common/get_pool_data.py new file mode 100644 index 000000000..a3fe8c94e --- /dev/null +++ b/curvesim/pipelines/common/get_pool_data.py @@ -0,0 +1,54 @@ +""" +Contains convenience functions to get Curve sim pools and/or metadata. +""" + +from datetime import datetime + +from curvesim.exceptions import CurvesimTypeError +from curvesim.pool import get_sim_pool +from curvesim.pool_data import get_metadata +from curvesim.pool_data.metadata import PoolMetaDataInterface + + +def get_pool_data(metadata_or_address, chain, env, pool_ts): + """ + Gets sim pool and (if needed) pool metadata. + """ + pool_ts = _parse_timestamp(pool_ts) + pool_metadata = _parse_metadata_or_address(metadata_or_address, chain, pool_ts) + pool = get_sim_pool(pool_metadata, env=env) + + return pool, pool_metadata + + +def _parse_timestamp(timestamp): + if not timestamp: + return timestamp + + if isinstance(timestamp, datetime): + timestamp = int(timestamp.timestamp()) + + if not isinstance(timestamp, int): + _type = type(timestamp).__name__ + raise CurvesimTypeError(f"'Pool_ts' must be 'int' or 'timestamp', not {_type}.") + + return timestamp + + +def _parse_metadata_or_address(metadata_or_address, chain, pool_ts): + if isinstance(metadata_or_address, str): + pool_metadata: PoolMetaDataInterface = get_metadata( + metadata_or_address, chain, end_ts=pool_ts + ) + + elif isinstance(metadata_or_address, PoolMetaDataInterface): + pool_metadata = metadata_or_address + + else: + _type = type(metadata_or_address).__name__ + raise CurvesimTypeError( + "'Metadata_or_address' must be 'PoolMetaDataInterface' or 'str'," + f"not {_type}." + ) + + return pool_metadata diff --git a/curvesim/pipelines/simple/__init__.py b/curvesim/pipelines/simple/__init__.py index 64307c4e8..9ddce1f67 100644 --- a/curvesim/pipelines/simple/__init__.py +++ b/curvesim/pipelines/simple/__init__.py @@ -11,21 +11,19 @@ from curvesim.metrics.results import make_results from curvesim.pipelines import run_pipeline from curvesim.pipelines.simple.strategy import SimpleStrategy -from curvesim.pool import get_sim_pool -from ..common import DEFAULT_METRICS +from ..common import DEFAULT_METRICS, get_asset_data, get_pool_data def pipeline( # pylint: disable=too-many-locals - pool_address, - chain, + metadata_or_address, *, + chain="mainnet", variable_params=None, fixed_params=None, - end_ts=None, - days=60, src="coingecko", - data_dir="data", + time_sequence=None, + pool_ts=None, ncpu=None, env="prod", ): @@ -38,14 +36,13 @@ def pipeline( # pylint: disable=too-many-locals Parameters ---------- - pool_address : str - '0x'-prefixed string representing the pool address. + metadata_or_address: :class:`~curvesim.pool_data.metadata.PoolMetaDataInterface` or str + Pool metadata obect or address to fetch metadata for. - chain: str - Identifier for blockchain or layer2. Supported values are: - "mainnet", "arbitrum", "optimism", "fantom", "avalanche", "matic", "xdai" + chain : str or :class:`curvesim.constants.Chain`, default="mainnet" + Chain to use if fetching metadata by address. - variable_params : dict, defaults to broad range of A/fee values + variable_params : dict Pool parameters to vary across simulations. keys: pool parameters, values: iterables of ints @@ -61,18 +58,14 @@ def pipeline( # pylint: disable=too-many-locals -------- >>> fixed_params = {"D": 1000000*10**18} - end_ts : int, optional - End timestamp in Unix time. Defaults to 30 minutes before midnight of the - current day in UTC. - - days : int, default=60 - Number of days to pull price/volume data for. - - src : str, default="coingecko" + src : str or :class:`~curvesim.templates.DateSource`, default="coingecko" Source for price/volume data: "coingecko" or "local". - data_dir : str, default="data" - relative path to saved price data folder + time_sequence : :class:`~curvesim.templates.DateTimeSequence`, optional + Timepoints for price/volume data and simulated trades. + + pool_ts : datetime.datetime or int, optional + Optional timestamp to use when fetching metadata by address. ncpu : int, default=os.cpu_count() Number of cores to use. @@ -84,15 +77,12 @@ def pipeline( # pylint: disable=too-many-locals """ ncpu = ncpu or os.cpu_count() - pool = get_sim_pool(pool_address, chain, env=env, end_ts=end_ts) - - sim_assets = pool.assets - price_sampler = PriceVolume( - sim_assets, days=days, end=end_ts, data_dir=data_dir, src=src - ) + pool, pool_metadata = get_pool_data(metadata_or_address, chain, env, pool_ts) + asset_data, _ = get_asset_data(pool_metadata, time_sequence, src) # pylint: disable-next=abstract-class-instantiated param_sampler = ParameterizedPoolIterator(pool, variable_params, fixed_params) + price_sampler = PriceVolume(asset_data) _metrics = init_metrics(DEFAULT_METRICS, pool=pool) strategy = SimpleStrategy(_metrics) diff --git a/curvesim/pipelines/simple/__main__.py b/curvesim/pipelines/simple/__main__.py index 7de769d56..e2d233d99 100644 --- a/curvesim/pipelines/simple/__main__.py +++ b/curvesim/pipelines/simple/__main__.py @@ -3,4 +3,4 @@ if __name__ == "__main__": pool_address = "0xbebc44782c7db0a1a60cb6fe97d0b483032ff1c7" chain = "mainnet" - results = pipeline(pool_address, chain, ncpu=1) + results = pipeline(pool_address, chain=chain, ncpu=1) diff --git a/curvesim/pipelines/vol_limited_arb/__init__.py b/curvesim/pipelines/vol_limited_arb/__init__.py index e7b03b09e..86bf1d45e 100644 --- a/curvesim/pipelines/vol_limited_arb/__init__.py +++ b/curvesim/pipelines/vol_limited_arb/__init__.py @@ -8,11 +8,10 @@ from curvesim.iterators.price_samplers import PriceVolume from curvesim.logging import get_logger from curvesim.metrics import init_metrics, make_results -from curvesim.pool import get_sim_pool from curvesim.pool_data import get_pool_volume from .. import run_pipeline -from ..common import DEFAULT_METRICS +from ..common import DEFAULT_METRICS, get_asset_data, get_pool_data from .strategy import VolumeLimitedStrategy logger = get_logger(__name__) @@ -20,17 +19,18 @@ # pylint: disable-next=too-many-locals def pipeline( - pool_metadata, + metadata_or_address, *, + chain="mainnet", variable_params=None, fixed_params=None, metrics=None, - days=60, src="coingecko", - data_dir="data", + time_sequence=None, vol_mult=None, + pool_ts=None, ncpu=None, - end=None, + env="prod", ): """ Implements the volume-limited arbitrage pipeline. @@ -42,10 +42,13 @@ def pipeline( Parameters ---------- - pool_metadata : :class:`~curvesim.pool_data.metadata.PoolMetaDataInterface` - Pool metadata object for the pool of interest. + metadata_or_address: :class:`~curvesim.pool_data.metadata.PoolMetaDataInterface` or str + Pool metadata obect or address to fetch metadata for. - variable_params : dict, defaults to broad range of A/fee values + chain : str or :class:`curvesim.constants.Chain`, default="mainnet" + Chain to use if fetching metadata by address. + + variable_params : dict Pool parameters to vary across simulations. keys: pool parameters, values: iterables of ints @@ -61,14 +64,15 @@ def pipeline( -------- >>> fixed_params = {"D": 1000000*10**18} - days : int, default=60 - Number of days to pull pool and price data for. + metrics : list of :class:`~curvesim.metrics.base.Metric` classes, optional + Metrics to compute for each simulation run. + Defaults to `curvesim.pipelines.common.DEFAULT_METRICS` - src : str, default="coingecko" + src : str or :class:`~curvesim.templates.DateSource`, default="coingecko" Source for price/volume data: "coingecko" or "local". - data_dir : str, default="data" - relative path to saved price data folder + time_sequence : :class:`~curvesim.templates.DateTimeSequence`, optional + Timepoints for price/volume data and simulated trades. vol_mult : dict, default computed from data Value(s) multiplied by market volume to specify volume limits @@ -80,6 +84,9 @@ def pipeline( {('DAI', 'USDC'): 0.1, ('DAI', 'USDT'): 0.1, ('USDC', 'USDT'): 0.1} + pool_ts : datetime.datetime or int, optional + Optional timestamp to use when fetching metadata by address. + ncpu : int, default=os.cpu_count() Number of cores to use. @@ -91,16 +98,17 @@ def pipeline( cpu_count = os.cpu_count() ncpu = cpu_count if cpu_count is not None else 1 - pool = get_sim_pool(pool_metadata) + pool, pool_metadata = get_pool_data(metadata_or_address, chain, env, pool_ts) + asset_data, time_sequence = get_asset_data(pool_metadata, time_sequence, src) # pylint: disable-next=abstract-class-instantiated param_sampler = ParameterizedPoolIterator(pool, variable_params, fixed_params) - price_sampler = PriceVolume( - pool.assets, days=days, data_dir=data_dir, src=src, end=end - ) + price_sampler = PriceVolume(asset_data) if vol_mult is None: - pool_volume = get_pool_volume(pool_metadata, days=days, end=end) + pool_volume = get_pool_volume( + pool_metadata, time_sequence[0], time_sequence[-1] + ) vol_mult = pool_volume.sum() / price_sampler.volumes.sum() logger.info("Volume Multipliers:\n%s", vol_mult.to_string()) vol_mult = vol_mult.to_dict() diff --git a/curvesim/pool/sim_interface/cryptoswap.py b/curvesim/pool/sim_interface/cryptoswap.py index 45ebb2443..ef3bc68a0 100644 --- a/curvesim/pool/sim_interface/cryptoswap.py +++ b/curvesim/pool/sim_interface/cryptoswap.py @@ -2,7 +2,6 @@ from math import prod from curvesim.exceptions import SimPoolError -from curvesim.templates import SimAssets from curvesim.templates.sim_pool import SimPool from curvesim.utils import cache, override @@ -213,17 +212,3 @@ def prepare_for_run(self, prices): self.virtual_price = self.get_virtual_price() self.xcp_profit = 10**18 self.xcp_profit_a = 10**18 - - @property - @override - @cache - def assets(self): - """ - Return :class:`.SimAssets` object with the properties of the pool's assets. - - Returns - ------- - SimAssets - SimAssets object that stores the properties of the pool's assets. - """ - return SimAssets(self.coin_names, self.coin_addresses, self.chain) diff --git a/curvesim/pool/sim_interface/metapool.py b/curvesim/pool/sim_interface/metapool.py index a9be3161c..c35eff1a4 100644 --- a/curvesim/pool/sim_interface/metapool.py +++ b/curvesim/pool/sim_interface/metapool.py @@ -1,5 +1,4 @@ from curvesim.exceptions import CurvesimValueError, SimPoolError -from curvesim.templates import SimAssets from curvesim.templates.sim_pool import SimPool from curvesim.utils import cache, override @@ -214,20 +213,3 @@ def get_min_trade_size(self, coin_in): The minimal trade size """ return 0 - - @property - @override - @cache - def assets(self): - """ - Return :class:`.SimAssets` object with the properties of the pool's assets. - - Returns - ------- - SimAssets - SimAssets object that stores the properties of the pool's assets. - """ - symbols = self.coin_names[:-1] + self.basepool.coin_names - addresses = self.coin_addresses[:-1] + self.basepool.coin_addresses - - return SimAssets(symbols, addresses, self.chain) diff --git a/curvesim/pool/sim_interface/pool.py b/curvesim/pool/sim_interface/pool.py index b1e73f564..51bcac605 100644 --- a/curvesim/pool/sim_interface/pool.py +++ b/curvesim/pool/sim_interface/pool.py @@ -1,5 +1,4 @@ from curvesim.exceptions import SimPoolError -from curvesim.templates import SimAssets from curvesim.templates.sim_pool import SimPool from curvesim.utils import cache, override @@ -141,17 +140,3 @@ def get_min_trade_size(self, coin_in): The minimal trade size """ return 0 - - @property - @override - @cache - def assets(self): - """ - Return :class:`.SimAssets` object with the properties of the pool's assets. - - Returns - ------- - SimAssets - SimAssets object that stores the properties of the pool's assets. - """ - return SimAssets(self.coin_names, self.coin_addresses, self.chain) diff --git a/curvesim/pool_data/__init__.py b/curvesim/pool_data/__init__.py index a00749a80..b6acb0fff 100644 --- a/curvesim/pool_data/__init__.py +++ b/curvesim/pool_data/__init__.py @@ -5,8 +5,9 @@ and 2-token cryptopools. """ -__all__ = ["get_metadata", "get_pool_volume"] +__all__ = ["get_metadata", "get_pool_assets", "get_pool_volume"] from .queries.metadata import get_metadata +from .queries.pool_assets import get_pool_assets from .queries.pool_volume import get_pool_volume diff --git a/curvesim/pool_data/queries/metadata.py b/curvesim/pool_data/queries/metadata.py index f05c2f4c3..eacc249e6 100644 --- a/curvesim/pool_data/queries/metadata.py +++ b/curvesim/pool_data/queries/metadata.py @@ -12,8 +12,6 @@ def from_address(address, chain, env="prod", end_ts=None): """ - Returns - Parameters ---------- address: str diff --git a/curvesim/pool_data/queries/pool_assets.py b/curvesim/pool_data/queries/pool_assets.py new file mode 100644 index 000000000..64015945d --- /dev/null +++ b/curvesim/pool_data/queries/pool_assets.py @@ -0,0 +1,51 @@ +""" +Functions to get assets for Curve pools. +""" + +from typing import List, Union + +from curvesim.constants import Chain +from curvesim.pool_data.metadata import PoolMetaDataInterface +from curvesim.pool_data.queries.metadata import get_metadata +from curvesim.templates import OnChainAsset, OnChainAssetPair +from curvesim.utils import get_pairs + + +def get_pool_assets( + metadata_or_address, chain: Union[str, Chain] = Chain.MAINNET +) -> List[OnChainAssetPair]: + """ + Gets asset pairs tradeable for the specified pool. + + Parameters + ---------- + metadata_or_address: PoolMetaDataInterface or str + Pool metadata or pool address to fetch metadata. + + chain: str or Chain, default=Chain.MAINNET + Chain to use if pool address is provided to fetch metadata. + + Returns + ------- + List[OnChainAssetPair] + + """ + if isinstance(metadata_or_address, str): + pool_metadata: PoolMetaDataInterface = get_metadata(metadata_or_address, chain) + else: + pool_metadata = metadata_or_address + + symbol_pairs = get_pairs(pool_metadata.coin_names) + address_pairs = get_pairs(pool_metadata.coins) + + sim_assets = [] + for pair_info in zip(symbol_pairs, symbol_pairs, address_pairs): + base_info, quote_info = tuple(zip(*pair_info)) + + base_asset = OnChainAsset(*base_info, pool_metadata.chain) # type: ignore [call-arg] + quote_asset = OnChainAsset(*quote_info, pool_metadata.chain) # type: ignore [call-arg] + + asset_pair = OnChainAssetPair(base_asset, quote_asset) + sim_assets.append(asset_pair) + + return sim_assets diff --git a/curvesim/pool_data/queries/pool_volume.py b/curvesim/pool_data/queries/pool_volume.py index ae2a5aa7e..be60f18e5 100644 --- a/curvesim/pool_data/queries/pool_volume.py +++ b/curvesim/pool_data/queries/pool_volume.py @@ -2,27 +2,28 @@ Functions to get historical volume for Curve pools. """ -from datetime import datetime, timezone +from datetime import datetime from math import comb -from typing import List, Optional, Tuple, Union +from typing import List, Tuple, Union from pandas import DataFrame, Series +from curvesim.constants import Chain from curvesim.logging import get_logger from curvesim.network.curve_prices import get_pool_pair_volume_sync from curvesim.pool_data.metadata import PoolMetaDataInterface from curvesim.utils import get_event_loop, get_pairs -from .metadata import Chain, get_metadata +from .metadata import get_metadata logger = get_logger(__name__) def get_pool_volume( metadata_or_address: Union[PoolMetaDataInterface, str], - days: int = 60, - end: Optional[int] = None, - chain: Union[str, Chain] = "mainnet", + start: Union[int, datetime], + end: Union[int, datetime], + chain: Union[str, Chain] = Chain.MAINNET, ) -> DataFrame: """ Gets historical daily volume for each pair of coins traded in a Curve pool. @@ -32,11 +33,11 @@ def get_pool_volume( metadata_or_address: PoolMetaDataInterface or str Pool metadata or pool address to fetch metadata. - days: int, defaults to 60 - Number of days to pull volume data for. + start: datetime.datetime or int (POSIX timestamp) + Timestamp of the last time to pull data for. - end: int, defaults to start of current date - Posix timestamp of the last time to pull data for. + end: datetime.datetime or int (POSIX timestamp) + Timestamp of the last time to pull data for. chain: str, default "mainnet" Chain to use if pool address is provided to fetch metadata. @@ -56,7 +57,7 @@ def get_pool_volume( pool_metadata = metadata_or_address pair_data = _get_pair_data(pool_metadata) - start_ts, end_ts = _process_timestamps(days, end) + start_ts, end_ts = _process_timestamps(start, end) loop = get_event_loop() volumes: dict[Tuple[str, str], Series] = {} @@ -71,7 +72,7 @@ def get_pool_volume( ) volumes[pair_symbols] = data["volume"] - volume_df = _make_volume_df(volumes, days) + volume_df = _make_volume_df(volumes) return volume_df @@ -108,21 +109,19 @@ def _get_metapool_addresses(pool_metadata) -> List[str]: return [address_meta] * n_pairs_meta + [address_base] * n_pairs_base -def _process_timestamps(days, end) -> Tuple[int, int]: - end = end or int( - datetime.now(timezone.utc) - .replace(hour=0, minute=0, second=0, microsecond=0) - .timestamp() - ) - start = end - days * 86400 +def _process_timestamps(start, end) -> Tuple[int, int]: + if isinstance(start, datetime): + start = int(start.timestamp()) + + if isinstance(end, datetime): + end = int(end.timestamp()) + return start, end -def _make_volume_df(volumes, days) -> DataFrame: +def _make_volume_df(volumes) -> DataFrame: df = DataFrame(volumes) df.columns = df.columns.to_flat_index() - if len(df) > days: - df = df[-days:] logger.info("Days of volume returned:\n%s", df.count().to_string()) - df.fillna(0, inplace=True) + df.fillna(0.0, inplace=True) return df diff --git a/curvesim/price_data/__init__.py b/curvesim/price_data/__init__.py index c66625c5c..f3f2a4455 100644 --- a/curvesim/price_data/__init__.py +++ b/curvesim/price_data/__init__.py @@ -7,59 +7,71 @@ Nomics data is deprecated. """ -from curvesim.exceptions import NetworkError -from .sources import coingecko +from typing import List, Union +from pandas import concat -def get( - coins, - chain="mainnet", - *, - days=60, - data_dir="data", - src="coingecko", - end=None, +from curvesim.exceptions import CurvesimTypeError +from curvesim.templates import DataSource, SimAsset, TimeSequence + +from .data_sources import DataSourceEnum + + +def get_price_data( + sim_assets: List[SimAsset], + time_sequence: TimeSequence, + data_source: Union[str, DataSource, DataSourceEnum] = DataSourceEnum.COINGECKO, ): """ - Pull price and volume data for given coins. + Pull price and volume data for each sim_asset. - Data is returned for all pairwise combinations of the input coins. Parameters ---------- - coins : list of str - List of coin addresses. + sim_assets: List[SimAsset] + The assets to pull data for. - days : int, default=60 - Number of days to pull data for. - - data_dir : str, default="data" - Directory to load local data from. - - src : str, default="coingecko" - Data source ("coingecko", "nomics", or "local"). + time_sequence: TimeSequence + Timestamps to pull data for. If the specified source can't provide data for + the specified times, the data will be resampled. + data_source: str, DataSource, or DataSourceEnum + DataSource object to query. Returns ------- - prices : pandas.DataFrame - Timestamped prices for each pair of coins. + pandas.DataFrame + + """ - volumes : pandas.DataFrame - Timestamped volumes for each pair of coins. + data_source_instance = _instantiate_data_source(data_source) - pzero : int or pandas.Series - Proportion of timestamps with zero volume. + data = [] + for sim_asset in sim_assets: + _data = data_source_instance.query(sim_asset, time_sequence) + data.append(_data) - """ - if src == "coingecko": - prices, volumes, pzero = coingecko(coins, chain=chain, days=days, end=end) + df = concat(data, axis=1) + return df + + +def _instantiate_data_source(data_source): + if isinstance(data_source, str): + data_source_instance = DataSourceEnum[data_source.upper()].value() + + elif isinstance(data_source, DataSourceEnum): + data_source_instance = data_source.value() + + elif isinstance(data_source, DataSource): + data_source_instance = data_source - elif src == "nomics": - raise NetworkError("Nomics data is no longer supported.") + elif issubclass(data_source, DataSource): + data_source_instance = data_source() - elif src == "local": - raise NetworkError("Local data currently not supported.") + else: + raise CurvesimTypeError( + "'data_source' must be str, DataSourceEnum, or DataSource subclass/instance" + ) - return prices, volumes, pzero + return data_source_instance diff --git a/curvesim/price_data/data_sources/__init__.py b/curvesim/price_data/data_sources/__init__.py new file mode 100644 index 000000000..0d02a239b --- /dev/null +++ b/curvesim/price_data/data_sources/__init__.py @@ -0,0 +1,15 @@ +"""Contains data sources used by curvesim.price_data.get_price_data()""" + +from enum import Enum + +from .coingecko import CoinGeckoPriceVolumeSource +from .local import CsvDataSource + + +class DataSourceEnum(Enum): + """ + Enum of data sources used by curvesim.price_data.get_price_data() + """ + + COINGECKO = CoinGeckoPriceVolumeSource + LOCAL = CsvDataSource diff --git a/curvesim/price_data/data_sources/coingecko.py b/curvesim/price_data/data_sources/coingecko.py new file mode 100644 index 000000000..660e8fd7c --- /dev/null +++ b/curvesim/price_data/data_sources/coingecko.py @@ -0,0 +1,133 @@ +""" +Coingecko price/volume Data Source and helper functions. +""" + + +from datetime import datetime + +from pandas import DataFrame, concat + +from curvesim.exceptions import DataSourceError +from curvesim.logging import get_logger +from curvesim.network.coingecko import coin_id_from_address_sync, get_prices_sync +from curvesim.templates import ( + ApiDataSource, + DateTimeSequence, + OnChainAssetPair, + TimeSequence, +) +from curvesim.utils import cache, get_event_loop + +logger = get_logger(__name__) + + +class CoinGeckoPriceVolumeSource(ApiDataSource): + """ + DataSource for Coingecko price/volume data. + """ + + def query( + self, sim_asset: OnChainAssetPair, time_sequence: TimeSequence[datetime] + ) -> DataFrame: + """ + Fetches asset data for a particular range of times. Timestamps are matched with + a tolerance of 10 minutes, then missing prices are frontfilled and missing + volume is filled with zeros. + + Parameters + ---------- + sim_asset: OnChainAssetPair + The asset-pair to pull data for. + + time_sequence: TimeSequence + Timestamps to pull data for. + + Returns + ------- + pandas.DataFrame + """ + + symbol_pair = (sim_asset.base.symbol, sim_asset.quote.symbol) + logger.info("Fetching CoinGecko price data for %s...", "-".join(symbol_pair)) + + _validate_arguments(sim_asset, time_sequence) + t_start, t_end = _get_time_endpoints(time_sequence) + + data = [] + for asset in sim_asset: + coingecko_id = self._get_coingecko_id(asset.address, asset.chain) + _data = self._get_usd_price(coingecko_id, t_start, t_end) + _data = _reindex_to_time_sequence(_data, time_sequence, asset.id) + data.append(_data) + + # divide prices: (usd/base) / (usd/quote) = quote/base + # sum volumes and convert to base: usd / (usd/base) = base + base_data, quote_data = data + prices = base_data["price"] / quote_data["price"] + volumes = (base_data["volume"] + quote_data["volume"]) / base_data["price"] + + df = concat( + [prices, volumes], + axis=1, + keys=[("price", symbol_pair), ("volume", symbol_pair)], + names=["metric", "symbol"], + ) + + return df + + @staticmethod + @cache + def _get_coingecko_id(address, chain): + loop = get_event_loop() + return coin_id_from_address_sync(address, chain, event_loop=loop) + + @staticmethod + @cache + def _get_usd_price(coingecko_id, t_start, t_end): + loop = get_event_loop() + return get_prices_sync(coingecko_id, "USD", t_start, t_end, event_loop=loop) + + +def _validate_arguments(sim_asset, time_sequence): + if not isinstance(sim_asset, OnChainAssetPair): + _type = type(sim_asset).__name__ + raise DataSourceError( + f"For CoinGecko, sim_asset must be 'OnChainAssetPair', not '{_type}'." + ) + + if not isinstance(time_sequence, DateTimeSequence): + _type = type(time_sequence).__name__ + raise DataSourceError( + f"For CoinGecko, time_sequence must be 'DateTimeSequence', not '{_type}'." + ) + + +def _get_time_endpoints(time_sequence, buffer=3600): + t_start = time_sequence[0].timestamp() - buffer + t_end = time_sequence[-1].timestamp() + buffer + return t_start, t_end + + +def _reindex_to_time_sequence(df, time_sequence, asset_id): + # Use "nearest" because CoinGecko timestamps are usually slightly delayed + df_reindexed = df.reindex(time_sequence, method="nearest", tolerance="10min") + nan_count = df_reindexed.isna().sum() + + logger.info( + ( + "\nResampling '%s'...\n" + "Average data frequency: %s\n" + "Resampling to: %s\n" + "Filling NaN values:\n%s" + ), + asset_id, + df.index.to_series().diff().mean(), + time_sequence.freq, + nan_count.to_string(), + ) + + if any(nan_count > 0): + df_reindexed["price"] = df_reindexed["price"].ffill() + df_reindexed["volume"] = df_reindexed["volume"].fillna(0) + + return df_reindexed diff --git a/curvesim/price_data/data_sources/local.py b/curvesim/price_data/data_sources/local.py new file mode 100644 index 000000000..ffdb3e4b0 --- /dev/null +++ b/curvesim/price_data/data_sources/local.py @@ -0,0 +1,56 @@ +""" +DataSources for local files and helper functions. +""" + +from pandas import MultiIndex, read_csv + +from curvesim.logging import get_logger +from curvesim.templates import FileDataSource + +logger = get_logger(__name__) + + +class CsvDataSource(FileDataSource): + """ + DataSource for local price/volume data stored in CSV files. + """ + + @property + def file_extension(self): + return "csv" + + def _default_read_function(self, filepath, sim_asset, time_sequence): + symbol_pair = (sim_asset.base.symbol, sim_asset.quote.symbol) + df = read_csv(filepath, index_col=0, parse_dates=True) + + if len(df.index) != len(time_sequence) or any(df.index != time_sequence): + df = _reindex_to_time_sequence(df, time_sequence, symbol_pair) + + columns = [(col, symbol_pair) for col in df.columns] + df.columns = MultiIndex.from_tuples(columns, names=["metric", "symbol"]) + + return df + + +def _reindex_to_time_sequence(df, time_sequence, asset_id): + df_reindexed = df.reindex(time_sequence, method="ffill", limit=1) + nan_count = df_reindexed.isna().sum() + + logger.info( + ( + "\nResampling %s...\n" + "Average data frequency: %s\n" + "Resampling to: %s\n" + "Filling NaN values:\n%s" + ), + asset_id, + df.index.to_series().diff().mean(), + time_sequence.freq, + nan_count.to_string(), + ) + + if any(nan_count > 0): + df_reindexed["price"] = df_reindexed["price"].ffill() + df_reindexed["volume"] = df_reindexed["volume"].fillna(0) + + return df_reindexed diff --git a/curvesim/price_data/sources.py b/curvesim/price_data/sources.py deleted file mode 100644 index ad14516e6..000000000 --- a/curvesim/price_data/sources.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Helper functions for the different data sources we pull from. -""" - -from curvesim.logging import get_logger -from curvesim.network import coingecko as _coingecko - -logger = get_logger(__name__) - - -def coingecko(coins, chain="mainnet", days=60, end=None): - """ - Fetch CoinGecko price data for specified coins. - - Parameters - ---------- - coins : list of str - List of coin symbols to fetch data for. - chain : str, optional - Blockchain network to consider. Default is "mainnet". - days : int, optional - Number of past days to fetch data for. Default is 60. - end : int, optional - End timestamp for the data in seconds since epoch. - If None, the end time will be the current time. Default is None. - - Returns - ------- - tuple of (dict, dict, int) - Tuple of prices, volumes, and pzero (fixed as 0 for this function). - """ - logger.info("Fetching CoinGecko price data...") - prices, volumes = _coingecko.pool_prices(coins, "usd", days, chain=chain, end=end) - pzero = 0 - - return prices, volumes, pzero diff --git a/curvesim/templates/__init__.py b/curvesim/templates/__init__.py index 15cb2ad44..cabcdc583 100644 --- a/curvesim/templates/__init__.py +++ b/curvesim/templates/__init__.py @@ -3,22 +3,31 @@ """ __all__ = [ + "ApiDataSource", + "DataSource", + "FileDataSource", "Log", - "Trader", - "Strategy", - "SimAssets", - "SimPool", - "Trade", - "TradeResult", "ParameterSampler", "PriceSample", "PriceSampler", + "OnChainAsset", + "OnChainAssetPair", + "SimAsset", + "SimPool", + "Strategy", + "DateTimeSequence", + "TimeSequence", + "Trade", + "Trader", + "TradeResult", ] +from .data_source import ApiDataSource, DataSource, FileDataSource from .log import Log from .param_samplers import ParameterSampler from .price_samplers import PriceSample, PriceSampler -from .sim_assets import SimAssets +from .sim_asset import OnChainAsset, OnChainAssetPair, SimAsset from .sim_pool import SimPool from .strategy import Strategy +from .time_sequence import DateTimeSequence, TimeSequence from .trader import Trade, Trader, TradeResult diff --git a/curvesim/templates/data_source.py b/curvesim/templates/data_source.py new file mode 100644 index 000000000..7f97f47f0 --- /dev/null +++ b/curvesim/templates/data_source.py @@ -0,0 +1,107 @@ +""" +Interfaces for DataSources, used to fetch asset data over time (e.g., price/volume). +""" + +from abc import ABC, abstractmethod +from os import extsep +from os.path import join +from typing import Callable, Optional + +from pandas import DataFrame + +from .sim_asset import OnChainAssetPair +from .time_sequence import TimeSequence + + +class DataSource(ABC): + """ + Abstract base class implementing the DataSource interface. + """ + + @abstractmethod + def query( + self, sim_asset: OnChainAssetPair, time_sequence: TimeSequence + ) -> DataFrame: + """ + Fetches asset data for a particular range of times. + + Parameters + ---------- + sim_asset: OnChainAssetPair + The asset-pair to pull data for. + + time_sequence: TimeSequence + Timestamps to pull data for. + + Returns + ------- + pandas.DataFrame + """ + raise NotImplementedError + + +class ApiDataSource(DataSource): + """ + DataSource that pulls data from a network API. + """ + + +class FileDataSource(DataSource): + """ + DataSource that pulls data from local files. + """ + + def __init__(self, directory: str = "", read_function: Optional[Callable] = None): + """ + Fetches asset data for a particular range of times. + + Parameters + ---------- + directory: str, default="" + Directory to pull data from. + + read_function: Callable, optional + Optional custom function to read data file. + + """ + self.directory = directory + self.read_function = read_function or self._default_read_function + + def query( + self, sim_asset: OnChainAssetPair, time_sequence: TimeSequence + ) -> DataFrame: + """ + Fetches asset data for a particular range of times. + Uses filepath: {directory}/{base_symbol}-{quote_symbol}.{file_extension} + + Parameters + ---------- + sim_asset: OnChainAssetPair + The asset-pair to pull data for. + + time_sequence: TimeSequence + Timestamps to pull data for. + + Returns + ------- + pandas.DataFrame + """ + filename = sim_asset.base.symbol + "-" + sim_asset.quote.symbol + filepath = join(self.directory, filename + extsep + self.file_extension) + df = self.read_function(filepath, sim_asset, time_sequence) + return df + + @property + def file_extension(self): + """ + The file extension used when loading data. + """ + raise NotImplementedError + + def _default_read_function( + self, filepath: str, sim_asset: OnChainAssetPair, time_sequence: TimeSequence + ): + """ + The default read function used by the DataSource. + """ + raise NotImplementedError diff --git a/curvesim/templates/sim_asset.py b/curvesim/templates/sim_asset.py new file mode 100644 index 000000000..97b4e5d58 --- /dev/null +++ b/curvesim/templates/sim_asset.py @@ -0,0 +1,65 @@ +"""Interfaces for SimAssets, which store data about assets used in simulations.""" + +from typing import NamedTuple + +from curvesim.constants import Chain +from curvesim.utils import dataclass + + +@dataclass(frozen=True) +class SimAsset: + """ + Base SimAsset dataclass to store data about assets used in simulations. + + Attributes + ---------- + id : str + Unique asset ID. + + symbol : str + Asset symbol. + """ + + id: str + symbol: str + + +@dataclass(frozen=True) +class OnChainAsset(SimAsset): + """ + SimAsset dataclass to store data about on-chain assets. + + Attributes + ---------- + id : str + Unique asset ID. + + symbol : str + Asset symbol. + + address : str + Asset's blockchain address. + + chain : Chain + Asset's blockchain. + """ + + address: str + chain: Chain + + +class OnChainAssetPair(NamedTuple): + """ + Container for base/quote pair of on-chain assets. + + Attributes + ---------- + base : OnChainAsset + The base asset. + + quote : OnChainAsset + The quote asset. + """ + + base: OnChainAsset + quote: OnChainAsset diff --git a/curvesim/templates/sim_assets.py b/curvesim/templates/sim_assets.py deleted file mode 100644 index f701d58da..000000000 --- a/curvesim/templates/sim_assets.py +++ /dev/null @@ -1,18 +0,0 @@ -from curvesim.utils import get_pairs - - -# pylint: disable-next=too-few-public-methods -class SimAssets: - """ - Stores the properties of the assets to be used in a simulation. Currently, only - specific coins identified by their address/chain are supported. This will be - expanded to "abstract" assets (e.g., some function of multiple coins) in the near - future. - """ - - def __init__(self, symbols, addresses, chain): - self.symbols = symbols - self.addresses = addresses - self.chain = chain - self.symbol_pairs = get_pairs(symbols) - self.address_pairs = get_pairs(addresses) diff --git a/curvesim/templates/sim_pool.py b/curvesim/templates/sim_pool.py index 205b88df0..026e45063 100644 --- a/curvesim/templates/sim_pool.py +++ b/curvesim/templates/sim_pool.py @@ -136,16 +136,3 @@ def get_min_trade_size(self, coin_in): The minimal trade size """ raise NotImplementedError - - @property - @abstractmethod - def assets(self): - """ - Return :class:`.SimAssets` object with the properties of the pool's assets. - - Returns - ------- - SimAssets - SimAssets object that stores the properties of the pool's assets. - """ - raise NotImplementedError diff --git a/curvesim/templates/time_sequence.py b/curvesim/templates/time_sequence.py new file mode 100644 index 000000000..71ce34bc5 --- /dev/null +++ b/curvesim/templates/time_sequence.py @@ -0,0 +1,103 @@ +"""Interfaces for TimeSequences, used to track time within simulations.""" + + +from datetime import datetime, timezone +from typing import Generic, Iterable, Optional, TypeVar, Union + +from pandas import DateOffset, date_range +from pandas.tseries.frequencies import to_offset + +from curvesim.exceptions import TimeSequenceError + +T = TypeVar("T") + + +class TimeSequence(Generic[T]): + """ + Generic class for time-like sequences. + Abstraction to encompass different ways of tracking "time", + useful for trading strategies involving a blockchain. + This could be timestamps, block times, block numbers, etc. + """ + + def __init__(self, sequence: Iterable[T]): + _validate_sequence(sequence) + self._sequence = tuple(sequence) + + def __getitem__(self, index): + return self._sequence[index] + + def __iter__(self): + for time in self._sequence: + yield time + + def __len__(self): + return len(self._sequence) + + def __repr__(self): + return f"<{self.__class__.__name__} start={self[0]} end={self[-1]}>" + + +class DateTimeSequence(TimeSequence[datetime]): + """ + TimeSequence composed of datetimes. + """ + + def __init__( + self, + sequence: Iterable[datetime], + freq: Optional[Union[str, DateOffset]] = None, + ): + _validate_datetime_sequence(sequence) + super().__init__(sequence) + self.freq = to_offset(freq) + + @classmethod + def from_range( + cls, + *, + start=None, + end=None, + periods=None, + freq=None, + tz=timezone.utc, + inclusive="both", + unit=None, + ): + """ + Instantiates a DateTimeSequence from a pandas date range. + The function signature is analogous to pandas.date_range. + """ + + times = date_range( + start=start, + end=end, + periods=periods, + freq=freq, + tz=tz, + inclusive=inclusive, + unit=unit, + ) + + return cls(times, freq=times.freq) + + +def _validate_sequence(times): + if not isinstance(times, Iterable) or isinstance(times, str): + type_name = type(times).__name__ + raise TimeSequenceError( + f"Input time sequence must be a non-string iterable, not '{type_name}'." + ) + + if sorted(times) != list(times): + raise TimeSequenceError("Input time sequence must be in ascending order.") + + if len(set(times)) != len(times): + raise TimeSequenceError("Input time sequence must not contain duplicates.") + + +def _validate_datetime_sequence(times): + if not all(isinstance(t, datetime) for t in times): + raise TimeSequenceError( + "DateTimeSequence may only contain iterables of datetime.datetime." + ) diff --git a/requirements.txt b/requirements.txt index 3f5a7f9a6..524cf5486 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,16 +2,16 @@ # "Core" dependencies used for the package. # These bounds should match the ones in `setup.cfg` -numpy==1.25.2 -pandas==2.1.0 -scipy==1.11.1 +numpy==1.26.4 +pandas==2.2.0 +scipy==1.12.0 gmpy2==2.1.5 -matplotlib==3.7.3 -web3==6.9.0 +matplotlib==3.8.3 +web3==6.15.1 requests==2.31.0 tenacity==8.2.3 -python-dotenv==1.0.0 -altair==5.1.1 +python-dotenv==1.0.1 +altair==5.2.0 # Dependencies used for setting up a venv for development. # These should be pinned to specific versions. black==22.6.0 @@ -25,104 +25,106 @@ pytest-cov==4.1.0 pytest-xdist==3.3.1 hypothesis==6.54.6 coverage==7.0.5 +py-evm==0.7.0a4 +vyper==0.3.9 titanoboa==0.1.7 Sphinx==5.3.0 Pallets-Sphinx-Themes==2.0.3 mypy==1.5.1 +pandas-stubs==2.1.4.231227 ## The following requirements were added by pip freeze: -aiohttp==3.8.5 +aiohttp==3.9.3 aiosignal==1.3.1 -alabaster==0.7.13 -asttokens==2.4.0 +alabaster==0.7.16 +asttokens==2.4.1 async-timeout==4.0.3 -attrs==23.1.0 -Babel==2.12.1 -bitarray==2.8.1 +attrs==23.2.0 +Babel==2.14.0 +bitarray==2.9.2 cached-property==1.5.2 -certifi==2023.7.22 -charset-normalizer==3.2.0 +certifi==2024.2.2 +charset-normalizer==3.3.2 click==8.1.7 click-log==0.4.0 -contourpy==1.1.0 -cycler==0.11.0 -cytoolz==0.12.2 -dill==0.3.7 +contourpy==1.2.0 +cycler==0.12.1 +cytoolz==0.12.3 +dill==0.3.8 docutils==0.19 -eth-abi==4.2.1 -eth-account==0.9.0 -eth-bloom==2.0.0 -eth-hash==0.5.2 -eth-keyfile==0.6.1 +eth-account==0.11.0 +eth-bloom==3.0.0 +eth-hash==0.6.0 +eth-keyfile==0.7.0 eth-keys==0.4.0 -eth-rlp==0.3.0 +eth-rlp==1.0.1 eth-stdlib==0.2.7 -eth-typing==3.4.0 -eth-utils==2.2.1 -exceptiongroup==1.1.3 +eth-typing==3.5.2 +eth-utils==2.3.1 +eth_abi==5.0.0 +exceptiongroup==1.2.0 execnet==2.0.2 -fonttools==4.42.1 -frozenlist==1.4.0 +fonttools==4.49.0 +frozenlist==1.4.1 hexbytes==0.3.1 -idna==3.4 +idna==3.6 imagesize==1.4.1 -importlib-metadata==6.8.0 +importlib-metadata==7.0.1 iniconfig==2.0.0 -isort==5.12.0 -Jinja2==3.1.2 -jsonschema==4.19.0 -jsonschema-specifications==2023.7.1 +isort==5.13.2 +Jinja2==3.1.3 +jsonschema==4.21.1 +jsonschema-specifications==2023.12.1 kiwisolver==1.4.5 -lazy-object-proxy==1.9.0 +lazy-object-proxy==1.10.0 lru-dict==1.2.0 markdown-it-py==3.0.0 -MarkupSafe==2.1.3 +MarkupSafe==2.1.5 mccabe==0.7.0 mdurl==0.1.2 -multidict==6.0.4 +multidict==6.0.5 mypy-extensions==1.0.0 -packaging==23.1 +packaging==23.2 parsimonious==0.9.0 -pathspec==0.11.2 -Pillow==10.0.0 -platformdirs==3.10.0 -pluggy==1.3.0 -protobuf==4.24.3 +pathspec==0.12.1 +pillow==10.2.0 +platformdirs==4.2.0 +pluggy==1.4.0 +protobuf==4.25.3 py==1.11.0 py-ecc==6.0.0 -py-evm==0.7.0a4 pycodestyle==2.9.1 -pycryptodome==3.18.0 +pycryptodome==3.20.0 pyethash==0.1.27 pyflakes==2.5.0 -Pygments==2.16.1 +Pygments==2.17.2 pyparsing==3.1.1 python-dateutil==2.8.2 -pytz==2023.3.post1 -pyunormalize==15.0.0 -referencing==0.30.2 -regex==2023.8.8 -rich==13.5.2 +pytz==2024.1 +pyunormalize==15.1.0 +referencing==0.33.0 +regex==2023.12.25 +rich==13.7.0 rlp==3.0.0 -rpds-py==0.10.3 +rpds-py==0.18.0 semantic-version==2.10.0 six==1.16.0 snowballstemmer==2.2.0 sortedcontainers==2.4.0 -sphinxcontrib-applehelp==1.0.7 -sphinxcontrib-devhelp==1.0.5 -sphinxcontrib-htmlhelp==2.0.4 +sphinxcontrib-applehelp==1.0.8 +sphinxcontrib-devhelp==1.0.6 +sphinxcontrib-htmlhelp==2.0.5 sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.6 -sphinxcontrib-serializinghtml==1.1.9 +sphinxcontrib-qthelp==1.0.7 +sphinxcontrib-serializinghtml==1.1.10 tomli==2.0.1 -tomlkit==0.12.1 -toolz==0.12.0 -trie==2.1.1 -typing_extensions==4.7.1 -tzdata==2023.3 -urllib3==2.0.4 -vyper==0.3.9 -websockets==11.0.3 -wrapt==1.15.0 -yarl==1.9.2 -zipp==3.16.2 +tomlkit==0.12.3 +toolz==0.12.1 +trie==2.2.0 +types-pytz==2024.1.0.20240203 +typing_extensions==4.9.0 +tzdata==2024.1 +urllib3==2.2.1 +websockets==12.0 +wrapt==1.16.0 +yarl==1.9.4 +zipp==3.17.0 diff --git a/requirements_dev.txt b/requirements_dev.txt index eed86f369..6dfd33f88 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -16,9 +16,12 @@ pytest-xdist==3.3.1 hypothesis[numpy,pandas]==6.54.6 coverage==7.0.5 +py-evm==0.7.0a4 # this version compatible with boa 0.1.7 +vyper==0.3.9 titanoboa==0.1.7 # 0.1.6 for python versions before 3.10 sphinx==5.3.0 Pallets-Sphinx-Themes==2.0.3 mypy==1.5.1 +pandas-stubs==2.1.4.231227 diff --git a/test/__init__.py b/test/__init__.py index e69de29bb..1267f70ef 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -0,0 +1,3 @@ +__all__ = ["pool_metadata"] + +from . import pool_metadata diff --git a/test/ci.py b/test/ci.py index c382f554c..3e83a29a9 100644 --- a/test/ci.py +++ b/test/ci.py @@ -12,6 +12,7 @@ import pandas as pd from curvesim import autosim +from curvesim.templates import DateTimeSequence TEST_PARAMS = {"A": [100, 1000], "fee": [3000000, 4000000]} TEST_CRYPTO_PARAMS = { @@ -25,25 +26,22 @@ # 3CRV { "address": "0xbebc44782c7db0a1a60cb6fe97d0b483032ff1c7", - "end_timestamp": 1638316800, "params": TEST_PARAMS, }, # aCRV - { - "address": "0xdebf20617708857ebe4f679508e7b7863a8a8eee", - "end_timestamp": 1622505600, - "params": TEST_PARAMS, - }, + # Disabled pending fixes to lending pool volume queries + # { + # "address": "0xdebf20617708857ebe4f679508e7b7863a8a8eee", + # "params": TEST_PARAMS, + # }, # # frax3CRV" { "address": "0xd632f22692fac7611d2aa1c0d552930d43caed3b", - "end_timestamp": 1643673600, "params": TEST_PARAMS, }, # triCRV { "address": "0x4ebdf703948ddcea3b11f675b4d1fba9d2414a14", - "end_timestamp": 1692215156, "params": TEST_CRYPTO_PARAMS, }, ] @@ -69,7 +67,10 @@ def main(generate=False, ncpu=None): for pool in pools: pool_address = pool["address"] - end_ts = pool["end_timestamp"] + pool_ts = 1707868800 + time_sequence = DateTimeSequence.from_range( + end=pool_ts * 1e9, freq="1h", periods=1440 + ) params = pool["params"] vol_mult = pool.get("vol_mult", None) env = pool.get("env", "prod") @@ -78,7 +79,8 @@ def main(generate=False, ncpu=None): pool=pool_address, chain="mainnet", **params, - end=end_ts, + time_sequence=time_sequence, + pool_ts=pool_ts, vol_mult=vol_mult, ncpu=ncpu, env=env, diff --git a/test/integration/test_get_asset_data.py b/test/integration/test_get_asset_data.py new file mode 100644 index 000000000..aef47041e --- /dev/null +++ b/test/integration/test_get_asset_data.py @@ -0,0 +1,44 @@ +import os + +from curvesim.pipelines.common import get_asset_data +from curvesim.pool_data.metadata import PoolMetaData +from curvesim.utils import get_pairs + +from ..pool_metadata import metapool_test_metadata + + +def test_get_asset_data(): + """ + Tests interworking of SimAssets, TimeSequence, DataSource, and associated functions: + get_asset_data(), get_pool_assets(), and get_price_data() + """ + + pool_metadata = PoolMetaData(metapool_test_metadata) + asset_pairs = get_pairs(pool_metadata.coin_names) + + # Test with default TimeSequence and DataSource + asset_data, time_sequence = get_asset_data(pool_metadata, None, "coingecko") + + assert len(time_sequence) == 1440 # default 60-day sequence + assert all(asset_data.index == time_sequence) + assert all(asset_data["price"].columns == asset_pairs) + assert all(asset_data["volume"].columns == asset_pairs) + + # Save data and test local DataSource + symbols = asset_data.columns.get_level_values("symbol").unique() + for symbol in symbols: + df = asset_data.xs(symbol, level="symbol", axis=1) + df.to_csv("-".join(symbol) + ".csv") + + asset_data_local, time_sequence_local = get_asset_data(pool_metadata, None, "local") + + price_diff = (asset_data["price"] - asset_data_local["price"]).abs() + volume_diff = (asset_data["volume"] - asset_data_local["volume"]).abs() + + assert price_diff.max().max() < 1e-10 + assert volume_diff.max().max() < 1e-3 + assert all([a == b for a, b in zip(time_sequence, time_sequence_local)]) + + # Clean up + for symbol in symbols: + os.remove("-".join(symbol) + ".csv") diff --git a/test/integration/test_get_pool_assets.py b/test/integration/test_get_pool_assets.py new file mode 100644 index 000000000..08c692100 --- /dev/null +++ b/test/integration/test_get_pool_assets.py @@ -0,0 +1,39 @@ +from curvesim.pool_data import get_pool_assets +from curvesim.pool_data.metadata import PoolMetaData +from curvesim.utils import get_pairs + +from ..pool_metadata import ( + cryptopool_test_metadata, + metapool_test_metadata, + pool_test_metadata, + tricrypto_ng_test_metadata, +) + + +def test_get_pool_assets(): + """Test get_pool_assets query.""" + metadata_list = [ + cryptopool_test_metadata, + metapool_test_metadata, + pool_test_metadata, + tricrypto_ng_test_metadata, + ] + + for metadata in metadata_list: + pool_metadata = PoolMetaData(metadata) + asset_pairs = get_pairs(pool_metadata.coin_names) + + # Test using metadata + assets1 = get_pool_assets(pool_metadata) + _pairs1 = [(asset.base.symbol, asset.quote.symbol) for asset in assets1] + + assert _pairs1 == asset_pairs + + # Test using address and chain + address = pool_metadata.address + chain = pool_metadata.chain + assets2 = get_pool_assets(address, chain=chain) + _pairs2 = [(asset.base.symbol, asset.quote.symbol) for asset in assets2] + + assert _pairs2 == asset_pairs + assert assets1 == assets2 diff --git a/test/integration/test_pool_metadata.py b/test/integration/test_get_pool_metadata.py similarity index 99% rename from test/integration/test_pool_metadata.py rename to test/integration/test_get_pool_metadata.py index 3f6827242..b8c10de71 100644 --- a/test/integration/test_pool_metadata.py +++ b/test/integration/test_get_pool_metadata.py @@ -1,8 +1,6 @@ -from curvesim.pool_data import get_metadata - from curvesim.pool.sim_interface import SimCurveMetaPool, SimCurvePool from curvesim.pool.stableswap import CurveMetaPool, CurvePool - +from curvesim.pool_data import get_metadata properties = ["chain", "pool_type", "sim_pool_type", "coins", "coin_names", "n"] diff --git a/test/integration/test_get_pool_volume.py b/test/integration/test_get_pool_volume.py index bb8f726bf..f79813efd 100644 --- a/test/integration/test_get_pool_volume.py +++ b/test/integration/test_get_pool_volume.py @@ -2,7 +2,7 @@ from curvesim.pool_data.metadata import PoolMetaData from curvesim.utils import get_pairs -from ..unit.test_pool_metadata import ( +from ..pool_metadata import ( cryptopool_test_metadata, metapool_test_metadata, pool_test_metadata, @@ -23,14 +23,16 @@ def test_get_pool_volume(): pool_metadata = PoolMetaData(metadata) # Test using metadata - volumes1 = get_pool_volume(pool_metadata, days=2, end=1698292800) + volumes1 = get_pool_volume(pool_metadata, start=1707696000, end=1707868800) assert len(volumes1) == 2 assert volumes1.columns.to_list() == get_pairs(pool_metadata.coin_names) # Test using address and chain address = pool_metadata.address chain = pool_metadata.chain - volumes2 = get_pool_volume(address, chain=chain, days=2, end=1698292800) + volumes2 = get_pool_volume( + address, chain=chain, start=1707696000, end=1707868800 + ) assert len(volumes2) == 2 assert volumes2.columns.to_list() == get_pairs(pool_metadata.coin_names) diff --git a/test/pool_metadata.py b/test/pool_metadata.py new file mode 100644 index 000000000..a25405b5e --- /dev/null +++ b/test/pool_metadata.py @@ -0,0 +1,194 @@ +import json + +POOL_TEST_METADATA_JSON = """ +{ + "name": "Curve.fi DAI/USDC/USDT", + "address": "0xbEbc44782C7dB0a1A60Cb6fe97d0b483032FF1C7", + "chain": "mainnet", + "symbol": "3Crv", + "version": 1, + "pool_type": "REGISTRY_V1", + "params": {"A": 2000, "fee": 1000000, "fee_mul": null, "admin_fee": 5000000000}, + "coins": { + "names": ["DAI", "USDC", "USDT"], + "addresses": [ + "0x6B175474E89094C44Da98b954EedeAC495271d0F", + "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48", + "0xdAC17F958D2ee523a2206206994597C13D831ec7" + ], + "decimals": [18, 6, 6] + }, + "reserves": { + "by_coin": [ + 171485829393046867353492287, + 175414686134396000000000000, + 88973989934190000000000000 + ], + "unnormalized_by_coin": [ + 171485829393046867353492287, + 175414686134396, + 88973989934190 + ], + "virtual_price": 1025499623208090719 + }, + "basepool": null, + "timestamp": 1677628800 +} +""" + +METAPOOL_TEST_METADATA_JSON = """ +{ + "name": "Curve.fi Factory USD Metapool: GUSDFRAXBP", + "address": "0x4e43151b78b5fbb16298C1161fcbF7531d5F8D93", + "chain":"mainnet", + "symbol": "GUSDFRAXBP3CRV-f", + "version": 1, + "pool_type": "STABLE_FACTORY", + "params": { + "A": 1500, "fee": 4000000, "fee_mul": null, "admin_fee": 5000000000}, + "coins": { + "names": ["GUSD", "crvFRAX"], + "addresses": [ + "0x056Fd409E1d7A124BD7017459dFEa2F387b6d5Cd", + "0x3175Df0976dFA876431C2E9eE6Bc45b65d3473CC" + ], + "decimals": [2, 18] + }, + "reserves": { + "by_coin": [4580491420000000000000000, 4584663086890532793313572], + "unnormalized_by_coin": [458049142, 4584663086890532793313572], + "virtual_price": 1002128768748324821 + }, + "basepool": { + "name": "Curve.fi FRAX/USDC", + "address": "0xDcEF968d416a41Cdac0ED8702fAC8128A64241A2", + "chain": "mainnet", + "symbol": "crvFRAX", + "version": 1, + "pool_type": + "REGISTRY_V1", + "params": { + "A": 1500, + "fee": 1000000, + "fee_mul": null, + "admin_fee": 5000000000 + }, + "coins": { + "names": ["FRAX", "USDC"], + "addresses": [ + "0x853d955aCEf822Db058eb8505911ED77F175b99e", + "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48" + ], + "decimals": [18, 6] + }, + "reserves": { + "by_coin": [305660498155854651779818562, 187140798282666000000000000], + "unnormalized_by_coin": [305660498155854651779818562, 187140798282666], + "virtual_price": 1001200369105166674 + }, + "basepool": null, + "timestamp": 1677715200 + }, + "timestamp": 1677715200 +} +""" + +CRYPTOPOOL_TEST_METADATA_JSON = """ +{ + "name": "Curve.fi Factory Crypto Pool: STG/USDC", + "address": "0x3211C6cBeF1429da3D0d58494938299C92Ad5860", + "chain": "mainnet", + "symbol": "STGUSDC-f", + "version": 2, + "pool_type": "CRYPTO_FACTORY", + "params": { + "A": 400000, + "gamma": 72500000000000, + "fee_gamma": 230000000000000, + "mid_fee": 26000000, + "out_fee": 45000000, + "allowed_extra_profit": 2000000000000, + "adjustment_step": 146000000000000, + "ma_half_time": 600, + "price_scale": [1532848669525694314], + "price_oracle": [1629891359676425537], + "last_prices": [1625755383082188296], + "last_prices_timestamp": 1684107935, + "admin_fee": 5000000000, + "xcp_profit": 1073065310463073367, + "xcp_profit_a": 1073065310463073367 + }, + "coins": { + "names": ["STG", "USDC"], + "addresses": [ + "0xAf5191B0De278C7286d6C7CC6ab6BB8A73bA2Cd6", + "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48" + ], + "decimals": [18, 6] + }, + "reserves": { + "by_coin": [11278350350009782994292193, 6837820334873000000000000], + "unnormalized_by_coin": [11278350350009782994292193, 6837820334873], + "virtual_price": 1036543672382221695 + }, + "basepool": null, + "timestamp": 1684108800 +} +""" + +TRICRYPTO_NG_TEST_METADATA_JSON = """ +{ + "name": "TriCRV", + "address": "0x4eBdF703948ddCEA3B11f675B4D1Fba9d2414A14", + "chain": "mainnet", + "symbol": "crvUSDETHCRV", + "version": 2, + "pool_type": "TRICRYPTO_FACTORY", + "params": { + "A": 2700000, + "gamma": 1300000000000, + "fee_gamma": 350000000000000, + "mid_fee": 2999999, + "out_fee": 80000000, + "allowed_extra_profit": 100000000000, + "adjustment_step": 100000000000, + "ma_half_time": 600, + "price_scale": [1649177296373068449425, 446562202678699631], + "price_oracle": [1648041807040538375682, 447066843075586148], + "last_prices": [1645044680220385710284, 446876572801432826], + "last_prices_timestamp": 1694130839, + "admin_fee": 5000000000, + "xcp_profit": 1018853337326661730, + "xcp_profit_a": 1018852684256364084 + }, + "coins": { + "names": ["crvUSD", "WETH", "CRV"], + "addresses": [ + "0xf939E0A03FB07F59A73314E73794Be0E57ac1b4E", + "0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2", + "0xD533a949740bb3306d119CC777fa900bA034cd52" + ], + "decimals": [18, 18, 18] + }, + "reserves": { + "by_coin": [ + 3724679717441585468224357, + 2268620966125133833261, + 8327951931226366295069133 + ], + "unnormalized_by_coin": [ + 3724679717441585468224357, + 2268620966125133833261, + 8327951931226366295069133 + ], + "virtual_price": 1027263450430060608 + }, + "basepool": null, + "timestamp": 1694131200 +} +""" + +pool_test_metadata = json.loads(POOL_TEST_METADATA_JSON) +metapool_test_metadata = json.loads(METAPOOL_TEST_METADATA_JSON) +cryptopool_test_metadata = json.loads(CRYPTOPOOL_TEST_METADATA_JSON) +tricrypto_ng_test_metadata = json.loads(TRICRYPTO_NG_TEST_METADATA_JSON) diff --git a/test/simple_ci.py b/test/simple_ci.py index c103813a8..73905bb3c 100644 --- a/test/simple_ci.py +++ b/test/simple_ci.py @@ -11,6 +11,7 @@ import pandas as pd from curvesim.pipelines.simple import pipeline as simple_pipeline +from curvesim.templates import DateTimeSequence TEST_PARAMS = {"A": [100, 1000], "fee": [3000000, 4000000]} TEST_CRYPTO_PARAMS = { @@ -24,25 +25,21 @@ # 3CRV { "address": "0xbebc44782c7db0a1a60cb6fe97d0b483032ff1c7", - "end_timestamp": 1638316800, "params": TEST_PARAMS, }, # aCRV { "address": "0xdebf20617708857ebe4f679508e7b7863a8a8eee", - "end_timestamp": 1622505600, "params": TEST_PARAMS, }, # # frax3CRV" { "address": "0xd632f22692fac7611d2aa1c0d552930d43caed3b", - "end_timestamp": 1643673600, "params": TEST_PARAMS, }, # triCRV { "address": "0x4ebdf703948ddcea3b11f675b4d1fba9d2414a14", - "end_timestamp": 1692215156, "params": TEST_CRYPTO_PARAMS, }, ] @@ -69,15 +66,19 @@ def main(generate=False, ncpu=None): for pool in pools: pool_address = pool["address"] - end_ts = pool["end_timestamp"] + pool_ts = 1707868800 + time_sequence = DateTimeSequence.from_range( + end=pool_ts * 1e9, freq="1h", periods=1440 + ) params = pool["params"] env = pool.get("env", "prod") results = simple_pipeline( - pool_address=pool_address, + pool_address, chain="mainnet", variable_params=params, - end_ts=end_ts, + time_sequence=time_sequence, + pool_ts=pool_ts, ncpu=ncpu, env=env, ) diff --git a/test/unit/test_metrics.py b/test/unit/test_metrics.py index a8916809e..42657db67 100644 --- a/test/unit/test_metrics.py +++ b/test/unit/test_metrics.py @@ -92,7 +92,6 @@ def _test_pricing_metric_class_init(metric, coin_names, prices): """Test attributes and methods specific to PricingMetric (sub)class.""" # Test PricingMetric attributes - assert metric.coin_names == coin_names assert metric.numeraire == coin_names[0] # Test get_market_price diff --git a/test/unit/test_pool_metadata.py b/test/unit/test_pool_metadata.py index bc6cc044d..8a6ea8b9b 100644 --- a/test/unit/test_pool_metadata.py +++ b/test/unit/test_pool_metadata.py @@ -1,5 +1,3 @@ -import json - from curvesim.pool.cryptoswap.pool import CurveCryptoPool from curvesim.pool.sim_interface.cryptoswap import SimCurveCryptoPool from curvesim.pool.sim_interface.metapool import SimCurveMetaPool @@ -8,198 +6,12 @@ from curvesim.pool.stableswap.pool import CurvePool from curvesim.pool_data.metadata import PoolMetaData -POOL_TEST_METADATA_JSON = """ -{ - "name": "Curve.fi DAI/USDC/USDT", - "address": "0xbEbc44782C7dB0a1A60Cb6fe97d0b483032FF1C7", - "chain": "mainnet", - "symbol": "3Crv", - "version": 1, - "pool_type": "REGISTRY_V1", - "params": {"A": 2000, "fee": 1000000, "fee_mul": null, "admin_fee": 5000000000}, - "coins": { - "names": ["DAI", "USDC", "USDT"], - "addresses": [ - "0x6B175474E89094C44Da98b954EedeAC495271d0F", - "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48", - "0xdAC17F958D2ee523a2206206994597C13D831ec7" - ], - "decimals": [18, 6, 6] - }, - "reserves": { - "by_coin": [ - 171485829393046867353492287, - 175414686134396000000000000, - 88973989934190000000000000 - ], - "unnormalized_by_coin": [ - 171485829393046867353492287, - 175414686134396, - 88973989934190 - ], - "virtual_price": 1025499623208090719 - }, - "basepool": null, - "timestamp": 1677628800 -} -""" - -METAPOOL_TEST_METADATA_JSON = """ -{ - "name": "Curve.fi Factory USD Metapool: GUSDFRAXBP", - "address": "0x4e43151b78b5fbb16298C1161fcbF7531d5F8D93", - "chain":"mainnet", - "symbol": "GUSDFRAXBP3CRV-f", - "version": 1, - "pool_type": "STABLE_FACTORY", - "params": { - "A": 1500, "fee": 4000000, "fee_mul": null, "admin_fee": 5000000000}, - "coins": { - "names": ["GUSD", "crvFRAX"], - "addresses": [ - "0x056Fd409E1d7A124BD7017459dFEa2F387b6d5Cd", - "0x3175Df0976dFA876431C2E9eE6Bc45b65d3473CC" - ], - "decimals": [2, 18] - }, - "reserves": { - "by_coin": [4580491420000000000000000, 4584663086890532793313572], - "unnormalized_by_coin": [458049142, 4584663086890532793313572], - "virtual_price": 1002128768748324821 - }, - "basepool": { - "name": "Curve.fi FRAX/USDC", - "address": "0xDcEF968d416a41Cdac0ED8702fAC8128A64241A2", - "chain": "mainnet", - "symbol": "crvFRAX", - "version": 1, - "pool_type": - "REGISTRY_V1", - "params": { - "A": 1500, - "fee": 1000000, - "fee_mul": null, - "admin_fee": 5000000000 - }, - "coins": { - "names": ["FRAX", "USDC"], - "addresses": [ - "0x853d955aCEf822Db058eb8505911ED77F175b99e", - "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48" - ], - "decimals": [18, 6] - }, - "reserves": { - "by_coin": [305660498155854651779818562, 187140798282666000000000000], - "unnormalized_by_coin": [305660498155854651779818562, 187140798282666], - "virtual_price": 1001200369105166674 - }, - "basepool": null, - "timestamp": 1677715200 - }, - "timestamp": 1677715200 -} -""" - -CRYPTOPOOL_TEST_METADATA_JSON = """ -{ - "name": "Curve.fi Factory Crypto Pool: STG/USDC", - "address": "0x3211C6cBeF1429da3D0d58494938299C92Ad5860", - "chain": "mainnet", - "symbol": "STGUSDC-f", - "version": 2, - "pool_type": "CRYPTO_FACTORY", - "params": { - "A": 400000, - "gamma": 72500000000000, - "fee_gamma": 230000000000000, - "mid_fee": 26000000, - "out_fee": 45000000, - "allowed_extra_profit": 2000000000000, - "adjustment_step": 146000000000000, - "ma_half_time": 600, - "price_scale": [1532848669525694314], - "price_oracle": [1629891359676425537], - "last_prices": [1625755383082188296], - "last_prices_timestamp": 1684107935, - "admin_fee": 5000000000, - "xcp_profit": 1073065310463073367, - "xcp_profit_a": 1073065310463073367 - }, - "coins": { - "names": ["STG", "USDC"], - "addresses": [ - "0xAf5191B0De278C7286d6C7CC6ab6BB8A73bA2Cd6", - "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48" - ], - "decimals": [18, 6] - }, - "reserves": { - "by_coin": [11278350350009782994292193, 6837820334873000000000000], - "unnormalized_by_coin": [11278350350009782994292193, 6837820334873], - "virtual_price": 1036543672382221695 - }, - "basepool": null, - "timestamp": 1684108800 -} -""" - -TRICRYPTO_NG_TEST_METADATA_JSON = """ -{ - "name": "TriCRV", - "address": "0x4eBdF703948ddCEA3B11f675B4D1Fba9d2414A14", - "chain": "mainnet", - "symbol": "crvUSDETHCRV", - "version": 2, - "pool_type": "TRICRYPTO_FACTORY", - "params": { - "A": 2700000, - "gamma": 1300000000000, - "fee_gamma": 350000000000000, - "mid_fee": 2999999, - "out_fee": 80000000, - "allowed_extra_profit": 100000000000, - "adjustment_step": 100000000000, - "ma_half_time": 600, - "price_scale": [1649177296373068449425, 446562202678699631], - "price_oracle": [1648041807040538375682, 447066843075586148], - "last_prices": [1645044680220385710284, 446876572801432826], - "last_prices_timestamp": 1694130839, - "admin_fee": 5000000000, - "xcp_profit": 1018853337326661730, - "xcp_profit_a": 1018852684256364084 - }, - "coins": { - "names": ["crvUSD", "WETH", "CRV"], - "addresses": [ - "0xf939E0A03FB07F59A73314E73794Be0E57ac1b4E", - "0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2", - "0xD533a949740bb3306d119CC777fa900bA034cd52" - ], - "decimals": [18, 18, 18] - }, - "reserves": { - "by_coin": [ - 3724679717441585468224357, - 2268620966125133833261, - 8327951931226366295069133 - ], - "unnormalized_by_coin": [ - 3724679717441585468224357, - 2268620966125133833261, - 8327951931226366295069133 - ], - "virtual_price": 1027263450430060608 - }, - "basepool": null, - "timestamp": 1694131200 -} -""" - -pool_test_metadata = json.loads(POOL_TEST_METADATA_JSON) -metapool_test_metadata = json.loads(METAPOOL_TEST_METADATA_JSON) -cryptopool_test_metadata = json.loads(CRYPTOPOOL_TEST_METADATA_JSON) -tricrypto_ng_test_metadata = json.loads(TRICRYPTO_NG_TEST_METADATA_JSON) +from ..pool_metadata import ( + cryptopool_test_metadata, + metapool_test_metadata, + pool_test_metadata, + tricrypto_ng_test_metadata, +) def test_pool(): From 9a889c97d82624f2f061e06da324cfdd45bf4541 Mon Sep 17 00:00:00 2001 From: naga Date: Thu, 22 Feb 2024 10:53:11 -0500 Subject: [PATCH 2/2] Fix pandas downcast warning --- curvesim/network/curve_prices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/curvesim/network/curve_prices.py b/curvesim/network/curve_prices.py index bf486e076..f598f0617 100644 --- a/curvesim/network/curve_prices.py +++ b/curvesim/network/curve_prices.py @@ -108,7 +108,7 @@ async def get_pool_pair_volume( interval=interval, ) - df = DataFrame(data, columns=["timestamp", "volume", "fees"]) + df = DataFrame(data, columns=["timestamp", "volume", "fees"], dtype="float64") df["timestamp"] = to_datetime(df["timestamp"], unit="s") df.set_index("timestamp", inplace=True) return df