18 synthetic returns (#24)

* typo * synthetic returns * removed playground * merged long-only and unconstrained * remove uncommented function * explained return predictions model * Small updates --------- Co-authored-by: phschiele <[email protected]>
cvxgrp · Oct 24, 2023 · e1b0462 · e1b0462
1 parent 8f86b71
commit e1b0462
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 18 deletions.
diff --git a/experiments/backtest.py b/experiments/backtest.py
@@ -46,9 +46,8 @@ def run_backtest(
 ) -> tuple[pd.Series, pd.DataFrame]:
     """
     Run a simplified backtest for a given strategy.
-    At time t we use data from t-500 to t-1 to forecast the data and
-    compute the optimal portfolio weights and cash holdings.
-    We then trade to these weights at time t.
+    At time t we use data from t-lookback to t to compute the optimal portfolio
+    weights and then execute the trades at time t.
     """
 
     prices, spread, volume, rf = load_data()
@@ -62,25 +61,28 @@ def run_backtest(
     post_trade_cash = []
     post_trade_quantities = []
 
-    returns = synthetic_returns(prices).dropna()
-    means = returns.ewm(halflife=125).mean()
-    covariance_df = returns.ewm(halflife=125).cov()
+    returns = prices.pct_change().dropna()
+    means = (
+        synthetic_returns(prices).shift(-1).dropna()
+    )  # At time t includes data up to t+1
+    covariance_df = returns.ewm(halflife=125).cov()  # At time t includes data up to t
     days = returns.index
     covariances = {}
     for day in days:
         covariances[day] = covariance_df.loc[day]
 
-    for t in range(lookback, len(prices)):
+    for t in range(lookback, len(prices) - 1):
         day = prices.index[t]
 
         if verbose:
             print(f"Day {t} of {len(prices)-1}, {day}")
 
-        prices_t = prices.iloc[t - lookback : t]  # Up to t-1
-        spread_t = spread.iloc[t - lookback : t]
-        volume_t = volume.iloc[t - lookback : t]
-        mean_t = means.loc[day]
-        covariance_t = covariances[day]
+        prices_t = prices.iloc[t - lookback : t + 1]  # Up to t
+        spread_t = spread.iloc[t - lookback : t + 1]
+        volume_t = volume.iloc[t - lookback : t + 1]
+
+        mean_t = means.loc[day]  # Forecast for return t to t+1
+        covariance_t = covariances[day]  # Forecast for covariance t to t+1
 
         inputs_t = OptimizationInput(
             prices_t,
@@ -108,9 +110,9 @@ def run_backtest(
         post_trade_cash.append(cash)
         post_trade_quantities.append(quantities)
 
-    post_trade_cash = pd.Series(post_trade_cash, index=prices.index[lookback:])
+    post_trade_cash = pd.Series(post_trade_cash, index=prices.index[lookback:-1])
     post_trade_quantities = pd.DataFrame(
-        post_trade_quantities, index=prices.index[lookback:], columns=prices.columns
+        post_trade_quantities, index=prices.index[lookback:-1], columns=prices.columns
     )
     return BacktestResult(post_trade_cash, post_trade_quantities, risk_target)
 
@@ -236,4 +238,13 @@ def load(path: Path) -> "BacktestResult":
     n_assets = load_data()[0].shape[1]
     w_targets = np.ones(n_assets) / (n_assets + 1)
     c_target = 1 / (n_assets + 1)
-    run_backtest(lambda _inputs: (w_targets, c_target), risk_target=0.0, verbose=True)
+    result = run_backtest(
+        lambda _inputs: (w_targets, c_target), risk_target=0.0, verbose=True
+    )
+    print(
+        f"Mean return: {result.mean_return:.2%},\n"
+        f"Volatility: {result.volatility:.2%},\n"
+        f"Sharpe: {result.sharpe:.2f},\n"
+        f"Turnover: {result.turnover:.2f},\n"
+        f"Max leverage: {result.max_leverage:.2f}"
+    )
diff --git a/experiments/utils.py b/experiments/utils.py
@@ -1,8 +1,28 @@
 import numpy as np
+import pandas as pd
 
 
-def synthetic_returns(prices, sigma_r=0.02236, sigma_eps=0.14142):
+def synthetic_returns(
+    prices: pd.DataFrame, var_r: float = 0.0005, var_eps: float = 0.02
+) -> pd.DataFrame:
+    """
+    param prices: a DataFrame of prices
+    param var_r: the Gaussian variance of the returns
+    param var_eps: the Gaussian variance of the noise term
+
+    returns: a DataFrame of "synthetic return predictions" computed as
+    alpha*(returns+noise), where alpha=var_r / (var_r + var_eps); this is the
+    coefficient that minimize the variance of the prediction error under the
+    above model.
+
+    var_r = 0.0005 and var_eps = 0.02 correspond to an information ratio
+    sqrt(alpha) of about 0.15.
+    """
     returns = prices.pct_change()
 
-    alpha = sigma_r**2 / (sigma_r**2 + sigma_eps**2)
-    return alpha * (returns + np.random.normal(size=returns.shape) * sigma_eps)
+    alpha = var_r / (var_r + var_eps)
+    sigma_eps = np.sqrt(var_eps)
+    synthetic_returns = alpha * (
+        returns + np.random.normal(size=returns.shape) * sigma_eps
+    )
+    return synthetic_returns