From e9e15319b6e804c024fe192b4d6112e94ea4b649 Mon Sep 17 00:00:00 2001 From: phschiele Date: Wed, 18 Oct 2023 00:07:16 +0200 Subject: [PATCH 1/2] Typing --- experiments/backtest.py | 6 +++--- experiments/taming.py | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/experiments/backtest.py b/experiments/backtest.py index 76408ee..d74a712 100644 --- a/experiments/backtest.py +++ b/experiments/backtest.py @@ -59,7 +59,7 @@ def run_backtest(strategy: Callable, risk_target: float, verbose: bool = False) if verbose: print(f"Day {day} of {len(prices)-1}, {prices.index[day]}") - prices_t = prices.iloc[day-lookback:day] # Up to t-1 + prices_t = prices.iloc[day-lookback:day] # Up to t-1 spread_t = spread.iloc[day-lookback:day] volume_t = volume.iloc[day-lookback:day] @@ -117,12 +117,12 @@ class BacktestResult: risk_target: float @property - def valuations(self): + def valuations(self) -> pd.DataFrame: prices = load_data()[0].loc[self.history] return self.quantities * prices @property - def portfolio_value(self): + def portfolio_value(self) -> pd.Series: return self.cash + self.valuations.sum(axis=1) @property diff --git a/experiments/taming.py b/experiments/taming.py index cbf59ea..af59cbb 100644 --- a/experiments/taming.py +++ b/experiments/taming.py @@ -1,5 +1,3 @@ -import dataclasses -import multiprocessing import os import numpy as np import pandas as pd From f0b60e0b9275f995e7c2d3a0e96e5ab41cead487 Mon Sep 17 00:00:00 2001 From: phschiele Date: Wed, 18 Oct 2023 00:11:50 +0200 Subject: [PATCH 2/2] Fix linters --- README.md | 2 +- experiments/backtest.py | 63 +++++++++------ experiments/taming.py | 172 +++++++++++++++++++++++++--------------- requirements.txt | 2 +- 4 files changed, 149 insertions(+), 90 deletions(-) diff --git a/README.md b/README.md index facf7b4..a8cb1cf 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Markowitz Reference Implementation This repository accompanies the paper \cite{X}. -It contains a reference implementation of the Markowitz portfolio optimization +It contains a reference implementation of the Markowitz portfolio optimization problem and the data used in the paper. Please note that the tickers of the stocks have been obfuscated to comply with the data provider's terms of use. diff --git a/experiments/backtest.py b/experiments/backtest.py index d74a712..8ea7f73 100644 --- a/experiments/backtest.py +++ b/experiments/backtest.py @@ -1,6 +1,5 @@ from dataclasses import dataclass from functools import lru_cache -import os from pathlib import Path import pickle import sys @@ -10,24 +9,28 @@ # hack to allow importing from parent directory without having a package sys.path.append(str(Path(__file__).parent.parent)) -from markowitz import Data, Parameters + def data_folder(): - return Path(__file__).parent.parent / "data" + return Path(__file__).parent.parent / "data" @lru_cache(maxsize=1) def load_data() -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: prices = pd.read_csv(data_folder() / "prices.csv", index_col=0, parse_dates=True) - spread = pd.read_csv(data_folder() / "spreads.csv", index_col=0, parse_dates=True).fillna(0.10) + spread = pd.read_csv( + data_folder() / "spreads.csv", index_col=0, parse_dates=True + ).fillna(0.10) volume = pd.read_csv(data_folder() / "volumes.csv", index_col=0, parse_dates=True) return prices, spread, volume + @dataclass class OptimizationInput: """ At time t, we have data from t-lookback to t-1. """ + prices: pd.DataFrame spread: pd.DataFrame volume: pd.DataFrame @@ -36,10 +39,12 @@ class OptimizationInput: risk_target: float -def run_backtest(strategy: Callable, risk_target: float, verbose: bool = False) -> tuple[pd.Series, pd.DataFrame]: +def run_backtest( + strategy: Callable, risk_target: float, verbose: bool = False +) -> tuple[pd.Series, pd.DataFrame]: """ Run a simplified backtest for a given strategy. - At time t we use data from t-500 to t-1 to forecast the data and + At time t we use data from t-500 to t-1 to forecast the data and compute the optimal portfolio weights and cash holdings. We then trade to these weights at time t. """ @@ -59,12 +64,13 @@ def run_backtest(strategy: Callable, risk_target: float, verbose: bool = False) if verbose: print(f"Day {day} of {len(prices)-1}, {prices.index[day]}") - prices_t = prices.iloc[day-lookback:day] # Up to t-1 - spread_t = spread.iloc[day-lookback:day] - volume_t = volume.iloc[day-lookback:day] + prices_t = prices.iloc[day - lookback : day] # Up to t-1 + spread_t = spread.iloc[day - lookback : day] + volume_t = volume.iloc[day - lookback : day] - inputs_t = OptimizationInput(prices_t, spread_t, volume_t, - quantities, cash, risk_target) + inputs_t = OptimizationInput( + prices_t, spread_t, volume_t, quantities, cash, risk_target + ) w, _ = strategy(inputs_t) latest_prices = prices.iloc[day] # At t @@ -77,9 +83,11 @@ def run_backtest(strategy: Callable, risk_target: float, verbose: bool = False) post_trade_cash.append(cash) post_trade_quantities.append(quantities) - + post_trade_cash = pd.Series(post_trade_cash, index=prices.index[lookback:]) - post_trade_quantities = pd.DataFrame(post_trade_quantities, index=prices.index[lookback:], columns=prices.columns) + post_trade_quantities = pd.DataFrame( + post_trade_quantities, index=prices.index[lookback:], columns=prices.columns + ) return BacktestResult(post_trade_cash, post_trade_quantities, risk_target) @@ -103,8 +111,9 @@ def execute_orders(latest_prices, trade_quantities, latest_spread) -> float: sell_receipt = -sell_order_quantities @ sell_order_prices buy_payment = buy_order_quantities @ buy_order_prices - return sell_receipt - buy_payment - + return sell_receipt - buy_payment + + def interest_and_fees(cash, quantities) -> float: # TODO: add risk free rate, borrow rate, shorting fees return 0 @@ -120,11 +129,11 @@ class BacktestResult: def valuations(self) -> pd.DataFrame: prices = load_data()[0].loc[self.history] return self.quantities * prices - + @property def portfolio_value(self) -> pd.Series: return self.cash + self.valuations.sum(axis=1) - + @property def portfolio_returns(self): return self.portfolio_value.pct_change().dropna() @@ -136,23 +145,25 @@ def periods_per_year(self): @property def history(self): return self.cash.index - + @property def cash_weight(self): return self.cash / self.portfolio_value - + @property def asset_weights(self): return self.valuations.div(self.portfolio_value, axis=0) @property def turnover(self) -> float: - return self.asset_weights.diff().abs().sum(axis=1).mean() * self.periods_per_year - + return ( + self.asset_weights.diff().abs().sum(axis=1).mean() * self.periods_per_year + ) + @property def mean_return(self) -> float: return self.portfolio_returns.mean() * self.periods_per_year - + @property def volatility(self) -> float: return self.portfolio_returns.std() * np.sqrt(self.periods_per_year) @@ -160,15 +171,15 @@ def volatility(self) -> float: @property def max_drawdown(self) -> float: return self.portfolio_value.div(self.portfolio_value.cummax()).sub(1).min() - + @property def max_leverage(self) -> float: return self.asset_weights.abs().sum(axis=1).max() @property def sharpe(self) -> float: - return self.mean_return / self.volatility # TODO: risk free rate - + return self.mean_return / self.volatility # TODO: risk free rate + def save(self, path: Path): with open(path, "wb") as f: pickle.dump(self, f, pickle.HIGHEST_PROTOCOL) @@ -184,4 +195,4 @@ def load(path: Path) -> "BacktestResult": n_assets = load_data()[0].shape[1] w_targets = np.ones(n_assets) / (n_assets + 1) c_target = 1 / (n_assets + 1) - run_backtest(lambda _inputs: (w_targets, c_target), risk_target=0.0, verbose=True) \ No newline at end of file + run_backtest(lambda _inputs: (w_targets, c_target), risk_target=0.0, verbose=True) diff --git a/experiments/taming.py b/experiments/taming.py index af59cbb..43cc7bc 100644 --- a/experiments/taming.py +++ b/experiments/taming.py @@ -2,7 +2,7 @@ import numpy as np import pandas as pd import cvxpy as cp -from backtest import BacktestResult, OptimizationInput, run_backtest, load_data +from backtest import BacktestResult, OptimizationInput, run_backtest from markowitz import Data, Parameters import matplotlib.pyplot as plt @@ -12,35 +12,37 @@ def unconstrained_markowitz(inputs: OptimizationInput) -> np.ndarray: n_assets = inputs.prices.shape[1] mu, Sigma = ewma_mean_covariance(inputs.prices) - w = cp.Variable(n_assets) + w = cp.Variable(n_assets) c = cp.Variable() objective = mu @ w constraints = [ - cp.sum(w) + c == 1, - cp.quad_form(w, Sigma, assume_PSD=True) <= inputs.risk_target ** 2 + cp.sum(w) + c == 1, + cp.quad_form(w, Sigma, assume_PSD=True) <= inputs.risk_target**2, ] problem = cp.Problem(cp.Maximize(objective), constraints) problem.solve(get_solver()) assert problem.status in {cp.OPTIMAL, cp.OPTIMAL_INACCURATE} return w.value, c.value + def long_only_markowitz(inputs: OptimizationInput) -> np.ndarray: """Compute the long-only Markowitz portfolio weights.""" n_assets = inputs.prices.shape[1] mu, Sigma = ewma_mean_covariance(inputs.prices) - w = cp.Variable(n_assets, nonneg=True) + w = cp.Variable(n_assets, nonneg=True) c = cp.Variable(nonneg=True) objective = mu @ w constraints = [ - cp.sum(w) + c == 1, - cp.quad_form(w, Sigma, assume_PSD=True) <= inputs.risk_target ** 2 + cp.sum(w) + c == 1, + cp.quad_form(w, Sigma, assume_PSD=True) <= inputs.risk_target**2, ] problem = cp.Problem(cp.Maximize(objective), constraints) problem.solve(get_solver()) assert problem.status in {cp.OPTIMAL, cp.OPTIMAL_INACCURATE} return w.value, c.value + def equal_weights(inputs: OptimizationInput) -> np.ndarray: """Compute the equal weights portfolio.""" n_assets = inputs.prices.shape[1] @@ -48,7 +50,10 @@ def equal_weights(inputs: OptimizationInput) -> np.ndarray: c = 1 / (n_assets + 1) return w, c -def ewma_mean_covariance(prices: pd.DataFrame, lamb: float = 0.94) -> tuple[np.ndarray, np.ndarray]: + +def ewma_mean_covariance( + prices: pd.DataFrame, lamb: float = 0.94 +) -> tuple[np.ndarray, np.ndarray]: returns = prices.pct_change().dropna() n_assets = returns.shape[1] alpha = 1 - lamb @@ -58,8 +63,12 @@ def ewma_mean_covariance(prices: pd.DataFrame, lamb: float = 0.94) -> tuple[np.n def prepare_data( - prices: pd.DataFrame, spread: pd.DataFrame, volume: pd.DataFrame, quantities: np.ndarray, cash: float - ) -> Data: + prices: pd.DataFrame, + spread: pd.DataFrame, + volume: pd.DataFrame, + quantities: np.ndarray, + cash: float, +) -> Data: n_assets = prices.shape[1] latest_prices = prices.iloc[-1] portfolio_value = cash + quantities @ latest_prices @@ -67,50 +76,51 @@ def prepare_data( mu, Sigma = ewma_mean_covariance(prices) return Data( - w_prev = quantities * latest_prices / portfolio_value, - c_prev = cash / portfolio_value, - idio_mean = mu, - factor_mean = np.zeros(n_assets), - risk_free = 0, - factor_covariance_chol = np.linalg.cholesky(Sigma), - idio_volas = np.sqrt(np.diag(Sigma)), - F = np.eye(n_assets), - kappa_short = np.zeros(n_assets), - kappa_borrow = 0.0, - kappa_spread = np.zeros(n_assets), - kappa_impact = np.zeros(n_assets), + w_prev=quantities * latest_prices / portfolio_value, + c_prev=cash / portfolio_value, + idio_mean=mu, + factor_mean=np.zeros(n_assets), + risk_free=0, + factor_covariance_chol=np.linalg.cholesky(Sigma), + idio_volas=np.sqrt(np.diag(Sigma)), + F=np.eye(n_assets), + kappa_short=np.zeros(n_assets), + kappa_borrow=0.0, + kappa_spread=np.zeros(n_assets), + kappa_impact=np.zeros(n_assets), ) def get_parameters(data, risk_target): return Parameters( - w_lower = np.zeros(data.n_assets), - w_upper = np.ones(data.n_assets), - c_lower = 0.0, - c_upper = 1.0, - z_lower = -np.ones(data.n_assets), - z_upper = np.ones(data.n_assets), - T_max = 0.1, - L_max = 1.5, - rho_mean = np.zeros(data.n_assets), - rho_covariance = 0.0, - gamma_hold = 0.0, - gamma_trade = 0.0, - gamma_turn = 0.0, - gamma_risk = 0.0, - risk_target = risk_target, + w_lower=np.zeros(data.n_assets), + w_upper=np.ones(data.n_assets), + c_lower=0.0, + c_upper=1.0, + z_lower=-np.ones(data.n_assets), + z_upper=np.ones(data.n_assets), + T_max=0.1, + L_max=1.5, + rho_mean=np.zeros(data.n_assets), + rho_covariance=0.0, + gamma_hold=0.0, + gamma_trade=0.0, + gamma_turn=0.0, + gamma_risk=0.0, + risk_target=risk_target, ) def main(from_checkpoint: bool = False): - if from_checkpoint: unconstrained_results = [] - for f in [f for f in os.listdir("checkpoints") if f.startswith("unconstrained")]: + for f in [ + f for f in os.listdir("checkpoints") if f.startswith("unconstrained") + ]: unconstrained_results.append(BacktestResult.load(f"checkpoints/{f}")) equal_weights_results = BacktestResult.load("checkpoints/equal_weights.pickle") else: - equal_weights_results = run_backtest(equal_weights, 0.0, verbose=True) + equal_weights_results = run_backtest(equal_weights, 0.0, verbose=True) equal_weights_results.save("checkpoints/equal_weights.pickle") adjustment_factor = np.sqrt(equal_weights_results.periods_per_year) @@ -118,64 +128,101 @@ def main(from_checkpoint: bool = False): unconstrained_results = [] for sigma_target in sigma_targets: result = run_backtest(unconstrained_markowitz, sigma_target, verbose=True) - result.save(f"checkpoints/unconstrained_{result.risk_target * adjustment_factor:.2f}.pickle") + result.save( + f"checkpoints/unconstrained_{result.risk_target * adjustment_factor:.2f}.pickle" + ) unconstrained_results.append(result) long_only_results = [] for sigma_target in sigma_targets: result = run_backtest(long_only_markowitz, sigma_target, verbose=True) - result.save(f"checkpoints/long_only_{result.risk_target * adjustment_factor:.2f}.pickle") + result.save( + f"checkpoints/long_only_{result.risk_target * adjustment_factor:.2f}.pickle" + ) long_only_results.append(result) generate_table(equal_weights_results, unconstrained_results, long_only_results) plot_results(equal_weights_results, unconstrained_results, long_only_results) + def generate_table( - equal_weights_results: BacktestResult, - unconstrained_results: list[BacktestResult], - long_only_results: list[BacktestResult], - ) -> None: + equal_weights_results: BacktestResult, + unconstrained_results: list[BacktestResult], + long_only_results: list[BacktestResult], +) -> None: # Table 1 df = pd.DataFrame( - index=["Equal weights"] + [f"$\sigma^\\text{{tar}} = {result.risk_target:.2f}$" for result in unconstrained_results], + index=["Equal weights"] + + [ + f"$\\sigma^\\text{{tar}} = {result.risk_target:.2f}$" + for result in unconstrained_results + ], columns=["Mean return", "Volatility", "Sharpe", "Turnover", "Max leverage"], ) - df["Mean return"] = [equal_weights_results.mean_return] + [result.mean_return for result in unconstrained_results] - df["Volatility"] = [equal_weights_results.volatility] + [result.volatility for result in unconstrained_results] - df["Sharpe"] = [equal_weights_results.sharpe] + [result.sharpe for result in unconstrained_results] - df["Turnover"] = [equal_weights_results.turnover] + [result.turnover for result in unconstrained_results] - df["Max leverage"] = [equal_weights_results.asset_weights.abs().sum(axis=1).max()] + [result.asset_weights.abs().sum(axis=1).max() for result in unconstrained_results] + df["Mean return"] = [equal_weights_results.mean_return] + [ + result.mean_return for result in unconstrained_results + ] + df["Volatility"] = [equal_weights_results.volatility] + [ + result.volatility for result in unconstrained_results + ] + df["Sharpe"] = [equal_weights_results.sharpe] + [ + result.sharpe for result in unconstrained_results + ] + df["Turnover"] = [equal_weights_results.turnover] + [ + result.turnover for result in unconstrained_results + ] + df["Max leverage"] = [ + equal_weights_results.asset_weights.abs().sum(axis=1).max() + ] + [ + result.asset_weights.abs().sum(axis=1).max() for result in unconstrained_results + ] print(df.to_latex(float_format="%.2f")) # Table 2 df = pd.DataFrame( - index=[f"$\sigma^\\text{{tar}} = {result.risk_target:.2f}$" for result in long_only_results], + index=[ + f"$\\sigma^\\text{{tar}} = {result.risk_target:.2f}$" + for result in long_only_results + ], columns=["Mean return", "Volatility", "Sharpe", "Turnover", "Max leverage"], ) df["Mean return"] = [result.mean_return for result in long_only_results] df["Volatility"] = [result.volatility for result in long_only_results] df["Sharpe"] = [result.sharpe for result in long_only_results] df["Turnover"] = [result.turnover for result in long_only_results] - df["Max leverage"] = [result.asset_weights.abs().sum(axis=1).max() for result in long_only_results] + df["Max leverage"] = [ + result.asset_weights.abs().sum(axis=1).max() for result in long_only_results + ] print(df.to_latex(float_format="%.2f")) def plot_results( - equal_weights_results: BacktestResult, - unconstrained_results: list[BacktestResult], - long_only_results: list[BacktestResult], - ) -> None: - + equal_weights_results: BacktestResult, + unconstrained_results: list[BacktestResult], + long_only_results: list[BacktestResult], +) -> None: # E-V plot plt.figure() - + # Single star for equal weights - plt.scatter(equal_weights_results.volatility, equal_weights_results.mean_return, marker="*", s=200, c="r", label="Equal weights") + plt.scatter( + equal_weights_results.volatility, + equal_weights_results.mean_return, + marker="*", + s=200, + c="r", + label="Equal weights", + ) # Circle for unconstrained Markowitz frontier as line with circles unconstrained_volatility = [result.volatility for result in unconstrained_results] unconstrained_mean_return = [result.mean_return for result in unconstrained_results] - plt.plot(unconstrained_volatility, unconstrained_mean_return, "o-", label="Unconstrained Markowitz") + plt.plot( + unconstrained_volatility, + unconstrained_mean_return, + "o-", + label="Unconstrained Markowitz", + ) plt.xlabel("Volatility") plt.ylabel("Mean return") @@ -186,5 +233,6 @@ def plot_results( def get_solver(): return cp.MOSEK if cp.MOSEK in cp.installed_solvers() else cp.CLARABEL + if __name__ == "__main__": main() diff --git a/requirements.txt b/requirements.txt index 27e14a4..ff3c8a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ cvxpy numpy pandas -matplotlib \ No newline at end of file +matplotlib