Skip to content

Commit

Permalink
added download_data.py script and normalized symbols
Browse files Browse the repository at this point in the history
  • Loading branch information
ErikBjare committed Apr 24, 2018
1 parent 5c1def8 commit 911fa8e
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 3 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@


data_private/*
tmp/*
67 changes: 67 additions & 0 deletions download_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import pickle
from pathlib import Path
from datetime import datetime, date

import requests
from bs4 import BeautifulSoup


_coinmarketcap_data_filename = "tmp/{}-coinmarketcap-data.pickle"


def get_data_from_coinmarketcap(currency):
r = requests.get(f"https://coinmarketcap.com/currencies/{currency}/historical-data/?start=20140101&end=20180423")
with open(_coinmarketcap_data_filename.format(currency), "wb") as f:
pickle.dump(r, f, pickle.HIGHEST_PROTOCOL)


def load_data(currency):
filename = _coinmarketcap_data_filename.format(currency)
if not Path(filename).exists():
print(f"Didn't find data file for {currency}, downloading...")
get_data_from_coinmarketcap(currency)
with open(filename, "rb") as f:
return pickle.load(f)


def parse_table(doc):
soup = BeautifulSoup(doc, 'html.parser')
tables = soup.find_all("table")

headers = [el.text.lower() for el in tables[0].find_all("th")]
rows = []

for row in tables[0].find_all("tr"):
cells = [el.text for el in row.find_all("td")]
if len(cells) == len(headers):
rows.append({k: v for k, v in zip(headers, cells)})
elif cells:
print(f"Incomplete row: {cells}")

d = {datetime.strptime(r["date"], "%b %d, %Y").date(): r for r in rows}
return d


def test_everything():
data = load_data("bitcoin")
tablebtc = parse_table(data.text)

assert all(k in tablebtc[date(2017, 1, 1)] for k in ["open", "high", "low", "close"])

data = load_data("ethereum")
tableeth = parse_table(data.text)

assert tablebtc[date(2017, 1, 1)]["open"] != tableeth[date(2017, 1, 1)]["open"]


if __name__ == "__main__":
# get_data("bitcoin")
data = load_data("bitcoin")
table = parse_table(data.text)

assert table[date(2017, 1, 1)]["open"]
assert table[date(2017, 1, 1)]["high"]
assert table[date(2017, 1, 1)]["low"]
assert table[date(2017, 1, 1)]["close"]

# print(data.text)
17 changes: 14 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,24 @@ def _load_incoming_balances() -> List[Dict[str, Any]]:
return []


symbolmap = {
"XXBTC": "XXBT",
"XBT": "XXBT",
"XXDG": "XXDG",
"ETH": "XETH",
"BCH": "XBCH",
"GNO": "XGNO",
"EOS": "XEOS",
}


def _format_csv_from_kraken(trades_csv):
"Format a CSV from a particular source into a canonical data format"
for trade in trades_csv:
# Kraken has really weird pair formatting...
pairlen = int(len(trade["pair"]) / 2)
trade["pair"] = (trade["pair"][:pairlen], trade["pair"][pairlen:])
trade["pair"] = tuple(map(lambda asset: "XXBTC" if asset == "XBT" else asset, trade["pair"]))
trade["pair"] = tuple(map(lambda symbol: symbolmap[symbol] if symbol in symbolmap else symbol, trade["pair"]))

trade["time"] = dateutil.parser.parse(trade["time"])
trade["price"] = float(trade["price"])
Expand Down Expand Up @@ -231,7 +242,7 @@ def _print_trade_header():


def _print_trade(t):
print(f"{str(t['pair']).ljust(16)} {t['type'].ljust(5)} {str(round(t['vol'], 3)).ljust(10)} ${t['cost_usd']}")
print(f"{' / '.join(t['pair']).ljust(16)} {t['type'].ljust(5)} {str(round(t['vol'], 3)).ljust(10)} ${t['cost_usd']}")


def main():
Expand All @@ -247,7 +258,7 @@ def main():
print("\n# Cost basis per asset")
_cost_basis_per_asset(trades)

for year in range(2017, 2019):
for year in range(2015, 2019):
balances = defaultdict(lambda: 0) # type: Dict[str, int]
trades_for_year = _filter_trades_by_time(trades, year)
_calculate_inout_balances(balances, trades_for_year)
Expand Down
Empty file added tmp/.empty
Empty file.

0 comments on commit 911fa8e

Please sign in to comment.