Skip to content

Commit

Permalink
add Methods FileIO.append
Browse files Browse the repository at this point in the history
  • Loading branch information
bh2smith committed Nov 18, 2022
1 parent 9db50bb commit 0794cc3
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ dist
_version.py
.idea/
venv/
tmp/
87 changes: 81 additions & 6 deletions dune_client/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import logging
import os.path
from enum import Enum
from os.path import exists
from pathlib import Path
from typing import TextIO
from typing import TextIO, Callable, List, Tuple

# ndjson missing types: https://github.com/rhgrant10/ndjson/issues/10
import ndjson # type: ignore
Expand Down Expand Up @@ -60,14 +61,17 @@ def load(self, file: TextIO) -> list[DuneRecord]:
return list(ndjson.reader(file))
raise ValueError(f"Unrecognized FileType {self} for {file.name}")

def write(self, out_file: TextIO, data: list[DuneRecord]) -> None:
def write(
self, out_file: TextIO, data: list[DuneRecord], skip_headers: bool = False
) -> None:
"""Writes `data` to `out_file`"""
logger.debug(f"writing results to file {out_file.name}")
if self == FileType.CSV:
headers = data[0].keys()
data_tuple = [tuple(rec.values()) for rec in data]
dict_writer = csv.DictWriter(out_file, headers, lineterminator="\n")
dict_writer.writeheader()
if not skip_headers:
dict_writer.writeheader()
writer = csv.writer(out_file, lineterminator="\n")
writer.writerows(data_tuple)

Expand All @@ -82,6 +86,18 @@ def write(self, out_file: TextIO, data: list[DuneRecord]) -> None:
raise ValueError(f"Unrecognized FileType {self} for {out_file.name}")


# def skip_empty(func):
# """Decorator used on write methods"""
#
# def write_wrapper(self, data, name, ftype):
# if len(data) == 0:
# logger.info(f"Nothing to write to {name}... skipping")
# return
# func(self, data, name, ftype)
#
# return write_wrapper


class FileIO:
"""
CSV is a more compact file type,
Expand All @@ -106,13 +122,69 @@ def _filepath(self, name: str, ftype: FileType) -> str:
"""Internal method for building absolute path."""
return os.path.join(self.path, name + str(ftype))

def _write(self, data: list[DuneRecord], name: str, ftype: FileType) -> None:
# TODO - use @skip_empty decorator here. Couldn't get the types to work.
def _write(self, data: List[DuneRecord], name: str, ftype: FileType) -> None:
# The following three lines are duplicated in _append, due to python version compatibility
# https://github.com/cowprotocol/dune-client/issues/37#issuecomment-1319901120
# We will continue to support python < 3.10 until ~3.13, this issue will remain open.
if len(data) == 0:
logger.info(f"Nothing to write to {name}... skipping")
return
return None
with open(self._filepath(name, ftype), "w", encoding=self.encoding) as out_file:
ftype.write(out_file, data)
return None

def _assert_matching_keys(
self, keys: Tuple[str, ...], fname: str, ftype: FileType
) -> None:
with open(fname, "r", encoding=self.encoding) as file:
if ftype == FileType.CSV:
# Check matching headers.
headers = file.readline()
# The skip empty decorator ensures existence of data[0]
existing_keys = tuple(headers.strip().split(","))
elif ftype == FileType.JSON:
single_object = json.loads(file.readline())[0]
existing_keys = tuple(single_object.keys())
elif ftype == FileType.NDJSON:
single_object = json.loads(file.readline())
existing_keys = tuple(single_object.keys())
assert keys == existing_keys, f"{keys} != {existing_keys}"

def _append(self, data: List[DuneRecord], name: str, ftype: FileType) -> None:
if len(data) == 0:
logger.info(f"Nothing to write to {name}... skipping")
return None
fname = self._filepath(name, ftype)
if not exists(fname):
logger.warning(
f"File {fname} does not exist, using write instead of append!"
)
return self._write(data, name, ftype)

# validate that the incoming content to be appended has the same schema
# The skip empty decorator ensures existence of data[0]
self._assert_matching_keys(tuple(data[0].keys()), fname, ftype)
if ftype == FileType.JSON:
with open(fname, "r", encoding=self.encoding) as existing_file:
existing_data = ftype.load(existing_file)
return self._write(existing_data + data, name, ftype)

with open(fname, "a+", encoding=self.encoding) as out_file:
return ftype.write(out_file, data, skip_headers=True)

def append_csv(self, data: list[DuneRecord], name: str) -> None:
"""Appends `data` to csv file `name`"""
# This is a special case because we want to skip headers when the file already exists
# Additionally, we may want to validate that the headers actually coincide.
self._append(data, name, FileType.CSV)

def append_json(self, data: list[DuneRecord], name: str) -> None:
"""Appends `data` to json file `name`"""
self._append(data, name, FileType.JSON)

def append_ndjson(self, data: list[DuneRecord], name: str) -> None:
"""Appends `data` to ndjson file `name`"""
self._append(data, name, FileType.NDJSON)

def write_csv(self, data: list[DuneRecord], name: str) -> None:
"""Writes `data` to csv file `name`"""
Expand Down Expand Up @@ -154,3 +226,6 @@ def load_singleton(
) -> DuneRecord:
"""Loads and returns single entry by index (default 0)"""
return self._load(name, self._parse_ftype(ftype))[index]


WriteLikeSignature = Callable[[FileIO, List[DuneRecord], str, FileType], None]
14 changes: 13 additions & 1 deletion tests/unit/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def cleanup():


def cleanup_files(func):
"""This decorator can be used for testing methods outside of this class"""
"""This decorator can be used for testing methods outside this class"""

def wrapped_func(self):
func(self)
Expand Down Expand Up @@ -48,6 +48,18 @@ def test_invertible_write_and_load(self):
f"Assert invertible failed on {ftype}",
)

def test_append(self):
for ftype in FileType:
self.file_manager._write(self.dune_records, TEST_FILE, ftype)
self.file_manager._append(self.dune_records, TEST_FILE, ftype)
loaded_records = self.file_manager._load(TEST_FILE, ftype)
expected = self.dune_records + self.dune_records
self.assertEqual(
expected,
loaded_records,
f"test_append failed on {ftype}",
)

def test_load_singleton(self):
for file_type in FileType:
self.file_manager._write(self.dune_records, TEST_FILE, file_type)
Expand Down

0 comments on commit 0794cc3

Please sign in to comment.