Skip to content

Commit

Permalink
feat: stream (#3)
Browse files Browse the repository at this point in the history
* feat: add cut

* feat: from stream

* test: datetime

* doc: performance implications

* docs: update readme
  • Loading branch information
bdura authored Apr 2, 2024
1 parent 2e85824 commit 2297f5d
Show file tree
Hide file tree
Showing 7 changed files with 186 additions and 6 deletions.
49 changes: 49 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,52 @@ For those coming from Parsy, here are some notable differences:
- `Err` is its own error: it inherits from `Exception` and can be raised.
- Persil introduces the `Stream` class, a wrapper around the input that can apply parsers sequentially,
keeping track of the book-keeping.

## Performance tips

Since Persil takes a functional approach, every transformation on a parser produces a new parser.
With that in mind, the way you define/use/combine parsers may substantially affect performance.

Consider the following example:

```python
from datetime import datetime

from persil import Stream, from_stream, regex, string


@from_stream
def datetime_parser(stream: Stream[str]) -> datetime:
year = stream.apply(regex(r"\d{4}").map(int))
stream.apply(string("/"))
month = stream.apply(regex(r"\d{2}").map(int))
stream.apply(string("/"))
day = stream.apply(regex(r"\d{2}").map(int))
return datetime(year, month, day)
```

The resulting `datetime_parser` will re-create three new regex parsers **every time** it is run.

A much better alternative:

```python
from datetime import datetime

from persil import Stream, from_stream, regex, string


year_parser = regex(r"\d{4}").map(int)
day_month_parser = regex(r"\d{2}").map(int)
slash_parser = string("/")

@from_stream
def datetime_parser(stream: Stream[str]) -> datetime:
year = stream.apply(year_parser)
stream.apply(slash_parser)
month = stream.apply(day_month_parser)
stream.apply(slash_parser)
day = stream.apply(day_month_parser)
return datetime(year, month, day)
```

That way, the parsers are only defined once.
3 changes: 3 additions & 0 deletions persil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
tag,
whitespace,
)
from .stream import Stream, from_stream

__all__ = [
"Parser",
Expand All @@ -24,4 +25,6 @@
"tag",
"whitespace",
"string",
"Stream",
"from_stream",
]
12 changes: 12 additions & 0 deletions persil/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ def __init__(
def __call__(self, stream: Input, index: int) -> Result[Output]:
return self.wrapped_fn(stream, index)

def cut(self) -> "Parser[Input, Output]":
"""
Commit to the current branch by raising the error if it's returned.
"""

@Parser
def cut_parser(stream: Input, index: int) -> Result[Output]:
result = self(stream, index)
return result.ok_or_raise()

return cut_parser

def then(self, other: "Parser[In, T]") -> "Parser[Input, T]":
"""
Returns a parser which, if the initial parser succeeds, will
Expand Down
11 changes: 5 additions & 6 deletions persil/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,13 @@ class Ok(Generic[T]):
value: T
index: int

def cut(self):
pass
def ok_or_raise(self) -> "Ok[T]":
"""No-op function."""
return self

def map(self, map_function: Callable[[T], T2]) -> "Ok[T2]":
return Ok(value=map_function(self.value), index=self.index)

def aggregate(self, other: "Result[T]") -> "Result[T]":
return self


@dataclass
class Err(Exception):
Expand All @@ -36,7 +34,8 @@ def __str__(self) -> str:
else:
return f"expected one of {', '.join(self.expected)} at {li}"

def cut(self):
def ok_or_raise(self):
"""Raise the error directly"""
raise self

def map(self, map_function: Callable) -> "Err":
Expand Down
61 changes: 61 additions & 0 deletions persil/stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from functools import wraps
from typing import Callable, Generic, Sequence, TypeVar, overload

from .parser import Parser
from .result import Err, Ok, Result

In = TypeVar("In", bound=Sequence)
Out = TypeVar("Out")


class SoftError(Exception):
inner: Err


class Stream(Generic[In]):
inner: In
index: int

def __init__(self, inner: In, index: int = 0):
self.inner = inner
self.index = index

def apply(self, parser: Parser[In, Out]) -> Out:
res = parser(self.inner, self.index)

if isinstance(res, Err):
raise SoftError(res)

self.index = res.index

return res.value


def _from_stream(func: Callable[[Stream[In]], Out]) -> Parser[In, Out]:
@Parser
@wraps(func)
def fn(stream: In, index: int) -> Result[Out]:
st = Stream(inner=stream, index=index)
try:
out = func(st)
except SoftError as e:
return e.inner
return Ok(out, st.index)

return fn


@overload
def from_stream(func: Callable[[Stream[In]], Out]) -> Parser[In, Out]: ...
@overload
def from_stream(
func: str,
) -> Callable[[Callable[[Stream[In]], Out]], Parser[In, Out]]: ...


def from_stream(func: str | Callable[[Stream[In]], Out]):
if isinstance(func, str):
return lambda f: _from_stream(f).desc(func)

else:
return _from_stream(func)
26 changes: 26 additions & 0 deletions tests/test_datetime_from_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from datetime import datetime

import pytest

from persil import Stream, from_stream, regex, string


@from_stream
def datetime_parser(stream: Stream[str]) -> datetime:
year = stream.apply(regex(r"\d{4}").map(int))
stream.apply(string("/"))
month = stream.apply(regex(r"\d{2}").map(int))
stream.apply(string("/"))
day = stream.apply(regex(r"\d{2}").map(int))
return datetime(year, month, day)


EXAMPLES = [
("2024/10/01", datetime(2024, 10, 1)),
]


@pytest.mark.parametrize("message,expected", EXAMPLES)
def test_datetime_from_stream(message: str, expected: datetime):
dt = datetime_parser.parse(message)
assert dt == expected
30 changes: 30 additions & 0 deletions tests/test_from_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pytest
from pydantic import BaseModel

from persil import regex
from persil.stream import Stream, from_stream


class Flight(BaseModel):
carrier: str
flight_number: int


@from_stream("Flight parser")
def flight_parser(stream: Stream[str]) -> Flight:
carrier = stream.apply(regex(r"[A-Z]{2}"))
flight_number = stream.apply(regex(r"\d{2,4}").map(int))

return Flight(carrier=carrier, flight_number=flight_number)


EXAMPLES = [
("AF071", Flight(carrier="AF", flight_number=71)),
("LY180", Flight(carrier="LY", flight_number=180)),
]


@pytest.mark.parametrize("message,expected", EXAMPLES)
def test_generate(message: str, expected: Flight):
flight = flight_parser.parse(message)
assert flight == expected

0 comments on commit 2297f5d

Please sign in to comment.