Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP]chore: streaming sink #193

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 31 additions & 9 deletions pynumaflow/proto/sinker/sink.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
syntax = "proto3";

import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";

Expand All @@ -7,7 +8,7 @@ package sink.v1;

service Sink {
// SinkFn writes the request to a user defined sink.
rpc SinkFn(stream SinkRequest) returns (SinkResponse);
rpc SinkFn(stream SinkRequest) returns (stream SinkResponse);

// IsReady is the heartbeat endpoint for gRPC.
rpc IsReady(google.protobuf.Empty) returns (ReadyResponse);
Expand All @@ -17,12 +18,32 @@ service Sink {
* SinkRequest represents a request element.
*/
message SinkRequest {
repeated string keys = 1;
bytes value = 2;
google.protobuf.Timestamp event_time = 3;
google.protobuf.Timestamp watermark = 4;
string id = 5;
map<string, string> headers = 6;
message Request {
repeated string keys = 1;
bytes value = 2;
google.protobuf.Timestamp event_time = 3;
google.protobuf.Timestamp watermark = 4;
string id = 5;
map<string, string> headers = 6;
}
message Status {
bool eot = 1;
}
// Required field indicating the request.
Request request = 1;
// Required field indicating the status of the request.
// If eot is set to true, it indicates the end of transmission.
Status status = 2;
// optional field indicating the handshake message.
optional Handshake handshake = 3;
}

/*
* Handshake message between client and server to indicate the start of transmission.
*/
message Handshake {
// Required field indicating the start of transmission.
bool sot = 1;
}

/**
Expand Down Expand Up @@ -53,5 +74,6 @@ message SinkResponse {
// err_msg is the error message, set it if success is set to false.
string err_msg = 3;
}
repeated Result results = 1;
}
Result result = 1;
optional Handshake handshake = 2;
}
38 changes: 22 additions & 16 deletions pynumaflow/proto/sinker/sink_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

89 changes: 60 additions & 29 deletions pynumaflow/proto/sinker/sink_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -25,45 +25,72 @@ FAILURE: Status
FALLBACK: Status

class SinkRequest(_message.Message):
__slots__ = ("keys", "value", "event_time", "watermark", "id", "headers")
__slots__ = ("request", "status", "handshake")

class HeadersEntry(_message.Message):
__slots__ = ("key", "value")
KEY_FIELD_NUMBER: _ClassVar[int]
class Request(_message.Message):
__slots__ = ("keys", "value", "event_time", "watermark", "id", "headers")

class HeadersEntry(_message.Message):
__slots__ = ("key", "value")
KEY_FIELD_NUMBER: _ClassVar[int]
VALUE_FIELD_NUMBER: _ClassVar[int]
key: str
value: str
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
KEYS_FIELD_NUMBER: _ClassVar[int]
VALUE_FIELD_NUMBER: _ClassVar[int]
key: str
value: str
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
KEYS_FIELD_NUMBER: _ClassVar[int]
VALUE_FIELD_NUMBER: _ClassVar[int]
EVENT_TIME_FIELD_NUMBER: _ClassVar[int]
WATERMARK_FIELD_NUMBER: _ClassVar[int]
ID_FIELD_NUMBER: _ClassVar[int]
HEADERS_FIELD_NUMBER: _ClassVar[int]
keys: _containers.RepeatedScalarFieldContainer[str]
value: bytes
event_time: _timestamp_pb2.Timestamp
watermark: _timestamp_pb2.Timestamp
id: str
headers: _containers.ScalarMap[str, str]
EVENT_TIME_FIELD_NUMBER: _ClassVar[int]
WATERMARK_FIELD_NUMBER: _ClassVar[int]
ID_FIELD_NUMBER: _ClassVar[int]
HEADERS_FIELD_NUMBER: _ClassVar[int]
keys: _containers.RepeatedScalarFieldContainer[str]
value: bytes
event_time: _timestamp_pb2.Timestamp
watermark: _timestamp_pb2.Timestamp
id: str
headers: _containers.ScalarMap[str, str]
def __init__(
self,
keys: _Optional[_Iterable[str]] = ...,
value: _Optional[bytes] = ...,
event_time: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
watermark: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
id: _Optional[str] = ...,
headers: _Optional[_Mapping[str, str]] = ...,
) -> None: ...

class Status(_message.Message):
__slots__ = ("eot",)
EOT_FIELD_NUMBER: _ClassVar[int]
eot: bool
def __init__(self, eot: bool = ...) -> None: ...
REQUEST_FIELD_NUMBER: _ClassVar[int]
STATUS_FIELD_NUMBER: _ClassVar[int]
HANDSHAKE_FIELD_NUMBER: _ClassVar[int]
request: SinkRequest.Request
status: SinkRequest.Status
handshake: Handshake
def __init__(
self,
keys: _Optional[_Iterable[str]] = ...,
value: _Optional[bytes] = ...,
event_time: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
watermark: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
id: _Optional[str] = ...,
headers: _Optional[_Mapping[str, str]] = ...,
request: _Optional[_Union[SinkRequest.Request, _Mapping]] = ...,
status: _Optional[_Union[SinkRequest.Status, _Mapping]] = ...,
handshake: _Optional[_Union[Handshake, _Mapping]] = ...,
) -> None: ...

class Handshake(_message.Message):
__slots__ = ("sot",)
SOT_FIELD_NUMBER: _ClassVar[int]
sot: bool
def __init__(self, sot: bool = ...) -> None: ...

class ReadyResponse(_message.Message):
__slots__ = ("ready",)
READY_FIELD_NUMBER: _ClassVar[int]
ready: bool
def __init__(self, ready: bool = ...) -> None: ...

class SinkResponse(_message.Message):
__slots__ = ("results",)
__slots__ = ("result", "handshake")

class Result(_message.Message):
__slots__ = ("id", "status", "err_msg")
Expand All @@ -79,8 +106,12 @@ class SinkResponse(_message.Message):
status: _Optional[_Union[Status, str]] = ...,
err_msg: _Optional[str] = ...,
) -> None: ...
RESULTS_FIELD_NUMBER: _ClassVar[int]
results: _containers.RepeatedCompositeFieldContainer[SinkResponse.Result]
RESULT_FIELD_NUMBER: _ClassVar[int]
HANDSHAKE_FIELD_NUMBER: _ClassVar[int]
result: SinkResponse.Result
handshake: Handshake
def __init__(
self, results: _Optional[_Iterable[_Union[SinkResponse.Result, _Mapping]]] = ...
self,
result: _Optional[_Union[SinkResponse.Result, _Mapping]] = ...,
handshake: _Optional[_Union[Handshake, _Mapping]] = ...,
) -> None: ...
6 changes: 3 additions & 3 deletions pynumaflow/proto/sinker/sink_pb2_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, channel):
Args:
channel: A grpc.Channel.
"""
self.SinkFn = channel.stream_unary(
self.SinkFn = channel.stream_stream(
"/sink.v1.Sink/SinkFn",
request_serializer=sink__pb2.SinkRequest.SerializeToString,
response_deserializer=sink__pb2.SinkResponse.FromString,
Expand Down Expand Up @@ -45,7 +45,7 @@ def IsReady(self, request, context):

def add_SinkServicer_to_server(servicer, server):
rpc_method_handlers = {
"SinkFn": grpc.stream_unary_rpc_method_handler(
"SinkFn": grpc.stream_stream_rpc_method_handler(
servicer.SinkFn,
request_deserializer=sink__pb2.SinkRequest.FromString,
response_serializer=sink__pb2.SinkResponse.SerializeToString,
Expand Down Expand Up @@ -77,7 +77,7 @@ def SinkFn(
timeout=None,
metadata=None,
):
return grpc.experimental.stream_unary(
return grpc.experimental.stream_stream(
request_iterator,
target,
"/sink.v1.Sink/SinkFn",
Expand Down
4 changes: 2 additions & 2 deletions pynumaflow/shared/asynciter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ class NonBlockingIterator:

__slots__ = "_queue"

def __init__(self):
self._queue = asyncio.Queue()
def __init__(self, size=0):
self._queue = asyncio.Queue(maxsize=size)

async def read_iterator(self):
item = await self._queue.get()
Expand Down
10 changes: 10 additions & 0 deletions pynumaflow/shared/server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import contextlib
import io
import multiprocessing
Expand Down Expand Up @@ -278,3 +279,12 @@ def get_exception_traceback_str(exc) -> str:
file = io.StringIO()
traceback.print_exception(exc, value=exc, tb=exc.__traceback__, file=file)
return file.getvalue().rstrip()


async def handle_exception(context, exception):
"""Handle exceptions by updating the context and exiting."""
handle_error(context, exception)
await asyncio.gather(
context.abort(grpc.StatusCode.UNKNOWN, details=repr(exception)), return_exceptions=True
)
exit_on_error(err=repr(exception), parent=False, context=context, update_context=False)
3 changes: 3 additions & 0 deletions pynumaflow/shared/servicer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
def is_valid_handshake(req):
"""Check if the handshake message is valid."""
return req.handshake and req.handshake.sot
5 changes: 3 additions & 2 deletions pynumaflow/sinker/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pynumaflow.sinker.async_server import SinkAsyncServer
from pynumaflow.sinker.server import SinkServer

# from pynumaflow.sinker.server import SinkServer

from pynumaflow.sinker._dtypes import Response, Responses, Datum, Sinker

__all__ = ["Response", "Responses", "Datum", "Sinker", "SinkServer", "SinkAsyncServer"]
__all__ = ["Response", "Responses", "Datum", "Sinker", "SinkAsyncServer"]
20 changes: 20 additions & 0 deletions pynumaflow/sinker/_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,26 @@
pass


@dataclass
class EndOfStreamTransmission:
"""
Basic datatype for UDSink response.

Args:
"""

eos: bool
__slots__ = "eos"

@classmethod
def as_completed(cls):
return EndOfStreamTransmission(eos=True)

Check warning on line 231 in pynumaflow/sinker/_dtypes.py

View check run for this annotation

Codecov / codecov/patch

pynumaflow/sinker/_dtypes.py#L231

Added line #L231 was not covered by tests

@classmethod
def as_failure(cls):
return EndOfStreamTransmission(eos=False)

Check warning on line 235 in pynumaflow/sinker/_dtypes.py

View check run for this annotation

Codecov / codecov/patch

pynumaflow/sinker/_dtypes.py#L235

Added line #L235 was not covered by tests


# SyncSinkCallable is a callable which can be used as a handler for the Synchronous UDSink.
SinkHandlerCallable = Callable[[Iterator[Datum]], Responses]
SyncSinkCallable = Union[Sinker, SinkHandlerCallable]
Expand Down
Loading
Loading