Return an incremental id instead of just pinging

The response of the best model stream will return an id. The id is increased by one, each time we have a new model. A client can identify if an action has been performed by an outdated model based on the id. If the current is greater, then a new best model exists.
ilastik · Dec 21, 2024 · b10d4f9 · b10d4f9
1 parent 0516dd2
commit b10d4f9
Show file tree

Hide file tree

Showing 6 changed files with 20 additions and 19 deletions.
diff --git a/proto/training.proto b/proto/training.proto
@@ -21,7 +21,7 @@ service Training {
 
     rpc GetLogs(ModelSession) returns (GetLogsResponse) {}
 
-    rpc IsBestModel(ModelSession) returns (stream Empty) {}
+    rpc GetBestModelIdx(ModelSession) returns (stream GetBestModelIdxResponse) {}
 
     rpc Save(SaveRequest) returns (Empty) {}
 

diff --git a/tests/test_server/test_grpc/test_training_servicer.py b/tests/test_server/test_grpc/test_training_servicer.py
@@ -637,13 +637,13 @@ def test_best_model_ping(self, grpc_stub):
 
         grpc_stub.Start(training_session_id)
 
-        responses = grpc_stub.IsBestModel(training_session_id)
+        responses = grpc_stub.GetBestModelIdx(training_session_id)
         received_updates = 0
         for response in responses:
-            assert isinstance(response, utils_pb2.Empty)
+            assert isinstance(response, training_pb2.GetBestModelIdxResponse)
+            assert response.id is not None
             received_updates += 1
-
-            if received_updates >= 3:
+            if received_updates >= 2:
                 break
 
     def test_close_session(self, grpc_stub):

diff --git a/tiktorch/proto/training_pb2.py b/tiktorch/proto/training_pb2.py
diff --git a/tiktorch/proto/training_pb2_grpc.py b/tiktorch/proto/training_pb2_grpc.py
@@ -50,10 +50,10 @@ def __init__(self, channel):
                 request_serializer=utils__pb2.ModelSession.SerializeToString,
                 response_deserializer=training__pb2.GetLogsResponse.FromString,
                 )
-        self.IsBestModel = channel.unary_stream(
-                '/training.Training/IsBestModel',
+        self.GetBestModelIdx = channel.unary_stream(
+                '/training.Training/GetBestModelIdx',
                 request_serializer=utils__pb2.ModelSession.SerializeToString,
-                response_deserializer=utils__pb2.Empty.FromString,
+                response_deserializer=training__pb2.GetBestModelIdxResponse.FromString,
                 )
         self.Save = channel.unary_unary(
                 '/training.Training/Save',
@@ -127,7 +127,7 @@ def GetLogs(self, request, context):
         context.set_details('Method not implemented!')
         raise NotImplementedError('Method not implemented!')
 
-    def IsBestModel(self, request, context):
+    def GetBestModelIdx(self, request, context):
         """Missing associated documentation comment in .proto file."""
         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
         context.set_details('Method not implemented!')
@@ -201,10 +201,10 @@ def add_TrainingServicer_to_server(servicer, server):
                     request_deserializer=utils__pb2.ModelSession.FromString,
                     response_serializer=training__pb2.GetLogsResponse.SerializeToString,
             ),
-            'IsBestModel': grpc.unary_stream_rpc_method_handler(
-                    servicer.IsBestModel,
+            'GetBestModelIdx': grpc.unary_stream_rpc_method_handler(
+                    servicer.GetBestModelIdx,
                     request_deserializer=utils__pb2.ModelSession.FromString,
-                    response_serializer=utils__pb2.Empty.SerializeToString,
+                    response_serializer=training__pb2.GetBestModelIdxResponse.SerializeToString,
             ),
             'Save': grpc.unary_unary_rpc_method_handler(
                     servicer.Save,
@@ -361,7 +361,7 @@ def GetLogs(request,
             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
 
     @staticmethod
-    def IsBestModel(request,
+    def GetBestModelIdx(request,
             target,
             options=(),
             channel_credentials=None,
@@ -371,9 +371,9 @@ def IsBestModel(request,
             wait_for_ready=None,
             timeout=None,
             metadata=None):
-        return grpc.experimental.unary_stream(request, target, '/training.Training/IsBestModel',
+        return grpc.experimental.unary_stream(request, target, '/training.Training/GetBestModelIdx',
             utils__pb2.ModelSession.SerializeToString,
-            utils__pb2.Empty.FromString,
+            training__pb2.GetBestModelIdxResponse.FromString,
             options, channel_credentials,
             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
 

diff --git a/tiktorch/server/grpc/training_servicer.py b/tiktorch/server/grpc/training_servicer.py
@@ -98,14 +98,14 @@ def StreamUpdates(self, request: utils_pb2.ModelSession, context):
     def GetLogs(self, request: utils_pb2.ModelSession, context):
         raise NotImplementedError
 
-    def IsBestModel(self, request, context):
+    def GetBestModelIdx(self, request, context):
         session = self._getTrainerSession(context, request)
         prev_best_model_idx = None
         while context.is_active():
             current_best_model_idx = session.client.get_best_model_idx()
             if current_best_model_idx != prev_best_model_idx:
                 prev_best_model_idx = current_best_model_idx
-                yield utils_pb2.Empty()
+                yield training_pb2.GetBestModelIdxResponse(id=str(current_best_model_idx))
             time.sleep(1)
         logger.info("Client disconnected. Stopping stream.")
 

diff --git a/tiktorch/trainer.py b/tiktorch/trainer.py
@@ -315,6 +315,7 @@ def export(self, file_to_save: Path):
                     architecture=ArchitectureFromLibraryDescr(
                         import_from=f"{self.get_model_import_file_path()}",
                         callable=Identifier(f"{self.model.__class__.__name__}"),
+                        kwargs={"in_channels": self._in_channels, "out_channels": self._out_channels},
                     ),
                     pytorch_version=Version("1.1.1"),
                 )