diff --git a/dff/script/core/context.py b/dff/script/core/context.py index 6658c346f..78ee18072 100644 --- a/dff/script/core/context.py +++ b/dff/script/core/context.py @@ -32,7 +32,7 @@ def get_last_index(dictionary: dict) -> int: """ - Obtaining the last index from the `dictionary`. Functions returns `-1` if the `dict` is empty. + Obtain the last index from the `dictionary`. Return `-1` if the `dict` is empty. :param dictionary: Dictionary with unsorted keys. :return: Last index from the `dictionary`. @@ -44,6 +44,9 @@ def get_last_index(dictionary: dict) -> int: class Context(BaseModel): """ A structure that is used to store data about the context of a dialog. + + Avoid storing unserializable data in the fields of this class in order for + context storages to work. """ id: Union[UUID, int, str] = Field(default_factory=uuid4) @@ -77,13 +80,15 @@ class Context(BaseModel): `misc` stores any custom data. The scripting doesn't use this dictionary by default, so storage of any data won't reflect on the work on the internal Dialog Flow Scripting functions. + Avoid storing unserializable data in order for context storages to work. + - key - Arbitrary data name. - value - Arbitrary data. """ validation: bool = False """ - `validation` is a flag that signals that :py:class:`~dff.script.Pipeline`, - while being initialized, checks the :py:class:`~dff.script.Script`. + `validation` is a flag that signals that :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline`, + while being initialized, checks the :py:class:`~dff.script.core.script.Script`. The functions that can give not valid data while being validated must use this flag to take the validation mode into account. Otherwise the validation will not be passed. @@ -91,12 +96,12 @@ class Context(BaseModel): framework_states: Dict[ModuleName, Dict[str, Any]] = {} """ `framework_states` is used for addons states or for - :py:class:`~dff.script.Pipeline`'s states. - :py:class:`~dff.script.Pipeline` + :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline`'s states. + :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline` records all its intermediate conditions into the `framework_states`. - After :py:class:`~dff.script.Context` processing is finished, - :py:class:`~dff.script.Pipeline` resets `framework_states` and - returns :py:class:`~dff.script.Context`. + After :py:class:`~.Context` processing is finished, + :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline` resets `framework_states` and + returns :py:class:`~.Context`. - key - Temporary variable name. - value - Temporary variable data. @@ -106,7 +111,7 @@ class Context(BaseModel): @classmethod def sort_dict_keys(cls, dictionary: dict) -> dict: """ - Sorting the keys in the `dictionary`. This needs to be done after deserialization, + Sort the keys in the `dictionary`. This needs to be done after deserialization, since the keys are deserialized in a random order. :param dictionary: Dictionary with unsorted keys. @@ -117,16 +122,15 @@ def sort_dict_keys(cls, dictionary: dict) -> dict: @classmethod def cast(cls, ctx: Optional[Union["Context", dict, str]] = None, *args, **kwargs) -> "Context": """ - Transforms different data types to the objects of - :py:class:`~dff.script.Context` class. - Returns an object of :py:class:`~dff.script.Context` + Transform different data types to the objects of the + :py:class:`~.Context` class. + Return an object of the :py:class:`~.Context` type that is initialized by the input data. - :param ctx: Different data types, that are used to initialize object of - :py:class:`~dff.script.Context` type. - The empty object of :py:class:`~dff.script.Context` - type is created if no data are given. - :return: Object of :py:class:`~dff.script.Context` + :param ctx: Data that is used to initialize an object of the + :py:class:`~.Context` type. + An empty :py:class:`~.Context` object is returned if no data is given. + :return: Object of the :py:class:`~.Context` type that is initialized by the input data. """ if not ctx: @@ -137,14 +141,15 @@ def cast(cls, ctx: Optional[Union["Context", dict, str]] = None, *args, **kwargs ctx = Context.model_validate_json(ctx) elif not issubclass(type(ctx), Context): raise ValueError( - f"context expected as sub class of Context class or object of dict/str(json) type, but got {ctx}" + f"Context expected to be an instance of the Context class " + f"or an instance of the dict/str(json) type. Got: {type(ctx)}" ) return ctx def add_request(self, request: Message): """ - Adds to the context the next `request` corresponding to the next turn. - The addition takes place in the `requests` and `new_index = last_index + 1`. + Add a new `request` to the context. + The new `request` is added with the index of `last_index + 1`. :param request: `request` to be added to the context. """ @@ -154,8 +159,8 @@ def add_request(self, request: Message): def add_response(self, response: Message): """ - Adds to the context the next `response` corresponding to the next turn. - The addition takes place in the `responses`, and `new_index = last_index + 1`. + Add a new `response` to the context. + The new `response` is added with the index of `last_index + 1`. :param response: `response` to be added to the context. """ @@ -165,9 +170,8 @@ def add_response(self, response: Message): def add_label(self, label: NodeLabel2Type): """ - Adds to the context the next :py:const:`label `, - corresponding to the next turn. - The addition takes place in the `labels`, and `new_index = last_index + 1`. + Add a new :py:data:`~.NodeLabel2Type` to the context. + The new `label` is added with the index of `last_index + 1`. :param label: `label` that we need to add to the context. """ @@ -180,12 +184,12 @@ def clear( field_names: Union[Set[str], List[str]] = {"requests", "responses", "labels"}, ): """ - Deletes all recordings from the `requests`/`responses`/`labels` except for + Delete all records from the `requests`/`responses`/`labels` except for the last `hold_last_n_indices` turns. If `field_names` contains `misc` field, `misc` field is fully cleared. - :param hold_last_n_indices: Number of last turns that remain under clearing. - :param field_names: Properties of :py:class:`~dff.script.Context` we need to clear. + :param hold_last_n_indices: Number of last turns to keep. + :param field_names: Properties of :py:class:`~.Context` to clear. Defaults to {"requests", "responses", "labels"} """ field_names = field_names if isinstance(field_names, set) else set(field_names) @@ -206,9 +210,12 @@ def clear( @property def last_label(self) -> Optional[NodeLabel2Type]: """ - Returns the last :py:const:`~dff.script.NodeLabel2Type` of - the :py:class:`~dff.script.Context`. - Returns `None` if `labels` is empty. + Return the last :py:data:`~.NodeLabel2Type` of + the :py:class:`~.Context`. + Return `None` if `labels` is empty. + + Since `start_label` is not added to the `labels` field, + empty `labels` usually indicates that the current node is the `start_node`. """ last_index = get_last_index(self.labels) return self.labels.get(last_index) @@ -216,8 +223,8 @@ def last_label(self) -> Optional[NodeLabel2Type]: @property def last_response(self) -> Optional[Message]: """ - Returns the last `response` of the current :py:class:`~dff.script.Context`. - Returns `None` if `responses` is empty. + Return the last `response` of the current :py:class:`~.Context`. + Return `None` if `responses` is empty. """ last_index = get_last_index(self.responses) return self.responses.get(last_index) @@ -225,7 +232,7 @@ def last_response(self) -> Optional[Message]: @last_response.setter def last_response(self, response: Optional[Message]): """ - Sets the last `response` of the current :py:class:`~dff.core.engine.core.context.Context`. + Set the last `response` of the current :py:class:`~.Context`. Required for use with various response wrappers. """ last_index = get_last_index(self.responses) @@ -234,8 +241,8 @@ def last_response(self, response: Optional[Message]): @property def last_request(self) -> Optional[Message]: """ - Returns the last `request` of the current :py:class:`~dff.script.Context`. - Returns `None` if `requests` is empty. + Return the last `request` of the current :py:class:`~.Context`. + Return `None` if `requests` is empty. """ last_index = get_last_index(self.requests) return self.requests.get(last_index) @@ -243,7 +250,7 @@ def last_request(self) -> Optional[Message]: @last_request.setter def last_request(self, request: Optional[Message]): """ - Sets the last `request` of the current :py:class:`~dff.core.engine.core.context.Context`. + Set the last `request` of the current :py:class:`~.Context`. Required for use with various request wrappers. """ last_index = get_last_index(self.requests) @@ -252,7 +259,7 @@ def last_request(self, request: Optional[Message]): @property def current_node(self) -> Optional[Node]: """ - Returns current :py:class:`~dff.script.Node`. + Return current :py:class:`~dff.script.core.script.Node`. """ actor = self.framework_states.get("actor", {}) node = ( @@ -264,17 +271,21 @@ def current_node(self) -> Optional[Node]: ) if node is None: logger.warning( - "The `current_node` exists when an actor is running between `ActorStage.GET_PREVIOUS_NODE`" - " and `ActorStage.FINISH_TURN`" + "The `current_node` method should be called " + "when an actor is running between the " + "`ActorStage.GET_PREVIOUS_NODE` and `ActorStage.FINISH_TURN` stages." ) return node def overwrite_current_node_in_processing(self, processed_node: Node): """ - Overwrites the current node with a processed node. This method only works in processing functions. + Set the current node to be `processed_node`. + This method only works in processing functions (pre-response and pre-transition). + + The actual current node is not changed. - :param processed_node: `node` that we need to overwrite current node. + :param processed_node: `node` to set as the current node. """ is_processing = self.framework_states.get("actor", {}).get("processed_node") if is_processing: @@ -282,7 +293,7 @@ def overwrite_current_node_in_processing(self, processed_node: Node): else: logger.warning( f"The `{self.overwrite_current_node_in_processing.__name__}` " - "function can only be run during processing functions." + "method can only be called from processing functions (either pre-response or pre-transition)." ) diff --git a/docs/source/user_guides.rst b/docs/source/user_guides.rst index 8724a1489..fa274db88 100644 --- a/docs/source/user_guides.rst +++ b/docs/source/user_guides.rst @@ -9,6 +9,12 @@ those include but are not limited to: dialog graph creation, specifying start an setting transitions and conditions, using ``Context`` object in order to receive information about current script execution. +:doc:`Context guide <./user_guides/context_guide>` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``context guide`` walks you through the details of working with the +``Context`` object, the backbone of the DFF API, including most of the relevant fields and methods. + :doc:`Superset guide <./user_guides/superset_guide>` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -22,4 +28,5 @@ Superset dashboard shipped with DFF. :hidden: user_guides/basic_conceptions + user_guides/context_guide user_guides/superset_guide diff --git a/docs/source/user_guides/basic_conceptions.rst b/docs/source/user_guides/basic_conceptions.rst index 9f81a8610..6cd4d2964 100644 --- a/docs/source/user_guides/basic_conceptions.rst +++ b/docs/source/user_guides/basic_conceptions.rst @@ -241,7 +241,7 @@ That's what we've changed: .. note:: - See `documentation of Context object`_. + See `guide on Context objects`_. * Transitions were changed: transitions to next, previous and current node were replaced with special standard transitions. @@ -268,7 +268,7 @@ For example: * You can serialize context (available on every transition and response) to json or dictionary in order to debug it or extract some values. - See `tutorial on context serialization`_. + See `guide on context serialization`_. * You can alter user input and modify generated responses. User input can be altered with ``PRE_RESPONSE_PROCESSING`` and will happen **before** response generation. @@ -293,11 +293,11 @@ Happy building! .. _tutorial on basic dialog structure: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.1_basics.html .. _tutorial on response functions: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.3_responses.html -.. _documentation of Context object: https://deeppavlov.github.io/dialog_flow_framework/apiref/dff.script.core.context.html +.. _guide on Context objects: ../user_guides/context_guide.html .. _tutorial on transitions: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.4_transitions.html .. _tutorial on conditions: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.2_conditions.html .. _tutorial on global transitions: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.5_global_transitions.html -.. _tutorial on context serialization: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.6_context_serialization.html +.. _guide on context serialization: ../user_guides/context_guide.html#serialization .. _tutorial on pre-response processing: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.7_pre_response_processing.html .. _tutorial on pre-transition processing: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.9_pre_transitions_processing.html .. _tutorial on script MISC: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.8_misc.html diff --git a/docs/source/user_guides/context_guide.rst b/docs/source/user_guides/context_guide.rst new file mode 100644 index 000000000..1dc47712c --- /dev/null +++ b/docs/source/user_guides/context_guide.rst @@ -0,0 +1,247 @@ +Context guide +-------------- + +Introduction +~~~~~~~~~~~~ + +The ``Context`` class is a backbone component of the DFF API. +Like the name suggests, this data structure is used to store information +about the current state, or context, of a particular conversation. +Each individual user has their own ``Context`` instance and can be identified by it. + +``Context`` is used to keep track of the user's requests, bot's replies, +user-related and request-related annotations, and any other information +that is relevant to the conversation with the user. + +.. note:: + + Since most callback functions used in DFF script and DFF pipeline (see the `basic guide <./basic_conceptions.rst>`__) + need to either read or update the current dialog state, + the framework-level convention is that all functions of this kind + use ``Context`` as their first parameter. This dependency is being + injected by the pipeline during its run. + Thus, understanding the ``Context`` class is essential for developing custom conversation logic + which is mostly made up by the said functions. + +As a callback parameter, ``Context`` provides a convenient interface for working with data, +allowing developers to easily add, retrieve, +and manipulate data as the conversation progresses. + +Let's consider some of the built-in callback instances to see how the context can be leveraged: + +.. code-block:: python + :linenos: + + pattern = re.compile("[a-zA-Z]+") + + def regexp_condition_handler( + ctx: Context, pipeline: Pipeline, *args, **kwargs + ) -> bool: + # retrieve the current request + request = ctx.last_request + if request.text is None: + return False + return bool(pattern.search(request.text)) + +The code above is a condition function (see the `basic guide <./basic_conceptions.rst>`__) +that belongs to the ``TRANSITIONS`` section of the script and returns `True` or `False` +depending on whether the current user request matches the given pattern. +As can be seen from the code block, the current +request (``last_request``) can be easily retrieved as one of the attributes of the ``Context`` object. +Likewise, the ``last_response`` (bot's current reply) or the ``last_label`` +(the name of the currently visited node) attributes can be used in the same manner. + +Another common use case is leveraging the ``misc`` field (see below for a detailed description): +pipeline functions or ``PROCESSING`` callbacks can write arbitrary values to the misc field, +making those available for other context-dependent functions. + +.. code-block:: python + :linenos: + + import urllib.request + import urllib.error + + def ping_example_com( + ctx: Context, *_, **__ + ): + try: + with urllib.request.urlopen("https://example.com/") as webpage: + web_content = webpage.read().decode( + webpage.headers.get_content_charset() + ) + result = "Example Domain" in web_content + except urllib.error.URLError: + result = False + ctx.misc["can_ping_example_com"] = result + +.. + todo: link to the user defined functions tutorial + + .. note:: + For more information about user-defined functions see the `user functions guide <./user_functions.rst>`__. + +API +~~~ + +This sections describes the API of the ``Context`` class. + +For more information, such as method signatures, see +`API reference <./apiref/dff.script.core.context.html#dff.script.core.context.Context>`__. + +Attributes +========== + +* **id**: This attribute represents the unique context identifier. By default, it is randomly generated using uuid4. + In most cases, this attribute will be used to identify a user. + +* **labels**: The labels attribute stores the history of all passed labels within the conversation. + It maps turn IDs to labels. The collection is ordered, so getting the last item of the mapping + always shows the last visited node. + + Note that `labels` only stores the nodes that were transitioned to + so `start_label` will not be in this attribute. + +* **requests**: The requests attribute maintains the history of all received requests by the agent. + It also maps turn IDs to requests. Like labels, it stores the requests in-order. + +* **responses**: This attribute keeps a record of all agent responses, mapping turn IDs to responses. + Stores the responses in-order. + +* **misc**: The misc attribute is a dictionary for storing custom data. This field is not used by any of the + built-in DFF classes or functions, so the values that you write there are guaranteed to persist + throughout the lifetime of the ``Context`` object. + +* **framework_states**: This attribute is used for storing addon or pipeline states. + Each turn, the DFF pipeline records the intermediary states of its components into this field, + and clears it at the end of the turn. For this reason, developers are discouraged from storing + their own data in this field. + +Methods +======= + +The methods of the ``Context`` class can be divided into two categories: + +* Public methods that get called manually in custom callbacks and in functions that depend on the context. +* Methods that are not designed for manual calls and get called automatically during pipeline runs, + i.e. quasi-private methods. You may still need them when developing extensions or heavily modifying DFF. + +Public methods +^^^^^^^^^^^^^^ + +* **last_request**: Return the last request of the context, or `None` if the ``requests`` field is empty. + + Note that a request is added right after the context is created/retrieved from db, + so an empty ``requests`` field usually indicates an issue with the messenger interface. + +* **last_response**: Return the last response of the context, or `None` if the ``responses`` field is empty. + + Responses are added at the end of each turn, so an empty ``response`` field is something you should definitely consider. + +* **last_label**: Return the last label of the context, or `None` if the ``labels`` field is empty. + Last label is always the name of the current node but not vice versa: + + Since ``start_label`` is not added to the ``labels`` field, + empty ``labels`` usually indicates that the current node is the `start_node`. + After a transition is made from the `start_node` + the label of that transition is added to the field. + +* **clear**: Clear all items from context fields, optionally keeping the data from `hold_last_n_indices` turns. + You can specify which fields to clear using the `field_names` parameter. This method is designed for cases + when contexts are shared over high latency networks. + +.. note:: + + See the `preprocessing tutorial <../tutorials/tutorials.script.core.7_pre_response_processing.py>`__. + +Private methods +^^^^^^^^^^^^^^^ + +* **set_last_response, set_last_request**: These methods allow you to set the last response or request for the current context. + This functionality can prove useful if you want to create a middleware component that overrides the pipeline functionality. + +* **add_request**: Add a request to the context. + It updates the `requests` dictionary. This method is called by the `Pipeline` component + before any of the `pipeline services <../tutorials/tutorials.pipeline.3_pipeline_dict_with_services_basic.py>`__ are executed, + including `Actor <../apiref/dff.pipeline.pipeline.actor.html>`__. + +* **add_response**: Add a response to the context. + It updates the `responses` dictionary. This function is run by the `Actor <../apiref/dff.pipeline.pipeline.actor.html>`__ pipeline component at the end of the turn, after it has run + the `PRE_RESPONSE_PROCESSING <../tutorials/tutorials.script.core.7_pre_response_processing.py>`__ functions. + + To be more precise, this method is called between the ``CREATE_RESPONSE`` and ``FINISH_TURN`` stages. + For more information about stages, see `ActorStages <../apiref/dff.script.core.types.html#dff.script.core.types.ActorStage>`__. + +* **add_label**: Add a label to the context. + It updates the `labels` field. This method is called by the `Actor <../apiref/dff.pipeline.pipeline.actor.html>`_ component when transition conditions + have been resolved, and when `PRE_TRANSITIONS_PROCESSING <../tutorials/tutorials.script.core.9_pre_transitions_processing.py>`__ callbacks have been run. + + To be more precise, this method is called between the ``GET_NEXT_NODE`` and ``REWRITE_NEXT_NODE`` stages. + For more information about stages, see `ActorStages <../apiref/dff.script.core.types.html#dff.script.core.types.ActorStage>`__. + +* **current_node**: Return the current node of the context. This is particularly useful for tracking the node during the conversation flow. + This method only returns a node inside ``PROCESSING`` callbacks yielding ``None`` in other contexts. + +Context storages +~~~~~~~~~~~~~~~~ + +Since context instances contain all the information, relevant for a particular user, there needs to be a way +to persistently store that information and to make it accessible in different user sessions. +This functionality is implemented by the ``context storages`` module that provides +the uniform ``DBContextStorage`` interface as well as child classes thereof that integrate +various database types (see the +`api reference <../apiref/dff.context_storages.database.html#dff.context_storages.database.DBContextStorage>`_). + +The supported storage options are as follows: + +* `JSON `_ +* `pickle `_ +* `shelve `_ +* `SQLite `_ +* `PostgreSQL `_ +* `MySQL `_ +* `MongoDB `_ +* `Redis `_ +* `Yandex DataBase `_ + +``DBContextStorage`` instances can be uniformly constructed using the ``context_storage_factory`` function. +The function's only parameter is a connection string that specifies both the database type +and the connection parameters, for example, *mongodb://admin:pass@localhost:27016/admin*. +(`see the reference <../apiref/dff.context_storages.database.html#dff.context_storages.database.context_storage_factory>`_) + +The GitHub-based distribution of DFF includes Docker images for each of the supported database types. +Therefore, the easiest way to deploy your service together with a database is to clone the GitHub +distribution and to take advantage of the packaged +`docker-compose file `_. + +.. code-block:: shell + :linenos: + + git clone https://github.com/deeppavlov/dialog_flow_framework.git + cd dialog_flow_framework + # assuming we need to deploy mongodb + docker-compose up mongo + +The images can be configured using the docker-compose file or the +`environment file `_, +also available in the distribution. Consult these files for more options. + +.. warning:: + + The data transmission protocols require the data to be JSON-serializable. DFF tackles this problem + through utilization of ``pydantic`` as described in the next section. + +Serialization +~~~~~~~~~~~~~ + +The fact that the ``Context`` class is a Pydantic model makes it easily convertible to other data formats, +such as JSON. For instance, as a developer, you don't need to implement instructions on how datetime fields +need to be marshalled, since this functionality is provided by Pydantic out of the box. +As a result, working with web interfaces and databases that require the transmitted data to be serialized +becomes as easy as calling the `model_dump_json` method: + +.. code-block:: python + + context = Context() + serialized_context = context.model_dump_json() + +Knowing that, you can easily extend DFF to work with storages like Memcache or web APIs of your liking. \ No newline at end of file diff --git a/tutorials/script/core/7_pre_response_processing.py b/tutorials/script/core/7_pre_response_processing.py index d43e9c17d..233d62f75 100644 --- a/tutorials/script/core/7_pre_response_processing.py +++ b/tutorials/script/core/7_pre_response_processing.py @@ -37,13 +37,6 @@ # %% -def add_label_processing(ctx: Context, _: Pipeline, *args, **kwargs) -> Context: - processed_node = ctx.current_node - processed_node.response = Message(text=f"{ctx.last_label}: {processed_node.response.text}") - ctx.overwrite_current_node_in_processing(processed_node) - return ctx - - def add_prefix(prefix): def add_prefix_processing(ctx: Context, _: Pipeline, *args, **kwargs) -> Context: processed_node = ctx.current_node