From 16918842bf86de1b493229310b0f6fc593d7a686 Mon Sep 17 00:00:00 2001
From: Erick Friis <erick@langchain.dev>
Date: Tue, 19 Nov 2024 14:46:26 -0800
Subject: [PATCH] docs: add conceptual testing docs (#28205)

---
 docs/docs/concepts/index.mdx                  |  4 +
 docs/docs/concepts/testing.mdx                | 81 +++++++++++++++++++
 .../how_to/integrations/standard_tests.ipynb  | 18 ++---
 3 files changed, 91 insertions(+), 12 deletions(-)
 create mode 100644 docs/docs/concepts/testing.mdx

diff --git a/docs/docs/concepts/index.mdx b/docs/docs/concepts/index.mdx
index 9b7272643b49e..6a9d8d0db8c69 100644
--- a/docs/docs/concepts/index.mdx
+++ b/docs/docs/concepts/index.mdx
@@ -40,6 +40,7 @@ The conceptual guide does not cover step-by-step instructions or specific implem
 - **[Callbacks](/docs/concepts/callbacks)**: Callbacks enable the execution of custom auxiliary code in built-in components. Callbacks are used to stream outputs from LLMs in LangChain, trace the intermediate steps of an application, and more.
 - **[Tracing](/docs/concepts/tracing)**: The process of recording the steps that an application takes to go from input to output. Tracing is essential for debugging and diagnosing issues in complex applications.
 - **[Evaluation](/docs/concepts/evaluation)**: The process of assessing the performance and effectiveness of AI applications. This involves testing the model's responses against a set of predefined criteria or benchmarks to ensure it meets the desired quality standards and fulfills the intended purpose. This process is vital for building reliable applications.
+- **[Testing](/docs/concepts/testing)**: The process of verifying that a component of an integration or application works as expected. Testing is essential for ensuring that the application behaves correctly and that changes to the codebase do not introduce new bugs.
 
 ## Glossary
 
@@ -62,6 +63,7 @@ The conceptual guide does not cover step-by-step instructions or specific implem
 - **[InjectedToolArg](/docs/concepts/tools#injectedtoolarg)**: Mechanism to inject arguments into tool functions.
 - **[input and output types](/docs/concepts/runnables#input-and-output-types)**: Types used for input and output in Runnables.
 - **[Integration packages](/docs/concepts/architecture/#integration-packages)**: Third-party packages that integrate with LangChain.
+- **[Integration tests](/docs/concepts/testing#integration-tests)**: Tests that verify the correctness of the interaction between components, usually run with access to the underlying API that powers an integration.
 - **[invoke](/docs/concepts/runnables)**: A standard method to invoke a Runnable.
 - **[JSON mode](/docs/concepts/structured_outputs#json-mode)**: Returning responses in JSON format.
 - **[langchain-community](/docs/concepts/architecture#langchain-community)**: Community-driven components for LangChain.
@@ -78,6 +80,7 @@ The conceptual guide does not cover step-by-step instructions or specific implem
 - **[role](/docs/concepts/messages#role)**: Represents the role (e.g., user, assistant) of a chat message.
 - **[RunnableConfig](/docs/concepts/runnables/#runnableconfig)**: Use to pass run time information to Runnables (e.g., `run_name`, `run_id`, `tags`, `metadata`, `max_concurrency`, `recursion_limit`, `configurable`).
 - **[Standard parameters for chat models](/docs/concepts/chat_models#standard-parameters)**: Parameters such as API key, `temperature`, and `max_tokens`,
+- **[Standard tests](/docs/concepts/testing#standard-tests)**: A defined set of unit and integration tests that all integrations must pass.
 - **[stream](/docs/concepts/streaming)**: Use to stream output from a Runnable or a graph.
 - **[Tokenization](/docs/concepts/tokens)**: The process of converting data into tokens and vice versa.
 - **[Tokens](/docs/concepts/tokens)**: The basic unit that a language model reads, processes, and generates under the hood.
@@ -86,6 +89,7 @@ The conceptual guide does not cover step-by-step instructions or specific implem
 - **[@tool](/docs/concepts/tools/#create-tools-using-the-tool-decorator)**: Decorator for creating tools in LangChain.
 - **[Toolkits](/docs/concepts/tools#toolkits)**: A collection of tools that can be used together.
 - **[ToolMessage](/docs/concepts/messages#toolmessage)**: Represents a message that contains the results of a tool execution.
+- **[Unit tests](/docs/concepts/testing#unit-tests)**: Tests that verify the correctness of individual components, run in isolation without access to the Internet.
 - **[Vector stores](/docs/concepts/vectorstores)**: Datastores specialized for storing and efficiently searching vector embeddings.
 - **[with_structured_output](/docs/concepts/structured_outputs/#structured-output-method)**: A helper method for chat models that natively support [tool calling](/docs/concepts/tool_calling) to get structured output matching a given schema specified via Pydantic, JSON schema or a function.
 - **[with_types](/docs/concepts/runnables#with_types)**: Method to overwrite the input and output types of a runnable. Useful when working with complex LCEL chains and deploying with LangServe.
diff --git a/docs/docs/concepts/testing.mdx b/docs/docs/concepts/testing.mdx
new file mode 100644
index 0000000000000..cd0114f31e1b3
--- /dev/null
+++ b/docs/docs/concepts/testing.mdx
@@ -0,0 +1,81 @@
+# Testing
+<span data-heading-keywords="tests,testing,unit,integration"></span>
+
+Testing is a critical part of the development process that ensures your code works as expected and meets the desired quality standards.
+
+In the LangChain ecosystem, we have 2 main types of tests: **unit tests** and **integration tests**.
+
+For integrations that implement standard LangChain abstractions, we have a set of **standard tests** (both unit and integration) that help maintain compatibility between different components and ensure reliability of high-usage ones.
+
+## Unit Tests
+
+**Definition**: Unit tests are designed to validate the smallest parts of your code—individual functions or methods—ensuring they work as expected in isolation. They do not rely on external systems or integrations.
+
+**Example**: Testing the `convert_langchain_aimessage_to_dict` function to confirm it correctly converts an AI message to a dictionary format:
+
+```python
+from langchain_core.messages import AIMessage, ToolCall, convert_to_openai_messages
+
+def test_convert_to_openai_messages():
+    ai_message = AIMessage(
+        content="Let me call that tool for you!",
+        tool_calls=[
+            ToolCall(name='parrot_multiply_tool', id='1', args={'a': 2, 'b': 3}),
+        ]
+    )
+    
+    result = convert_to_openai_messages(ai_message)
+    
+    expected = {
+        "role": "assistant",
+        "tool_calls": [
+            {
+                "type": "function",
+                "id": "1",
+                "function": {
+                    "name": "parrot_multiply_tool",
+                    "arguments": '{"a": 2, "b": 3}',
+                },
+            }
+        ],
+        "content": "Let me call that tool for you!",
+    }
+    assert result == expected  # Ensure conversion matches expected output
+```
+
+---
+
+## Integration Tests
+
+**Definition**: Integration tests validate that multiple components or systems work together as expected. For tools or integrations relying on external services, these tests often ensure end-to-end functionality.
+
+**Example**: Testing `ParrotMultiplyTool` with access to an API service that multiplies two numbers and adds 80:
+
+```python
+def test_integration_with_service():
+    tool = ParrotMultiplyTool()
+    result = tool.invoke({"a": 2, "b": 3})
+    assert result == 86
+```
+
+---
+
+## Standard Tests
+
+**Definition**: Standard tests are pre-defined tests provided by LangChain to ensure consistency and reliability across all tools and integrations. They include both unit and integration test templates tailored for LangChain components.
+
+**Example**: Subclassing LangChain's `ToolsUnitTests` or `ToolsIntegrationTests` to automatically run standard tests:
+
+```python
+from langchain_tests.unit_tests import ToolsUnitTests
+
+class TestParrotMultiplyToolUnit(ToolsUnitTests):
+    @property
+    def tool_constructor(self):
+        return ParrotMultiplyTool
+
+    def tool_invoke_params_example(self):
+        return {"a": 2, "b": 3}
+```
+
+To learn more, check out our guide on [how to add standard tests to an integration](../../contributing/how_to/integrations/standard_tests).
diff --git a/docs/docs/contributing/how_to/integrations/standard_tests.ipynb b/docs/docs/contributing/how_to/integrations/standard_tests.ipynb
index 4607fc83c2147..f98a9ef7471d5 100644
--- a/docs/docs/contributing/how_to/integrations/standard_tests.ipynb
+++ b/docs/docs/contributing/how_to/integrations/standard_tests.ipynb
@@ -83,14 +83,8 @@
     "\n",
     "There are 2 namespaces in the `langchain-tests` package: \n",
     "\n",
-    "- unit tests (`langchain_tests.unit_tests`): designed to be used to test the tool in isolation and without access to external services\n",
-    "- integration tests (`langchain_tests.integration_tests`): designed to be used to test the tool with access to external services (in particular, the external service that the tool is designed to interact with).\n",
-    "\n",
-    ":::note\n",
-    "\n",
-    "Integration tests can also be run without access to external services, **if** they are properly mocked.\n",
-    "\n",
-    ":::\n",
+    "- [unit tests](../../../concepts/testing.mdx#unit-tests) (`langchain_tests.unit_tests`): designed to be used to test the tool in isolation and without access to external services\n",
+    "- [integration tests](../../../concepts/testing.mdx#unit-tests) (`langchain_tests.integration_tests`): designed to be used to test the tool with access to external services (in particular, the external service that the tool is designed to interact with).\n",
     "\n",
     "Both types of tests are implemented as [`pytest` class-based test suites](https://docs.pytest.org/en/7.1.x/getting-started.html#group-multiple-tests-in-a-class).\n",
     "\n",
@@ -264,7 +258,7 @@
     "from typing import Tuple, Type\n",
     "\n",
     "from langchain_parrot_link.embeddings import ParrotLinkEmbeddings\n",
-    "from langchain_standard_tests.unit_tests import EmbeddingsUnitTests\n",
+    "from langchain_tests.unit_tests import EmbeddingsUnitTests\n",
     "\n",
     "\n",
     "class TestParrotLinkEmbeddingsUnit(EmbeddingsUnitTests):\n",
@@ -287,7 +281,7 @@
     "from typing import Type\n",
     "\n",
     "from langchain_parrot_link.embeddings import ParrotLinkEmbeddings\n",
-    "from langchain_standard_tests.integration_tests import EmbeddingsIntegrationTests\n",
+    "from langchain_tests.integration_tests import EmbeddingsIntegrationTests\n",
     "\n",
     "\n",
     "class TestParrotLinkEmbeddingsIntegration(EmbeddingsIntegrationTests):\n",
@@ -320,7 +314,7 @@
     "from typing import Type\n",
     "\n",
     "from langchain_parrot_link.tools import ParrotMultiplyTool\n",
-    "from langchain_standard_tests.unit_tests import ToolsUnitTests\n",
+    "from langchain_tests.unit_tests import ToolsUnitTests\n",
     "\n",
     "\n",
     "class TestParrotMultiplyToolUnit(ToolsUnitTests):\n",
@@ -354,7 +348,7 @@
     "from typing import Type\n",
     "\n",
     "from langchain_parrot_link.tools import ParrotMultiplyTool\n",
-    "from langchain_standard_tests.integration_tests import ToolsIntegrationTests\n",
+    "from langchain_tests.integration_tests import ToolsIntegrationTests\n",
     "\n",
     "\n",
     "class TestParrotMultiplyToolIntegration(ToolsIntegrationTests):\n",