diff --git a/docs/content/_navigation.json b/docs/content/_navigation.json index 0c9347440d959..115581fc3ecba 100644 --- a/docs/content/_navigation.json +++ b/docs/content/_navigation.json @@ -836,6 +836,10 @@ } ] }, + { + "title": "OpenAI", + "path": "/integrations/openai" + }, { "title": "Pandas", "path": "/integrations/pandas" diff --git a/docs/content/api/modules.json.gz b/docs/content/api/modules.json.gz index e44c646c2e877..8ba3c9cdd1870 100644 Binary files a/docs/content/api/modules.json.gz and b/docs/content/api/modules.json.gz differ diff --git a/docs/content/api/searchindex.json.gz b/docs/content/api/searchindex.json.gz index 92066a5a2859e..2b2e36c93bf1c 100644 Binary files a/docs/content/api/searchindex.json.gz and b/docs/content/api/searchindex.json.gz differ diff --git a/docs/content/api/sections.json.gz b/docs/content/api/sections.json.gz index b02c8e60691cc..976e4acb85aba 100644 Binary files a/docs/content/api/sections.json.gz and b/docs/content/api/sections.json.gz differ diff --git a/docs/content/integrations.mdx b/docs/content/integrations.mdx index a8906c3e72b1d..ed81c5d6ef68a 100644 --- a/docs/content/integrations.mdx +++ b/docs/content/integrations.mdx @@ -54,6 +54,7 @@ Using our integration guides and libraries, you can extend Dagster to interopera title="Google BigQuery" href="/integrations/bigquery" > + + + This feature is considered experimental. + + +The `dagster-openai` library allows you to build OpenAI pipelines with Dagster and log OpenAI API usage metadata in [Dagster Insights](/dagster-cloud/insights). + +Using this library's , you can easily interact with the [OpenAI REST API](https://platform.openai.com/docs/introduction) via the [OpenAI Python API](https://github.com/openai/openai-python). + +When used with Dagster's [Software-defined Assets](/concepts/assets/software-defined-assets), the resource automatically logs OpenAI usage metadata in asset metadata. See the [Relevant APIs](#relevant-apis) section for more information. + +--- + +## Getting started + +Before you get started with the `dagster-openai` library, we recommend familiarizing yourself with the [OpenAI Python API library](https://github.com/openai/openai-python), which this integration uses to interact with the [OpenAI REST API](https://platform.openai.com/docs/introduction). + +--- + +## Prerequisites + +To get started, install the `dagster` and `dagster-openai` Python packages: + +```bash +pip install dagster dagster-openai +``` + +Note that you will need an OpenAI [API key](https://platform.openai.com/api-keys) to use the resource, which can be generated in your OpenAI account. + +--- + +## Connecting to OpenAI + +The first step in using OpenAI with Dagster is to tell Dagster how to connect to an OpenAI client using an OpenAI [resource](/concepts/resources). This resource contains the credentials needed to interact with OpenAI API. + +We will supply our credentials as environment variables by adding them to a `.env` file. For more information on setting environment variables in a production setting, see [Using environment variables and secrets](/guides/dagster/using-environment-variables-and-secrets). + +```bash +# .env + +OPENAI_API_KEY=... +``` + +Then, we can instruct Dagster to authorize the OpenAI resource using the environment variables: + +```python startafter=start_example endbefore=end_example file=/integrations/openai/resource.py +from dagster_openai import OpenAIResource + +from dagster import EnvVar + +# Pull API key from environment variables +openai = OpenAIResource( + api_key=EnvVar("OPENAI_API_KEY"), +) +``` + +--- + +## Using the OpenAI resource with assets + +The OpenAI resource can be used in assets in order to interact with the OpenAI API. Note that in this example, we supply our credentials as environment variables directly when instantiating the object. + +```python startafter=start_example endbefore=end_example file=/integrations/openai/assets.py +from dagster_openai import OpenAIResource + +from dagster import ( + AssetExecutionContext, + Definitions, + EnvVar, + asset, + define_asset_job, +) + + +@asset(compute_kind="OpenAI") +def openai_asset(context: AssetExecutionContext, openai: OpenAIResource): + with openai.get_client(context) as client: + client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Say this is a test."}], + ) + + +openai_asset_job = define_asset_job(name="openai_asset_job", selection="openai_asset") + +defs = Definitions( + assets=[openai_asset], + jobs=[openai_asset_job], + resources={ + "openai": OpenAIResource(api_key=EnvVar("OPENAI_API_KEY")), + }, +) +``` + +After materializing your asset, your OpenAI API usage metadata will be available in the **Events** and **Plots** tabs of your asset in the Dagster UI. If you are using [Dagster Cloud](/dagster-cloud), your usage metadata will also be available in [Dagster Insights](/dagster-cloud/insights). Refer to the [Viewing and materializing assets in the UI guide](https://docs.dagster.io/concepts/assets/software-defined-assets#viewing-and-materializing-assets-in-the-ui) for more information. + +--- + +## Using the OpenAI resource with ops + +The OpenAI resource can also be used in ops. **Note**: Currently, the OpenAI resource doesn't (out-of-the-box) log OpenAI usage metadata when used in ops. + +```python startafter=start_example endbefore=end_example file=/integrations/openai/ops.py +from dagster_openai import OpenAIResource + +from dagster import ( + Definitions, + EnvVar, + GraphDefinition, + OpExecutionContext, + op, +) + + +@op +def openai_op(context: OpExecutionContext, openai: OpenAIResource): + with openai.get_client(context) as client: + client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Say this is a test"}], + ) + + +openai_op_job = GraphDefinition(name="openai_op_job", node_defs=[openai_op]).to_job() + +defs = Definitions( + jobs=[openai_op_job], + resources={ + "openai": OpenAIResource(api_key=EnvVar("OPENAI_API_KEY")), + }, +) +``` + +--- + +## Relevant APIs + +| Name | Description | +| ----------------------------------------------------------------- | ------------------------------------------------------------------------------------- | +| | The OpenAI resource used for handing the client | +| | The function wrapper used on OpenAI API endpoint methods to log OpenAI usage metadata | diff --git a/docs/next/public/objects.inv b/docs/next/public/objects.inv index b882580229ed4..38bb9eaa818dc 100644 Binary files a/docs/next/public/objects.inv and b/docs/next/public/objects.inv differ diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py index 91c8e0f40d416..352006a5263da 100644 --- a/docs/sphinx/conf.py +++ b/docs/sphinx/conf.py @@ -41,6 +41,7 @@ "../../python_modules/libraries/dagster-mlflow", "../../python_modules/libraries/dagster-msteams", "../../python_modules/libraries/dagster-mysql", + "../../python_modules/libraries/dagster-openai", "../../python_modules/libraries/dagster-pagerduty", "../../python_modules/libraries/dagster-pandas", "../../python_modules/libraries/dagster-pandera", diff --git a/docs/sphinx/index.rst b/docs/sphinx/index.rst index a147677c31562..07ac5af7fb6da 100644 --- a/docs/sphinx/index.rst +++ b/docs/sphinx/index.rst @@ -59,6 +59,7 @@ sections/api/apidocs/libraries/dagster-mlflow sections/api/apidocs/libraries/dagster-msteams sections/api/apidocs/libraries/dagster-mysql + sections/api/apidocs/libraries/dagster-openai sections/api/apidocs/libraries/dagster-pagerduty sections/api/apidocs/libraries/dagster-pandas sections/api/apidocs/libraries/dagster-pandera diff --git a/docs/sphinx/sections/api/apidocs/libraries/dagster-openai.rst b/docs/sphinx/sections/api/apidocs/libraries/dagster-openai.rst new file mode 100644 index 0000000000000..e932e61bd7f6a --- /dev/null +++ b/docs/sphinx/sections/api/apidocs/libraries/dagster-openai.rst @@ -0,0 +1,14 @@ +OpenAI (dagster-openai) +------------------------ + +The `dagster_openai` library provides utilities for using OpenAI with Dagster. +A good place to start with `dagster_openai` is `the guide `_. + + +.. currentmodule:: dagster_openai + +.. autofunction:: with_usage_metadata + +.. autoclass:: OpenAIResource + :members: get_client, get_client_for_asset + diff --git a/docs/tox.ini b/docs/tox.ini index 3973067863a14..6e8cd0faff2db 100644 --- a/docs/tox.ini +++ b/docs/tox.ini @@ -38,6 +38,7 @@ deps = -e ../python_modules/libraries/dagster-deltalake -e ../python_modules/libraries/dagster-deltalake-pandas -e ../python_modules/libraries/dagster-deltalake-polars + -e ../python_modules/libraries/dagster-openai commands = make --directory=sphinx clean diff --git a/examples/docs_snippets/docs_snippets/integrations/openai/__init__.py b/examples/docs_snippets/docs_snippets/integrations/openai/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/docs_snippets/docs_snippets/integrations/openai/assets.py b/examples/docs_snippets/docs_snippets/integrations/openai/assets.py new file mode 100644 index 0000000000000..3a4cf2e1b3e27 --- /dev/null +++ b/examples/docs_snippets/docs_snippets/integrations/openai/assets.py @@ -0,0 +1,31 @@ +# start_example +from dagster_openai import OpenAIResource + +from dagster import ( + AssetExecutionContext, + Definitions, + EnvVar, + asset, + define_asset_job, +) + + +@asset(compute_kind="OpenAI") +def openai_asset(context: AssetExecutionContext, openai: OpenAIResource): + with openai.get_client(context) as client: + client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Say this is a test."}], + ) + + +openai_asset_job = define_asset_job(name="openai_asset_job", selection="openai_asset") + +defs = Definitions( + assets=[openai_asset], + jobs=[openai_asset_job], + resources={ + "openai": OpenAIResource(api_key=EnvVar("OPENAI_API_KEY")), + }, +) +# end_example diff --git a/examples/docs_snippets/docs_snippets/integrations/openai/ops.py b/examples/docs_snippets/docs_snippets/integrations/openai/ops.py new file mode 100644 index 0000000000000..ceebce6b5d0c3 --- /dev/null +++ b/examples/docs_snippets/docs_snippets/integrations/openai/ops.py @@ -0,0 +1,30 @@ +# start_example +from dagster_openai import OpenAIResource + +from dagster import ( + Definitions, + EnvVar, + GraphDefinition, + OpExecutionContext, + op, +) + + +@op +def openai_op(context: OpExecutionContext, openai: OpenAIResource): + with openai.get_client(context) as client: + client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Say this is a test"}], + ) + + +openai_op_job = GraphDefinition(name="openai_op_job", node_defs=[openai_op]).to_job() + +defs = Definitions( + jobs=[openai_op_job], + resources={ + "openai": OpenAIResource(api_key=EnvVar("OPENAI_API_KEY")), + }, +) +# end_example diff --git a/examples/docs_snippets/docs_snippets/integrations/openai/resource.py b/examples/docs_snippets/docs_snippets/integrations/openai/resource.py new file mode 100644 index 0000000000000..bda1f756c7c50 --- /dev/null +++ b/examples/docs_snippets/docs_snippets/integrations/openai/resource.py @@ -0,0 +1,10 @@ +# start_example +from dagster_openai import OpenAIResource + +from dagster import EnvVar + +# Pull API key from environment variables +openai = OpenAIResource( + api_key=EnvVar("OPENAI_API_KEY"), +) +# end_example diff --git a/python_modules/libraries/dagster-openai/README.md b/python_modules/libraries/dagster-openai/README.md index 42af42c36bbd8..81653e9785602 100644 --- a/python_modules/libraries/dagster-openai/README.md +++ b/python_modules/libraries/dagster-openai/README.md @@ -1 +1,4 @@ # dagster-openai + +The docs for `dagster-openai` can be found +[here](https://docs.dagster.io/_apidocs/libraries/dagster-openai). diff --git a/python_modules/libraries/dagster-openai/dagster_openai/resources.py b/python_modules/libraries/dagster-openai/dagster_openai/resources.py index 518e9a8530469..53be130477503 100644 --- a/python_modules/libraries/dagster-openai/dagster_openai/resources.py +++ b/python_modules/libraries/dagster-openai/dagster_openai/resources.py @@ -12,7 +12,7 @@ InitResourceContext, OpExecutionContext, ) -from dagster._annotations import experimental +from dagster._annotations import experimental, public from dagster._core.errors import ( DagsterInvariantViolationError, ) @@ -49,6 +49,7 @@ def _add_to_asset_metadata( context.add_output_metadata(dict(counters), output_name) +@public @experimental def with_usage_metadata(context: AssetExecutionContext, output_name: Optional[str], func): """This wrapper can be used on any endpoint of the @@ -141,6 +142,7 @@ def wrapper(*args, **kwargs): return wrapper +@public @experimental class OpenAIResource(ConfigurableResource): """This resource is wrapper over the @@ -212,6 +214,7 @@ def setup_for_execution(self, context: InitResourceContext) -> None: # Set up an OpenAI client based on the API key. self._client = Client(api_key=self.api_key) + @public @contextmanager def get_client( self, context: Union[AssetExecutionContext, OpExecutionContext] @@ -274,6 +277,7 @@ def openai_asset(context: AssetExecutionContext, openai: OpenAIResource): """ yield from self._get_client(context=context, asset_key=None) + @public @contextmanager def get_client_for_asset( self, context: AssetExecutionContext, asset_key: AssetKey @@ -288,7 +292,7 @@ def get_client_for_asset( allowing to log the API usage metadata in the asset metadata. This method can only be called when working with assets, - i.e. the provided ``context`` must be of type ``AssetExecutionContext. + i.e. the provided ``context`` must be of type ``AssetExecutionContext``. :param context: The ``context`` object for computing the asset in which ``get_client`` is called. :param asset_key: the ``asset_key`` of the asset for which a materialization should include the metadata.