From b2a5cda0b4c3aca9dbfdc3700f95a312469e96c2 Mon Sep 17 00:00:00 2001 From: noamlevy81 Date: Wed, 4 Dec 2024 13:16:22 +0200 Subject: [PATCH 1/2] Update action to support output rail --- docs/user-guides/community/active-fence.md | 4 +- docs/user-guides/guardrails-library.md | 5 +- nemoguardrails/library/activefence/actions.py | 7 ++- ...input_rail.py => test_activefence_rail.py} | 46 ++++++++++++++++++- 4 files changed, 56 insertions(+), 6 deletions(-) rename tests/{test_active_fence_input_rail.py => test_activefence_rail.py} (70%) diff --git a/docs/user-guides/community/active-fence.md b/docs/user-guides/community/active-fence.md index 688216f29..61d66a80a 100644 --- a/docs/user-guides/community/active-fence.md +++ b/docs/user-guides/community/active-fence.md @@ -1,6 +1,6 @@ # ActiveFence Integration -NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set). +NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input and output rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set). ```yaml rails: @@ -13,7 +13,7 @@ rails: # - activefence moderation detailed ``` -The `activefence moderation` flow uses the maximum risk score with an 0.85 threshold to decide if the input should be allowed or not (i.e., if the risk score is above the threshold, it is considered a violation). The `activefence moderation detailed` has individual scores per category of violation. +The `activefence moderation` flow uses the maximum risk score with an 0.85 threshold to decide if the text should be allowed or not (i.e., if the risk score is above the threshold, it is considered a violation). The `activefence moderation detailed` has individual scores per category of violation. To customize the scores, you have to overwrite the [default flows](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/activefence/flows.co) in your config. For example, to change the threshold for `activefence moderation` you can add the following flow to your config: diff --git a/docs/user-guides/guardrails-library.md b/docs/user-guides/guardrails-library.md index 14b84fe44..eb9176bbc 100644 --- a/docs/user-guides/guardrails-library.md +++ b/docs/user-guides/guardrails-library.md @@ -611,7 +611,7 @@ This category of rails relies on 3rd party APIs for various guardrailing tasks. ### ActiveFence -NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set). +NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input and output rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set). #### Example usage @@ -620,6 +620,9 @@ rails: input: flows: - activefence moderation + output: + flows: + - activefence moderation ``` For more details, check out the [ActiveFence Integration](./community/active-fence.md) page. diff --git a/nemoguardrails/library/activefence/actions.py b/nemoguardrails/library/activefence/actions.py index 2b01974ad..e8eb095d9 100644 --- a/nemoguardrails/library/activefence/actions.py +++ b/nemoguardrails/library/activefence/actions.py @@ -32,12 +32,15 @@ async def call_activefence_api(context: Optional[dict] = None): if api_key is None: raise ValueError("ACTIVEFENCE_API_KEY environment variable not set.") - user_message = context.get("user_message") + if context.get("triggered_input_rail"): + text = context.get("user_message") + else: + text = context.get("bot_message") url = "https://apis.activefence.com/sync/v3/content/text" headers = {"af-api-key": api_key, "af-source": "nemo-guardrails"} data = { - "text": user_message, + "text": text, "content_id": "ng-" + new_uuid(), } diff --git a/tests/test_active_fence_input_rail.py b/tests/test_activefence_rail.py similarity index 70% rename from tests/test_active_fence_input_rail.py rename to tests/test_activefence_rail.py index 41a76047c..1148000cf 100644 --- a/tests/test_active_fence_input_rail.py +++ b/tests/test_activefence_rail.py @@ -19,7 +19,7 @@ from tests.utils import TestChat -def test_1(monkeypatch): +def test_input(monkeypatch): monkeypatch.setenv("ACTIVEFENCE_API_KEY", "xxx") config = RailsConfig.from_content( @@ -88,3 +88,47 @@ def test_1(monkeypatch): chat >> "you are stupid!" chat << "I'm sorry, I can't respond to that." + + +def test_output(monkeypatch): + monkeypatch.setenv("ACTIVEFENCE_API_KEY", "xxx") + + config = RailsConfig.from_content( + yaml_content=""" + models: + - type: main + engine: openai + model: gpt-3.5-turbo-instruct + + rails: + output: + flows: + - activefence moderation + """, + ) + chat = TestChat( + config, + llm_completions=[ + " You are stupid!", + ], + ) + + with aioresponses() as m: + m.post( + "https://apis.activefence.com/sync/v3/content/text", + payload={ + "response_id": "36f76a43-ddbe-4308-bc86-1a2b068a00ea", + "entity_id": "59fe8fe0-5036-494f-970c-8e28305a3716", + "entity_type": "content", + "violations": [ + { + "violation_type": "abusive_or_harmful.profanity", + "risk_score": 0.95, + } + ], + "errors": [], + }, + ) + + chat >> "Hello!" + chat << "I'm sorry, I can't respond to that." From fba5cf8ee992ec9dda5449151aabd5f60d52d8ef Mon Sep 17 00:00:00 2001 From: noamlevy81 Date: Thu, 5 Dec 2024 08:41:24 +0200 Subject: [PATCH 2/2] Update action to support output rail --- nemoguardrails/library/activefence/actions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemoguardrails/library/activefence/actions.py b/nemoguardrails/library/activefence/actions.py index e8eb095d9..cc5456d56 100644 --- a/nemoguardrails/library/activefence/actions.py +++ b/nemoguardrails/library/activefence/actions.py @@ -33,9 +33,9 @@ async def call_activefence_api(context: Optional[dict] = None): raise ValueError("ACTIVEFENCE_API_KEY environment variable not set.") if context.get("triggered_input_rail"): - text = context.get("user_message") + text = context["user_message"] else: - text = context.get("bot_message") + text = context["bot_message"] url = "https://apis.activefence.com/sync/v3/content/text" headers = {"af-api-key": api_key, "af-source": "nemo-guardrails"}