diff --git a/docs/user-guides/community/active-fence.md b/docs/user-guides/community/active-fence.md index 688216f29..61d66a80a 100644 --- a/docs/user-guides/community/active-fence.md +++ b/docs/user-guides/community/active-fence.md @@ -1,6 +1,6 @@ # ActiveFence Integration -NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set). +NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input and output rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set). ```yaml rails: @@ -13,7 +13,7 @@ rails: # - activefence moderation detailed ``` -The `activefence moderation` flow uses the maximum risk score with an 0.85 threshold to decide if the input should be allowed or not (i.e., if the risk score is above the threshold, it is considered a violation). The `activefence moderation detailed` has individual scores per category of violation. +The `activefence moderation` flow uses the maximum risk score with an 0.85 threshold to decide if the text should be allowed or not (i.e., if the risk score is above the threshold, it is considered a violation). The `activefence moderation detailed` has individual scores per category of violation. To customize the scores, you have to overwrite the [default flows](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/activefence/flows.co) in your config. For example, to change the threshold for `activefence moderation` you can add the following flow to your config: diff --git a/docs/user-guides/guardrails-library.md b/docs/user-guides/guardrails-library.md index 14b84fe44..eb9176bbc 100644 --- a/docs/user-guides/guardrails-library.md +++ b/docs/user-guides/guardrails-library.md @@ -611,7 +611,7 @@ This category of rails relies on 3rd party APIs for various guardrailing tasks. ### ActiveFence -NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set). +NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input and output rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set). #### Example usage @@ -620,6 +620,9 @@ rails: input: flows: - activefence moderation + output: + flows: + - activefence moderation ``` For more details, check out the [ActiveFence Integration](./community/active-fence.md) page. diff --git a/nemoguardrails/library/activefence/actions.py b/nemoguardrails/library/activefence/actions.py index 2b01974ad..cc5456d56 100644 --- a/nemoguardrails/library/activefence/actions.py +++ b/nemoguardrails/library/activefence/actions.py @@ -32,12 +32,15 @@ async def call_activefence_api(context: Optional[dict] = None): if api_key is None: raise ValueError("ACTIVEFENCE_API_KEY environment variable not set.") - user_message = context.get("user_message") + if context.get("triggered_input_rail"): + text = context["user_message"] + else: + text = context["bot_message"] url = "https://apis.activefence.com/sync/v3/content/text" headers = {"af-api-key": api_key, "af-source": "nemo-guardrails"} data = { - "text": user_message, + "text": text, "content_id": "ng-" + new_uuid(), } diff --git a/tests/test_active_fence_input_rail.py b/tests/test_activefence_rail.py similarity index 70% rename from tests/test_active_fence_input_rail.py rename to tests/test_activefence_rail.py index 41a76047c..1148000cf 100644 --- a/tests/test_active_fence_input_rail.py +++ b/tests/test_activefence_rail.py @@ -19,7 +19,7 @@ from tests.utils import TestChat -def test_1(monkeypatch): +def test_input(monkeypatch): monkeypatch.setenv("ACTIVEFENCE_API_KEY", "xxx") config = RailsConfig.from_content( @@ -88,3 +88,47 @@ def test_1(monkeypatch): chat >> "you are stupid!" chat << "I'm sorry, I can't respond to that." + + +def test_output(monkeypatch): + monkeypatch.setenv("ACTIVEFENCE_API_KEY", "xxx") + + config = RailsConfig.from_content( + yaml_content=""" + models: + - type: main + engine: openai + model: gpt-3.5-turbo-instruct + + rails: + output: + flows: + - activefence moderation + """, + ) + chat = TestChat( + config, + llm_completions=[ + " You are stupid!", + ], + ) + + with aioresponses() as m: + m.post( + "https://apis.activefence.com/sync/v3/content/text", + payload={ + "response_id": "36f76a43-ddbe-4308-bc86-1a2b068a00ea", + "entity_id": "59fe8fe0-5036-494f-970c-8e28305a3716", + "entity_type": "content", + "violations": [ + { + "violation_type": "abusive_or_harmful.profanity", + "risk_score": 0.95, + } + ], + "errors": [], + }, + ) + + chat >> "Hello!" + chat << "I'm sorry, I can't respond to that."