NVIDIA · noamlevy81 · Dec 4, 2024 · Dec 5, 2024
diff --git a/docs/user-guides/community/active-fence.md b/docs/user-guides/community/active-fence.md
@@ -1,6 +1,6 @@
 # ActiveFence Integration
 
-NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set).
+NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input and output rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set).
 
 ```yaml
 rails:
@@ -13,7 +13,7 @@ rails:
       # - activefence moderation detailed
 ```
 
-The `activefence moderation` flow uses the maximum risk score with an 0.85 threshold to decide if the input should be allowed or not (i.e., if the risk score is above the threshold, it is considered a violation). The `activefence moderation detailed` has individual scores per category of violation.
+The `activefence moderation` flow uses the maximum risk score with an 0.85 threshold to decide if the text should be allowed or not (i.e., if the risk score is above the threshold, it is considered a violation). The `activefence moderation detailed` has individual scores per category of violation.
 
 To customize the scores, you have to overwrite the [default flows](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/activefence/flows.co) in your config. For example, to change the threshold for `activefence moderation` you can add the following flow to your config:
 

diff --git a/docs/user-guides/guardrails-library.md b/docs/user-guides/guardrails-library.md
@@ -611,7 +611,7 @@ This category of rails relies on 3rd party APIs for various guardrailing tasks.
 
 ### ActiveFence
 
-NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set).
+NeMo Guardrails supports using the [ActiveFence ActiveScore API](https://docs.activefence.com/index.html) as an input and output rail out-of-the-box (you need to have the `ACTIVEFENCE_API_KEY` environment variable set).
 
 #### Example usage
 
@@ -620,6 +620,9 @@ rails:
   input:
     flows:
       - activefence moderation
+  output:
+    flows:
+      - activefence moderation
 ```
 
 For more details, check out the [ActiveFence Integration](./community/active-fence.md) page.

diff --git a/nemoguardrails/library/activefence/actions.py b/nemoguardrails/library/activefence/actions.py
@@ -32,12 +32,15 @@ async def call_activefence_api(context: Optional[dict] = None):
     if api_key is None:
         raise ValueError("ACTIVEFENCE_API_KEY environment variable not set.")
 
-    user_message = context.get("user_message")
+    if context.get("triggered_input_rail"):
+        text = context["user_message"]
+    else:
+        text = context["bot_message"]
 
     url = "https://apis.activefence.com/sync/v3/content/text"
     headers = {"af-api-key": api_key, "af-source": "nemo-guardrails"}
     data = {
-        "text": user_message,
+        "text": text,
         "content_id": "ng-" + new_uuid(),
     }
 

diff --git a/tests/test_active_fence_input_rail.py → tests/test_activefence_rail.py b/tests/test_active_fence_input_rail.py → tests/test_activefence_rail.py
@@ -19,7 +19,7 @@
 from tests.utils import TestChat
 
 
-def test_1(monkeypatch):
+def test_input(monkeypatch):
     monkeypatch.setenv("ACTIVEFENCE_API_KEY", "xxx")
 
     config = RailsConfig.from_content(
@@ -88,3 +88,47 @@ def test_1(monkeypatch):
 
         chat >> "you are stupid!"
         chat << "I'm sorry, I can't respond to that."
+
+
+def test_output(monkeypatch):
+    monkeypatch.setenv("ACTIVEFENCE_API_KEY", "xxx")
+
+    config = RailsConfig.from_content(
+        yaml_content="""
+            models:
+              - type: main
+                engine: openai
+                model: gpt-3.5-turbo-instruct
+
+            rails:
+              output:
+                flows:
+                  - activefence moderation
+        """,
+    )
+    chat = TestChat(
+        config,
+        llm_completions=[
+            " You are stupid!",
+        ],
+    )
+
+    with aioresponses() as m:
+        m.post(
+            "https://apis.activefence.com/sync/v3/content/text",
+            payload={
+                "response_id": "36f76a43-ddbe-4308-bc86-1a2b068a00ea",
+                "entity_id": "59fe8fe0-5036-494f-970c-8e28305a3716",
+                "entity_type": "content",
+                "violations": [
+                    {
+                        "violation_type": "abusive_or_harmful.profanity",
+                        "risk_score": 0.95,
+                    }
+                ],
+                "errors": [],
+            },
+        )
+
+        chat >> "Hello!"
+        chat << "I'm sorry, I can't respond to that."