From a79419c9b1b04822e0df7221c76fe4616987dc28 Mon Sep 17 00:00:00 2001 From: Sam Arbid Date: Thu, 30 Nov 2023 11:13:55 +0100 Subject: [PATCH] schema: extend content accepted HTML tags & attrs --- invenio_pages/config.py | 11 ++++++ invenio_pages/services/schemas.py | 25 ++++++++++++- tests/services/test_services.py | 58 +++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 1 deletion(-) diff --git a/invenio_pages/config.py b/invenio_pages/config.py index 03c9368..7979012 100644 --- a/invenio_pages/config.py +++ b/invenio_pages/config.py @@ -2,6 +2,7 @@ # # This file is part of Invenio. # Copyright (C) 2015-2022 CERN. +# Copyright (C) 2023 KTH Royal Institute of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -52,3 +53,13 @@ PAGES_FACETS = {} """Available facets defined for this module.""" + + +PAGES_ALLOWED_EXTRA_HTML_TAGS = ["img", "button"] +"""Extend allowed HTML tags list for static pages content.""" + +PAGES_ALLOWED_EXTRA_HTML_ATTRS = { + "img": ["src", "alt", "title", "width", "height", "loading"], + "button": ["type", "name", "value", "disabled", "onclick"], +} +"""Extend allowed HTML attrs list for static pages content.""" diff --git a/invenio_pages/services/schemas.py b/invenio_pages/services/schemas.py index 2fc310e..0cede5b 100644 --- a/invenio_pages/services/schemas.py +++ b/invenio_pages/services/schemas.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2023 CERN. +# Copyright (C) 2023 KTH Royal Institute of Technology. # # Invenio-Pages is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -9,17 +10,39 @@ from datetime import timezone +from flask import current_app from marshmallow import Schema, fields from marshmallow_utils.fields import SanitizedHTML, TZDateTime +class DynamicSanitizedHTML(SanitizedHTML): + """A subclass of SanitizedHTML that dynamically configures allowed HTML tags and attributes based on application settings.""" + + def __init__(self, *args, **kwargs): + """Initializes DynamicSanitizedHTML with dynamic tag and attribute settings.""" + super().__init__(tags=None, attrs=None, *args, **kwargs) + + def _deserialize(self, value, attr, data, **kwargs): + """Deserialize value with dynamic HTML tags and attributes based on Flask app context or defaults.""" + self.tags = ( + current_app.config.get("ALLOWED_HTML_TAGS", []) + + current_app.config["PAGES_ALLOWED_EXTRA_HTML_TAGS"] + ) + self.attrs = self.attrs = dict( + **current_app.config.get("ALLOWED_HTML_ATTRS", {}), + **current_app.config["PAGES_ALLOWED_EXTRA_HTML_ATTRS"] + ) + + return super()._deserialize(value, attr, data, **kwargs) + + class PageSchema(Schema): """Schema for page.""" id = fields.String() url = fields.String(metadata={"create_only": True}) title = fields.String() - content = SanitizedHTML() + content = DynamicSanitizedHTML() description = fields.String() template_name = fields.String() created = TZDateTime(timezone=timezone.utc, format="iso", dump_only=True) diff --git a/tests/services/test_services.py b/tests/services/test_services.py index affd06d..1b30b67 100644 --- a/tests/services/test_services.py +++ b/tests/services/test_services.py @@ -2,15 +2,23 @@ # # This file is part of Invenio. # Copyright (C) 2022 CERN. +# Copyright (C) 2023 KTH Royal Institute of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. import pytest +from flask import Flask, current_app from invenio_records_resources.services.errors import PermissionDeniedError +from invenio_pages import InvenioPages +from invenio_pages.config import ( + PAGES_ALLOWED_EXTRA_HTML_ATTRS, + PAGES_ALLOWED_EXTRA_HTML_TAGS, +) from invenio_pages.proxies import current_pages_service from invenio_pages.records.errors import PageNotCreatedError, PageNotFoundError +from invenio_pages.services.schemas import DynamicSanitizedHTML def test_page_read(module_scoped_pages_fixture, simple_user_identity): @@ -179,3 +187,53 @@ def test_delete_all(module_scoped_pages_fixture, superuser_identity): current_pages_service.delete_all(superuser_identity) pages = current_pages_service.search(superuser_identity) assert pages.total == 0 + + +def test_extra_allowed_html_tags(): + """Test instance folder loading.""" + app = Flask("testapp") + InvenioPages(app) + + assert ( + app.config["PAGES_ALLOWED_EXTRA_HTML_ATTRS"] == PAGES_ALLOWED_EXTRA_HTML_ATTRS + ) + assert app.config["PAGES_ALLOWED_EXTRA_HTML_TAGS"] == PAGES_ALLOWED_EXTRA_HTML_TAGS + + app.config["PAGES_ALLOWED_EXTRA_HTML_ATTRS"] = ["a"] + app.config["PAGES_ALLOWED_EXTRA_HTML_TAGS"] = ["a"] + InvenioPages(app) + assert app.config["PAGES_ALLOWED_EXTRA_HTML_ATTRS"] == ["a"] + assert app.config["PAGES_ALLOWED_EXTRA_HTML_TAGS"] == ["a"] + + +def test_dynamic_sanitized_html_initialization(): + """ + Test the initialization of the DynamicSanitizedHTML class. + + This test verifies that the default values for 'tags' and 'attrs' + attributes of a DynamicSanitizedHTML instance are set to None. + It asserts that both these attributes are None upon initialization, + ensuring that the class starts with no predefined allowed tags or attributes. + """ + html_sanitizer = DynamicSanitizedHTML() + assert html_sanitizer.tags is None + assert html_sanitizer.attrs is None + + +def test_dynamic_sanitized_html(app): + """ + Tests DynamicSanitizedHTML with custom tags and attributes in an app context. + Verifies if custom settings are properly applied and reflected in the output. + """ + with app.app_context(): + # Set up the extra configuration + current_app.config["PAGES_ALLOWED_EXTRA_HTML_TAGS"] = ["customtag"] + current_app.config["PAGES_ALLOWED_EXTRA_HTML_ATTRS"] = { + "customtag": ["data-custom"] + } + + sanitizer = DynamicSanitizedHTML() + sample_html = 'Test' + result = sanitizer._deserialize(sample_html, None, None) + + assert 'Test' in result