diff --git a/.github/workflows/pythontest.yml b/.github/workflows/pythontest.yml index 443e445b4e..a4862cc657 100644 --- a/.github/workflows/pythontest.yml +++ b/.github/workflows/pythontest.yml @@ -65,11 +65,10 @@ jobs: - name: Set up minio run: | docker run -d -p 9000:9000 --name minio \ - -e "MINIO_ACCESS_KEY=development" \ - -e "MINIO_SECRET_KEY=development" \ - -v /tmp/minio_data:/data \ - -v /tmp/minio_config:/root/.minio \ - minio/minio server /data + -e "MINIO_ROOT_USER=development" \ + -e "MINIO_ROOT_PASSWORD=development" \ + -e "MINIO_DEFAULT_BUCKETS=content:public" \ + bitnami/minio:2024.5.28 - name: Set up Python 3.10 uses: actions/setup-python@v5 with: diff --git a/README.md b/README.md index 5f8b9f8427..362093df9b 100644 --- a/README.md +++ b/README.md @@ -41,3 +41,9 @@ As soon as you open a pull request, it may take us a week or two to review it as --- *Thank you for your interest in contributing! Learning Equality was founded by volunteers dedicated to helping make educational materials more accessible to those in need, and every contribution makes a difference.* + + +## Licensing +Kolibri Studio is licensed under the MIT license. See [LICENSE](./LICENSE) for more details. + +Other tools and libraries used in Kolibri Studio are licensed under their respective licenses, and some are only used during development and are not intended for distribution or use in production environments. diff --git a/contentcuration/contentcuration/apps.py b/contentcuration/contentcuration/apps.py index 2466492feb..6f344aa63d 100644 --- a/contentcuration/contentcuration/apps.py +++ b/contentcuration/contentcuration/apps.py @@ -1,7 +1,4 @@ from django.apps import AppConfig -from django.conf import settings - -from contentcuration.utils.storage_common import is_gcs_backend class ContentConfig(AppConfig): @@ -10,7 +7,3 @@ class ContentConfig(AppConfig): def ready(self): # Import signals import contentcuration.signals # noqa - - if settings.AWS_AUTO_CREATE_BUCKET and not is_gcs_backend(): - from contentcuration.utils.minio_utils import ensure_storage_bucket_public - ensure_storage_bucket_public() diff --git a/contentcuration/contentcuration/management/commands/setup.py b/contentcuration/contentcuration/management/commands/setup.py index 3284349ebe..305f9318ff 100644 --- a/contentcuration/contentcuration/management/commands/setup.py +++ b/contentcuration/contentcuration/management/commands/setup.py @@ -24,7 +24,6 @@ from contentcuration.utils.db_tools import create_user from contentcuration.utils.files import duplicate_file from contentcuration.utils.publish import publish_channel -from contentcuration.utils.storage_common import is_gcs_backend logging = logmodule.getLogger(__name__) @@ -54,11 +53,6 @@ def handle(self, *args, **options): print("{} is not a valid email".format(email)) sys.exit() - # create the minio bucket - if not is_gcs_backend(): - from contentcuration.utils.minio_utils import ensure_storage_bucket_public - ensure_storage_bucket_public() - # create the cache table try: call_command("createcachetable") diff --git a/contentcuration/contentcuration/tests/base.py b/contentcuration/contentcuration/tests/base.py index 3820d308e7..ad110c7302 100644 --- a/contentcuration/contentcuration/tests/base.py +++ b/contentcuration/contentcuration/tests/base.py @@ -18,47 +18,9 @@ from . import testdata from contentcuration.models import User -from contentcuration.utils import minio_utils -class BucketTestClassMixin(object): - @classmethod - def create_bucket(cls): - minio_utils.ensure_storage_bucket_public(will_sleep=False) - - @classmethod - def delete_bucket(cls): - minio_utils.ensure_bucket_deleted() - - -class BucketTestMixin: - """ - Handles bucket setup and tear down for test classes. If you want your entire TestCase to share the same bucket, - call create_bucket in setUpClass and then set persist_bucket to True, then make sure you call self.delete_bucket() - in tearDownClass. - """ - - persist_bucket = False - - @classmethod - def create_bucket(cls): - minio_utils.ensure_storage_bucket_public(will_sleep=False) - - @classmethod - def delete_bucket(cls): - minio_utils.ensure_bucket_deleted() - - def setUp(self): - raise Exception("Called?") - if not self.persist_bucket: - self.create_bucket() - - def tearDown(self): - if not self.persist_bucket: - self.delete_bucket() - - -class StudioTestCase(TestCase, BucketTestMixin): +class StudioTestCase(TestCase): @classmethod def setUpClass(cls): super(StudioTestCase, cls).setUpClass() @@ -67,22 +29,12 @@ def setUpClass(cls): "big_shot", "bigshot@reallybigcompany.com", "password" ) - def setUp(self): - if not self.persist_bucket: - self.create_bucket() - def setUpBase(self): - if not self.persist_bucket: - self.create_bucket() self.channel = testdata.channel() self.user = testdata.user() self.channel.editors.add(self.user) self.channel.main_tree.refresh_from_db() - def tearDown(self): - if not self.persist_bucket: - self.delete_bucket() - def admin_client(self): client = APIClient() client.force_authenticate(self.admin_user) @@ -115,20 +67,12 @@ def get(self, url, data=None, follow=False, secure=False): ) -class StudioAPITestCase(APITestCase, BucketTestMixin): +class StudioAPITestCase(APITestCase): @classmethod def setUpClass(cls): super(StudioAPITestCase, cls).setUpClass() call_command("loadconstants") - def setUp(self): - if not self.persist_bucket: - self.create_bucket() - - def tearDown(self): - if not self.persist_bucket: - self.delete_bucket() - def sign_in(self, user=None): if not user: user = self.user diff --git a/contentcuration/contentcuration/tests/viewsets/test_clipboard.py b/contentcuration/contentcuration/tests/viewsets/test_clipboard.py index 6621e3ed6a..59113c0532 100644 --- a/contentcuration/contentcuration/tests/viewsets/test_clipboard.py +++ b/contentcuration/contentcuration/tests/viewsets/test_clipboard.py @@ -17,16 +17,6 @@ class SyncTestCase(SyncTestMixin, StudioAPITestCase): - @classmethod - def setUpClass(cls): - cls.create_bucket() - super(SyncTestCase, cls).setUpClass() - - @classmethod - def tearDownClass(cls): - super(SyncTestCase, cls).tearDownClass() - cls.create_bucket() - @classmethod def setUpTestData(cls): call_command("loadconstants") @@ -214,16 +204,6 @@ def test_delete_clipboards(self): class CRUDTestCase(StudioAPITestCase): - @classmethod - def setUpClass(cls): - cls.create_bucket() - super(CRUDTestCase, cls).setUpClass() - - @classmethod - def tearDownClass(cls): - super(CRUDTestCase, cls).tearDownClass() - cls.create_bucket() - @classmethod def setUpTestData(cls): call_command("loadconstants") diff --git a/contentcuration/contentcuration/tests/viewsets/test_contentnode.py b/contentcuration/contentcuration/tests/viewsets/test_contentnode.py index 180d8b0e72..cf2a6fe3d5 100644 --- a/contentcuration/contentcuration/tests/viewsets/test_contentnode.py +++ b/contentcuration/contentcuration/tests/viewsets/test_contentnode.py @@ -22,7 +22,6 @@ from contentcuration import models from contentcuration.tests import testdata -from contentcuration.tests.base import BucketTestMixin from contentcuration.tests.base import StudioAPITestCase from contentcuration.tests.viewsets.base import generate_copy_event from contentcuration.tests.viewsets.base import generate_create_event @@ -68,12 +67,10 @@ def rebuild_tree(tree_id): models.ContentNode.objects.partial_rebuild(tree_id) -@pytest.mark.skipif(True, reason="Concurrent processes overload Travis VM") -class ConcurrencyTestCase(TransactionTestCase, BucketTestMixin): +@pytest.mark.skipif(True, reason="Concurrent processes overload CI") +class ConcurrencyTestCase(TransactionTestCase): def setUp(self): super(ConcurrencyTestCase, self).setUp() - if not self.persist_bucket: - self.create_bucket() call_command("loadconstants") self.channel = testdata.channel() self.user = testdata.user() @@ -85,8 +82,6 @@ def setUp(self): def tearDown(self): call_command("flush", interactive=False) super(ConcurrencyTestCase, self).tearDown() - if not self.persist_bucket: - self.delete_bucket() def test_create_contentnodes_concurrently(self): results = call_concurrently( diff --git a/contentcuration/contentcuration/utils/minio_utils.py b/contentcuration/contentcuration/utils/minio_utils.py deleted file mode 100644 index b7d0a7d62b..0000000000 --- a/contentcuration/contentcuration/utils/minio_utils.py +++ /dev/null @@ -1,105 +0,0 @@ -from future import standard_library - -standard_library.install_aliases() - -import logging -import time -import json -from urllib.parse import urlparse - -import minio -from django.conf import settings - -from contentcuration.utils.storage_common import is_gcs_backend - - -logger = logging.getLogger(__name__) - - -def ensure_storage_bucket_public(bucket=None, will_sleep=True): - # GCS' S3 compatibility is broken, especially in bucket operations; - # skip bucket creation there and just bug Aron to create buckets with - # public-read access for you - if is_gcs_backend(): - logging.info("Skipping storage creation on googleapis") - return - - # If true, sleep for 5 seconds to wait for minio to start - if will_sleep: - time.sleep(5) - - if not bucket: - bucketname = settings.AWS_S3_BUCKET_NAME - else: - bucketname = bucket - - host = urlparse(settings.AWS_S3_ENDPOINT_URL).netloc - c = minio.Minio( - host, - access_key=settings.AWS_ACCESS_KEY_ID, - secret_key=settings.AWS_SECRET_ACCESS_KEY, - secure=False, - ) - - READ_ONLY_POLICY = { - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": ["s3:GetBucketLocation", "s3:ListBucket"], - "Resource": "arn:aws:s3:::{bucketname}".format(bucketname=bucketname), - }, - { - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:GetObject", - "Resource": "arn:aws:s3:::{bucketname}/*".format(bucketname=bucketname), - }, - ], - } - - if not c.bucket_exists(bucketname): - c.make_bucket(bucketname) - - c.set_bucket_policy( - bucketname, - json.dumps(READ_ONLY_POLICY), - ) - - -def ensure_bucket_deleted(bucket=None): - - if not bucket: - bucketname = settings.AWS_S3_BUCKET_NAME - else: - bucketname = bucket - - host = urlparse(settings.AWS_S3_ENDPOINT_URL).netloc - - # GCS' S3 compatibility is broken, especially in bucket operations; - # skip bucket creation there and just bug Aron to create buckets with - # public-read access for you - if is_gcs_backend(): - logging.info( - "Skipping storage deletion on googleapis; that sounds like a production bucket!" - ) - return - - minio_client = minio.Minio( - host, - access_key=settings.AWS_ACCESS_KEY_ID, - secret_key=settings.AWS_SECRET_ACCESS_KEY, - secure=False, - ) - - if minio_client.bucket_exists(bucketname): - # We need to delete all objects first, before we can actually delete the bucket. - objs_name = ( - o.object_name for o in minio_client.list_objects(bucketname, recursive=True) - ) - - for o in objs_name: - minio_client.remove_object(bucketname, o) - - minio_client.remove_bucket(bucketname) diff --git a/contentcuration/contentcuration/utils/storage_common.py b/contentcuration/contentcuration/utils/storage_common.py index 9ce747fe1e..b41b018511 100644 --- a/contentcuration/contentcuration/utils/storage_common.py +++ b/contentcuration/contentcuration/utils/storage_common.py @@ -1,7 +1,6 @@ import mimetypes import os from datetime import timedelta -from urllib.parse import urlparse from django.conf import settings from django.core.files.storage import default_storage @@ -20,15 +19,6 @@ class UnknownStorageBackendError(Exception): pass -def is_gcs_backend(): - """ - Determines if storage is GCS backend, which if not we can assume it is minio - :return: A bool - """ - host = urlparse(settings.AWS_S3_ENDPOINT_URL).netloc - return "storage.googleapis.com" in host - - def determine_content_type(filename): """ Guesses the content type of a filename. Returns the mimetype of a file. diff --git a/deploy/includes/README.md b/deploy/includes/README.md new file mode 100644 index 0000000000..021180c837 --- /dev/null +++ b/deploy/includes/README.md @@ -0,0 +1 @@ +This directory contains nginx configuration files that are included in the main configuration file, via the `include` directive. This entire directory is copied to `/etc/nginx/includes` in the image. diff --git a/deploy/includes/content/_proxy.conf b/deploy/includes/content/_proxy.conf new file mode 100644 index 0000000000..5490c72742 --- /dev/null +++ b/deploy/includes/content/_proxy.conf @@ -0,0 +1,18 @@ +# location {} settings for /content proxy +# used by files in this directory, via `include` directive + +limit_except GET HEAD OPTIONS { + deny all; +} + +proxy_http_version 1.1; +proxy_set_header Host $proxy_host; +proxy_set_header Accept-Encoding Identity; +proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; +proxy_redirect off; +proxy_buffering off; +proxy_cache off; +proxy_read_timeout 100s; +proxy_ssl_server_name on; + +gzip off; diff --git a/deploy/includes/content/default.conf b/deploy/includes/content/default.conf new file mode 100644 index 0000000000..404bd64075 --- /dev/null +++ b/deploy/includes/content/default.conf @@ -0,0 +1,45 @@ +# DO NOT RENAME: referenced by k8s/images/nginx/entrypoint.sh + +# assume development +location @emulator { + include /etc/nginx/includes/content/_proxy.conf; + # for the storage emulator, we actually want `/content` in the path + # because it's used as the bucket name + + # this is the magic that allows us to intercept errors and try the next location + proxy_intercept_errors on; + recursive_error_pages on; + error_page 404 = @hotfixes; + + proxy_pass http://minio:9000; +} + +location @hotfixes { + include /etc/nginx/includes/content/_proxy.conf; + + # this is the magic that allows us to intercept errors and try the next location + proxy_intercept_errors on; + recursive_error_pages on; + error_page 404 = @production; + + # use LE domain to ensure Cloudflare is in between + proxy_pass https://hotfixes.studio.learningequality.org; +} + +location @production { + include /etc/nginx/includes/content/_proxy.conf; + + # use LE domain to ensure Cloudflare is in between + proxy_pass https://studio.learningequality.org; +} + +location @nowhere { + return 404; +} + +location /content/ { + # check the emulator bucket first, then cloud development bucket, then fall back to production + # try_files will only use one named route, and it uses the last one. Although, we can just + # pass one named route, because it fails. + try_files @nowhere @emulator; +} diff --git a/deploy/includes/content/develop-studio-content.conf b/deploy/includes/content/develop-studio-content.conf new file mode 100644 index 0000000000..5a1c2ed181 --- /dev/null +++ b/deploy/includes/content/develop-studio-content.conf @@ -0,0 +1,32 @@ +# DO NOT RENAME: this file is named after the primary bucket it proxies to + +location @hotfixes { + include /etc/nginx/includes/content/_proxy.conf; + + # this is the magic that allows us to intercept errors and try the next location + proxy_intercept_errors on; + recursive_error_pages on; + error_page 404 = @production; + + proxy_pass https://develop-studio-content.storage.googleapis.com; +} + +location @production { + include /etc/nginx/includes/content/_proxy.conf; + + proxy_pass https://studio-content.storage.googleapis.com; +} + +location @nowhere { + return 404; +} + +location /content/ { + # ensure that the /content/ prefix is stripped from the request + rewrite ^/content/(.*)$ /$1 break; + + # check the emulator bucket first, then cloud development bucket, then fall back to production + # try_files will only use one named route, and it uses the last one. Although, we can just + # pass one named route, because it fails. + try_files @nowhere @hotfixes; +} diff --git a/deploy/includes/content/studio-content.conf b/deploy/includes/content/studio-content.conf new file mode 100644 index 0000000000..f59650f659 --- /dev/null +++ b/deploy/includes/content/studio-content.conf @@ -0,0 +1,11 @@ +# DO NOT RENAME: this file is named after the primary bucket it proxies to + +location /content/ { + include /etc/nginx/includes/content/_proxy.conf; + + # ensure that the /content/ prefix is stripped from the request + rewrite ^/content/(.*)$ /$1 break; + + # just direct proxy to the bucket + proxy_pass https://studio-content.storage.googleapis.com; +} diff --git a/deploy/nginx.conf.jinja2 b/deploy/nginx.conf similarity index 83% rename from deploy/nginx.conf.jinja2 rename to deploy/nginx.conf index 5dcc5862f9..b024589d14 100644 --- a/deploy/nginx.conf.jinja2 +++ b/deploy/nginx.conf @@ -57,21 +57,9 @@ http { proxy_cache off; } - location /content/ { - limit_except GET HEAD OPTIONS { - deny all; - } - proxy_http_version 1.1; - proxy_pass {{ $aws_s3_endpoint_url }}/{{ $aws_s3_bucket_name }}/; - proxy_set_header Host $proxy_host; - proxy_set_header Accept-Encoding Identity; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_redirect off; - proxy_buffering off; - proxy_cache off; - proxy_read_timeout 100s; - gzip off; - } + # dynamically provisioned in the image's entrypoint script + # see deploy/includes/content/* + include /etc/nginx/includes/content.conf; # We cache the following expensive API endpoints. diff --git a/docker-compose.alt.yml b/docker-compose.alt.yml index 5573b723ff..acf01a97e3 100644 --- a/docker-compose.alt.yml +++ b/docker-compose.alt.yml @@ -7,10 +7,6 @@ version: '3.4' # docker-compose -f docker-compose.yml -f docker-compose.alt.yml up minio postgres redis services: - minio: - ports: - - "9000:9000" - postgres: ports: - "5432:5432" diff --git a/docker-compose.yml b/docker-compose.yml index 037f1e5a03..946dd76f12 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,6 +5,7 @@ x-studio-environment: MPLBACKEND: ps SHELL: /bin/bash AWS_S3_ENDPOINT_URL: http://minio:9000 + AWS_BUCKET_NAME: content DATA_DB_HOST: postgres DJANGO_SETTINGS_MODULE: contentcuration.dev_settings RUN_MODE: docker-compose @@ -53,15 +54,17 @@ services: <<: *studio-worker command: make prodceleryworkers + # this service is for development emulation only minio: - image: minio/minio:RELEASE.2020-06-22T03-12-50Z - entrypoint: minio server /data + image: 'bitnami/minio:2024.5.28' environment: - MINIO_ACCESS_KEY: development - MINIO_SECRET_KEY: development - MINIO_API_CORS_ALLOW_ORIGIN: 'http://localhost:8080,http://127.0.0.1:8080' + - "MINIO_DEFAULT_BUCKETS=content:public" + - MINIO_ROOT_USER=development + - MINIO_ROOT_PASSWORD=development + ports: + - "9000:9000" volumes: - - .docker/minio:/data + - minio:/bitnami/minio/data postgres: image: ghcr.io/learningequality/postgres @@ -93,5 +96,5 @@ services: volumes: - minio_data: + minio: pgdata: diff --git a/k8s/images/nginx/Dockerfile b/k8s/images/nginx/Dockerfile index d0f41afaa5..ab38a1118a 100644 --- a/k8s/images/nginx/Dockerfile +++ b/k8s/images/nginx/Dockerfile @@ -1,18 +1,8 @@ -FROM byrnedo/alpine-curl - -# download all extra deps we need for the production container -# templating executable -COPY /k8s/images/nginx/download_sigil.sh /tmp/download_sigil.sh -RUN chmod +x /tmp/download_sigil.sh -RUN /tmp/download_sigil.sh - FROM nginx:1.25 RUN rm /etc/nginx/conf.d/* # if there's stuff here, nginx won't read sites-enabled -ADD deploy/nginx.conf.jinja2 /etc/nginx/nginx.conf.jinja2 -ADD k8s/images/nginx/entrypoint.sh /usr/bin - -# install the templating binary -COPY --from=0 /tmp/sigil /usr/bin/ +COPY deploy/nginx.conf /etc/nginx/nginx.conf +COPY deploy/includes /etc/nginx/includes +COPY k8s/images/nginx/entrypoint.sh /usr/bin -CMD entrypoint.sh +CMD ["entrypoint.sh"] diff --git a/k8s/images/nginx/download_sigil.sh b/k8s/images/nginx/download_sigil.sh deleted file mode 100755 index 41c5b24a61..0000000000 --- a/k8s/images/nginx/download_sigil.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh -set -eou pipefail - -export SIGIL_VERSION=0.10.1 -export OS=`sh -c "uname -s | tr '[:upper:]' '[:lower:]'"` -export ARCH=`sh -c "uname -m | tr '[:upper:]' '[:lower:]' | sed 's/aarch64/arm64/' | sed 's/x86_64/amd64/'"` - - -curl -L "https://github.com/gliderlabs/sigil/releases/download/v${SIGIL_VERSION}/gliderlabs-sigil_${SIGIL_VERSION}_${OS}_${ARCH}.tgz" | tar -zxC /tmp -mv /tmp/gliderlabs-sigil-${ARCH} /tmp/sigil diff --git a/k8s/images/nginx/entrypoint.sh b/k8s/images/nginx/entrypoint.sh index b5a7c1fc5d..71f35a3927 100755 --- a/k8s/images/nginx/entrypoint.sh +++ b/k8s/images/nginx/entrypoint.sh @@ -1,6 +1,18 @@ #!/bin/sh -# Run yasha (a cli jinja templating engine) to generate the real nginx.conf file -sigil -f /etc/nginx/nginx.conf.jinja2 aws_s3_bucket_name=$AWS_BUCKET_NAME aws_s3_endpoint_url=$AWS_S3_ENDPOINT_URL > /etc/nginx/nginx.conf +if [ -z "$AWS_BUCKET_NAME" ]; then + echo "AWS_BUCKET_NAME is not set. Exiting..." + exit 1 +fi -nginx -c /etc/nginx/nginx.conf \ No newline at end of file +CONTENT_CONFIG="/etc/nginx/includes/content/$AWS_BUCKET_NAME.conf" + +# if content proxy config with the same name as the bucket does not exist, use the default one +if [ ! -f "$CONTENT_CONFIG" ]; then + CONTENT_CONFIG="/etc/nginx/includes/content/default.conf" +fi + +echo "Using content proxy config: $CONTENT_CONFIG" +cp "$CONTENT_CONFIG" /etc/nginx/includes/content.conf + +nginx -c /etc/nginx/nginx.conf diff --git a/requirements-dev.in b/requirements-dev.in index 7a403fffcd..140f4e8731 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -23,4 +23,3 @@ pypandoc git+https://github.com/someshchaturvedi/customizable-django-profiler.git#customizable-django-profiler tabulate==0.9.0 fonttools -minio==7.1.1 diff --git a/requirements-dev.txt b/requirements-dev.txt index 206a049a46..6ac4b5bea6 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -22,7 +22,6 @@ certifi==2020.12.5 # via # -c requirements.txt # geventhttpclient - # minio # requests cfgv==3.3.1 # via pre-commit @@ -124,8 +123,6 @@ markupsafe==2.1.2 # werkzeug mccabe==0.6.1 # via flake8 -minio==7.1.1 - # via -r requirements-dev.in mixer==6.1.3 # via -r requirements-dev.in mock==4.0.3 @@ -253,7 +250,6 @@ uritemplate==3.0.1 urllib3==1.26.18 # via # -c requirements.txt - # minio # requests virtualenv==20.14.1 # via pre-commit