diff --git a/.env b/.env index 7b257f9d..d3f0a6d5 100644 --- a/.env +++ b/.env @@ -1,5 +1,8 @@ PYTHONPATH=/app +# REST API +AIOD_REST_PORT=8000 + #MYSQL MYSQL_ROOT_PASSWORD=ok @@ -10,6 +13,9 @@ KEYCLOAK_ADMIN_PASSWORD=password KEYCLOAK_CLIENT_SECRET="QJiOGn09eCEfnqAmcPP2l4vMU8grlmVQ" REDIRECT_URIS=http://${HOSTNAME}/docs/oauth2-redirect POST_LOGOUT_REDIRECT_URIS=http://${HOSTNAME}/aiod-auth/realms/aiod/protocol/openid-connect/logout +AIOD_KEYCLOAK_PORT=8080 + +EGICHECKINALIAS= #ELASTICSEARCH ES_USER=elastic @@ -17,10 +23,18 @@ ES_PASSWORD=changeme ES_DISCOVERY_TYPE=single-node ES_ROLE="edit_aiod_resources" ES_JAVA_OPTS="-Xmx256m -Xms256m" +AIOD_ES_HTTP_PORT=9200 +AIOD_ES_TRANSPORT_PORT=9300 #LOGSTASH LS_JAVA_OPTS="-Xmx256m -Xms256m" +AIOD_LOGSTASH_BEATS_PORT=5044 +AIOD_LOGSTASH_PORT=5000 +AIOD_LOGSTASH_API_PORT=9600 + +#NGINX +AIOD_NGINX_PORT=80 #DATA STORAGE DATA_PATH=./data -BACKUP_PATH=./data/backups \ No newline at end of file +BACKUP_PATH=./data/backups diff --git a/README.md b/README.md index bf6e16a4..d2bd4efe 100644 --- a/README.md +++ b/README.md @@ -287,4 +287,9 @@ To create a new release, release branch. Look at all closed PRs and create a changelog 6. Create a PR from release branch to master 7. After that's merged, create a PR from master to develop -8. Notify everyone (e.g., in the API channel in Slack) and update the code on the server(s). +8. Deploy on the server(s): + - Check which services currently work (before the update). It's a sanity check for if a service _doesn't_ work later. + - Update the code on the server by checking out the release + - Merge configurations as necessary + - Make sure the latest database migrations are applied: see ["Schema Migrations"](alembic/readme.md#update-the-database) +9. Notify everyone (e.g., in the API channel in Slack). diff --git a/alembic/Dockerfile b/alembic/Dockerfile new file mode 100644 index 00000000..99a96388 --- /dev/null +++ b/alembic/Dockerfile @@ -0,0 +1,5 @@ +FROM aiod_metadata_catalogue +RUN python -m pip install alembic +ENV PYTHONPATH="$PYTHONPATH:/app" +WORKDIR /alembic +ENTRYPOINT ["alembic", "upgrade", "head"] diff --git a/alembic/README.md b/alembic/README.md new file mode 100644 index 00000000..c0960285 --- /dev/null +++ b/alembic/README.md @@ -0,0 +1,32 @@ +# Database Schema Migrations + +We use [Alembic](https://alembic.sqlalchemy.org/en/latest/tutorial.html#running-our-first-migration) to automate database schema migrations +(e.g., adding a table, altering a column, and so on). +Please refer to the Alembic documentation for more information. + +## Usage +Commands below assume that the root directory of the project is your current working directory. + +Build the image with: +```commandline +docker build -f alembic/Dockerfile . -t aiod-migration +``` + +With the sqlserver container running, you can migrate to the latest schema with: + +```commandline +docker run -v $(pwd)/alembic:/alembic:ro -v $(pwd)/src:/app -it --network aiod_default aiod-migration +``` +Make sure that the specifid `--network` is the docker network that has the `sqlserver` container. +The alembic directory is mounted to ensure the latest migrations are available, +the src directory is mounted so the migration scripts can use defined classes and variable from the project. + +## Update the Database +> [!Caution] +> Database migrations may be irreversible. Always make sure there is a backup of the old database. + +Following the usage commands above, on a new release we should run alembic to ensure the latest schema changes are applied. +The default entrypoint of the container specifies to upgrade the database to the latest schema. + +## TODO + - set up support for auto-generating migration scripts: https://alembic.sqlalchemy.org/en/latest/autogenerate.html diff --git a/alembic/alembic.ini b/alembic/alembic.ini new file mode 100644 index 00000000..f8e5a0c9 --- /dev/null +++ b/alembic/alembic.ini @@ -0,0 +1,116 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +# Use forward slashes (/) also on windows to provide an os agnostic path +script_location = alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = '' + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/alembic/README b/alembic/alembic/README new file mode 100644 index 00000000..98e4f9c4 --- /dev/null +++ b/alembic/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/alembic/alembic/env.py b/alembic/alembic/env.py new file mode 100644 index 00000000..3320285a --- /dev/null +++ b/alembic/alembic/env.py @@ -0,0 +1,72 @@ +from logging.config import fileConfig + +from alembic import context + +# Assumes /src is in the Python path, so we can re-use logic for constructing db connections +from database.session import db_url +from database.session import EngineSingleton + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = db_url() + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = EngineSingleton().engine + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/alembic/script.py.mako b/alembic/alembic/script.py.mako new file mode 100644 index 00000000..fbc4b07d --- /dev/null +++ b/alembic/alembic/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/alembic/alembic/versions/0a23b40cc09c_extend_max_length_of_text_in_note.py b/alembic/alembic/versions/0a23b40cc09c_extend_max_length_of_text_in_note.py new file mode 100644 index 00000000..a1e4ef16 --- /dev/null +++ b/alembic/alembic/versions/0a23b40cc09c_extend_max_length_of_text_in_note.py @@ -0,0 +1,48 @@ +"""Extend max length of text in note + +Revision ID: 0a23b40cc09c +Revises: +Create Date: 2024-08-29 11:37:20.827291 + +""" +from typing import Sequence, Union + +from alembic import op +from sqlalchemy import String + +from database.model.field_length import VERY_LONG + +# revision identifiers, used by Alembic. +revision: str = "0a23b40cc09c" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # All models that derive from AIResourceBase + for table in [ + "news", + "team", + "person", + "organisation", + "event", + "project", + "service", + "dataset", + "case_study", + "publication", + "computational_asset", + "ml_model", + "experiment", + "educational_resource", + ]: + op.alter_column( + f"note_{table}", + "value", + type_=String(VERY_LONG), + ) + + +def downgrade() -> None: + pass diff --git a/data/keycloak/import/realm-export.json b/data/keycloak/data/import/realm-export.json similarity index 99% rename from data/keycloak/import/realm-export.json rename to data/keycloak/data/import/realm-export.json index cc0972ae..da01070f 100644 --- a/data/keycloak/import/realm-export.json +++ b/data/keycloak/data/import/realm-export.json @@ -1527,6 +1527,7 @@ "strictTransportSecurity": "max-age=31536000; includeSubDomains" }, "smtpServer": {}, + "loginTheme" : "aiod", "eventsEnabled": false, "eventsListeners": [ "jboss-logging" diff --git a/data/keycloak/themes/aiod/login/login.ftl b/data/keycloak/themes/aiod/login/login.ftl new file mode 100755 index 00000000..94020948 --- /dev/null +++ b/data/keycloak/themes/aiod/login/login.ftl @@ -0,0 +1,123 @@ +<#import "template.ftl" as layout> +<@layout.registrationLayout displayMessage=!messagesPerField.existsError('username','password') displayInfo=realm.password && realm.registrationAllowed && !registrationDisabled??; section> + <#if section = "header"> + ${msg("loginAccountTitle")} + <#elseif section = "form"> +
+
+ <#if social.providers??> +

${msg("identity-provider-login-label")}

+
+ <#else> +
+ + <#if realm.password> +
+ <#if !usernameHidden??> +
+ + + + + <#if messagesPerField.existsError('username','password')> + + ${kcSanitize(messagesPerField.getFirstError('username','password'))?no_esc} + + + +
+ + +
+ + +
+ + +
+ + <#if usernameHidden?? && messagesPerField.existsError('username','password')> + + ${kcSanitize(messagesPerField.getFirstError('username','password'))?no_esc} + + + +
+ +
+
+ <#if realm.rememberMe && !usernameHidden??> +
+ +
+ +
+
+ <#if realm.resetPasswordAllowed> + ${msg("doForgotPassword")} + +
+ +
+ +
+ value="${auth.selectedCredential}"/> + +
+
+ +
+
+ + <#elseif section = "info" > + <#if realm.password && realm.registrationAllowed && !registrationDisabled??> +
+
+ ${msg("noAccount")} ${msg("doRegister")} +
+
+ + <#elseif section = "socialProviders" > + <#if realm.password && social?? && social.providers?has_content> +
+ +
+ + + + diff --git a/data/keycloak/themes/aiod/login/messages/messages_en.properties b/data/keycloak/themes/aiod/login/messages/messages_en.properties new file mode 100644 index 00000000..cff63e19 --- /dev/null +++ b/data/keycloak/themes/aiod/login/messages/messages_en.properties @@ -0,0 +1 @@ +identity-provider-login-label=Or sign in with your local account diff --git a/data/keycloak/themes/aiod/login/resources/css/styles.css b/data/keycloak/themes/aiod/login/resources/css/styles.css new file mode 100644 index 00000000..b8c45139 --- /dev/null +++ b/data/keycloak/themes/aiod/login/resources/css/styles.css @@ -0,0 +1,80 @@ +body { + color: white; +} + +login-pf { + background: none; +} + +.form-group { + font-size: small; +} + +.btn-group-lg { + font-size: small; +} + +.login-pf body { + background: #000613 none; +} + +#kc-header-wrapper { + font-size: 0px; + padding: 124px 10px 20px; + background: url("../img/logo.svg") no-repeat center; + background-size: contain; +} + +.card-pf { + background: transparent; +} + +.kc-form-toggle { + color: white; + text-align: center; +} + +.kc-form-toggle::after { + content: '▶︎'; + display: inline-block; + transform: rotate(0deg); + transition: transform 0.3s ease; +} + +.kc-form-toggle.up::after { + transform: rotate(90deg); +} + +.kc-form-wrapper { + display: none; +} + +h1#kc-page-title { + color: white; +} + +#social-egicheckin { + display: inline-block; + width: auto; + padding: 20px 30px 20px 80px; + border: 2px solid #005faa; + border-radius: 100vw; + background-color: #005faa; + background-image: url(../img/egi-logo-white.svg); + background-position: 30px 43%; + background-size: 36px; + background-repeat: no-repeat; + -webkit-transition: all 200ms ease-in-out; + transition: all 200ms ease-in-out; + font-family: 'DM Sans', sans-serif; + color: #fff; + font-size: 18px; + font-weight: 700; + --pf-c-button--after--BorderColor: transparent; +} + +#social-egicheckin:hover { + background-color: #fff; + background-image: url(../img/egi-logo-color.svg); + color: #005faa; +} \ No newline at end of file diff --git a/data/keycloak/themes/aiod/login/resources/img/egi-logo-color.svg b/data/keycloak/themes/aiod/login/resources/img/egi-logo-color.svg new file mode 100644 index 00000000..1250fe1c --- /dev/null +++ b/data/keycloak/themes/aiod/login/resources/img/egi-logo-color.svg @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/data/keycloak/themes/aiod/login/resources/img/egi-logo-white.svg b/data/keycloak/themes/aiod/login/resources/img/egi-logo-white.svg new file mode 100644 index 00000000..17798c69 --- /dev/null +++ b/data/keycloak/themes/aiod/login/resources/img/egi-logo-white.svg @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/data/keycloak/themes/aiod/login/resources/img/logo.svg b/data/keycloak/themes/aiod/login/resources/img/logo.svg new file mode 100644 index 00000000..c25c6329 --- /dev/null +++ b/data/keycloak/themes/aiod/login/resources/img/logo.svg @@ -0,0 +1,87 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/data/keycloak/themes/aiod/login/resources/js/script.js b/data/keycloak/themes/aiod/login/resources/js/script.js new file mode 100644 index 00000000..06923895 --- /dev/null +++ b/data/keycloak/themes/aiod/login/resources/js/script.js @@ -0,0 +1,20 @@ +document.addEventListener('DOMContentLoaded', function() { + const toggleButton = document.getElementById('kc-form-toggle'); + const kcFormWrapper = document.getElementById('kc-form-wrapper'); + + function toggleVisibility() { + kcFormWrapper.style.display = kcFormWrapper.style.display === 'block' ? 'none' : 'block'; + toggleButton.classList.toggle('up'); + } + + // Add event listener for the click event + if (toggleButton) { + toggleButton.addEventListener('click', toggleVisibility); + } + const inputError = document.getElementById('input-error'); + if (inputError) { + // Call the function once when the content loads if the specific ID is present + toggleVisibility(); + } +}); + diff --git a/data/keycloak/themes/aiod/login/template.ftl b/data/keycloak/themes/aiod/login/template.ftl new file mode 100644 index 00000000..84fd3e43 --- /dev/null +++ b/data/keycloak/themes/aiod/login/template.ftl @@ -0,0 +1,173 @@ +<#macro registrationLayout bodyClass="" displayInfo=false displayMessage=true displayRequiredFields=false> + + lang="${locale.currentLanguageTag}"> + + + + + + + <#if properties.meta?has_content> + <#list properties.meta?split(' ') as meta> + + + + ${msg("loginTitle",(realm.displayName!''))} + + <#if properties.stylesCommon?has_content> + <#list properties.stylesCommon?split(' ') as style> + + + + <#if properties.styles?has_content> + <#list properties.styles?split(' ') as style> + + + + <#if properties.scripts?has_content> + <#list properties.scripts?split(' ') as script> + + + + + + <#if scripts??> + <#list scripts as script> + + + + + + + +
+
+
${kcSanitize(msg("loginTitleHtml",(realm.displayNameHtml!'')))?no_esc}
+
+
+
+ <#if realm.internationalizationEnabled && locale.supported?size gt 1> +
+
+ +
+
+ + <#if !(auth?has_content && auth.showUsername() && !auth.showResetCredentials())> + <#if displayRequiredFields> +
+
+ * ${msg("requiredFields")} +
+
+

<#nested "header">

+
+
+ <#else> +

<#nested "header">

+ + <#else> + <#if displayRequiredFields> +
+
+ * ${msg("requiredFields")} +
+
+ <#nested "show-username"> +
+ + + + +
+
+
+ <#else> + <#nested "show-username"> +
+ + + + +
+ + +
+
+
+ + <#-- App-initiated actions should not see warning messages about the need to complete the action --> + <#-- during login. --> + <#if displayMessage && message?has_content && (message.type != 'warning' || !isAppInitiatedAction??)> +
+
+ <#if message.type = 'success'> + <#if message.type = 'warning'> + <#if message.type = 'error'> + <#if message.type = 'info'> +
+ ${kcSanitize(message.summary)?no_esc} +
+ + + <#nested "socialProviders"> + + <#nested "form"> + + <#if auth?has_content && auth.showTryAnotherWayLink()> +
+ +
+ + + + + <#if displayInfo> +
+
+ <#nested "info"> +
+
+ +
+
+ +
+
+ + + diff --git a/data/keycloak/themes/aiod/login/theme.properties b/data/keycloak/themes/aiod/login/theme.properties new file mode 100644 index 00000000..b766740f --- /dev/null +++ b/data/keycloak/themes/aiod/login/theme.properties @@ -0,0 +1,6 @@ +parent=keycloak +import=common/keycloak +styles=css/login.css css/styles.css +scripts=js/script.js + +egi_checkin_alias=${env.EGICHECKINALIAS} diff --git a/docker-compose.yaml b/docker-compose.yaml index 3b6e9c43..2c467670 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -12,7 +12,7 @@ services: environment: - KEYCLOAK_CLIENT_SECRET=$KEYCLOAK_CLIENT_SECRET ports: - - 8000:8000 + - ${AIOD_REST_PORT}:8000 volumes: - ./src:/app:ro command: > @@ -144,9 +144,10 @@ services: - REDIRECT_URIS=$REDIRECT_URIS - POST_LOGOUT_REDIRECT_URIS=$POST_LOGOUT_REDIRECT_URIS ports: - - 8080:8080 + - ${AIOD_KEYCLOAK_PORT}:8080 volumes: - - ${DATA_PATH}/keycloak:/opt/keycloak/data + - ${DATA_PATH}/keycloak/data:/opt/keycloak/data + - ${DATA_PATH}/keycloak/themes:/opt/keycloak/themes command: > start --hostname-url http://${HOSTNAME}/aiod-auth @@ -163,7 +164,7 @@ services: volumes: - ./nginx:/etc/nginx/conf.d:ro ports: - - 80:80 + - ${AIOD_NGINX_PORT}:80 depends_on: app: condition: service_healthy @@ -178,8 +179,8 @@ services: - ELASTIC_PASSWORD=$ES_PASSWORD - discovery.type=$ES_DISCOVERY_TYPE ports: - - 9200:9200 - - 9300:9300 + - ${AIOD_ES_HTTP_PORT}:9200 + - ${AIOD_ES_TRANSPORT_PORT}:9300 volumes: - type: bind source: ./es/elasticsearch.yml @@ -221,10 +222,10 @@ services: environment: - LS_JAVA_OPTS=$LS_JAVA_OPTS ports: - - 5044:5044 - - 5000:5000/tcp - - 5000:5000/udp - - 9600:9600 + - ${AIOD_LOGSTASH_BEATS_PORT}:5044 + - ${AIOD_LOGSTASH_PORT}:5000/tcp + - ${AIOD_LOGSTASH_PORT}:5000/udp + - ${AIOD_LOGSTASH_API_PORT}:9600 volumes: - ./logstash/config/config:/usr/share/logstash/config:ro - ./logstash/config/pipeline:/usr/share/logstash/pipeline:ro diff --git a/pyproject.toml b/pyproject.toml index d1ca2ab8..7f592888 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "aiod_metadata_catalogue" description = "A Metadata Catalogue for AI on Demand " -version = "1.3.20240619" +version = "1.3.20240909" requires-python = ">=3.11" authors = [ { name = "Adrián Alcolea" }, @@ -16,11 +16,11 @@ authors = [ dependencies = [ "urllib3== 2.1.0", "bibtexparser==1.4.1", - "huggingface_hub==0.20.3", + "huggingface_hub==0.23.4", "fastapi==0.109.2", "uvicorn==0.25.0", "requests==2.31.0", - "mysqlclient==2.2.1", + "mysqlclient==2.2.4", "oic==1.6.0", "python-keycloak==3.7.0", "python-dotenv==1.0.0", @@ -34,7 +34,7 @@ dependencies = [ "sickle==0.7.0", "xmltodict==0.13.0", "python-multipart==0.0.6", - "mysql-connector-python==8.2.0", + "mysql-connector-python==9.0.0", "elasticsearch==8.11.1", "jinja2==3.1.3", ] @@ -42,7 +42,7 @@ readme = "README.md" [project.optional-dependencies] dev = [ - "types-python-dateutil==2.8.19.14", + "types-python-dateutil==2.9.0.20240316", "pytest==7.4.3", "pytest-asyncio==0.23.2", "pytest-dotenv==0.5.2", diff --git a/src/connectors/huggingface/huggingface_dataset_connector.py b/src/connectors/huggingface/huggingface_dataset_connector.py index 3e25218c..bf18d8f0 100644 --- a/src/connectors/huggingface/huggingface_dataset_connector.py +++ b/src/connectors/huggingface/huggingface_dataset_connector.py @@ -1,4 +1,5 @@ import logging +import math import typing import bibtexparser @@ -40,9 +41,7 @@ def _get(url: str, dataset_id: str) -> typing.List[typing.Dict[str, typing.Any]] response_json = response.json() if not response.ok: msg = response_json["error"] - logging.error( - f"Error while fetching parquet info for dataset {dataset_id}: " f"'{msg}'" - ) + logging.warning(f"Unable to retrieve parquet info for dataset '{dataset_id}': '{msg}'") return [] return response_json["parquet_files"] @@ -80,7 +79,7 @@ def fetch_dataset( description=f"{pq_file['dataset']}. Config: {pq_file['config']}. Split: " f"{pq_file['split']}", content_url=pq_file["url"], - content_size_kb=pq_file["size"], + content_size_kb=math.ceil(pq_file["size"] / 1000), ) for pq_file in parquet_info ] @@ -110,9 +109,9 @@ def fetch_dataset( related_resources["creator"] = [pydantic_class_contact(name=dataset.author)] description = getattr(dataset, "description", None) - if description and len(description) > field_length.LONG: + if description and len(description) > field_length.MAX_TEXT: text_break = " [...]" - description = description[: field_length.LONG - len(text_break)] + text_break + description = description[: field_length.MAX_TEXT - len(text_break)] + text_break if description: description = Text(plain=description) @@ -127,7 +126,7 @@ def fetch_dataset( date_published=dataset.created_at if hasattr(dataset, "created_at") else None, license=ds_license, distribution=distributions, - is_accessible_for_free=True, + is_accessible_for_free=not dataset.private, keyword=dataset.tags, ), resource_ORM_class=Dataset, diff --git a/src/database/model/ai_asset/distribution.py b/src/database/model/ai_asset/distribution.py index 11ffa074..78455b14 100644 --- a/src/database/model/ai_asset/distribution.py +++ b/src/database/model/ai_asset/distribution.py @@ -24,7 +24,7 @@ class DistributionBase(AIoDConceptBase): schema_extra={"example": "2010-2020 Example Company. All rights reserved."}, ) content_url: str = Field( - max_length=NORMAL, + max_length=LONG, schema_extra={"example": "https://www.example.com/dataset/file.csv"}, ) content_size_kb: int | None = Field(schema_extra={"example": 10000}) diff --git a/src/database/model/ai_resource/note.py b/src/database/model/ai_resource/note.py index 276428a2..6ed1f514 100644 --- a/src/database/model/ai_resource/note.py +++ b/src/database/model/ai_resource/note.py @@ -3,7 +3,7 @@ from sqlalchemy import Column, Integer, ForeignKey from sqlmodel import Field, SQLModel -from database.model.field_length import LONG +from database.model.field_length import VERY_LONG class NoteBase(SQLModel): @@ -11,7 +11,7 @@ class NoteBase(SQLModel): index=False, unique=False, description="The string value", - max_length=LONG, + max_length=VERY_LONG, schema_extra={"example": "A brief record of points or ideas about this AI resource."}, ) diff --git a/src/database/model/field_length.py b/src/database/model/field_length.py index 4bc0115e..47ab7736 100644 --- a/src/database/model/field_length.py +++ b/src/database/model/field_length.py @@ -7,4 +7,5 @@ SHORT = 64 NORMAL = 256 LONG = 1800 # an A4s full of text +VERY_LONG = 8000 # Cut off for out-of-row storage MAX_TEXT = 65535 # max length for Mysql text