diff --git a/.dockerignore b/.dockerignore index 5d1d149e0..558da504b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,3 +2,4 @@ data/ .github/ .ipynb_checkpoints/ .gitignore +.idea/ diff --git a/.env b/.env index 69a217df0..d03f9c703 100644 --- a/.env +++ b/.env @@ -30,7 +30,7 @@ TELEGRAM_PORT=443 # Docker Volume Names DOCKER_DB_VOL=4cat_4cat_db DOCKER_DATA_VOL=4cat_4cat_data -DOCKER_CONFIG_VOL=4cat_4cat_share +DOCKER_CONFIG_VOL=4cat_4cat_config DOCKER_LOGS_VOL=4cat_4cat_logs # Gunicorn settings @@ -39,4 +39,3 @@ workers=4 threads=4 worker_class=gthread log_level=debug - diff --git a/.zenodo.json b/.zenodo.json index 3ab05ca45..fd261019f 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -3,7 +3,7 @@ "license": "MPL-2.0", "title": "4CAT Capture and Analysis Toolkit", "upload_type": "software", - "version": "v1.45", + "version": "v1.46", "keywords": [ "webmining", "scraping", diff --git a/VERSION b/VERSION index 6245ec1a2..fa2cb2583 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ -1.45 +1.46 This file should not be modified. It is used by 4CAT to determine whether it needs to run migration scripts to e.g. update the database structure to a more diff --git a/backend/database.sql b/backend/database.sql index 33f0ea393..1f372a697 100644 --- a/backend/database.sql +++ b/backend/database.sql @@ -56,6 +56,7 @@ CREATE TABLE IF NOT EXISTS datasets ( is_private boolean DEFAULT TRUE, software_version text, software_file text DEFAULT '', + software_source text DEFAULT '', annotation_fields text DEFAULT '' ); diff --git a/backend/lib/processor.py b/backend/lib/processor.py index c67fa7a9d..0ed4cb6a3 100644 --- a/backend/lib/processor.py +++ b/backend/lib/processor.py @@ -164,7 +164,7 @@ def work(self): # start log file self.dataset.update_status("Processing data") - self.dataset.update_version(get_software_commit()) + self.dataset.update_version(get_software_commit(self)) # get parameters # if possible, fill defaults where parameters are not provided @@ -628,7 +628,7 @@ def write_csv_items_and_finish(self, data): self.dataset.update_status("Finished") self.dataset.finish(len(data)) - def write_archive_and_finish(self, files, num_items=None, compression=zipfile.ZIP_STORED): + def write_archive_and_finish(self, files, num_items=None, compression=zipfile.ZIP_STORED, finish=True): """ Archive a bunch of files into a zip archive and finish processing @@ -639,6 +639,7 @@ def write_archive_and_finish(self, files, num_items=None, compression=zipfile.ZI files added to the archive will be used. :param int compression: Type of compression to use. By default, files are not compressed, to speed up unarchiving. + :param bool finish: Finish the dataset/job afterwards or not? """ is_folder = False if issubclass(type(files), PurePath): @@ -665,7 +666,8 @@ def write_archive_and_finish(self, files, num_items=None, compression=zipfile.ZI if num_items is None: num_items = done - self.dataset.finish(num_items) + if finish: + self.dataset.finish(num_items) def create_standalone(self): """ diff --git a/backend/lib/search.py b/backend/lib/search.py index cdcd08115..15b3982d6 100644 --- a/backend/lib/search.py +++ b/backend/lib/search.py @@ -1,16 +1,16 @@ import hashlib +import zipfile import secrets -import shutil import random import json import math import csv +import os from pathlib import Path from abc import ABC, abstractmethod from common.config_manager import config -from common.lib.dataset import DataSet from backend.lib.processor import BasicProcessor from common.lib.helpers import strip_tags, dict_search_and_update, remove_nuls, HashCache from common.lib.exceptions import WorkerInterruptedException, ProcessorInterruptedException, MapItemException @@ -71,7 +71,6 @@ def process(self): items = self.import_from_file(query_parameters.get("file")) else: items = self.search(query_parameters) - except WorkerInterruptedException: raise ProcessorInterruptedException("Interrupted while collecting data, trying again later.") @@ -79,10 +78,12 @@ def process(self): num_items = 0 if items: self.dataset.update_status("Writing collected data to dataset file") - if results_file.suffix == ".ndjson": - num_items = self.items_to_ndjson(items, results_file) - elif results_file.suffix == ".csv": + if self.extension == "csv": num_items = self.items_to_csv(items, results_file) + elif self.extension == "ndjson": + num_items = self.items_to_ndjson(items, results_file) + elif self.extension == "zip": + num_items = self.items_to_archive(items, results_file) else: raise NotImplementedError("Datasource query cannot be saved as %s file" % results_file.suffix) @@ -361,6 +362,22 @@ def items_to_ndjson(self, items, filepath): return processed + def items_to_archive(self, items, filepath): + """ + Save retrieved items as an archive + + Assumes that items is an iterable with one item, a Path object + referring to a folder containing files to be archived. The folder will + be removed afterwards. + + :param items: + :param filepath: Where to store the archive + :return int: Number of items + """ + num_items = len(os.listdir(items)) + self.write_archive_and_finish(items, None, zipfile.ZIP_STORED, False) + return num_items + class SearchWithScope(Search, ABC): """ @@ -404,7 +421,7 @@ def search(self, query): # proportion of items matches # first, get amount of items for all threads in which matching # items occur and that are long enough - thread_ids = tuple([post["thread_id"] for post in items]) + thread_ids = tuple([item["thread_id"] for item in items]) self.dataset.update_status("Retrieving thread metadata for %i threads" % len(thread_ids)) try: min_length = int(query.get("scope_length", 30)) diff --git a/backend/lib/worker.py b/backend/lib/worker.py index 3fe19e067..a5695e673 100644 --- a/backend/lib/worker.py +++ b/backend/lib/worker.py @@ -133,6 +133,17 @@ def run(self): location = "->".join(frames) self.log.error("Worker %s raised exception %s and will abort: %s at %s" % (self.type, e.__class__.__name__, str(e), location)) + # Clean up after work successfully completed or terminates + self.clean_up() + + def clean_up(self): + """ + Clean up after a processor runs successfully or results in error. + Workers should override this method to implement any procedures + to run to clean up a worker; by default this does nothing. + """ + pass + def abort(self): """ Called when the application shuts down diff --git a/common/config_manager.py b/common/config_manager.py index 40bce67a6..86faf9060 100644 --- a/common/config_manager.py +++ b/common/config_manager.py @@ -44,9 +44,9 @@ def with_db(self, db=None): # Replace w/ db if provided else only initialise if not already self.db = db if db else Database(logger=None, dbname=self.get("DB_NAME"), user=self.get("DB_USER"), password=self.get("DB_PASSWORD"), host=self.get("DB_HOST"), - port=self.get("DB_PORT"), appname="config-reader") if not db else db + port=self.get("DB_PORT"), appname="config-reader") else: - # self.db already initialized + # self.db already initialized and no db provided pass def load_user_settings(self): diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 0507d0993..d1af7b95d 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -165,20 +165,10 @@ "help": "Can view worker status", "tooltip": "Controls whether users can view worker status via the Control Panel" }, - # The following two options should be set to ensure that every analysis step can + # The following option should be set to ensure that every analysis step can # be traced to a specific version of 4CAT. This allows for reproducible - # research. You can however leave them empty with no ill effect. The version ID - # should be a commit hash, which will be combined with the Github URL to offer - # links to the exact version of 4CAT code that produced an analysis result. - # If no version file is available, the output of "git show" in PATH_ROOT will be used - # to determine the version, if possible. - "path.versionfile": { - "type": UserInput.OPTION_TEXT, - "default": ".git-checked-out", - "help": "Version file", - "tooltip": "Path to file containing GitHub commit hash. File containing a commit ID (everything after the first whitespace found is ignored)", - "global": True - }, + # research. The output of "git show" in PATH_ROOT will be used to determine + # the version of a processor file, if possible. "4cat.github_url": { "type": UserInput.OPTION_TEXT, "default": "https://github.com/digitalmethodsinitiative/4cat", @@ -516,6 +506,18 @@ "tooltip": "If a dataset is a JSON file but it can be mapped to a CSV file, show the CSV in the preview instead" "of the underlying JSON." }, + "ui.offer_hashing": { + "type": UserInput.OPTION_TOGGLE, + "default": True, + "help": "Offer pseudonymisation", + "tooltip": "Add a checkbox to the 'create dataset' forum to allow users to toggle pseudonymisation." + }, + "ui.offer_private": { + "type": UserInput.OPTION_TOGGLE, + "default": True, + "help": "Offer create as private", + "tooltip": "Add a checkbox to the 'create dataset' forum to allow users to make a dataset private." + }, "ui.option_email": { "type": UserInput.OPTION_CHOICE, "options": { diff --git a/common/lib/dataset.py b/common/lib/dataset.py index 8510a5adb..2e75912a1 100644 --- a/common/lib/dataset.py +++ b/common/lib/dataset.py @@ -114,6 +114,9 @@ def __init__(self, parameters=None, key=None, job=None, data=None, db=None, pare self.parameters = json.loads(self.data["parameters"]) self.is_new = False else: + self.data = {"type": type} # get_own_processor needs this + own_processor = self.get_own_processor() + version = get_software_commit(own_processor) self.data = { "key": self.key, "query": self.get_label(parameters, default=type), @@ -125,7 +128,8 @@ def __init__(self, parameters=None, key=None, job=None, data=None, db=None, pare "timestamp": int(time.time()), "is_finished": False, "is_private": is_private, - "software_version": get_software_commit(), + "software_version": version[0], + "software_source": version[1], "software_file": "", "num_rows": 0, "progress": 0.0, @@ -139,7 +143,6 @@ def __init__(self, parameters=None, key=None, job=None, data=None, db=None, pare # Find desired extension from processor if not explicitly set if extension is None: - own_processor = self.get_own_processor() if own_processor: extension = own_processor.get_extension(parent_dataset=DataSet(key=parent, db=db) if parent else None) # Still no extension, default to 'csv' @@ -865,10 +868,12 @@ def get_label(self, parameters=None, default="Query"): elif parameters.get("subject_match") and parameters["subject_match"] != "empty": return parameters["subject_match"] elif parameters.get("query"): - label = parameters["query"] if len(parameters["query"]) < 30 else parameters["query"][:25] + "..." + label = parameters["query"] # Some legacy datasets have lists as query data if isinstance(label, list): label = ", ".join(label) + + label = label if len(label) < 30 else label[:25] + "..." label = label.strip().replace("\n", ", ") return label elif parameters.get("country_flag") and parameters["country_flag"] != "all": @@ -1116,7 +1121,8 @@ def update_version(self, version): processor_path = "" updated = self.db.update("datasets", where={"key": self.data["key"]}, data={ - "software_version": version, + "software_version": version[0], + "software_source": version[1], "software_file": processor_path }) @@ -1151,10 +1157,15 @@ def get_version_url(self, file): :param file: File to link within the repository :return: URL, or an empty string """ - if not self.data["software_version"] or not config.get("4cat.github_url"): + if not self.data["software_source"]: return "" - return config.get("4cat.github_url") + "/blob/" + self.data["software_version"] + self.data.get("software_file", "") + filepath = self.data.get("software_file", "") + if filepath.startswith("/extensions/"): + # go to root of extension + filepath = "/" + "/".join(filepath.split("/")[3:]) + + return self.data["software_source"] + "/blob/" + self.data["software_version"] + filepath def top_parent(self): """ diff --git a/common/lib/helpers.py b/common/lib/helpers.py index f6767c929..d98fc8ed6 100644 --- a/common/lib/helpers.py +++ b/common/lib/helpers.py @@ -1,6 +1,7 @@ """ Miscellaneous helper functions for the 4CAT backend """ +import hashlib import subprocess import requests import datetime @@ -16,9 +17,10 @@ import os import io +from pathlib import Path from collections.abc import MutableMapping from html.parser import HTMLParser -from pathlib import Path +from urllib.parse import urlparse, urlunparse from calendar import monthrange from packaging import version @@ -40,7 +42,6 @@ def init_datasource(database, logger, queue, name): """ pass - def strip_tags(html, convert_newlines=True): """ Strip HTML from a string @@ -120,12 +121,9 @@ def get_git_branch(): return "" -def get_software_commit(): +def get_software_commit(worker=None): """ - Get current 4CAT commit hash - - Reads a given version file and returns the first string found in there - (up until the first space). On failure, return an empty string. + Get current 4CAT git commit hash Use `get_software_version()` instead if you need the release version number rather than the precise commit hash. @@ -134,34 +132,58 @@ def get_software_commit(): repository in the 4CAT root folder, and if so, what commit is currently checked out in it. - :return str: 4CAT git commit hash - """ - versionpath = config.get('PATH_ROOT').joinpath(config.get('path.versionfile')) + For extensions, get the repository information for that extension, or if + the extension is not a git repository, return empty data. - if versionpath.exists() and not versionpath.is_file(): - return "" + :param BasicWorker processor: Worker to get commit for. If not given, get + version information for the main 4CAT installation. - if not versionpath.exists(): - # try git command line within the 4CAT root folder - # if it is a checked-out git repository, it will tell us the hash of - # the currently checked-out commit - try: - cwd = os.getcwd() - os.chdir(config.get('PATH_ROOT')) - show = subprocess.run(["git", "show"], stderr=subprocess.PIPE, stdout=subprocess.PIPE) - os.chdir(cwd) - if show.returncode != 0: - raise ValueError() - return show.stdout.decode("utf-8").split("\n")[0].split(" ")[1] - except (subprocess.SubprocessError, IndexError, TypeError, ValueError, FileNotFoundError): - return "" + :return tuple: 4CAT git commit hash, repository name + """ + # try git command line within the 4CAT root folder + # if it is a checked-out git repository, it will tell us the hash of + # the currently checked-out commit + cwd = os.getcwd() + # path has no Path.relative()... + relative_filepath = Path(re.sub(r"^[/\\]+", "", worker.filepath)).parent try: - with open(versionpath, "r", encoding="utf-8", errors="ignore") as versionfile: - version = versionfile.readline().split(" ")[0] - return version - except OSError: - return "" + # if extension, go to the extension file's path + # we will run git here - if it is not its own repository, we have no + # useful version info (since the extension is by definition not in the + # main 4CAT repository) and will return an empty value + if worker and worker.is_extension: + extension_dir = config.get("PATH_ROOT").joinpath(relative_filepath) + os.chdir(extension_dir) + # check if we are in the extensions' own repo or 4CAT's + repo_level = subprocess.run(["git", "rev-parse", "--show-toplevel"], stderr=subprocess.PIPE, stdout=subprocess.PIPE) + if Path(repo_level.stdout.decode("utf-8")) == config.get("PATH_ROOT"): + # not its own repository + return ("", "") + + else: + os.chdir(config.get("PATH_ROOT")) + + show = subprocess.run(["git", "show"], stderr=subprocess.PIPE, stdout=subprocess.PIPE) + if show.returncode != 0: + raise ValueError() + commit = show.stdout.decode("utf-8").split("\n")[0].split(" ")[1] + + # now get the repository the commit belongs to, if we can + origin = subprocess.run(["git", "config", "--get", "remote.origin.url"], stderr=subprocess.PIPE, stdout=subprocess.PIPE) + if origin.returncode != 0 or not origin.stdout: + raise ValueError() + repository = origin.stdout.decode("utf-8").strip() + if repository.endswith(".git"): + repository = repository[:-4] + + except (subprocess.SubprocessError, IndexError, TypeError, ValueError, FileNotFoundError) as e: + return ("", "") + + finally: + os.chdir(cwd) + + return (commit, repository) def get_software_version(): """ @@ -174,7 +196,7 @@ def get_software_version(): :return str: Software version, for example `1.37`. """ - current_version_file = Path(config.get("PATH_ROOT"), "config/.current-version") + current_version_file = config.get("PATH_ROOT").joinpath("config/.current-version") if not current_version_file.exists(): return "" @@ -887,6 +909,37 @@ def _sets_to_lists_gen(d): return dict(_sets_to_lists_gen(d)) + +def url_to_hash(url, remove_scheme=True, remove_www=True): + """ + Convert a URL to a filename; some URLs are too long to be used as filenames, this keeps the domain and hashes the + rest of the URL. + """ + parsed_url = urlparse(url.lower()) + if parsed_url: + if remove_scheme: + parsed_url = parsed_url._replace(scheme="") + if remove_www: + netloc = re.sub(r"^www\.", "", parsed_url.netloc) + parsed_url = parsed_url._replace(netloc=netloc) + + url = re.sub(r"[^0-9a-z]+", "_", urlunparse(parsed_url).strip("/")) + else: + # Unable to parse URL; use regex + if remove_scheme: + url = re.sub(r"^https?://", "", url) + if remove_www: + if not remove_scheme: + scheme = re.match(r"^https?://", url).group() + temp_url = re.sub(r"^https?://", "", url) + url = scheme + re.sub(r"^www\.", "", temp_url) + else: + url = re.sub(r"^www\.", "", url) + + url = re.sub(r"[^0-9a-z]+", "_", url.lower().strip("/")) + + return hashlib.blake2b(url.encode("utf-8"), digest_size=24).hexdigest() + def folder_size(path='.'): """ Get the size of a folder using os.scandir for efficiency diff --git a/common/lib/logger.py b/common/lib/logger.py index c1a015ca6..bbd30c444 100644 --- a/common/lib/logger.py +++ b/common/lib/logger.py @@ -163,7 +163,7 @@ class Logger: } alert_level = "FATAL" - def __init__(self, output=False, filename='4cat.log', log_level="INFO"): + def __init__(self, logger_name='4cat-backend', output=False, filename='4cat.log', log_level="INFO"): """ Set up log handler @@ -181,7 +181,7 @@ def __init__(self, output=False, filename='4cat.log', log_level="INFO"): self.log_path = log_folder.joinpath(filename) self.previous_report = time.time() - self.logger = logging.getLogger("4cat-backend") + self.logger = logging.getLogger(logger_name) self.logger.setLevel(log_level) # this handler manages the text log files diff --git a/common/lib/module_loader.py b/common/lib/module_loader.py index 84e5d951e..6d169d912 100644 --- a/common/lib/module_loader.py +++ b/common/lib/module_loader.py @@ -7,6 +7,7 @@ import pickle import sys import re +import os from common.config_manager import config @@ -69,14 +70,11 @@ def is_4cat_class(object, only_processors=False): """ Determine if a module member is a worker class we can use """ - # it would be super cool to just use issubclass() here! - # but that requires importing the classes themselves, which leads to - # circular imports if inspect.isclass(object): if object.__name__ in("BasicProcessor", "BasicWorker") or inspect.isabstract(object): # ignore abstract and base classes return False - + if hasattr(object, "is_4cat_class"): if only_processors: if hasattr(object, "is_4cat_processor"): @@ -85,7 +83,7 @@ def is_4cat_class(object, only_processors=False): return False else: return object.is_4cat_class() - + return False def load_modules(self): @@ -99,14 +97,17 @@ def load_modules(self): """ # look for workers and processors in pre-defined folders and datasources - paths = [Path(config.get('PATH_ROOT'), "processors"), Path(config.get('PATH_ROOT'), "backend", "workers"), - *[self.datasources[datasource]["path"] for datasource in self.datasources]] + paths = [Path(config.get('PATH_ROOT'), "processors"), + Path(config.get('PATH_ROOT'), "backend", "workers"), + Path(config.get('PATH_ROOT'), "extensions"), + *[self.datasources[datasource]["path"] for datasource in self.datasources]] # extension datasources will be here and the above line... root_match = re.compile(r"^%s" % re.escape(str(config.get('PATH_ROOT')))) root_path = Path(config.get('PATH_ROOT')) for folder in paths: # loop through folders, and files in those folders, recursively + is_extension = folder.is_relative_to(Path(config.get("PATH_ROOT"), "extensions")) for file in folder.rglob("*.py"): # determine module name for file # reduce path to be relative to 4CAT root @@ -147,6 +148,7 @@ def load_modules(self): self.workers[component[1].type] = component[1] self.workers[component[1].type].filepath = relative_path + self.workers[component[1].type].is_extension = is_extension # we can't use issubclass() because for that we would need # to import BasicProcessor, which would lead to a circular @@ -169,8 +171,7 @@ def load_modules(self): for missing_module, processor_list in self.missing_modules.items(): warning += "\t%s (for %s)\n" % (missing_module, ", ".join(processor_list)) - self.log_buffer = warning - + self.log_buffer += warning self.processors = categorised_processors @@ -183,19 +184,21 @@ def load_datasources(self): `DATASOURCE` constant. The latter is taken as the ID for this datasource. """ - for subdirectory in Path(config.get('PATH_ROOT'), "datasources").iterdir(): - # folder name, also the name used in config.py - folder_name = subdirectory.parts[-1] - - # determine module name - module_name = "datasources." + folder_name + def _load_datasource(subdirectory): + """ + Load a single datasource + """ + # determine module name (path relative to 4CAT root w/ periods) + module_name = ".".join(subdirectory.relative_to(Path(config.get("PATH_ROOT"))).parts) try: datasource = importlib.import_module(module_name) except ImportError as e: - continue + self.log_buffer += "Could not import %s: %s\n" % (module_name, e) + return if not hasattr(datasource, "init_datasource") or not hasattr(datasource, "DATASOURCE"): - continue + self.log_buffer += "Could not load datasource %s: missing init_datasource or DATASOURCE\n" % subdirectory + return datasource_id = datasource.DATASOURCE @@ -208,6 +211,19 @@ def load_datasources(self): "config": {} if not hasattr(datasource, "config") else datasource.config } + # Load 4CAT core datasources + for subdirectory in Path(config.get('PATH_ROOT'), "datasources").iterdir(): + if subdirectory.is_dir(): + _load_datasource(subdirectory) + + # Load extension datasources + # os.walk is used to allow for the possibility of multiple extensions, with nested "datasources" folders + for root, dirs, files in os.walk(Path(config.get('PATH_ROOT'), "extensions"), followlinks=True): + if "datasources" in dirs: + for subdirectory in Path(root, "datasources").iterdir(): + if subdirectory.is_dir(): + _load_datasource(subdirectory) + sorted_datasources = {datasource_id: self.datasources[datasource_id] for datasource_id in sorted(self.datasources, key=lambda id: self.datasources[id]["name"])} self.datasources = sorted_datasources diff --git a/docker-compose_build.yml b/docker-compose_build.yml index b1c1fa1af..7466e8ba8 100644 --- a/docker-compose_build.yml +++ b/docker-compose_build.yml @@ -9,7 +9,6 @@ services: - POSTGRES_HOST_AUTH_METHOD=${POSTGRES_HOST_AUTH_METHOD} volumes: - ./data/postgres/:/var/lib/postgresql/data/ -# - 4cat_db:/var/lib/postgresql/data/ healthcheck: test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER}" ] interval: 5s @@ -33,10 +32,6 @@ services: - ./data/datasets/:/usr/src/app/data/ - ./data/config/:/usr/src/app/config/ - ./data/logs/:/usr/src/app/logs/ -# - 4cat_data:/usr/src/app/data/ -# - 4cat_config:/usr/src/app/config/ -# - 4cat_logs:/usr/src/app/logs/ - entrypoint: docker/docker-entrypoint.sh frontend: @@ -54,9 +49,6 @@ services: - ./data/datasets/:/usr/src/app/data/ - ./data/config/:/usr/src/app/config/ - ./data/logs/:/usr/src/app/logs/ -# - 4cat_data:/usr/src/app/data/ -# - 4cat_config:/usr/src/app/config/ -# - 4cat_logs:/usr/src/app/logs/ command: ["docker/wait-for-backend.sh"] volumes: diff --git a/docker/Dockerfile b/docker/Dockerfile index 709d68893..046b39cba 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -24,6 +24,7 @@ ENV PYTHONUNBUFFERED=1 # Install dependencies RUN pip3 install --upgrade pip COPY ./requirements.txt /usr/src/app/requirements.txt +COPY ./extensions /usr/src/app/extensions COPY ./setup.py /usr/src/app/setup.py COPY ./VERSION /usr/src/app/VERSION COPY ./README.md /usr/src/app/README.md diff --git a/docs/datasource.rst b/docs/datasource.rst index 56ae2189c..c4731a3e1 100644 --- a/docs/datasource.rst +++ b/docs/datasource.rst @@ -59,7 +59,7 @@ needs to function (e.g. queueing any recurring workers). A default implementatio ------------------ The `Search` class ------------------ -.. autoclass:: backend.abstract.search.Search +.. autoclass:: backend.lib.search.Search :members: :undoc-members: :show-inheritance: @@ -67,7 +67,7 @@ The `Search` class --------------------------- The `SearchWithScope` class --------------------------- -.. autoclass:: backend.abstract.search.SearchWithScope +.. autoclass:: backend.lib.search.SearchWithScope :members: :undoc-members: :show-inheritance: \ No newline at end of file diff --git a/docs/processor.rst b/docs/processor.rst index 1bc3c5191..3073b3a09 100644 --- a/docs/processor.rst +++ b/docs/processor.rst @@ -27,7 +27,7 @@ A minimal example of a processor could look like this: """ A minimal example 4CAT processor """ - from backend.abstract.processor import BasicProcessor + from backend.lib.processor import BasicProcessor class ExampleProcessor(BasicProcessor): """ @@ -57,7 +57,7 @@ But there is more you can do. The full API looks like this: The `BasicProcessor` class -------------------------- -.. autoclass:: backend.abstract.processor.BasicProcessor +.. autoclass:: backend.lib.processor.BasicProcessor :members: :undoc-members: :show-inheritance: \ No newline at end of file diff --git a/docs/worker.rst b/docs/worker.rst index bc122f7e9..6eafd5f5a 100644 --- a/docs/worker.rst +++ b/docs/worker.rst @@ -8,7 +8,7 @@ TBD The `BasicWorker` class ----------------------- -.. autoclass:: backend.abstract.worker.BasicWorker +.. autoclass:: backend.lib.worker.BasicWorker :members: :undoc-members: :show-inheritance: \ No newline at end of file diff --git a/extensions/.gitignore b/extensions/.gitignore new file mode 100644 index 000000000..d7e401301 --- /dev/null +++ b/extensions/.gitignore @@ -0,0 +1,5 @@ +# Ignore everything in this directory +* +# Except these files +!.gitignore +!README.md diff --git a/extensions/README.md b/extensions/README.md new file mode 100644 index 000000000..f594bc152 --- /dev/null +++ b/extensions/README.md @@ -0,0 +1,39 @@ +This folder contains 4CAT extensions. + +Extensions are processor or data sources that are not part of the main 4CAT codebase, but are otherwise compatible +with it. For example, a processor that interfaces with a closed API would not be useful to most 4CAT users, but if you +have access to it, you could add such a processor to 4CAT as an extension. + + +## Installation +Extensions are simply folders within this 'extensions' folder in which Python files containing the relevant code is +contained. It is strongly recommended that you use git for version control of these folders. Simply commit the code to +a repository somewhere, then clone it into this folder like so: + +```shell +cd [4cat root] +cd extensions +git clone [repository URL] +``` + +This ensures that any dataset created with processors in your extension will be aware of the version of the code they +were created with. This helps debugging and doing reproducible and traceable research. + +## Structure +Processors can simply be .py files in the extension folder. Data sources should be sub-folders in a "datasources" +folder. An extension containing both processors and a data source could look like this: + +``` +[4CAT root]/ +├─ extensions/ +│ ├─ my_extension/ +│ ├─ my_processor.py +│ ├─ my_other_processor.py +│ ├─ datasources/ +│ ├─ my_datasource/ +│ ├─ __init__.py +│ ├─ DESCRIPTION.md +│ ├─ search_my_datasource.py +``` + +In this scenario, `my_extension` would be a git repository within which all other files are contained. \ No newline at end of file diff --git a/helper-scripts/migrate.py b/helper-scripts/migrate.py index fb85772ae..25071afe4 100644 --- a/helper-scripts/migrate.py +++ b/helper-scripts/migrate.py @@ -79,8 +79,39 @@ def check_for_nltk(): nltk.download("omw-1.4", quiet=True) +def install_extensions(no_pip=True): + """ + Check for extensions and run any installation scripts found. -def finish(args, logger): + Note: requirements texts are handled by setup.py + """ + # Check for extension packages + if os.path.isdir("extensions"): + for root, dirs, files in os.walk("extensions"): + for file in files: + if file == "fourcat_install.py": + command = [interpreter, os.path.join(root, file)] + if args.component == "frontend": + command.append("--component=frontend") + elif args.component == "backend": + command.append("--component=backend") + elif args.component == "both": + command.append("--component=both") + + if no_pip: + command.append("--no-pip") + + print(f"Installing extension: {os.path.join(root, file)}") + result = subprocess.run(command, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if result.returncode != 0: + print("Error while running extension installation script: " + os.path.join(root, file)) + + print(result.stdout.decode("utf-8")) if result.stdout else None + print(result.stderr.decode("utf-8")) if result.stderr else None + + +def finish(args, logger, no_pip=True): """ Finish migration @@ -89,6 +120,7 @@ def finish(args, logger): wrap up and exit. """ check_for_nltk() + install_extensions(no_pip=no_pip) logger.info("\nMigration finished. You can now safely restart 4CAT.\n") if args.restart: @@ -115,7 +147,7 @@ def finish(args, logger): cli.add_argument("--no-migrate", "-m", default=False, action="store_true", help="Do not run scripts to upgrade between minor versions. Use if you only want to use migrate to e.g. upgrade dependencies.") cli.add_argument("--current-version", "-v", default="config/.current-version", help="File path to .current-version file, relative to the 4CAT root") cli.add_argument("--output", "-o", default="", help="By default migrate.py will send output to stdout. If this argument is set, it will write to the given path instead.") -cli.add_argument("--component", "-c", default="both", help="Which component of 4CAT to migrate. Currently only skips check for if 4CAT is running when set to 'frontend'") +cli.add_argument("--component", "-c", default="both", help="Which component of 4CAT to migrate ('both', 'backend', 'frontend'). Skips check for if 4CAT is running when set to 'frontend'. Also used by extensions w/ fourcat_install.py") cli.add_argument("--branch", "-b", default=False, help="Which branch to check out from GitHub. By default, check out the latest release.") args = cli.parse_args() @@ -125,6 +157,9 @@ def finish(args, logger): print("This script needs to be run from the same folder as 4cat-daemon.py\n") exit(1) +# track pip +pip_ran = False + # set up logging logger = logging.getLogger("migrate") logger.setLevel(logging.INFO) @@ -145,6 +180,7 @@ def finish(args, logger): logger.info("Restart after migration: " + ("yes" if args.restart else "no")) logger.info("Repository URL: " + args.repository) logger.info(".current-version path: " + args.current_version) +logger.info(f"Current Datetime: {time.strftime('%Y-%m-%d %H:%M:%S')}") # --------------------------------------------- # Ensure existence of current version file @@ -221,7 +257,7 @@ def finish(args, logger): logger.info(" ...latest release available from GitHub (%s) is older than or equivalent to currently checked out version " "(%s)." % (tag_version, current_version_c)) logger.info(" ...upgrade not necessary, skipping.") - finish(args, logger) + finish(args, logger, no_pip=pip_ran) logger.info(" ...ensuring repository %s is a known remote" % args.repository) remote = subprocess.run(shlex.split("git remote add 4cat_migrate %s" % args.repository), stdout=subprocess.PIPE, @@ -297,7 +333,7 @@ def finish(args, logger): if current_version == target_version: logger.info(" ...already up to date.") - finish(args, logger) + finish(args, logger, no_pip=pip_ran) if current_version_c[0:3] != target_version_c[0:3]: logger.info(" ...cannot migrate between different major versions.") @@ -365,6 +401,7 @@ def log_pip_output(logger, output): pip = subprocess.run([interpreter, "-m", "pip", "install", "-r", "requirements.txt", "--upgrade", "--upgrade-strategy", "eager"], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, check=True, cwd=cwd) log_pip_output(logger, pip.stdout) + pip_ran = True except subprocess.CalledProcessError as e: log_pip_output(logger, e.output) logger.info(f"\n Error running pip: {e}") @@ -410,4 +447,4 @@ def log_pip_output(logger, output): # --------------------------------------------- # Done! Wrap up and finish # --------------------------------------------- -finish(args, logger) +finish(args, logger, no_pip=pip_ran) diff --git a/helper-scripts/migrate/migrate-1.45-1.46.py b/helper-scripts/migrate/migrate-1.45-1.46.py new file mode 100644 index 000000000..8bf5d0683 --- /dev/null +++ b/helper-scripts/migrate/migrate-1.45-1.46.py @@ -0,0 +1,33 @@ +# Ensure unique metrics index exists +import json +import sys +import os + +from pathlib import Path + +sys.path.insert(0, os.path.join(os.path.abspath(os.path.dirname(__file__)), "../..")) +from common.lib.database import Database +from common.lib.logger import Logger + +log = Logger(output=True) + +import configparser + +ini = configparser.ConfigParser() +ini.read(Path(__file__).parent.parent.parent.resolve().joinpath("config/config.ini")) +db_config = ini["DATABASE"] + +db = Database(logger=log, dbname=db_config["db_name"], user=db_config["db_user"], password=db_config["db_password"], + host=db_config["db_host"], port=db_config["db_port"], appname="4cat-migrate") + +print(" Checking if datasets table has a column 'software_source'...") +has_column = db.fetchone( + "SELECT COUNT(*) AS num FROM information_schema.columns WHERE table_name = 'datasets' AND column_name = 'software_source'") +if has_column["num"] == 0: + print(" ...No, adding.") + current_source = db.fetchone("SELECT value FROM settings WHERE name = '4cat.github_url' AND tag = ''") + current_source = json.loads(current_source["value"]) if current_source is not None else "" + db.execute("ALTER TABLE datasets ADD COLUMN software_source TEXT DEFAULT %s", (current_source,)) + db.commit() +else: + print(" ...Yes, nothing to update.") \ No newline at end of file diff --git a/processors/filtering/column_filter.py b/processors/filtering/column_filter.py index 01c7fa88f..2dc73b63e 100644 --- a/processors/filtering/column_filter.py +++ b/processors/filtering/column_filter.py @@ -75,7 +75,7 @@ class ColumnFilter(BaseFilter): @classmethod def is_compatible_with(cls, module=None, user=None): """ - Allow processor on top datasets. + Allow processor on top datasets that are CSV or NDJSON. :param module: Module to determine compatibility with """ @@ -262,11 +262,11 @@ class ColumnProcessorFilter(ColumnFilter): @classmethod def is_compatible_with(cls, module=None, user=None): """ - Allow processor on top datasets. + Allow on child datasets and do not create a standalone dataset :param module: Dataset or processor to determine compatibility with """ - return module.get_extension() in ("csv", "ndjson") and not module.is_top_dataset() + return not module.is_top_dataset() and module.get_extension() in ("csv", "ndjson") @classmethod def is_filter(cls): diff --git a/processors/metrics/rank_attribute.py b/processors/metrics/rank_attribute.py index adffe824a..0e38757c6 100644 --- a/processors/metrics/rank_attribute.py +++ b/processors/metrics/rank_attribute.py @@ -110,11 +110,12 @@ class AttributeRanker(BasicProcessor): @classmethod def is_compatible_with(cls, module=None, user=None): """ - Allow processor on top image rankings + Allow processor to run on all csv and NDJSON datasets :param module: Module to determine compatibility with """ - return module.get_extension() in ["csv", "ndjson"] + + return module.get_extension() in ("csv", "ndjson") def process(self): """ @@ -134,7 +135,7 @@ def process(self): weighby = self.parameters.get("weigh") to_lowercase = self.parameters.get("to-lowercase", True) self.include_missing_data = self.parameters.get("count_missing") - + try: if self.parameters.get("filter"): filter = re.compile(".*" + self.parameters.get("filter") + ".*") @@ -203,7 +204,7 @@ def missing_value_placeholder(data, field_name): for value in values: if to_lowercase: value = value.lower() - + if rank_style == "overall" and value not in overall_top: continue @@ -340,4 +341,4 @@ def get_options(cls, parent_dataset=None, user=None): options["columns"]["options"] = {v: v for v in columns} options["columns"]["default"] = ["body"] - return options \ No newline at end of file + return options diff --git a/processors/networks/wikipedia_network.py b/processors/networks/wikipedia_network.py index 00e141fc7..0426c97d2 100644 --- a/processors/networks/wikipedia_network.py +++ b/processors/networks/wikipedia_network.py @@ -3,19 +3,20 @@ """ import re import requests - -from backend.lib.processor import BasicProcessor from lxml import etree from lxml.cssselect import CSSSelector as css from io import StringIO - import networkx as nx +from backend.lib.processor import BasicProcessor +from common.lib.exceptions import ProcessorInterruptedException + __author__ = "Stijn Peeters" __credits__ = ["Stijn Peeters", "Sal Hagen"] __maintainer__ = "Stijn Peeters" __email__ = "4cat@oilab.eu" + class WikiURLCoLinker(BasicProcessor): """ Generate URL co-link network diff --git a/processors/presets/neologisms.py b/processors/presets/neologisms.py index 26684e4d0..1cf258503 100644 --- a/processors/presets/neologisms.py +++ b/processors/presets/neologisms.py @@ -19,17 +19,6 @@ class NeologismExtractor(ProcessorPreset): references = ["Van Soest, Jeroen. 2019. 'Language Innovation Tracker: Detecting language innovation in online discussion fora.' (MA thesis), Beuls, K. (Promotor), Van Eecke, P. (Advisor).'"] - @staticmethod - def is_compatible_with(module=None, user=None): - """ - Determine compatibility - - This preset is compatible with any dataset that has columns - - :param Dataset module: Module ID to determine compatibility with - :return bool: - """ - return module.is_top_dataset() and module.get_extension() in ("csv", "ndjson") @classmethod def get_options(cls, parent_dataset=None, user=None): @@ -60,6 +49,16 @@ def get_options(cls, parent_dataset=None, user=None): return options + @classmethod + def is_compatible_with(cls, module=None, user=None): + """ + Allow processor to run on all csv and NDJSON datasets + + :param module: Dataset or processor to determine compatibility with + """ + + return module.get_extension() in ("csv", "ndjson") + def get_processor_pipeline(self): """ This queues a series of post-processors to extract neologisms from a diff --git a/processors/text-analysis/split_sentences.py b/processors/text-analysis/split_sentences.py index c5cce2477..dd2be7c2f 100644 --- a/processors/text-analysis/split_sentences.py +++ b/processors/text-analysis/split_sentences.py @@ -86,8 +86,11 @@ def get_options(cls, parent_dataset=None, user=None): @classmethod def is_compatible_with(cls, module=None, user=None): """ - Allow CSV and NDJSON datasets + Allow processor to run on all csv and NDJSON datasets + + :param module: Dataset or processor to determine compatibility with """ + return module.get_extension() in ("csv", "ndjson") def process(self): diff --git a/processors/text-analysis/tokenise.py b/processors/text-analysis/tokenise.py index fb1b89cbd..a104306f1 100644 --- a/processors/text-analysis/tokenise.py +++ b/processors/text-analysis/tokenise.py @@ -50,8 +50,11 @@ class Tokenise(BasicProcessor): @classmethod def is_compatible_with(cls, module=None, user=None): """ - Allow CSV and NDJSON datasets + Allow processor to run on all csv and NDJSON datasets + + :param module: Dataset or processor to determine compatibility with """ + return module.get_extension() in ("csv", "ndjson") @classmethod diff --git a/processors/visualisation/download_videos.py b/processors/visualisation/download_videos.py index aa24a724b..2b385ffe7 100644 --- a/processors/visualisation/download_videos.py +++ b/processors/visualisation/download_videos.py @@ -234,7 +234,7 @@ def is_compatible_with(cls, module=None, user=None): in principle, but any links to videos are likely to come from the top dataset anyway. - :param str module: Module ID to determine compatibility with + :param module: Module to determine compatibility with :return bool: """ return ((module.type.endswith("-search") or module.is_from_collector()) @@ -645,6 +645,9 @@ def collect_video_urls(self): if not value: continue + if value is not str: + value = str(value) + video_links = self.identify_video_urls_in_string(value) if video_links: item_urls |= set(video_links) @@ -667,7 +670,6 @@ def identify_video_urls_in_string(self, text): :param str text: string that may contain URLs :return list: list containing validated URLs to videos """ - text = str(text) split_comma = self.parameters.get("split-comma", True) if split_comma: texts = text.split(",") diff --git a/processors/visualisation/image_category_wall.py b/processors/visualisation/image_category_wall.py index fee1fb7b0..d74d28e40 100644 --- a/processors/visualisation/image_category_wall.py +++ b/processors/visualisation/image_category_wall.py @@ -61,13 +61,14 @@ class ImageCategoryWallGenerator(BasicProcessor): def is_compatible_with(cls, module=None, user=None): """ Allow processor on CLIP dataset only - + :param module: Dataset or processor to determine compatibility with """ return module.type.startswith("image-to-categories") or \ module.type.startswith("image-downloader") or \ module.type.startswith("video-hasher-1") or \ - module.type.startswith("video-hash-similarity-matrix") + module.type.startswith("video-hash-similarity-matrix") and \ + not module.type not in ["image-downloader-screenshots-search"] @classmethod def get_options(cls, parent_dataset=None, user=None): @@ -170,7 +171,7 @@ def process(self): self.dataset.log(f"Found {image_dataset.type} w/ {image_dataset.num_rows} images and {category_dataset.type} w/ {category_dataset.num_rows} items") category_column = self.parameters.get("category") - if category_column is None: + if not category_column: self.dataset.finish_with_error("No category provided.") return @@ -427,6 +428,3 @@ def process(self): canvas.save(pretty=True) self.dataset.log("Saved to " + str(self.dataset.get_results_path())) return self.dataset.finish(len(category_widths)) - - - diff --git a/processors/visualisation/word-trees.py b/processors/visualisation/word-trees.py index 6446372e8..f7783bcc1 100644 --- a/processors/visualisation/word-trees.py +++ b/processors/visualisation/word-trees.py @@ -104,6 +104,16 @@ class MakeWordtree(BasicProcessor): } } + @classmethod + def is_compatible_with(cls, module=None, user=None): + """ + Allow processor to run on all csv and NDJSON datasets + + :param module: Dataset or processor to determine compatibility with + """ + + return module.get_extension() in ("csv", "ndjson") + # determines how close the nodes are displayed to each other (min. 1) whitespace = 2 @@ -126,13 +136,6 @@ class MakeWordtree(BasicProcessor): # methods limit = 1 - @classmethod - def is_compatible_with(cls, module=None, user=None): - """ - Allow CSV and NDJSON datasets - """ - return module.is_top_dataset() and module.get_extension() in ("csv", "ndjson") - def process(self): """ This takes a 4CAT results file as input, and outputs a plain text file diff --git a/setup.py b/setup.py index 17079a887..56f5acd16 100644 --- a/setup.py +++ b/setup.py @@ -8,10 +8,10 @@ version = versionfile.readline().strip() # Universal packages -packages = [ +packages = set([ "anytree~=2.8.0", "bcrypt~=3.2.0", - "beautifulsoup4~=4.11.0", + "beautifulsoup4",#~=4.11.0", "clarifai-grpc~=9.0", "cryptography>=39.0.1", "cssselect~=1.1.0", @@ -22,7 +22,7 @@ "Flask~=2.2", "Flask_Limiter==1.0.1", "Flask_Login~=0.6", - "gensim>=4.1.0, <4.2", + "gensim>=4.1.0", "google_api_python_client==2.0.2", "html2text==2020.*", "ImageHash>4.2.0", @@ -31,7 +31,7 @@ "lxml~=4.9.0", "markdown==3.0.1", "markdown2==2.4.2", - "nltk==3.9.1", + "nltk~=3.9.1", "networkx~=2.8.0", "numpy>=1.19.2", "opencv-python>=4.6.0.66", @@ -48,6 +48,7 @@ "razdel~=0.5", "requests~=2.27", "requests_futures", + "scikit_learn", "scenedetect==0.6.0.3", "scikit-learn", "scipy==1.10.1", @@ -64,15 +65,28 @@ "videohash @ git+https://github.com/dale-wahl/videohash@main", "vk_api", "yt-dlp" -] +]) + +# Check for extension packages +if os.path.isdir("extensions"): + extension_packages = set() + for root, dirs, files in os.walk("extensions"): + for file in files: + if file == "requirements.txt": + with open(os.path.join(root, file)) as extension_requirements: + for line in extension_requirements.readlines(): + extension_packages.add(line.strip()) + if extension_packages: + print("Found extensions, installing additional packages: " + str(extension_packages)) + packages = packages.union(extension_packages) # Some packages don't run on Windows -unix_packages = [ +unix_packages = set([ "python-daemon==2.3.2" -] +]) if os.name != "nt": - packages = packages + unix_packages + packages = packages.union(unix_packages) setup( name='fourcat', @@ -85,5 +99,5 @@ url="https://oilab.eu", packages=['backend', 'webtool', 'datasources'], python_requires='>=3.7', - install_requires=packages, + install_requires=list(packages), ) diff --git a/webtool/__init__.py b/webtool/__init__.py index 7becd1239..0fd3ecf5d 100644 --- a/webtool/__init__.py +++ b/webtool/__init__.py @@ -107,10 +107,8 @@ import webtool.views.views_admin import webtool.views.views_restart import webtool.views.views_user - import webtool.views.views_dataset import webtool.views.views_misc - import webtool.views.api_explorer import webtool.views.api_standalone import webtool.views.api_tool diff --git a/webtool/lib/helpers.py b/webtool/lib/helpers.py index d06f4435c..6cc91eba1 100644 --- a/webtool/lib/helpers.py +++ b/webtool/lib/helpers.py @@ -23,7 +23,7 @@ class Pagination(object): Provide pagination """ - def __init__(self, page, per_page, total_count, route="show_results"): + def __init__(self, page, per_page, total_count, route="show_results", route_args=None): """ Set up pagination object @@ -36,6 +36,7 @@ def __init__(self, page, per_page, total_count, route="show_results"): self.per_page = per_page self.total_count = total_count self.route = route + self.route_args = route_args if route_args else {} @property def pages(self): diff --git a/webtool/lib/template_filters.py b/webtool/lib/template_filters.py index c50caca26..6ac9272ba 100644 --- a/webtool/lib/template_filters.py +++ b/webtool/lib/template_filters.py @@ -139,9 +139,12 @@ def _jinja2_filter_add_ahref(content): return content -@app.template_filter('markdown') -def _jinja2_filter_markdown(text): +@app.template_filter('markdown',) +def _jinja2_filter_markdown(text, trim_container=False): val = markdown.markdown(text) + if trim_container: + val = re.sub(r"^

", "", val) + val = re.sub(r"

$", "", val) return val @app.template_filter('isbool') @@ -262,7 +265,7 @@ def _jinja2_filter_post_field(field, post): formatted_field = field field = str(field) - + for key in re.findall(r"\{\{(.*?)\}\}", field): original_key = key @@ -296,7 +299,7 @@ def _jinja2_filter_post_field(field, post): # We see 0 as a valid value - e.g. '0 retweets'. if not val and val != 0: return "" - + # Support some basic string slicing if string_slice: field = field.replace("[" + string_slice + "]", "") @@ -317,7 +320,7 @@ def _jinja2_filter_post_field(field, post): # Apply further filters, if present (e.g. lower) for extra_filter in extra_filters: - + extra_filter = extra_filter.strip() # We're going to parse possible parameters to pass to the filter @@ -328,7 +331,7 @@ def _jinja2_filter_post_field(field, post): extra_filter = extra_filter.split("(")[0] params = [p.strip() for p in params.split(",")] params = [post[param] for param in params] - + val = app.jinja_env.filters[extra_filter](val, *params) if string_slice: @@ -388,3 +391,7 @@ def uniqid(): "__version": version, "uniqid": uniqid } + +@app.template_filter('log') +def _jinja2_filter_log(text): + app.logger.info(text) \ No newline at end of file diff --git a/webtool/static/js/fourcat.js b/webtool/static/js/fourcat.js index e622505b2..7e0058fc3 100644 --- a/webtool/static/js/fourcat.js +++ b/webtool/static/js/fourcat.js @@ -630,17 +630,17 @@ const query = { for (let i = 0; i < json.length; i += 1) { search_queue_length += json[i]['count']; - search_queue_notice += " " + json[i]['jobtype'].replace('-search', '') + ' (' + json[i]['count'] + ')' + '' + search_queue_notice += " " + json[i]['processor_name'] + ' (' + json[i]['count'] + ')' + '' } if (search_queue_length == 0) { search_queue_box.html('Search queue is empty.'); search_queue_list.html(''); } else if (search_queue_length == 1) { - search_queue_box.html('Currently processing 1 search query: '); + search_queue_box.html('Currently collecting 1 dataset: '); search_queue_list.html(search_queue_notice); } else { - search_queue_box.html('Currently processing ' + search_queue_length + ' search queries: '); + search_queue_box.html('Currently collecting ' + search_queue_length + ' datasets: '); search_queue_list.html(search_queue_notice); } }, @@ -1993,4 +1993,4 @@ function find_parent(element, selector) { } return null; -} \ No newline at end of file +} diff --git a/webtool/templates/components/datasource-option.html b/webtool/templates/components/datasource-option.html index 5eff77e00..4ee4ba16e 100644 --- a/webtool/templates/components/datasource-option.html +++ b/webtool/templates/components/datasource-option.html @@ -9,7 +9,7 @@ {% if settings.type == "toggle" %} - {% if "tooltip" in settings %}

{{ settings.tooltip }}

{% endif %} + {% if "tooltip" in settings %}

{{ settings.tooltip|markdown(True)|safe }}

{% endif %} {% elif settings.type == "file" %} {% if "tooltip" in settings %} diff --git a/webtool/templates/components/pagination.html b/webtool/templates/components/pagination.html index 607844157..91ea859c8 100644 --- a/webtool/templates/components/pagination.html +++ b/webtool/templates/components/pagination.html @@ -2,12 +2,12 @@ \ No newline at end of file diff --git a/webtool/templates/components/result-details.html b/webtool/templates/components/result-details.html index 9cf51a2a6..ebe8f64ec 100644 --- a/webtool/templates/components/result-details.html +++ b/webtool/templates/components/result-details.html @@ -163,7 +163,7 @@