From 9271877caac4b1d1e0f59383ba53b871d643b7b3 Mon Sep 17 00:00:00 2001 From: Thomas Schaffter Date: Thu, 26 Oct 2023 23:01:50 +0000 Subject: [PATCH 1/8] Create Bedrock client --- .../notebooks/openai-challenge-headline.ipynb | 37 +- .../notebook/notebooks/utils/__init__.py | 0 .../notebook/notebooks/utils/bedrock.py | 80 ++++ .../notebook/notebooks/utils/print_ww.py | 21 + apps/openchallenges/notebook/poetry.lock | 425 +++++++++++++++++- apps/openchallenges/notebook/pyproject.toml | 2 + 6 files changed, 555 insertions(+), 10 deletions(-) create mode 100644 apps/openchallenges/notebook/notebooks/utils/__init__.py create mode 100644 apps/openchallenges/notebook/notebooks/utils/bedrock.py create mode 100644 apps/openchallenges/notebook/notebooks/utils/print_ww.py diff --git a/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb b/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb index 6c1811e02e..dddc61a970 100644 --- a/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb +++ b/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb @@ -253,12 +253,47 @@ "print(json_str)" ] }, + { + "cell_type": "markdown", + "id": "14ba8e14", + "metadata": {}, + "source": [ + "## Generating challenge headlines with AWS LLM" + ] + }, + { + "cell_type": "markdown", + "id": "8ba9a632", + "metadata": {}, + "source": [ + "### Configure Bedrock client" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "20da8b0e", "metadata": {}, "outputs": [], + "source": [ + "import json\n", + "import os\n", + "import sys\n", + "\n", + "import boto3\n", + "import botocore\n", + "\n", + "module_path = \"..\"\n", + "sys.path.append(os.path.abspath(module_path))\n", + "from utils import bedrock, print_ww" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5bbcd90f", + "metadata": {}, + "outputs": [], "source": [] } ], diff --git a/apps/openchallenges/notebook/notebooks/utils/__init__.py b/apps/openchallenges/notebook/notebooks/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/openchallenges/notebook/notebooks/utils/bedrock.py b/apps/openchallenges/notebook/notebooks/utils/bedrock.py new file mode 100644 index 0000000000..b959e1009a --- /dev/null +++ b/apps/openchallenges/notebook/notebooks/utils/bedrock.py @@ -0,0 +1,80 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 +"""Helper utilities for working with Amazon Bedrock from Python notebooks""" +# Python Built-Ins: +import os +from typing import Optional + +# External Dependencies: +import boto3 +from botocore.config import Config + + +def get_bedrock_client( + assumed_role: Optional[str] = None, + region: Optional[str] = None, + runtime: Optional[bool] = True, +): + """Create a boto3 client for Amazon Bedrock, with optional configuration overrides + + Parameters + ---------- + assumed_role : + Optional ARN of an AWS IAM role to assume for calling the Bedrock service. If not + specified, the current active credentials will be used. + region : + Optional name of the AWS Region in which the service should be called (e.g. "us-east-1"). + If not specified, AWS_REGION or AWS_DEFAULT_REGION environment variable will be used. + runtime : + Optional choice of getting different client to perform operations with the Amazon Bedrock service. + """ + if region is None: + target_region = os.environ.get( + "AWS_REGION", os.environ.get("AWS_DEFAULT_REGION") + ) + else: + target_region = region + + print(f"Create new client\n Using region: {target_region}") + session_kwargs = {"region_name": target_region} + client_kwargs = {**session_kwargs} + + profile_name = os.environ.get("AWS_PROFILE") + if profile_name: + print(f" Using profile: {profile_name}") + session_kwargs["profile_name"] = profile_name + + retry_config = Config( + region_name=target_region, + retries={ + "max_attempts": 10, + "mode": "standard", + }, + ) + session = boto3.Session(**session_kwargs) + + if assumed_role: + print(f" Using role: {assumed_role}", end="") + sts = session.client("sts") + response = sts.assume_role( + RoleArn=str(assumed_role), RoleSessionName="langchain-llm-1" + ) + print(" ... successful!") + client_kwargs["aws_access_key_id"] = response["Credentials"]["AccessKeyId"] + client_kwargs["aws_secret_access_key"] = response["Credentials"][ + "SecretAccessKey" + ] + client_kwargs["aws_session_token"] = response["Credentials"]["SessionToken"] + + if runtime: + service_name = "bedrock-runtime" + else: + service_name = "bedrock" + + bedrock_client = session.client( + service_name=service_name, config=retry_config, **client_kwargs + ) + + print("boto3 Bedrock client successfully created!") + print(bedrock_client._endpoint) + return bedrock_client diff --git a/apps/openchallenges/notebook/notebooks/utils/print_ww.py b/apps/openchallenges/notebook/notebooks/utils/print_ww.py new file mode 100644 index 0000000000..b03ad2c10a --- /dev/null +++ b/apps/openchallenges/notebook/notebooks/utils/print_ww.py @@ -0,0 +1,21 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 +"""General helper utilities the workshop notebooks""" +# Python Built-Ins: +from io import StringIO +import sys +import textwrap + + +def print_ww(*args, width: int = 100, **kwargs): + """Like print(), but wraps output to `width` characters (default 100)""" + buffer = StringIO() + try: + _stdout = sys.stdout + sys.stdout = buffer + print(*args, **kwargs) + output = buffer.getvalue() + finally: + sys.stdout = _stdout + for line in output.splitlines(): + print("\n".join(textwrap.wrap(line, width=width))) diff --git a/apps/openchallenges/notebook/poetry.lock b/apps/openchallenges/notebook/poetry.lock index 84ff397b55..80ab2c34f7 100644 --- a/apps/openchallenges/notebook/poetry.lock +++ b/apps/openchallenges/notebook/poetry.lock @@ -136,24 +136,24 @@ frozenlist = ">=1.1.0" [[package]] name = "anyio" -version = "4.0.0" +version = "3.7.1" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false -python-versions = ">=3.8" +python-versions = ">=3.7" files = [ - {file = "anyio-4.0.0-py3-none-any.whl", hash = "sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f"}, - {file = "anyio-4.0.0.tar.gz", hash = "sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a"}, + {file = "anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5"}, + {file = "anyio-3.7.1.tar.gz", hash = "sha256:44a3c9aba0f5defa43261a8b3efb97891f2bd7d804e0e1f56419befa1adfc780"}, ] [package.dependencies] -exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} idna = ">=2.8" sniffio = ">=1.1" [package.extras] -doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] -trio = ["trio (>=0.22)"] +doc = ["Sphinx", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-jquery"] +test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (<0.22)"] [[package]] name = "appnope" @@ -349,6 +349,44 @@ webencodings = "*" [package.extras] css = ["tinycss2 (>=1.1.0,<1.3)"] +[[package]] +name = "boto3" +version = "1.28.72" +description = "The AWS SDK for Python" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "boto3-1.28.72-py3-none-any.whl", hash = "sha256:73e2b56dcbe1b856274185c908dcb5973b007d62c74b646e6bd2ac09a303983a"}, + {file = "boto3-1.28.72.tar.gz", hash = "sha256:cc7d939cdbb4ad3980274c96c589e95f028f740ac49a3547c8f4aaec6a7a6409"}, +] + +[package.dependencies] +botocore = ">=1.31.72,<1.32.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.7.0,<0.8.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.31.72" +description = "Low-level, data-driven core of boto 3." +optional = false +python-versions = ">= 3.7" +files = [ + {file = "botocore-1.31.72-py3-none-any.whl", hash = "sha256:1a0f5063a9a5e1ef40907c67778625cf14a8c47c8e09cd2b3f9dc53a8adb52cf"}, + {file = "botocore-1.31.72.tar.gz", hash = "sha256:37763f40c2fcc48114015fe43bbd75ec1aabef48b418e1a5e28cb1c350967260"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""} + +[package.extras] +crt = ["awscrt (==0.16.26)"] + [[package]] name = "certifi" version = "2023.7.22" @@ -553,6 +591,21 @@ lint = ["black (>=22.6.0)", "mdformat (>0.7)", "mdformat-gfm (>=0.3.5)", "ruff ( test = ["pytest"] typing = ["mypy (>=0.990)"] +[[package]] +name = "dataclasses-json" +version = "0.6.1" +description = "Easily serialize dataclasses to and from JSON." +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "dataclasses_json-0.6.1-py3-none-any.whl", hash = "sha256:1bd8418a61fe3d588bb0079214d7fb71d44937da40742b787256fd53b26b6c80"}, + {file = "dataclasses_json-0.6.1.tar.gz", hash = "sha256:a53c220c35134ce08211a1057fd0e5bf76dc5331627c6b241cacbc570a89faae"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + [[package]] name = "debugpy" version = "1.8.0" @@ -736,6 +789,76 @@ files = [ {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, ] +[[package]] +name = "greenlet" +version = "3.0.1" +description = "Lightweight in-process concurrent programming" +optional = false +python-versions = ">=3.7" +files = [ + {file = "greenlet-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f89e21afe925fcfa655965ca8ea10f24773a1791400989ff32f467badfe4a064"}, + {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28e89e232c7593d33cac35425b58950789962011cc274aa43ef8865f2e11f46d"}, + {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8ba29306c5de7717b5761b9ea74f9c72b9e2b834e24aa984da99cbfc70157fd"}, + {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19bbdf1cce0346ef7341705d71e2ecf6f41a35c311137f29b8a2dc2341374565"}, + {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599daf06ea59bfedbec564b1692b0166a0045f32b6f0933b0dd4df59a854caf2"}, + {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b641161c302efbb860ae6b081f406839a8b7d5573f20a455539823802c655f63"}, + {file = "greenlet-3.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d57e20ba591727da0c230ab2c3f200ac9d6d333860d85348816e1dca4cc4792e"}, + {file = "greenlet-3.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5805e71e5b570d490938d55552f5a9e10f477c19400c38bf1d5190d760691846"}, + {file = "greenlet-3.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:52e93b28db27ae7d208748f45d2db8a7b6a380e0d703f099c949d0f0d80b70e9"}, + {file = "greenlet-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f7bfb769f7efa0eefcd039dd19d843a4fbfbac52f1878b1da2ed5793ec9b1a65"}, + {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91e6c7db42638dc45cf2e13c73be16bf83179f7859b07cfc139518941320be96"}, + {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1757936efea16e3f03db20efd0cd50a1c86b06734f9f7338a90c4ba85ec2ad5a"}, + {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19075157a10055759066854a973b3d1325d964d498a805bb68a1f9af4aaef8ec"}, + {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9d21aaa84557d64209af04ff48e0ad5e28c5cca67ce43444e939579d085da72"}, + {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2847e5d7beedb8d614186962c3d774d40d3374d580d2cbdab7f184580a39d234"}, + {file = "greenlet-3.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:97e7ac860d64e2dcba5c5944cfc8fa9ea185cd84061c623536154d5a89237884"}, + {file = "greenlet-3.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b2c02d2ad98116e914d4f3155ffc905fd0c025d901ead3f6ed07385e19122c94"}, + {file = "greenlet-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:22f79120a24aeeae2b4471c711dcf4f8c736a2bb2fabad2a67ac9a55ea72523c"}, + {file = "greenlet-3.0.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:100f78a29707ca1525ea47388cec8a049405147719f47ebf3895e7509c6446aa"}, + {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60d5772e8195f4e9ebf74046a9121bbb90090f6550f81d8956a05387ba139353"}, + {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:daa7197b43c707462f06d2c693ffdbb5991cbb8b80b5b984007de431493a319c"}, + {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea6b8aa9e08eea388c5f7a276fabb1d4b6b9d6e4ceb12cc477c3d352001768a9"}, + {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d11ebbd679e927593978aa44c10fc2092bc454b7d13fdc958d3e9d508aba7d0"}, + {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dbd4c177afb8a8d9ba348d925b0b67246147af806f0b104af4d24f144d461cd5"}, + {file = "greenlet-3.0.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20107edf7c2c3644c67c12205dc60b1bb11d26b2610b276f97d666110d1b511d"}, + {file = "greenlet-3.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8bef097455dea90ffe855286926ae02d8faa335ed8e4067326257cb571fc1445"}, + {file = "greenlet-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:b2d3337dcfaa99698aa2377c81c9ca72fcd89c07e7eb62ece3f23a3fe89b2ce4"}, + {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80ac992f25d10aaebe1ee15df45ca0d7571d0f70b645c08ec68733fb7a020206"}, + {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:337322096d92808f76ad26061a8f5fccb22b0809bea39212cd6c406f6a7060d2"}, + {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9934adbd0f6e476f0ecff3c94626529f344f57b38c9a541f87098710b18af0a"}, + {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc4d815b794fd8868c4d67602692c21bf5293a75e4b607bb92a11e821e2b859a"}, + {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41bdeeb552d814bcd7fb52172b304898a35818107cc8778b5101423c9017b3de"}, + {file = "greenlet-3.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6e6061bf1e9565c29002e3c601cf68569c450be7fc3f7336671af7ddb4657166"}, + {file = "greenlet-3.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fa24255ae3c0ab67e613556375a4341af04a084bd58764731972bcbc8baeba36"}, + {file = "greenlet-3.0.1-cp37-cp37m-win32.whl", hash = "sha256:b489c36d1327868d207002391f662a1d163bdc8daf10ab2e5f6e41b9b96de3b1"}, + {file = "greenlet-3.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:f33f3258aae89da191c6ebaa3bc517c6c4cbc9b9f689e5d8452f7aedbb913fa8"}, + {file = "greenlet-3.0.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:d2905ce1df400360463c772b55d8e2518d0e488a87cdea13dd2c71dcb2a1fa16"}, + {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a02d259510b3630f330c86557331a3b0e0c79dac3d166e449a39363beaae174"}, + {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55d62807f1c5a1682075c62436702aaba941daa316e9161e4b6ccebbbf38bda3"}, + {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3fcc780ae8edbb1d050d920ab44790201f027d59fdbd21362340a85c79066a74"}, + {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4eddd98afc726f8aee1948858aed9e6feeb1758889dfd869072d4465973f6bfd"}, + {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eabe7090db68c981fca689299c2d116400b553f4b713266b130cfc9e2aa9c5a9"}, + {file = "greenlet-3.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f2f6d303f3dee132b322a14cd8765287b8f86cdc10d2cb6a6fae234ea488888e"}, + {file = "greenlet-3.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d923ff276f1c1f9680d32832f8d6c040fe9306cbfb5d161b0911e9634be9ef0a"}, + {file = "greenlet-3.0.1-cp38-cp38-win32.whl", hash = "sha256:0b6f9f8ca7093fd4433472fd99b5650f8a26dcd8ba410e14094c1e44cd3ceddd"}, + {file = "greenlet-3.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:990066bff27c4fcf3b69382b86f4c99b3652bab2a7e685d968cd4d0cfc6f67c6"}, + {file = "greenlet-3.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ce85c43ae54845272f6f9cd8320d034d7a946e9773c693b27d620edec825e376"}, + {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89ee2e967bd7ff85d84a2de09df10e021c9b38c7d91dead95b406ed6350c6997"}, + {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:87c8ceb0cf8a5a51b8008b643844b7f4a8264a2c13fcbcd8a8316161725383fe"}, + {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d6a8c9d4f8692917a3dc7eb25a6fb337bff86909febe2f793ec1928cd97bedfc"}, + {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fbc5b8f3dfe24784cee8ce0be3da2d8a79e46a276593db6868382d9c50d97b1"}, + {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85d2b77e7c9382f004b41d9c72c85537fac834fb141b0296942d52bf03fe4a3d"}, + {file = "greenlet-3.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:696d8e7d82398e810f2b3622b24e87906763b6ebfd90e361e88eb85b0e554dc8"}, + {file = "greenlet-3.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:329c5a2e5a0ee942f2992c5e3ff40be03e75f745f48847f118a3cfece7a28546"}, + {file = "greenlet-3.0.1-cp39-cp39-win32.whl", hash = "sha256:cf868e08690cb89360eebc73ba4be7fb461cfbc6168dd88e2fbbe6f31812cd57"}, + {file = "greenlet-3.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:ac4a39d1abae48184d420aa8e5e63efd1b75c8444dd95daa3e03f6c6310e9619"}, + {file = "greenlet-3.0.1.tar.gz", hash = "sha256:816bd9488a94cba78d93e1abb58000e8266fa9cc2aa9ccdd6eb0696acb24005b"}, +] + +[package.extras] +docs = ["Sphinx"] +test = ["objgraph", "psutil"] + [[package]] name = "idna" version = "3.4" @@ -880,6 +1003,17 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + [[package]] name = "json5" version = "0.9.14" @@ -894,6 +1028,20 @@ files = [ [package.extras] dev = ["hypothesis"] +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + [[package]] name = "jsonpointer" version = "2.4" @@ -1153,6 +1301,61 @@ six = ">=1.10" tqdm = "*" urllib3 = "*" +[[package]] +name = "langchain" +version = "0.0.324" +description = "Building applications with LLMs through composability" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langchain-0.0.324-py3-none-any.whl", hash = "sha256:9be84d14e264567d52b93d0d2ba1e8cbf38c6e50a3914be02dbd9ea0fabaafd9"}, + {file = "langchain-0.0.324.tar.gz", hash = "sha256:d8dc589aa57699d51eeef8ce0507cd3faac4465ad0ff08dfb0a19e5661c3af44"}, +] + +[package.dependencies] +aiohttp = ">=3.8.3,<4.0.0" +anyio = "<4.0" +async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""} +dataclasses-json = ">=0.5.7,<0.7" +jsonpatch = ">=1.33,<2.0" +langsmith = ">=0.0.52,<0.1.0" +numpy = ">=1,<2" +pydantic = ">=1,<3" +PyYAML = ">=5.3" +requests = ">=2,<3" +SQLAlchemy = ">=1.4,<3" +tenacity = ">=8.1.0,<9.0.0" + +[package.extras] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.10.1,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] +azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (>=0,<1)"] +clarifai = ["clarifai (>=9.1.0)"] +cli = ["typer (>=0.9.0,<0.10.0)"] +cohere = ["cohere (>=4,<5)"] +docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] +embeddings = ["sentence-transformers (>=2,<3)"] +extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "amazon-textract-caller (<2)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (>=0,<1)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] +javascript = ["esprima (>=4.0.1,<5.0.0)"] +llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] +openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.6.0)"] +qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"] +text-helpers = ["chardet (>=5.1.0,<6.0.0)"] + +[[package]] +name = "langsmith" +version = "0.0.52" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langsmith-0.0.52-py3-none-any.whl", hash = "sha256:d02a0ade5a53b36143084e57003ed38ccbdf5fc15a5a0eb14f8989ceaee0b807"}, + {file = "langsmith-0.0.52.tar.gz", hash = "sha256:1dc29082d257deea1859cb22c53d9481ca5c4a37f3af40c0f9d300fb8adc91db"}, +] + +[package.dependencies] +pydantic = ">=1,<3" +requests = ">=2,<3" + [[package]] name = "markupsafe" version = "2.1.3" @@ -1222,6 +1425,26 @@ files = [ {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, ] +[[package]] +name = "marshmallow" +version = "3.20.1" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = false +python-versions = ">=3.8" +files = [ + {file = "marshmallow-3.20.1-py3-none-any.whl", hash = "sha256:684939db93e80ad3561392f47be0230743131560a41c5110684c16e21ade0a5c"}, + {file = "marshmallow-3.20.1.tar.gz", hash = "sha256:5d2371bbe42000f2b3fb5eaa065224df7d8f8597bc19a1bbfa5bfe7fba8da889"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)", "pytest", "pytz", "simplejson", "tox"] +docs = ["alabaster (==0.7.13)", "autodocsumm (==0.2.11)", "sphinx (==7.0.1)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"] +lint = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)"] +tests = ["pytest", "pytz", "simplejson"] + [[package]] name = "matplotlib-inline" version = "0.1.6" @@ -1330,6 +1553,17 @@ files = [ {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, ] +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + [[package]] name = "nbclassic" version = "1.0.0" @@ -1507,6 +1741,47 @@ jupyter-server = ">=1.8,<3" [package.extras] test = ["pytest", "pytest-console-scripts", "pytest-jupyter", "pytest-tornasync"] +[[package]] +name = "numpy" +version = "1.26.1" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = "<3.13,>=3.9" +files = [ + {file = "numpy-1.26.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82e871307a6331b5f09efda3c22e03c095d957f04bf6bc1804f30048d0e5e7af"}, + {file = "numpy-1.26.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cdd9ec98f0063d93baeb01aad472a1a0840dee302842a2746a7a8e92968f9575"}, + {file = "numpy-1.26.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d78f269e0c4fd365fc2992c00353e4530d274ba68f15e968d8bc3c69ce5f5244"}, + {file = "numpy-1.26.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ab9163ca8aeb7fd32fe93866490654d2f7dda4e61bc6297bf72ce07fdc02f67"}, + {file = "numpy-1.26.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:78ca54b2f9daffa5f323f34cdf21e1d9779a54073f0018a3094ab907938331a2"}, + {file = "numpy-1.26.1-cp310-cp310-win32.whl", hash = "sha256:d1cfc92db6af1fd37a7bb58e55c8383b4aa1ba23d012bdbba26b4bcca45ac297"}, + {file = "numpy-1.26.1-cp310-cp310-win_amd64.whl", hash = "sha256:d2984cb6caaf05294b8466966627e80bf6c7afd273279077679cb010acb0e5ab"}, + {file = "numpy-1.26.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cd7837b2b734ca72959a1caf3309457a318c934abef7a43a14bb984e574bbb9a"}, + {file = "numpy-1.26.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1c59c046c31a43310ad0199d6299e59f57a289e22f0f36951ced1c9eac3665b9"}, + {file = "numpy-1.26.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d58e8c51a7cf43090d124d5073bc29ab2755822181fcad978b12e144e5e5a4b3"}, + {file = "numpy-1.26.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6081aed64714a18c72b168a9276095ef9155dd7888b9e74b5987808f0dd0a974"}, + {file = "numpy-1.26.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:97e5d6a9f0702c2863aaabf19f0d1b6c2628fbe476438ce0b5ce06e83085064c"}, + {file = "numpy-1.26.1-cp311-cp311-win32.whl", hash = "sha256:b9d45d1dbb9de84894cc50efece5b09939752a2d75aab3a8b0cef6f3a35ecd6b"}, + {file = "numpy-1.26.1-cp311-cp311-win_amd64.whl", hash = "sha256:3649d566e2fc067597125428db15d60eb42a4e0897fc48d28cb75dc2e0454e53"}, + {file = "numpy-1.26.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1d1bd82d539607951cac963388534da3b7ea0e18b149a53cf883d8f699178c0f"}, + {file = "numpy-1.26.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:afd5ced4e5a96dac6725daeb5242a35494243f2239244fad10a90ce58b071d24"}, + {file = "numpy-1.26.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a03fb25610ef560a6201ff06df4f8105292ba56e7cdd196ea350d123fc32e24e"}, + {file = "numpy-1.26.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcfaf015b79d1f9f9c9fd0731a907407dc3e45769262d657d754c3a028586124"}, + {file = "numpy-1.26.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e509cbc488c735b43b5ffea175235cec24bbc57b227ef1acc691725beb230d1c"}, + {file = "numpy-1.26.1-cp312-cp312-win32.whl", hash = "sha256:af22f3d8e228d84d1c0c44c1fbdeb80f97a15a0abe4f080960393a00db733b66"}, + {file = "numpy-1.26.1-cp312-cp312-win_amd64.whl", hash = "sha256:9f42284ebf91bdf32fafac29d29d4c07e5e9d1af862ea73686581773ef9e73a7"}, + {file = "numpy-1.26.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bb894accfd16b867d8643fc2ba6c8617c78ba2828051e9a69511644ce86ce83e"}, + {file = "numpy-1.26.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e44ccb93f30c75dfc0c3aa3ce38f33486a75ec9abadabd4e59f114994a9c4617"}, + {file = "numpy-1.26.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9696aa2e35cc41e398a6d42d147cf326f8f9d81befcb399bc1ed7ffea339b64e"}, + {file = "numpy-1.26.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5b411040beead47a228bde3b2241100454a6abde9df139ed087bd73fc0a4908"}, + {file = "numpy-1.26.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1e11668d6f756ca5ef534b5be8653d16c5352cbb210a5c2a79ff288e937010d5"}, + {file = "numpy-1.26.1-cp39-cp39-win32.whl", hash = "sha256:d1d2c6b7dd618c41e202c59c1413ef9b2c8e8a15f5039e344af64195459e3104"}, + {file = "numpy-1.26.1-cp39-cp39-win_amd64.whl", hash = "sha256:59227c981d43425ca5e5c01094d59eb14e8772ce6975d4b2fc1e106a833d5ae2"}, + {file = "numpy-1.26.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:06934e1a22c54636a059215d6da99e23286424f316fddd979f5071093b648668"}, + {file = "numpy-1.26.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76ff661a867d9272cd2a99eed002470f46dbe0943a5ffd140f49be84f68ffc42"}, + {file = "numpy-1.26.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6965888d65d2848e8768824ca8288db0a81263c1efccec881cb35a0d805fcd2f"}, + {file = "numpy-1.26.1.tar.gz", hash = "sha256:c8c6c72d4a9f831f328efb1312642a1cafafaa88981d9ab76368d50d07d93cbe"}, +] + [[package]] name = "openai" version = "0.28.1" @@ -2214,6 +2489,23 @@ files = [ {file = "rpds_py-0.10.6.tar.gz", hash = "sha256:4ce5a708d65a8dbf3748d2474b580d606b1b9f91b5c6ab2a316e0b0cf7a4ba50"}, ] +[[package]] +name = "s3transfer" +version = "0.7.0" +description = "An Amazon S3 Transfer Manager" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "s3transfer-0.7.0-py3-none-any.whl", hash = "sha256:10d6923c6359175f264811ef4bf6161a3156ce8e350e705396a7557d6293c33a"}, + {file = "s3transfer-0.7.0.tar.gz", hash = "sha256:fd3889a66f5fe17299fe75b82eae6cf722554edca744ca5d5fe308b104883d2e"}, +] + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + [[package]] name = "send2trash" version = "1.8.2" @@ -2263,6 +2555,92 @@ files = [ {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, ] +[[package]] +name = "sqlalchemy" +version = "2.0.22" +description = "Database Abstraction Library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f146c61ae128ab43ea3a0955de1af7e1633942c2b2b4985ac51cc292daf33222"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:875de9414393e778b655a3d97d60465eb3fae7c919e88b70cc10b40b9f56042d"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13790cb42f917c45c9c850b39b9941539ca8ee7917dacf099cc0b569f3d40da7"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e04ab55cf49daf1aeb8c622c54d23fa4bec91cb051a43cc24351ba97e1dd09f5"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a42c9fa3abcda0dcfad053e49c4f752eef71ecd8c155221e18b99d4224621176"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:14cd3bcbb853379fef2cd01e7c64a5d6f1d005406d877ed9509afb7a05ff40a5"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-win32.whl", hash = "sha256:d143c5a9dada696bcfdb96ba2de4a47d5a89168e71d05a076e88a01386872f97"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-win_amd64.whl", hash = "sha256:ccd87c25e4c8559e1b918d46b4fa90b37f459c9b4566f1dfbce0eb8122571547"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4f6ff392b27a743c1ad346d215655503cec64405d3b694228b3454878bf21590"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f776c2c30f0e5f4db45c3ee11a5f2a8d9de68e81eb73ec4237de1e32e04ae81c"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8f1792d20d2f4e875ce7a113f43c3561ad12b34ff796b84002a256f37ce9437"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d80eeb5189d7d4b1af519fc3f148fe7521b9dfce8f4d6a0820e8f5769b005051"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:69fd9e41cf9368afa034e1c81f3570afb96f30fcd2eb1ef29cb4d9371c6eece2"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:54bcceaf4eebef07dadfde424f5c26b491e4a64e61761dea9459103ecd6ccc95"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-win32.whl", hash = "sha256:7ee7ccf47aa503033b6afd57efbac6b9e05180f492aeed9fcf70752556f95624"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-win_amd64.whl", hash = "sha256:b560f075c151900587ade06706b0c51d04b3277c111151997ea0813455378ae0"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2c9bac865ee06d27a1533471405ad240a6f5d83195eca481f9fc4a71d8b87df8"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:625b72d77ac8ac23da3b1622e2da88c4aedaee14df47c8432bf8f6495e655de2"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b39a6e21110204a8c08d40ff56a73ba542ec60bab701c36ce721e7990df49fb9"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53a766cb0b468223cafdf63e2d37f14a4757476157927b09300c8c5832d88560"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0e1ce8ebd2e040357dde01a3fb7d30d9b5736b3e54a94002641dfd0aa12ae6ce"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:505f503763a767556fa4deae5194b2be056b64ecca72ac65224381a0acab7ebe"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-win32.whl", hash = "sha256:154a32f3c7b00de3d090bc60ec8006a78149e221f1182e3edcf0376016be9396"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-win_amd64.whl", hash = "sha256:129415f89744b05741c6f0b04a84525f37fbabe5dc3774f7edf100e7458c48cd"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3940677d341f2b685a999bffe7078697b5848a40b5f6952794ffcf3af150c301"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55914d45a631b81a8a2cb1a54f03eea265cf1783241ac55396ec6d735be14883"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2096d6b018d242a2bcc9e451618166f860bb0304f590d205173d317b69986c95"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:19c6986cf2fb4bc8e0e846f97f4135a8e753b57d2aaaa87c50f9acbe606bd1db"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6ac28bd6888fe3c81fbe97584eb0b96804bd7032d6100b9701255d9441373ec1"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-win32.whl", hash = "sha256:cb9a758ad973e795267da334a92dd82bb7555cb36a0960dcabcf724d26299db8"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-win_amd64.whl", hash = "sha256:40b1206a0d923e73aa54f0a6bd61419a96b914f1cd19900b6c8226899d9742ad"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3aa1472bf44f61dd27987cd051f1c893b7d3b17238bff8c23fceaef4f1133868"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:56a7e2bb639df9263bf6418231bc2a92a773f57886d371ddb7a869a24919face"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ccca778c0737a773a1ad86b68bda52a71ad5950b25e120b6eb1330f0df54c3d0"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c6c3e9350f9fb16de5b5e5fbf17b578811a52d71bb784cc5ff71acb7de2a7f9"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:564e9f9e4e6466273dbfab0e0a2e5fe819eec480c57b53a2cdee8e4fdae3ad5f"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:af66001d7b76a3fab0d5e4c1ec9339ac45748bc4a399cbc2baa48c1980d3c1f4"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-win32.whl", hash = "sha256:9e55dff5ec115316dd7a083cdc1a52de63693695aecf72bc53a8e1468ce429e5"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-win_amd64.whl", hash = "sha256:4e869a8ff7ee7a833b74868a0887e8462445ec462432d8cbeff5e85f475186da"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9886a72c8e6371280cb247c5d32c9c8fa141dc560124348762db8a8b236f8692"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a571bc8ac092a3175a1d994794a8e7a1f2f651e7c744de24a19b4f740fe95034"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8db5ba8b7da759b727faebc4289a9e6a51edadc7fc32207a30f7c6203a181592"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b0b3f2686c3f162123adba3cb8b626ed7e9b8433ab528e36ed270b4f70d1cdb"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0c1fea8c0abcb070ffe15311853abfda4e55bf7dc1d4889497b3403629f3bf00"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4bb062784f37b2d75fd9b074c8ec360ad5df71f933f927e9e95c50eb8e05323c"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-win32.whl", hash = "sha256:58a3aba1bfb32ae7af68da3f277ed91d9f57620cf7ce651db96636790a78b736"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-win_amd64.whl", hash = "sha256:92e512a6af769e4725fa5b25981ba790335d42c5977e94ded07db7d641490a85"}, + {file = "SQLAlchemy-2.0.22-py3-none-any.whl", hash = "sha256:3076740335e4aaadd7deb3fe6dcb96b3015f1613bd190a4e1634e1b99b02ec86"}, + {file = "SQLAlchemy-2.0.22.tar.gz", hash = "sha256:5434cc601aa17570d79e5377f5fd45ff92f9379e2abed0be5e8c2fba8d353d2b"}, +] + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +typing-extensions = ">=4.2.0" + +[package.extras] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx-oracle (>=7)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3-binary"] + [[package]] name = "stack-data" version = "0.6.3" @@ -2282,6 +2660,20 @@ pure-eval = "*" [package.extras] tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] +[[package]] +name = "tenacity" +version = "8.2.3" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, + {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, +] + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + [[package]] name = "terminado" version = "0.17.1" @@ -2419,6 +2811,21 @@ files = [ {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, ] +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +optional = false +python-versions = "*" +files = [ + {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, + {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + [[package]] name = "uri-template" version = "1.3.0" @@ -2593,4 +3000,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "3.10.12" -content-hash = "28451b147686a3f8c798377e6e179bfe265074e5b4ff0e57d69d9ea3ec818464" +content-hash = "e66f4feea268c70e9e5d76167e67c932142d0ce606b5c661d0579d5d354280a4" diff --git a/apps/openchallenges/notebook/pyproject.toml b/apps/openchallenges/notebook/pyproject.toml index 49a8783be2..6c9fe74a8c 100644 --- a/apps/openchallenges/notebook/pyproject.toml +++ b/apps/openchallenges/notebook/pyproject.toml @@ -12,6 +12,8 @@ kaggle = "1.5.12" openchallenges-client = {path = "../../../libs/openchallenges/api-client-python"} openai = "0.28.1" python-dotenv = "1.0.0" +boto3 = "1.28.72" +langchain = "0.0.324" [tool.poetry.group.dev.dependencies] jupyterlab = "3.5.2" From d02bc5d33b7ceaa73df4354a9be812c44ede1e67 Mon Sep 17 00:00:00 2001 From: Thomas Schaffter Date: Thu, 26 Oct 2023 23:15:30 +0000 Subject: [PATCH 2/8] Can query Bedrock --- .../notebooks/openai-challenge-headline.ipynb | 104 +++- apps/openchallenges/notebook/poetry.lock | 571 +++++++++++++++++- apps/openchallenges/notebook/pyproject.toml | 2 + 3 files changed, 671 insertions(+), 6 deletions(-) diff --git a/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb b/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb index dddc61a970..4f1f99ccf6 100644 --- a/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb +++ b/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb @@ -271,10 +271,22 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 27, "id": "20da8b0e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Create new client\n", + " Using region: us-east-1\n", + " Using profile: cnb\n", + "boto3 Bedrock client successfully created!\n", + "bedrock-runtime(https://bedrock-runtime.us-east-1.amazonaws.com)\n" + ] + } + ], "source": [ "import json\n", "import os\n", @@ -285,16 +297,98 @@ "\n", "module_path = \"..\"\n", "sys.path.append(os.path.abspath(module_path))\n", - "from utils import bedrock, print_ww" + "from utils import bedrock, print_ww\n", + "\n", + "os.environ[\"AWS_DEFAULT_REGION\"] = \"us-east-1\"\n", + "os.environ[\"AWS_PROFILE\"] = \"cnb\"\n", + "\n", + "boto3_bedrock = bedrock.get_bedrock_client(\n", + " assumed_role=os.environ.get(\"BEDROCK_ASSUME_ROLE\", None),\n", + " region=os.environ.get(\"AWS_DEFAULT_REGION\", None)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "0b1ef15b", + "metadata": {}, + "source": [ + "### Configure base model options" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "5bbcd90f", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from langchain.llms.bedrock import Bedrock\n", + "\n", + "inference_modifier = {'max_tokens_to_sample':6000, \n", + " \"temperature\":0.6,\n", + " \"top_k\":250,\n", + " \"top_p\":1,\n", + " \"stop_sequences\": [\"\\n\\nHuman\"]\n", + " }\n", + "\n", + "textgen_llm = Bedrock(model_id = \"anthropic.claude-v2\",\n", + " client = boto3_bedrock, \n", + " model_kwargs = inference_modifier \n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "id": "83d704d1", + "metadata": {}, + "source": [ + "Call API and output results" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "1f4b90df", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of text tokens: 7\n" + ] + } + ], + "source": [ + "prompt = 'How much is 2 plus 2?'\n", + "print(f'Number of text tokens: {textgen_llm.get_num_tokens(prompt)}')" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "b0c4c192", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of output tokens: 5\n", + " 2 + 2 = 4\n" + ] + } + ], + "source": [ + "response = textgen_llm(prompt)\n", + "\n", + "# summary = response[response.index('\\n')+1:]\n", + "summary = response\n", + "\n", + "print(f'Number of output tokens: {textgen_llm.get_num_tokens(summary)}')\n", + "print(summary)" + ] } ], "metadata": { diff --git a/apps/openchallenges/notebook/poetry.lock b/apps/openchallenges/notebook/poetry.lock index 80ab2c34f7..85c84f1c62 100644 --- a/apps/openchallenges/notebook/poetry.lock +++ b/apps/openchallenges/notebook/poetry.lock @@ -134,6 +134,25 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "anthropic" +version = "0.5.0" +description = "Client library for the anthropic API" +optional = false +python-versions = ">=3.7" +files = [ + {file = "anthropic-0.5.0-py3-none-any.whl", hash = "sha256:61a774b57252831bff80636f351a74ef0e8a727a70a46dcddab9a62e3b00ecb2"}, + {file = "anthropic-0.5.0.tar.gz", hash = "sha256:b7961cf3ff930698d6c1e11f3ad5d193c0623d3b4c607fbf5f23bcf9a17fa6a6"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<4" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +pydantic = ">=1.9.0,<3" +tokenizers = ">=0.13.0" +typing-extensions = ">=4.5,<5" + [[package]] name = "anyio" version = "3.7.1" @@ -655,6 +674,17 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "distro" +version = "1.8.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.8.0-py3-none-any.whl", hash = "sha256:99522ca3e365cac527b44bde033f64c6945d90eb9f769703caaec52b09bbd3ff"}, + {file = "distro-1.8.0.tar.gz", hash = "sha256:02e111d1dc6a50abb8eed6bf31c3e48ed8b0830d1ea2a1b78c61765c2513fdd8"}, +] + [[package]] name = "entrypoints" version = "0.4" @@ -708,6 +738,22 @@ files = [ [package.extras] devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"] +[[package]] +name = "filelock" +version = "3.12.4" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"}, + {file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"}, +] + +[package.extras] +docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"] +typing = ["typing-extensions (>=4.7.1)"] + [[package]] name = "fqdn" version = "1.5.1" @@ -789,6 +835,41 @@ files = [ {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, ] +[[package]] +name = "fsspec" +version = "2023.10.0" +description = "File-system specification" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fsspec-2023.10.0-py3-none-any.whl", hash = "sha256:346a8f024efeb749d2a5fca7ba8854474b1ff9af7c3faaf636a4548781136529"}, + {file = "fsspec-2023.10.0.tar.gz", hash = "sha256:330c66757591df346ad3091a53bd907e15348c2ba17d63fd54f5c39c4457d2a5"}, +] + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +arrow = ["pyarrow (>=1)"] +dask = ["dask", "distributed"] +devel = ["pytest", "pytest-cov"] +dropbox = ["dropbox", "dropboxdrivefs", "requests"] +full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] +fuse = ["fusepy"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +gui = ["panel"] +hdfs = ["pyarrow (>=1)"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] +libarchive = ["libarchive-c"] +oci = ["ocifs"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] +tqdm = ["tqdm"] + [[package]] name = "greenlet" version = "3.0.1" @@ -859,6 +940,94 @@ files = [ docs = ["Sphinx"] test = ["objgraph", "psutil"] +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "httpcore" +version = "0.18.0" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpcore-0.18.0-py3-none-any.whl", hash = "sha256:adc5398ee0a476567bf87467063ee63584a8bce86078bf748e48754f60202ced"}, + {file = "httpcore-0.18.0.tar.gz", hash = "sha256:13b5e5cd1dca1a6636a6aaea212b19f4f85cd88c366a2b82304181b769aab3c9"}, +] + +[package.dependencies] +anyio = ">=3.0,<5.0" +certifi = "*" +h11 = ">=0.13,<0.15" +sniffio = "==1.*" + +[package.extras] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] + +[[package]] +name = "httpx" +version = "0.25.0" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpx-0.25.0-py3-none-any.whl", hash = "sha256:181ea7f8ba3a82578be86ef4171554dd45fec26a02556a744db029a0a27b7100"}, + {file = "httpx-0.25.0.tar.gz", hash = "sha256:47ecda285389cb32bb2691cc6e069e3ab0205956f681c5b2ad2325719751d875"}, +] + +[package.dependencies] +certifi = "*" +httpcore = ">=0.18.0,<0.19.0" +idna = "*" +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] + +[[package]] +name = "huggingface-hub" +version = "0.17.3" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "huggingface_hub-0.17.3-py3-none-any.whl", hash = "sha256:545eb3665f6ac587add946e73984148f2ea5c7877eac2e845549730570c1933a"}, + {file = "huggingface_hub-0.17.3.tar.gz", hash = "sha256:40439632b211311f788964602bf8b0d9d6b7a2314fba4e8d67b2ce3ecea0e3fd"}, +] + +[package.dependencies] +filelock = "*" +fsspec = "*" +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +docs = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "hf-doc-builder", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)", "watchdog"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +inference = ["aiohttp", "pydantic (<2.0)"] +quality = ["black (==23.7)", "mypy (==1.5.1)", "ruff (>=0.0.241)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["torch"] +typing = ["pydantic (<2.0)", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] + [[package]] name = "idna" version = "3.4" @@ -2335,6 +2504,103 @@ files = [ attrs = ">=22.2.0" rpds-py = ">=0.7.0" +[[package]] +name = "regex" +version = "2023.10.3" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.7" +files = [ + {file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"}, + {file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cd1bccf99d3ef1ab6ba835308ad85be040e6a11b0977ef7ea8c8005f01a3c29"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:81dce2ddc9f6e8f543d94b05d56e70d03a0774d32f6cca53e978dc01e4fc75b8"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c6b4d23c04831e3ab61717a707a5d763b300213db49ca680edf8bf13ab5d91b"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c15ad0aee158a15e17e0495e1e18741573d04eb6da06d8b84af726cfc1ed02ee"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6239d4e2e0b52c8bd38c51b760cd870069f0bdf99700a62cd509d7a031749a55"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4a8bf76e3182797c6b1afa5b822d1d5802ff30284abe4599e1247be4fd6b03be"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9c727bbcf0065cbb20f39d2b4f932f8fa1631c3e01fcedc979bd4f51fe051c5"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3ccf2716add72f80714b9a63899b67fa711b654be3fcdd34fa391d2d274ce767"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:107ac60d1bfdc3edb53be75e2a52aff7481b92817cfdddd9b4519ccf0e54a6ff"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:00ba3c9818e33f1fa974693fb55d24cdc8ebafcb2e4207680669d8f8d7cca79a"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0a47efb1dbef13af9c9a54a94a0b814902e547b7f21acb29434504d18f36e3a"}, + {file = "regex-2023.10.3-cp310-cp310-win32.whl", hash = "sha256:36362386b813fa6c9146da6149a001b7bd063dabc4d49522a1f7aa65b725c7ec"}, + {file = "regex-2023.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:c65a3b5330b54103e7d21cac3f6bf3900d46f6d50138d73343d9e5b2900b2353"}, + {file = "regex-2023.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90a79bce019c442604662d17bf69df99090e24cdc6ad95b18b6725c2988a490e"}, + {file = "regex-2023.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c7964c2183c3e6cce3f497e3a9f49d182e969f2dc3aeeadfa18945ff7bdd7051"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ef80829117a8061f974b2fda8ec799717242353bff55f8a29411794d635d964"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5addc9d0209a9afca5fc070f93b726bf7003bd63a427f65ef797a931782e7edc"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c148bec483cc4b421562b4bcedb8e28a3b84fcc8f0aa4418e10898f3c2c0eb9b"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d1f21af4c1539051049796a0f50aa342f9a27cde57318f2fc41ed50b0dbc4ac"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b9ac09853b2a3e0d0082104036579809679e7715671cfbf89d83c1cb2a30f58"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ebedc192abbc7fd13c5ee800e83a6df252bec691eb2c4bedc9f8b2e2903f5e2a"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d8a993c0a0ffd5f2d3bda23d0cd75e7086736f8f8268de8a82fbc4bd0ac6791e"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:be6b7b8d42d3090b6c80793524fa66c57ad7ee3fe9722b258aec6d0672543fd0"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4023e2efc35a30e66e938de5aef42b520c20e7eda7bb5fb12c35e5d09a4c43f6"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0d47840dc05e0ba04fe2e26f15126de7c755496d5a8aae4a08bda4dd8d646c54"}, + {file = "regex-2023.10.3-cp311-cp311-win32.whl", hash = "sha256:9145f092b5d1977ec8c0ab46e7b3381b2fd069957b9862a43bd383e5c01d18c2"}, + {file = "regex-2023.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:b6104f9a46bd8743e4f738afef69b153c4b8b592d35ae46db07fc28ae3d5fb7c"}, + {file = "regex-2023.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bff507ae210371d4b1fe316d03433ac099f184d570a1a611e541923f78f05037"}, + {file = "regex-2023.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be5e22bbb67924dea15039c3282fa4cc6cdfbe0cbbd1c0515f9223186fc2ec5f"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a992f702c9be9c72fa46f01ca6e18d131906a7180950958f766c2aa294d4b41"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7434a61b158be563c1362d9071358f8ab91b8d928728cd2882af060481244c9e"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2169b2dcabf4e608416f7f9468737583ce5f0a6e8677c4efbf795ce81109d7c"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9e908ef5889cda4de038892b9accc36d33d72fb3e12c747e2799a0e806ec841"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12bd4bc2c632742c7ce20db48e0d99afdc05e03f0b4c1af90542e05b809a03d9"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bc72c231f5449d86d6c7d9cc7cd819b6eb30134bb770b8cfdc0765e48ef9c420"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bce8814b076f0ce5766dc87d5a056b0e9437b8e0cd351b9a6c4e1134a7dfbda9"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:ba7cd6dc4d585ea544c1412019921570ebd8a597fabf475acc4528210d7c4a6f"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b0c7d2f698e83f15228ba41c135501cfe7d5740181d5903e250e47f617eb4292"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5a8f91c64f390ecee09ff793319f30a0f32492e99f5dc1c72bc361f23ccd0a9a"}, + {file = "regex-2023.10.3-cp312-cp312-win32.whl", hash = "sha256:ad08a69728ff3c79866d729b095872afe1e0557251da4abb2c5faff15a91d19a"}, + {file = "regex-2023.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:39cdf8d141d6d44e8d5a12a8569d5a227f645c87df4f92179bd06e2e2705e76b"}, + {file = "regex-2023.10.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4a3ee019a9befe84fa3e917a2dd378807e423d013377a884c1970a3c2792d293"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76066d7ff61ba6bf3cb5efe2428fc82aac91802844c022d849a1f0f53820502d"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe50b61bab1b1ec260fa7cd91106fa9fece57e6beba05630afe27c71259c59b"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fd88f373cb71e6b59b7fa597e47e518282455c2734fd4306a05ca219a1991b0"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3ab05a182c7937fb374f7e946f04fb23a0c0699c0450e9fb02ef567412d2fa3"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dac37cf08fcf2094159922edc7a2784cfcc5c70f8354469f79ed085f0328ebdf"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e54ddd0bb8fb626aa1f9ba7b36629564544954fff9669b15da3610c22b9a0991"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3367007ad1951fde612bf65b0dffc8fd681a4ab98ac86957d16491400d661302"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:16f8740eb6dbacc7113e3097b0a36065a02e37b47c936b551805d40340fb9971"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:f4f2ca6df64cbdd27f27b34f35adb640b5d2d77264228554e68deda54456eb11"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:39807cbcbe406efca2a233884e169d056c35aa7e9f343d4e78665246a332f597"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7eece6fbd3eae4a92d7c748ae825cbc1ee41a89bb1c3db05b5578ed3cfcfd7cb"}, + {file = "regex-2023.10.3-cp37-cp37m-win32.whl", hash = "sha256:ce615c92d90df8373d9e13acddd154152645c0dc060871abf6bd43809673d20a"}, + {file = "regex-2023.10.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0f649fa32fe734c4abdfd4edbb8381c74abf5f34bc0b3271ce687b23729299ed"}, + {file = "regex-2023.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9b98b7681a9437262947f41c7fac567c7e1f6eddd94b0483596d320092004533"}, + {file = "regex-2023.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:91dc1d531f80c862441d7b66c4505cd6ea9d312f01fb2f4654f40c6fdf5cc37a"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82fcc1f1cc3ff1ab8a57ba619b149b907072e750815c5ba63e7aa2e1163384a4"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7979b834ec7a33aafae34a90aad9f914c41fd6eaa8474e66953f3f6f7cbd4368"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef71561f82a89af6cfcbee47f0fabfdb6e63788a9258e913955d89fdd96902ab"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd829712de97753367153ed84f2de752b86cd1f7a88b55a3a775eb52eafe8a94"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00e871d83a45eee2f8688d7e6849609c2ca2a04a6d48fba3dff4deef35d14f07"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:706e7b739fdd17cb89e1fbf712d9dc21311fc2333f6d435eac2d4ee81985098c"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cc3f1c053b73f20c7ad88b0d1d23be7e7b3901229ce89f5000a8399746a6e039"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6f85739e80d13644b981a88f529d79c5bdf646b460ba190bffcaf6d57b2a9863"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:741ba2f511cc9626b7561a440f87d658aabb3d6b744a86a3c025f866b4d19e7f"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e77c90ab5997e85901da85131fd36acd0ed2221368199b65f0d11bca44549711"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:979c24cbefaf2420c4e377ecd1f165ea08cc3d1fbb44bdc51bccbbf7c66a2cb4"}, + {file = "regex-2023.10.3-cp38-cp38-win32.whl", hash = "sha256:58837f9d221744d4c92d2cf7201c6acd19623b50c643b56992cbd2b745485d3d"}, + {file = "regex-2023.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:c55853684fe08d4897c37dfc5faeff70607a5f1806c8be148f1695be4a63414b"}, + {file = "regex-2023.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2c54e23836650bdf2c18222c87f6f840d4943944146ca479858404fedeb9f9af"}, + {file = "regex-2023.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69c0771ca5653c7d4b65203cbfc5e66db9375f1078689459fe196fe08b7b4930"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ac965a998e1388e6ff2e9781f499ad1eaa41e962a40d11c7823c9952c77123e"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c0e8fae5b27caa34177bdfa5a960c46ff2f78ee2d45c6db15ae3f64ecadde14"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c56c3d47da04f921b73ff9415fbaa939f684d47293f071aa9cbb13c94afc17d"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ef1e014eed78ab650bef9a6a9cbe50b052c0aebe553fb2881e0453717573f52"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d29338556a59423d9ff7b6eb0cb89ead2b0875e08fe522f3e068b955c3e7b59b"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9c6d0ced3c06d0f183b73d3c5920727268d2201aa0fe6d55c60d68c792ff3588"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:994645a46c6a740ee8ce8df7911d4aee458d9b1bc5639bc968226763d07f00fa"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:66e2fe786ef28da2b28e222c89502b2af984858091675044d93cb50e6f46d7af"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:11175910f62b2b8c055f2b089e0fedd694fe2be3941b3e2633653bc51064c528"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:06e9abc0e4c9ab4779c74ad99c3fc10d3967d03114449acc2c2762ad4472b8ca"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fb02e4257376ae25c6dd95a5aec377f9b18c09be6ebdefa7ad209b9137b73d48"}, + {file = "regex-2023.10.3-cp39-cp39-win32.whl", hash = "sha256:3b2c3502603fab52d7619b882c25a6850b766ebd1b18de3df23b2f939360e1bd"}, + {file = "regex-2023.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:adbccd17dcaff65704c856bd29951c58a1bd4b2b0f8ad6b826dbd543fe740988"}, + {file = "regex-2023.10.3.tar.gz", hash = "sha256:3fef4f844d2290ee0ba57addcec17eec9e3df73f10a2748485dfd6a3a188cc0f"}, +] + [[package]] name = "requests" version = "2.31.0" @@ -2506,6 +2772,125 @@ botocore = ">=1.12.36,<2.0a.0" [package.extras] crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] +[[package]] +name = "safetensors" +version = "0.4.0" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "safetensors-0.4.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:2289ae6dbe6d027ecee016b28ced13a2e21a0b3a3a757a23033a2d1c0b1bad55"}, + {file = "safetensors-0.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bf6458959f310f551cbbeef2255527ade5f783f952738e73e4d0136198cc3bfe"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6b60a58a8f7cc7aed3b5b73dce1f5259a53c83d9ba43a76a874e6ad868c1b4d"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:491b3477e4d0d4599bb75d79da4b75af2e6ed9b1f6ec2b715991f0bc927bf09a"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59d2e10b7e0cd18bb73ed7c17c624a5957b003b81345e18159591771c26ee428"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f667a4c12fb593f5f66ce966cb1b14a7148898b2b1a7f79e0761040ae1e3c51"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f9909512bcb6f712bdd04c296cdfb0d8ff73d258ffc5af884bb62ea02d221e0"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d33d29e846821f0e4f92614022949b09ccf063cb36fe2f9fe099cde1efbfbb87"}, + {file = "safetensors-0.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4d512525a8e05a045ce6698066ba0c5378c174a83e0b3720a8c7799dc1bb06f3"}, + {file = "safetensors-0.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0219cea445177f6ad1f9acd3a8d025440c8ff436d70a4a7c7ba9c36066aa9474"}, + {file = "safetensors-0.4.0-cp310-none-win32.whl", hash = "sha256:67ab171eeaad6972d3971c53d29d53353c67f6743284c6d637b59fa3e54c8a94"}, + {file = "safetensors-0.4.0-cp310-none-win_amd64.whl", hash = "sha256:7ffc736039f08a9ca1f09816a7481b8e4469c06e8f8a5ffa8cb67ddd79e6d77f"}, + {file = "safetensors-0.4.0-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:4fe9e3737b30de458225a23926219ca30b902ee779b6a3df96eaab2b6d625ec2"}, + {file = "safetensors-0.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e7916e814a90008de767b1c164a1d83803693c661ffe9af5a697b22e2752edb0"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbc4a4da01143472323c145f3c289e5f6fabde0ac0a3414dabf912a21692fff4"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a54c21654a47669b38e359e8f852af754b786c9da884bb61ad5e9af12bd71ccb"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25cd407955bad5340ba17f9f8ac789a0d751601a311e2f7b2733f9384478c95e"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82e8fc4e3503cd738fd40718a430fe0e5ce6e7ff91a73d6ce628bbb89c41e8ce"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48b92059b1a4ad163024d4f526e0e73ebe2bb3ae70537e15e347820b4de5dc27"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5daa05058f7dce85b5f9f60c4eab483ed7859d63978f08a76e52e78859ff20ca"}, + {file = "safetensors-0.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a86565a5c112dd855909e20144947b4f53abb78c4de207f36ca71ee63ba5b90d"}, + {file = "safetensors-0.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38032078ed9fea52d06584e441bccc73fb475c4581600c6d6166de2fe2deb3d1"}, + {file = "safetensors-0.4.0-cp311-none-win32.whl", hash = "sha256:2f99d90c91b7c76b40a862acd9085bc77f7974a27dee7cfcebe46149af5a99a1"}, + {file = "safetensors-0.4.0-cp311-none-win_amd64.whl", hash = "sha256:74e2a448ffe19be188b457b130168190ee73b5a75e45ba96796320c1f5ae35d2"}, + {file = "safetensors-0.4.0-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:1e2f9c69b41d03b4826ffb96b29e07444bb6b34a78a7bafd0b88d59e8ec75b8a"}, + {file = "safetensors-0.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3910fb5bf747413b59f1a34e6d2a993b589fa7d919709518823c70efaaa350bd"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8fdca709b2470a35a59b1e6dffea75cbe1214b22612b5dd4c93947697aea8b"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f27b8ef814c5fb43456caeb7f3cbb889b76115180aad1f42402839c14a47c5b"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7b2d6101eccc43c7be0cb052f13ceda64288b3d8b344b988ed08d7133cbce2f3"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdc34027b545a69be3d4220c140b276129523e4e46db06ad1a0b60d6a4cf9214"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db7bb48ca9e90bb9526c71b388d38d8de160c0354f4c5126df23e8701a870dcb"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a78ffc0795d3595cd9e4d453502e35f764276c49e434b25556a15a337db4dafc"}, + {file = "safetensors-0.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8e735b0f79090f6855b55e205e820b7b595502ffca0009a5c13eef3661ce465b"}, + {file = "safetensors-0.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f8d2416734e850d5392afffbcb2b8985ea29fb171f1cb197e2ae51b8e35d6438"}, + {file = "safetensors-0.4.0-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:e853e189ba7d47eaf561094586692ba2bbdd258c096f1755805cac098de0e6ab"}, + {file = "safetensors-0.4.0-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:4b2aa57b5a4d576f3d1dd6e56980026340f156f8a13c13016bfac4e25295b53f"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b6c1316ffde6cb4bf22c7445bc9fd224b4d1b9dd7320695f5611c89e802e4b6"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:003077ec85261d00061058fa12e3c1d2055366b02ce8f2938929359ffbaff2b8"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bd63d83a92f1437a8b0431779320376030ae43ace980bea5686d515de0784100"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2077801800b4b13301d8d6290c7fb5bd60737320001717153ebc4371776643b5"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7abe0e157a49a75aeeccfbc4f3dac38d8f98512d3cdb35c200f8e628dc5773cf"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3bfed574f6b1e7e7fe1f17213278875ef6c6e8b1582ab6eda93947db1178cae6"}, + {file = "safetensors-0.4.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:964ef166a286ce3b023d0d0bd0e21d440a1c8028981c8abdb136bc7872ba9b3d"}, + {file = "safetensors-0.4.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:44f84373e42183bd56a13a1f2d8acb1db7fedaeffbd83e79cec861477eee1af4"}, + {file = "safetensors-0.4.0-cp37-none-win32.whl", hash = "sha256:c68132727dd86fb641102e494d445f705efe402f4d5e24b278183a15499ab400"}, + {file = "safetensors-0.4.0-cp37-none-win_amd64.whl", hash = "sha256:1db87155454c168aef118d5657a403aee48a4cb08d8851a981157f07351ea317"}, + {file = "safetensors-0.4.0-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:9e583fa68e5a07cc859c4e13c1ebff12029904aa2e27185cf04a1f57fe9a81c4"}, + {file = "safetensors-0.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73e7696dcf3f72f99545eb1abe6106ad65ff1f62381d6ce4b34be3272552897a"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4936096a57c62e84e200f92620a536be067fc5effe46ecc7f230ebb496ecd579"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87b328ee1591adac332543e1f5fc2c2d7f149b745ebb0d58d7850818ff9cee27"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b69554c143336256260eceff1d3c0969172a641b54d4668489a711b05f92a2c0"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ebf6bcece5d5d1bd6416472f94604d2c834ca752ac60ed42dba7157e595a990"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6686ce01b8602d55a7d9903c90d4a6e6f90aeb6ddced7cf4605892d0ba94bcb8"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9b8fd6cc2f3bda444a048b541c843c7b7fefc89c4120d7898ea7d5b026e93891"}, + {file = "safetensors-0.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8a6abfe67692f81b8bdb99c837f28351c17e624ebf136970c850ee989c720446"}, + {file = "safetensors-0.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:27a24ca8822c469ee452db4c13418ba983315a0d863c018a9af15f2305eac38c"}, + {file = "safetensors-0.4.0-cp38-none-win32.whl", hash = "sha256:c4a0a47c8640167792d8261ee21b26430bbc39130a7edaad7f4c0bc05669d00e"}, + {file = "safetensors-0.4.0-cp38-none-win_amd64.whl", hash = "sha256:a738970a367f39249e2abb900d9441a8a86d7ff50083e5eaa6e7760a9f216014"}, + {file = "safetensors-0.4.0-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:806379f37e1abd5d302288c4b2f4186dd7ea7143d4c7811f90a8077f0ae8967b"}, + {file = "safetensors-0.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b9b94133ed2ae9dda0e95dcace7b7556eba023ffa4c4ae6df8f99377f571d6a"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b563a14c43614815a6b524d2e4edeaace50b717f7e7487bb227dd5b68350f5a"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00a9b157be660fb7ba88fa2eedd05ec93793a5b61e43e783e10cb0b995372802"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c8f194f45ab6aa767993c24f0aeb950af169dbc5d611b94c9021a1d13b8a1a34"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:469360b9451db10bfed3881378d5a71b347ecb1ab4f42367d77b8164a13af70b"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5f75fa97ccf32a3c7af476c6a0e851023197d3c078f6de3612008fff94735f9"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:acf0180283c2efae72f1d8c0a4a7974662091df01be3aa43b5237b1e52ed0a01"}, + {file = "safetensors-0.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cd02b495ba0814619f40bda46771bb06dbbf1d42524b66fa03b2a736c77e4515"}, + {file = "safetensors-0.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c42bdea183dbaa99e2f0e6120dc524df79cf4289a6f90f30a534444ef20f49fa"}, + {file = "safetensors-0.4.0-cp39-none-win32.whl", hash = "sha256:cef7bb5d9feae7146c3c3c7b3aef7d2c8b39ba7f5ff4252d368eb69462a47076"}, + {file = "safetensors-0.4.0-cp39-none-win_amd64.whl", hash = "sha256:79dd46fb1f19282fd12f544471efb97823ede927cedbf9cf35550d92b349fdd2"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:002301c1afa32909f83745b0c124d002e7ae07e15671f3b43cbebd0ffc5e6037"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:67762d36ae088c73d4a3c96bfc4ea8d31233554f35b6cace3a18533238d462ea"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f45230f20a206e5e4c7f7bbf9342178410c6f8b0af889843aa99045a76f7691"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f2ca939bbd8fb2f4dfa28e39a146dad03bc9325e9fc831b68f7b98f69a5a2f1"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:61a00f281391fae5ce91df70918bb61c12d2d514a493fd8056e12114be729911"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:435fd136a42492b280cb55126f9ce9535b35dd49df2c5d572a5945455a439448"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f0daa788273d683258fb1e4a5e16bef4486b2fca536451a2591bc0f4a6488895"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:0620ab0d41e390ccb1c4ea8f63dc00cb5f0b96a5cdd3cd0d64c21765720c074a"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc1fa8d067733cb67f22926689ee808f08afacf7700d2ffb44efae90a0693eb1"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcaa40bc363edda145db75cd030f3b1822e5478d550c3500a42502ecef32c959"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b561fbc044db7beff2ece0ec219a291809d45a38d30c6b38e7cc46482582f4ba"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:79a983b09782dacf9a1adb19bb98f4a8f6c3144108939f572c047b5797e43cf5"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:10b65cd3ad79f5d0daf281523b4146bc271a34bb7430d4e03212e0de8622dab8"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:114decacc475a6a9e2f9102a00c171d113ddb5d35cb0bda0db2c0c82b2eaa9ce"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:72ddb741dd5fe42521db76a70e012f76995516a12e7e0ef26be03ea9be77802a"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c5556c2ec75f5a6134866eddd7341cb36062e6edaea343478a279591b63ddba"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed50f239b0ce7ae85b078395593b4a351ede7e6f73af25f4873e3392336f64c9"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:495dcaea8fbab70b927d2274e2547824462737acbf98ccd851a71124f779a5c6"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3f4d90c79a65ba2fe2ff0876f6140748f0a3ce6a21e27a35190f4f96321803f8"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7a524382b5c55b5fbb168e0e9d3f502450c8cf3fb81b93e880018437c206a482"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:9849ea60c7e840bfdd6030ad454d4a6ba837b3398c902f15a30460dd6961c28c"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:6c42623ae7045615d9eaa6877b9df1db4e9cc71ecc14bcc721ea1e475dddd595"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80cb8342f00f3c41b3b93b1a599b84723280d3ac90829bc62262efc03ab28793"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8c4f5ed4ede384dea8c99bae76b0718a828dbf7b2c8ced1f44e3b9b1a124475"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40d7cf03493bfe75ef62e2c716314474b28d9ba5bf4909763e4b8dd14330c01a"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:232029f0a9fa6fa1f737324eda98a700409811186888536a2333cbbf64e41741"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:9ed55f4a20c78ff3e8477efb63c8303c2152cdfb3bfea4d025a80f54d38fd628"}, + {file = "safetensors-0.4.0.tar.gz", hash = "sha256:b985953c3cf11e942eac4317ef3db3da713e274109cf7cfb6076d877054f013e"}, +] + +[package.extras] +all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"] +dev = ["safetensors[all]"] +jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[numpy]"] +numpy = ["numpy (>=1.21.6)"] +paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"] +pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"] +quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"] +tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"] +testing = ["h5py (>=3.7.0)", "huggingface_hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools_rust (>=1.5.2)"] +torch = ["safetensors[numpy]", "torch (>=1.10)"] + [[package]] name = "send2trash" version = "1.8.2" @@ -2723,6 +3108,121 @@ webencodings = ">=0.4" doc = ["sphinx", "sphinx_rtd_theme"] test = ["flake8", "isort", "pytest"] +[[package]] +name = "tokenizers" +version = "0.14.1" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tokenizers-0.14.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:04ec1134a18ede355a05641cdc7700f17280e01f69f2f315769f02f7e295cf1e"}, + {file = "tokenizers-0.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:638abedb39375f0ddce2de536fc9c976639b2d1b7202d715c2e7a25f0ebfd091"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:901635098565773a44f74068639d265f19deaaca47ea77b428fd9bee13a61d87"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72e95184bf5b9a4c08153ed07c16c130ff174835c9a1e6ee2b311be758c8b3ef"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ebefbc26ccff5e96ae7d40772172e7310174f9aa3683d2870a1882313ec3a4d5"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3a6330c9f1deda22873e8b4ac849cc06d3ff33d60b3217ac0bb397b541e1509"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6cba7483ba45600346a35c466bde32327b108575022f73c35a0f7170b5a71ae2"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60fec380778d75cbb492f14ca974f11f37b41d53c057b9c8ba213315b86e1f84"}, + {file = "tokenizers-0.14.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:930c19b699dd7e1077eac98967adc2fe5f0b104bd96cc1f26778ab82b31ceb24"}, + {file = "tokenizers-0.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a1e30a13376db5329570e09b14c8eb36c017909ed7e88591ca3aa81f3c7d6f32"}, + {file = "tokenizers-0.14.1-cp310-none-win32.whl", hash = "sha256:370b5b86da9bddbe65fa08711f0e8ffdf8b0036558178d1a31dfcb44efcde72a"}, + {file = "tokenizers-0.14.1-cp310-none-win_amd64.whl", hash = "sha256:c2c659f2106b6d154f118ad1b700e68148c46c59b720f04867b1fc5f26a85060"}, + {file = "tokenizers-0.14.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:00df4c5bf25c153b432b98689609b426ae701a44f3d8074dcb619f410bc2a870"}, + {file = "tokenizers-0.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fee553657dcdb7e73df8823c49e8611457ba46e9d7026b7e9c44820c08c327c3"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a480bd902e327dfcaa52b7dd14fdc71e7aa45d73a3d6e41e028a75891d2823cf"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e448b2be0430ab839cf7954715c39d6f34ff6cf2b49393f336283b7a59f485af"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c11444984aecd342f0cf160c3320288edeb1763871fbb560ed466654b2a7016c"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe164a1c72c6be3c5c26753c6c412f81412f4dae0d7d06371e0b396a9cc0fc9"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:72d9967fb1f927542cfb5347207fde01b29f25c9bb8cbc7ced280decfa015983"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37cc955c84ec67c2d11183d372044399342b20a1fa447b7a33040f4889bba318"}, + {file = "tokenizers-0.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:db96cf092d86d4cb543daa9148e299011e0a40770380bb78333b9fd700586fcb"}, + {file = "tokenizers-0.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c84d3cb1349936c2b96ca6175b50f5a9518170bffd76464219ee0ea6022a64a7"}, + {file = "tokenizers-0.14.1-cp311-none-win32.whl", hash = "sha256:8db3a6f3d430ac3dc3793c53fa8e5e665c23ba359484d365a191027ad8b65a30"}, + {file = "tokenizers-0.14.1-cp311-none-win_amd64.whl", hash = "sha256:c65d76052561c60e17cb4fa289885ed00a9995d59e97019fac2138bd45142057"}, + {file = "tokenizers-0.14.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:c375161b588982be381c43eb7158c250f430793d0f708ce379a0f196164c6778"}, + {file = "tokenizers-0.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:50f03d2330a153a9114c2429061137bd323736059f384de8348d7cb1ca1baa15"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0c8ee283b249c3c3c201c41bc23adc3be2514ae4121eacdb5c5250a461eaa8c6"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9f27399b8d50c5d3f08f0aae961bcc66a1dead1cd0ae9401e4c2a43a623322a"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:89cbeec7e9d5d8773ec4779c64e3cbcbff53d234ca6ad7b1a3736588003bba48"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:08e55920b453c30b46d58accc68a38e8e7488d0c03babfdb29c55d3f39dd2052"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91d32bd1056c0e83a0f90e4ffa213c25096b2d8b9f0e2d172a45f138c7d8c081"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44f1748035c36c939848c935715bde41734d9249ab7b844ff9bfbe984be8952c"}, + {file = "tokenizers-0.14.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1ff516d129f01bb7a4aa95bc6aae88e4d86dd63bfc2d57db9302c2624d1be7cb"}, + {file = "tokenizers-0.14.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:acfc8db61c6e919d932448cc7985b85e330c8d745528e12fce6e62d40d268bce"}, + {file = "tokenizers-0.14.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:ba336bc9107acbc1da2ad30967df7b2db93448ca66538ad86aa1fbb91116f631"}, + {file = "tokenizers-0.14.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:f77371b5030e53f8bf92197640af437539e3bba1bc8342b97888c8e26567bfdc"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d72d25c57a9c814240802d188ff0a808b701e2dd2bf1c64721c7088ceeeb1ed7"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caf0df8657277e32671aa8a4d3cc05f2050ab19d9b49447f2265304168e9032c"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb3c6bc6e599e46a26ad559ad5dec260ffdf705663cc9b894033d64a69314e86"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8cf2fcdc2368df4317e05571e33810eeed24cd594acc9dfc9788b21dac6b3a8"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f475d5eda41d2ed51ca775a07c80529a923dd759fcff7abf03ccdd83d9f7564e"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cce4d1a97a7eb2253b5d3f29f4a478d8c37ba0303ea34024eb9e65506d4209f8"}, + {file = "tokenizers-0.14.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ff66577ae55114f7d0f6aa0d4d335f27cae96bf245962a745b718ec887bbe7eb"}, + {file = "tokenizers-0.14.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a687099e085f5162e5b88b3402adb6c2b41046180c015c5075c9504440b6e971"}, + {file = "tokenizers-0.14.1-cp37-none-win32.whl", hash = "sha256:49f5336b82e315a33bef1025d247ca08d95719715b29e33f0e9e8cf15ff1dfb6"}, + {file = "tokenizers-0.14.1-cp37-none-win_amd64.whl", hash = "sha256:117c8da60d1bd95a6df2692926f36de7971baa1d89ff702fae47b6689a4465ad"}, + {file = "tokenizers-0.14.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:01d2bd5935642de22a6c6778bb2307f9949cd6eaeeb5c77f9b98f0060b69f0db"}, + {file = "tokenizers-0.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b05ec04132394c20bd6bcb692d557a8eb8ab1bac1646d28e49c67c00907d17c8"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7d9025b185465d9d18679406f6f394850347d5ed2681efc203539d800f36f459"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2539831838ab5393f78a893d7bbf27d5c36e43baf77e91dc9992922b2b97e09d"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ec8f46d533092d8e20bc742c47918cbe24b8641dbfbbcb83177c5de3c9d4decb"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8b019c4810903fdea3b230f358b9d27377c0f38454778b607676c9e1b57d14b7"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e8984114fd83ed3913d89526c992395920930c9620a2feee61faf035f41d7b9a"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11284b32f0036fe7ef4b8b00201dda79c00f3fcea173bc0e5c599e09c937ab0f"}, + {file = "tokenizers-0.14.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:53614f44f36917282a583180e402105bc63d61d1aca067d51cb7f051eb489901"}, + {file = "tokenizers-0.14.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e3b6082e9532309727273443c8943bb9558d52e36788b246aa278bda7c642116"}, + {file = "tokenizers-0.14.1-cp38-none-win32.whl", hash = "sha256:7560fca3e17a6bc876d20cd825d7721c101fa2b1cd0bfa0abf9a2e781e49b37b"}, + {file = "tokenizers-0.14.1-cp38-none-win_amd64.whl", hash = "sha256:c318a5acb429ca38f632577754235140bbb8c5a27faca1c51b43fbf575596e34"}, + {file = "tokenizers-0.14.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:b886e0f5c72aa4249c609c24b9610a9ca83fd963cbb5066b19302723ea505279"}, + {file = "tokenizers-0.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f522f28c88a0d5b2f9e895cf405dd594cd518e99d61905406aec74d30eb6383b"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5bef76c4d9329913cef2fe79ce1f4dab98f77fa4887e5f0420ffc9386941de32"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59c7df2103052b30b7c76d4fa8251326c9f82689578a912698a127dc1737f43e"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:232445e7b85255ccfe68dfd42185db8a3f3349b34ad7068404856c4a5f67c355"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8e63781da85aa8948864970e529af10abc4084a990d30850c41bbdb5f83eee45"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5760a831c0f3c6d3229b50ef3fafa4c164ec99d7e8c2237fe144e67a9d33b120"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c84b456ff8525ec3ff09762e32ccc27888d036dcd0ba2883e1db491e164dd725"}, + {file = "tokenizers-0.14.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:463ee5f3afbfec29cbf5652752c9d1032bdad63daf48bb8cb9970064cc81d5f9"}, + {file = "tokenizers-0.14.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ee6b63aecf929a7bcf885bdc8a8aec96c43bc4442f63fe8c6d48f24fc992b05b"}, + {file = "tokenizers-0.14.1-cp39-none-win32.whl", hash = "sha256:aae42798ba1da3bc1572b2048fe42e61dd6bacced2b424cb0f5572c5432f79c2"}, + {file = "tokenizers-0.14.1-cp39-none-win_amd64.whl", hash = "sha256:68c4699147dded6926a3d2c2f948d435d54d027f69909e0ef3c6587933723ed2"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:5f9afdcf701a1aa3c41e0e748c152d2162434d61639a1e5d8523ecf60ae35aea"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:6859d81243cd09854be9054aca3ecab14a2dee5b3c9f6d7ef12061d478ca0c57"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7975178f9478ccedcf613332d5d6f37b67c74ef4e2e47e0c965597506b921f04"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ce2f0ff2e5f12ac5bebaa690606395725239265d7ffa35f35c243a379316297"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c7cfc3d42e81cda802f93aa9e92caf79feaa1711426e28ce620560b8aaf5e4d"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:67d3adff654dc7f7c7091dd259b3b847fe119c08d0bda61db91e2ea2b61c38c0"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:956729b7dd599020e57133fb95b777e4f81ee069ff0a70e80f6eeac82658972f"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:fe2ea1177146a7ab345ab61e90a490eeea25d5f063e1cb9d4eb1425b169b64d7"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9930f31f603ecc6ea54d5c6dfa299f926ab3e921f72f94babcb02598c32b57c6"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d49567a2754e9991c05c2b5a7e6650b56e24365b7cab504558e58033dcf0edc4"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3678be5db330726f19c1949d8ae1b845a02eeb2a2e1d5a8bb8eaa82087ae25c1"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:42b180ed1bec58ab9bdc65d406577e0c0fb7241b74b8c032846073c7743c9f86"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:319e4367596fb0d52be645b3de1616faf0fadaf28507ce1c7595bebd9b4c402c"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:2cda65b689aec63b7c76a77f43a08044fa90bbc6ad9849267cedfee9795913f3"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:ca0bfc79b27d84fcb7fa09339b2ee39077896738d9a30ff99c0332376e985072"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a7093767e070269e22e2c5f845e46510304f124c32d2cd249633c0f27eb29d86"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad759ba39cd32c2c2247864d02c84ea5883b5f6cc6a4ee0c95602a3dde52268f"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26fee36a6d8f2bd9464f3566b95e3e3fb7fd7dad723f775c500aac8204ec98c6"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d091c62cb7abbd32e527a85c41f7c8eb4526a926251891fc4ecbe5f974142ffb"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ca304402ea66d58f99c05aa3d7a6052faea61e5a8313b94f6bc36fbf27960e2d"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:102f118fa9b720b93c3217c1e239ed7bc1ae1e8dbfe9b4983a4f2d7b4ce6f2ec"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:df4f058e96e8b467b7742e5dba7564255cd482d3c1e6cf81f8cb683bb0433340"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:040ee44efc1806900de72b13c1c3036154077d9cde189c9a7e7a50bbbdcbf39f"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7618b84118ae704f7fa23c4a190bd80fc605671841a4427d5ca14b9b8d9ec1a3"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ecdfe9736c4a73343f629586016a137a10faed1a29c6dc699d8ab20c2d3cf64"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:92c34de04fec7f4ff95f7667d4eb085c4e4db46c31ef44c3d35c38df128430da"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:628b654ba555b2ba9111c0936d558b14bfc9d5f57b8c323b02fc846036b38b2f"}, + {file = "tokenizers-0.14.1.tar.gz", hash = "sha256:ea3b3f8908a9a5b9d6fc632b5f012ece7240031c44c6d4764809f33736534166"}, +] + +[package.dependencies] +huggingface_hub = ">=0.16.4,<0.18" + +[package.extras] +dev = ["tokenizers[testing]"] +docs = ["setuptools_rust", "sphinx", "sphinx_rtd_theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] + [[package]] name = "tomli" version = "2.0.1" @@ -2789,6 +3289,75 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=3.0.3)", "mypy (>=1.5.1)", "pre-commit", "pytest (>=7.0,<7.5)", "pytest-mock", "pytest-mypy-testing"] +[[package]] +name = "transformers" +version = "4.34.1" +description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "transformers-4.34.1-py3-none-any.whl", hash = "sha256:d06ac09151d7b845e4a4acd6b143a591d946031ee67b4cbb20693b241920ffc0"}, + {file = "transformers-4.34.1.tar.gz", hash = "sha256:1d0258d5a18063b66005bbe1e3276ec5943d9ab4ab47f020db1fd485cc40ea22"}, +] + +[package.dependencies] +filelock = "*" +huggingface-hub = ">=0.16.4,<1.0" +numpy = ">=1.17" +packaging = ">=20.0" +pyyaml = ">=5.1" +regex = "!=2019.12.17" +requests = "*" +safetensors = ">=0.3.1" +tokenizers = ">=0.14,<0.15" +tqdm = ">=4.27" + +[package.extras] +accelerate = ["accelerate (>=0.20.3)"] +agents = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.10,!=1.12.0)"] +all = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"] +audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +codecarbon = ["codecarbon (==1.2.0)"] +deepspeed = ["accelerate (>=0.20.3)", "deepspeed (>=0.9.3)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.14,<0.15)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +docs = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"] +docs-specific = ["hf-doc-builder"] +fairscale = ["fairscale (>0.3)"] +flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)"] +flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +ftfy = ["ftfy"] +integrations = ["optuna", "ray[tune]", "sigopt"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +modelcreation = ["cookiecutter (==1.7.3)"] +natten = ["natten (>=0.14.6)"] +onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] +onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] +optuna = ["optuna"] +quality = ["GitPython (<3.1.19)", "black (>=23.1,<24.0)", "datasets (!=2.5.0)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (>=0.0.241,<=0.0.259)", "urllib3 (<2.0.0)"] +ray = ["ray[tune]"] +retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] +sagemaker = ["sagemaker (>=2.31.0)"] +sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"] +serving = ["fastapi", "pydantic (<2)", "starlette", "uvicorn"] +sigopt = ["sigopt"] +sklearn = ["scikit-learn"] +speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +testing = ["GitPython (<3.1.19)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "timeout-decorator"] +tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx"] +tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx"] +tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +timm = ["timm"] +tokenizers = ["tokenizers (>=0.14,<0.15)"] +torch = ["accelerate (>=0.20.3)", "torch (>=1.10,!=1.12.0)"] +torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +torch-vision = ["Pillow (<10.0.0)", "torchvision"] +torchhub = ["filelock", "huggingface-hub (>=0.16.4,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "tqdm (>=4.27)"] +video = ["av (==9.2.0)", "decord (==0.6.0)"] +vision = ["Pillow (<10.0.0)"] + [[package]] name = "types-python-dateutil" version = "2.8.19.14" @@ -3000,4 +3569,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "3.10.12" -content-hash = "e66f4feea268c70e9e5d76167e67c932142d0ce606b5c661d0579d5d354280a4" +content-hash = "da9451cabbbafa708e8c48adf6048a0cddc8d0c257bac7b918552f8a6c1eaf8b" diff --git a/apps/openchallenges/notebook/pyproject.toml b/apps/openchallenges/notebook/pyproject.toml index 6c9fe74a8c..2695a2ad58 100644 --- a/apps/openchallenges/notebook/pyproject.toml +++ b/apps/openchallenges/notebook/pyproject.toml @@ -14,6 +14,8 @@ openai = "0.28.1" python-dotenv = "1.0.0" boto3 = "1.28.72" langchain = "0.0.324" +transformers = "4.34.1" +anthropic = "0.5.0" [tool.poetry.group.dev.dependencies] jupyterlab = "3.5.2" From c15513523aa15099fbc84ff5bc962bc587f9bfe4 Mon Sep 17 00:00:00 2001 From: Thomas Schaffter Date: Tue, 31 Oct 2023 18:05:09 +0000 Subject: [PATCH 3/8] Add notebook docker image --- .../notebooks/openai-challenge-headline.ipynb | 30 +++++++++++++------ apps/openchallenges/notebook/project.json | 17 +++++++++++ docker/openchallenges/serve-detach.sh | 1 + docker/openchallenges/services/notebook.yml | 17 +++++++++++ 4 files changed, 56 insertions(+), 9 deletions(-) create mode 100644 docker/openchallenges/services/notebook.yml diff --git a/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb b/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb index 4f1f99ccf6..c0a60289c9 100644 --- a/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb +++ b/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "8fc3ac4c-2ceb-4bbc-bdb8-3bb8be08dfc6", "metadata": {}, "outputs": [], @@ -78,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "b2f952f5-9140-4702-8a96-3457ca4df841", "metadata": {}, "outputs": [], @@ -88,7 +88,7 @@ "with openchallenges_client.ApiClient(configuration) as api_client:\n", " api_instance = challenge_api.ChallengeApi(api_client)\n", " \n", - " query = openchallenges_client.ChallengeSearchQuery(page_number=1, page_size=1)\n", + " query = openchallenges_client.ChallengeSearchQuery(page_number=1000, page_size=1)\n", "\n", " try:\n", " # Get the first page of the list of challenges\n", @@ -100,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "6d590b17", "metadata": {}, "outputs": [ @@ -110,7 +110,7 @@ "True" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -271,7 +271,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 6, "id": "20da8b0e", "metadata": {}, "outputs": [ @@ -318,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 7, "id": "5bbcd90f", "metadata": {}, "outputs": [], @@ -348,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 8, "id": "1f4b90df", "metadata": {}, "outputs": [ @@ -365,9 +365,21 @@ "print(f'Number of text tokens: {textgen_llm.get_num_tokens(prompt)}')" ] }, + { + "cell_type": "markdown", + "id": "903a6de3", + "metadata": {}, + "source": [ + "Authentify with AWS using the command:\n", + "\n", + "```console\n", + "aws --profile cnb sso login\n", + "```" + ] + }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 9, "id": "b0c4c192", "metadata": {}, "outputs": [ diff --git a/apps/openchallenges/notebook/project.json b/apps/openchallenges/notebook/project.json index 9d67fe6101..52070878f3 100644 --- a/apps/openchallenges/notebook/project.json +++ b/apps/openchallenges/notebook/project.json @@ -25,6 +25,23 @@ "cwd": "apps/openchallenges/notebook" }, "dependsOn": [] + }, + "build-image": { + "executor": "@nx-tools/nx-container:build", + "options": { + "context": "apps/openchallenges/notebook", + "metadata": { + "images": ["ghcr.io/sage-bionetworks/openchallenges-notebook"], + "tags": ["type=edge,branch=main", "type=raw,value=local", "type=sha"] + }, + "push": false + } + }, + "serve-detach": { + "executor": "nx:run-commands", + "options": { + "command": "docker/openchallenges/serve-detach.sh openchallenges-notebook" + } } }, "tags": ["type:app", "scope:client"], diff --git a/docker/openchallenges/serve-detach.sh b/docker/openchallenges/serve-detach.sh index 5d4bb09426..cc5ba04b61 100755 --- a/docker/openchallenges/serve-detach.sh +++ b/docker/openchallenges/serve-detach.sh @@ -14,6 +14,7 @@ args=( --file docker/openchallenges/services/kafka.yml --file docker/openchallenges/services/mariadb.yml --file docker/openchallenges/services/mysqld-exporter.yml + --file docker/openchallenges/services/notebook.yml --file docker/openchallenges/services/organization-service.yml --file docker/openchallenges/services/prometheus.yml --file docker/openchallenges/services/rstudio.yml diff --git a/docker/openchallenges/services/notebook.yml b/docker/openchallenges/services/notebook.yml new file mode 100644 index 0000000000..bf95e1e5cd --- /dev/null +++ b/docker/openchallenges/services/notebook.yml @@ -0,0 +1,17 @@ +version: '3.8' + +services: + openchallenges-notebook: + image: ghcr.io/sage-bionetworks/openchallenges-notebook:${OPENCHALLENGES_VERSION:-local} + container_name: openchallenges-notebook + restart: always + env_file: + - ../../../apps/openchallenges/notebook/.env + networks: + - openchallenges + ports: + - '8888:8888' + # deploy: + # resources: + # limits: + # memory: 500M From f7944a047dea0083fccc81a7961b88aef2364d53 Mon Sep 17 00:00:00 2001 From: Thomas Schaffter Date: Tue, 31 Oct 2023 19:40:23 +0000 Subject: [PATCH 4/8] Rename notebook --- ...enai-challenge-headline.ipynb => llm-challenge-headline.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename apps/openchallenges/notebook/notebooks/{openai-challenge-headline.ipynb => llm-challenge-headline.ipynb} (100%) diff --git a/apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb b/apps/openchallenges/notebook/notebooks/llm-challenge-headline.ipynb similarity index 100% rename from apps/openchallenges/notebook/notebooks/openai-challenge-headline.ipynb rename to apps/openchallenges/notebook/notebooks/llm-challenge-headline.ipynb From b8505c98877ac1f668102912bc9c8683e1a1ef30 Mon Sep 17 00:00:00 2001 From: Thomas Schaffter Date: Tue, 31 Oct 2023 19:50:43 +0000 Subject: [PATCH 5/8] Generate headlines with bedrock --- .vscode/settings.json | 8 +- ...ine.ipynb => challenge-headline-llm.ipynb} | 77 +++++++++++-------- 2 files changed, 50 insertions(+), 35 deletions(-) rename apps/openchallenges/notebook/notebooks/{llm-challenge-headline.ipynb => challenge-headline-llm.ipynb} (86%) diff --git a/.vscode/settings.json b/.vscode/settings.json index c2f81f9c1b..25e0c3d68e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -84,9 +84,9 @@ "editor.inlayHints.enabled": "off", "rewrap.wrappingColumn": 100, "editor.rulers": [100], - "eslint.validate": [ - "json" - ], + "eslint.validate": ["json"], "java.compile.nullAnalysis.mode": "disabled", - "typescript.tsdk": "node_modules/typescript/lib" + "typescript.tsdk": "node_modules/typescript/lib", + "python.analysis.autoImportCompletions": true, + "python.analysis.typeCheckingMode": "basic" } diff --git a/apps/openchallenges/notebook/notebooks/llm-challenge-headline.ipynb b/apps/openchallenges/notebook/notebooks/challenge-headline-llm.ipynb similarity index 86% rename from apps/openchallenges/notebook/notebooks/llm-challenge-headline.ipynb rename to apps/openchallenges/notebook/notebooks/challenge-headline-llm.ipynb index c0a60289c9..207f6b89a0 100644 --- a/apps/openchallenges/notebook/notebooks/llm-challenge-headline.ipynb +++ b/apps/openchallenges/notebook/notebooks/challenge-headline-llm.ipynb @@ -7,7 +7,7 @@ "tags": [] }, "source": [ - "# OpenChallenges REST API Example" + "# Generate Challenge Headlines" ] }, { @@ -54,7 +54,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "c41a64a1", + "id": "f37a60d9", "metadata": {}, "outputs": [], "source": [ @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "8fc3ac4c-2ceb-4bbc-bdb8-3bb8be08dfc6", "metadata": {}, "outputs": [], @@ -78,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "b2f952f5-9140-4702-8a96-3457ca4df841", "metadata": {}, "outputs": [], @@ -100,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "6d590b17", "metadata": {}, "outputs": [ @@ -110,7 +110,7 @@ "True" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -271,7 +271,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "20da8b0e", "metadata": {}, "outputs": [ @@ -318,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "5bbcd90f", "metadata": {}, "outputs": [], @@ -348,21 +348,22 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "1f4b90df", + "execution_count": 9, + "id": "451eca6a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of text tokens: 7\n" - ] - } - ], + "outputs": [], "source": [ - "prompt = 'How much is 2 plus 2?'\n", - "print(f'Number of text tokens: {textgen_llm.get_num_tokens(prompt)}')" + "def generate_challenge_headline(text):\n", + " prompt=(\n", + " \"Please generate five headlines that have a maximum ten words from the following \"\n", + " \"challenge description. The headline must summarize the goal of the challenge. \"\n", + " f\"Description: \\n{text}\"\n", + " )\n", + " response = Bedrock(model_id = \"anthropic.claude-v2\",\n", + " client = boto3_bedrock, \n", + " model_kwargs = inference_modifier \n", + " )(prompt)\n", + " return response" ] }, { @@ -379,7 +380,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "b0c4c192", "metadata": {}, "outputs": [ @@ -387,20 +388,34 @@ "name": "stdout", "output_type": "stream", "text": [ - "Number of output tokens: 5\n", - " 2 + 2 = 4\n" + "(' Here are 5 headlines with a maximum of 10 words summarizing the goal of the '\n", + " 'challenge:\\n'\n", + " '\\n'\n", + " '1. Challenge Seeks to Standardize Data for AI Discovery\\n'\n", + " '\\n'\n", + " '2. Improve Data Quality for AI Research, Says NIDDK Challenge \\n'\n", + " '\\n'\n", + " '3. NIDDK Launches Data Challenge to Boost AI Reuse\\n'\n", + " '\\n'\n", + " '4. Challenge Aims to Ready Data for AI Insights\\n'\n", + " '\\n'\n", + " '5. Data Challenge Targets Interoperability for AI')\n" ] } ], "source": [ - "response = textgen_llm(prompt)\n", - "\n", - "# summary = response[response.index('\\n')+1:]\n", - "summary = response\n", - "\n", - "print(f'Number of output tokens: {textgen_llm.get_num_tokens(summary)}')\n", - "print(summary)" + "challenge = challenges[0]\n", + "result = generate_challenge_headline(challenge.description)\n", + "pprint(result)\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12d3b54d", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 875dd92909c53e704d30d63b8a271eeae60d6781 Mon Sep 17 00:00:00 2001 From: Thomas Schaffter Date: Wed, 1 Nov 2023 03:10:21 +0000 Subject: [PATCH 6/8] Work now in Python script --- .vscode/settings.json | 2 + .../notebook/challenge_headlines.json | 28 ++ .../notebooks/challenge-headline-llm.ipynb | 312 +++++++++--------- .../src/challenge_headline/__init__.py | 0 .../challenge_headline_llm.py | 134 ++++++++ .../notebook/src/utils/__init__.py | 0 .../notebook/src/utils/bedrock.py | 80 +++++ .../notebook/src/utils/print_ww.py | 21 ++ 8 files changed, 425 insertions(+), 152 deletions(-) create mode 100644 apps/openchallenges/notebook/challenge_headlines.json create mode 100644 apps/openchallenges/notebook/src/challenge_headline/__init__.py create mode 100644 apps/openchallenges/notebook/src/challenge_headline/challenge_headline_llm.py create mode 100644 apps/openchallenges/notebook/src/utils/__init__.py create mode 100644 apps/openchallenges/notebook/src/utils/bedrock.py create mode 100644 apps/openchallenges/notebook/src/utils/print_ww.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 25e0c3d68e..03e0ffd14e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,6 +12,8 @@ "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnSave": true, "editor.tabSize": 4 + // This settings still works even if VS Code does not recognize it. + // "notebook.formatOnSave.enabled": true }, "[json][jsonc]": { "editor.defaultFormatter": "esbenp.prettier-vscode", diff --git a/apps/openchallenges/notebook/challenge_headlines.json b/apps/openchallenges/notebook/challenge_headlines.json new file mode 100644 index 0000000000..ae0e40f347 --- /dev/null +++ b/apps/openchallenges/notebook/challenge_headlines.json @@ -0,0 +1,28 @@ +[ + { + "id": 279, + "slug": "niddk-central-repository-data-centric-challenge", + "name": "NIDDK Central Repository Data-Centric Challenge", + "headline": "Enhancing NIDDK datasets for future Artificial Intelligence (AI) applications.", + "headline_alternatives": [ + "1. Challenge Seeks AI Solutions to Standardize NIDDK Data", + "2. Data Challenge Aims to Ready NIDDK Data for AI Discovery ", + "3. Making NIDDK Data AI-Ready Via Data Standardization Challenge", + "4. Challenge Invites AI to Standardize NIDDK Data for Reuse", + "5. Data Challenge Seeks AI Tools to Improve NIDDK Data Reuse" + ] + }, + { + "id": 278, + "slug": "qbi-hackathon", + "name": "QBI hackathon", + "headline": "A 48-hour event connecting the Bay Area developer community with scientists ...", + "headline_alternatives": [ + "1. Hackathon connects developers and scientists to advance biomedical research ", + "2. 48-hour hackathon applies AI to biomedical data ", + "3. Developers and scientists collaborate on biomedical challenges", + "4. Hackathon pushes science ahead with latest algorithms ", + "5. Event connects Bay Area developers and scientists" + ] + } +] \ No newline at end of file diff --git a/apps/openchallenges/notebook/notebooks/challenge-headline-llm.ipynb b/apps/openchallenges/notebook/notebooks/challenge-headline-llm.ipynb index 207f6b89a0..3dad5722ce 100644 --- a/apps/openchallenges/notebook/notebooks/challenge-headline-llm.ipynb +++ b/apps/openchallenges/notebook/notebooks/challenge-headline-llm.ipynb @@ -7,7 +7,7 @@ "tags": [] }, "source": [ - "# Generate Challenge Headlines" + "# Generate Challenge Headlines with AWS Bedrock" ] }, { @@ -43,6 +43,45 @@ "- Access to OpenAI API" ] }, + { + "cell_type": "markdown", + "id": "2dd21cb4", + "metadata": {}, + "source": [ + "## Preparation" + ] + }, + { + "cell_type": "markdown", + "id": "c74bd100", + "metadata": {}, + "source": [ + "Load config file `.env`." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b9a13bdf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()" + ] + }, { "cell_type": "markdown", "id": "c90710c2-f053-44ae-a3c2-610eecff9073", @@ -53,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "f37a60d9", "metadata": {}, "outputs": [], @@ -65,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "8fc3ac4c-2ceb-4bbc-bdb8-3bb8be08dfc6", "metadata": {}, "outputs": [], @@ -78,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "b2f952f5-9140-4702-8a96-3457ca4df841", "metadata": {}, "outputs": [], @@ -99,158 +138,48 @@ ] }, { - "cell_type": "code", - "execution_count": 4, - "id": "6d590b17", + "cell_type": "markdown", + "id": "1ff9d4b9", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "from dotenv import load_dotenv\n", - "\n", - "load_dotenv()" + "## Prepare the challenge descriptions" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "488632df", + "execution_count": 20, + "id": "2fcad35d", "metadata": {}, "outputs": [], "source": [ - "import openai" + "import requests\n", + "from bs4 import BeautifulSoup" ] }, { - "cell_type": "code", - "execution_count": 6, - "id": "9158b9b9", + "cell_type": "markdown", + "id": "49d20733", "metadata": {}, - "outputs": [], "source": [ - "# Source: https://medium.com/muthoni-wanyoike/implementing-text-summarization-using-openais-gpt-3-api-dcd6be4f6933\n", - "def split_text(text):\n", - " max_chunk_size = 2048\n", - " chunks = []\n", - " current_chunk = \"\"\n", - " for sentence in text.split(\".\"):\n", - " if len(current_chunk) + len(sentence) < max_chunk_size:\n", - " current_chunk += sentence + \".\"\n", - " else:\n", - " chunks.append(current_chunk.strip())\n", - " current_chunk = sentence + \".\"\n", - " if current_chunk:\n", - " chunks.append(current_chunk.strip())\n", - " return chunks" + "TODO" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "1ea1b66b", + "execution_count": null, + "id": "eb6c1ada", "metadata": {}, "outputs": [], - "source": [ - "def generate_challenge_headline(text):\n", - " prompt=(\n", - " \"Please generate five headlines that have a maximum ten words from the following \"\n", - " \"challenge description. The headline must summarize the goal of the challenge. \"\n", - " f\"Description: \\n{text}\"\n", - " )\n", - " response = openai.ChatCompletion.create(\n", - " model=\"gpt-3.5-turbo\",\n", - " messages=[\n", - " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", - " {\"role\": \"user\", \"content\": prompt},\n", - " ],\n", - " max_tokens=1024,\n", - " temperature=0.5\n", - " )\n", - " return response['choices'][0]['message']['content']" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d0c0b308-0b58-44a7-8ff6-4987dfbccb17", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('1. NIDDK Data Centric Challenge: Enhancing Repository for AI Research\\n'\n", - " '2. Unlocking Insights: NIDDK Challenge Improves Data Quality for AI\\n'\n", - " \"3. NIDDK-CR's Data Centric Challenge: Advancing AI-driven Discovery\\n\"\n", - " '4. Bridging the Gap: NIDDK Challenge Boosts Data Collaboration for AI\\n'\n", - " '5. NIDDK Repository Challenge: Making Research Data FAIR for AI')\n" - ] - } - ], "source": [ "challenge = challenges[0]\n", - "result = generate_challenge_headline(challenge.description)\n", - "pprint(result)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "f2bd77de", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"id\": 279,\n", - " \"slug\": \"niddk-central-repository-data-centric-challenge\",\n", - " \"name\": \"NIDDK Central Repository Data-Centric Challenge\",\n", - " \"headline\": \"Enhancing NIDDK datasets for future Artificial Intelligence (AI) applications.\",\n", - " \"headline_alternatives\": [\n", - " \"1. NIDDK Data Centric Challenge: Enhancing Repository for AI Research\",\n", - " \"2. Unlocking Insights: NIDDK Challenge Improves Data Quality for AI\",\n", - " \"3. NIDDK-CR's Data Centric Challenge: Advancing AI-driven Discovery\",\n", - " \"4. Bridging the Gap: NIDDK Challenge Boosts Data Collaboration for AI\",\n", - " \"5. NIDDK Repository Challenge: Making Research Data FAIR for AI\"\n", - " ]\n", - "}\n" - ] - } - ], - "source": [ - "from itertools import compress\n", - "import json\n", "\n", - "raw_headlines = result.splitlines()\n", + "response = requests.get(challenge.website_url)\n", "\n", - "def is_raw_headline(raw_headline):\n", - " prefixes = (\"1. \", \"2. \", \"3. \", \"4. \", \"5. \")\n", - " return raw_headline.startswith(prefixes)\n", "\n", - "headlines = list(compress(raw_headlines, map(is_raw_headline, raw_headlines)))\n", "\n", - "obj = {\n", - " \"id\": challenge.id,\n", - " \"slug\": challenge.slug,\n", - " \"name\": challenge.name,\n", - " \"headline\": challenge.headline,\n", - " \"headline_alternatives\": headlines\n", - "}\n", - "json_str = json.dumps(obj, indent=2)\n", "\n", - "print(json_str)" + "\n", + "pprint(response)" ] }, { @@ -258,7 +187,7 @@ "id": "14ba8e14", "metadata": {}, "source": [ - "## Generating challenge headlines with AWS LLM" + "## Generate the headlines with AWS Bedrock" ] }, { @@ -338,25 +267,17 @@ " )\n" ] }, - { - "cell_type": "markdown", - "id": "83d704d1", - "metadata": {}, - "source": [ - "Call API and output results" - ] - }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "id": "451eca6a", "metadata": {}, "outputs": [], "source": [ - "def generate_challenge_headline(text):\n", + "def generate_challenge_headlines(text, num_headlines):\n", " prompt=(\n", - " \"Please generate five headlines that have a maximum ten words from the following \"\n", - " \"challenge description. The headline must summarize the goal of the challenge. \"\n", + " f\"Please generate {num_headlines} headlines that have a maximum ten words from the \"\n", + " \"following challenge description. The headline must summarize the goal of the challenge. \"\n", " f\"Description: \\n{text}\"\n", " )\n", " response = Bedrock(model_id = \"anthropic.claude-v2\",\n", @@ -366,6 +287,21 @@ " return response" ] }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1a397957", + "metadata": {}, + "outputs": [], + "source": [ + "from itertools import compress\n", + "import json\n", + "\n", + "def is_raw_headline(raw_headline):\n", + " prefixes = (\"1. \", \"2. \", \"3. \", \"4. \", \"5. \")\n", + " return raw_headline.startswith(prefixes)" + ] + }, { "cell_type": "markdown", "id": "903a6de3", @@ -380,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "b0c4c192", "metadata": {}, "outputs": [ @@ -391,31 +327,103 @@ "(' Here are 5 headlines with a maximum of 10 words summarizing the goal of the '\n", " 'challenge:\\n'\n", " '\\n'\n", - " '1. Challenge Seeks to Standardize Data for AI Discovery\\n'\n", + " '1. Challenge Seeks to Improve Data for AI Discovery\\n'\n", " '\\n'\n", - " '2. Improve Data Quality for AI Research, Says NIDDK Challenge \\n'\n", + " '2. Data Challenge Aims to Augment Repository for AI Use \\n'\n", " '\\n'\n", - " '3. NIDDK Launches Data Challenge to Boost AI Reuse\\n'\n", + " '3. Challenge Targets Data Standardization for AI Research\\n'\n", " '\\n'\n", - " '4. Challenge Aims to Ready Data for AI Insights\\n'\n", + " '4. Competition Focuses on Making Data AI Ready \\n'\n", " '\\n'\n", - " '5. Data Challenge Targets Interoperability for AI')\n" + " '5. Challenge Works to Ready Data for AI Insights')\n" ] } ], "source": [ "challenge = challenges[0]\n", - "result = generate_challenge_headline(challenge.description)\n", - "pprint(result)\n" + "response = generate_challenge_headlines(challenge.description, 5)\n", + "\n", + "pprint(response)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": 279,\n", + " \"slug\": \"niddk-central-repository-data-centric-challenge\",\n", + " \"name\": \"NIDDK Central Repository Data-Centric Challenge\",\n", + " \"headline\": \"Enhancing NIDDK datasets for future Artificial Intelligence (AI) applications.\",\n", + " \"headline_alternatives\": [\n", + " \"1. Challenge Seeks to Improve Data for AI Discovery\",\n", + " \"2. Data Challenge Aims to Augment Repository for AI Use \",\n", + " \"3. Challenge Targets Data Standardization for AI Research\",\n", + " \"4. Competition Focuses on Making Data AI Ready \",\n", + " \"5. Challenge Works to Ready Data for AI Insights\"\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "raw_headlines = response.splitlines()\n", + "headlines = list(compress(raw_headlines, map(is_raw_headline, raw_headlines)))\n", + "\n", + "obj = {\n", + " \"id\": challenge.id,\n", + " \"slug\": challenge.slug,\n", + " \"name\": challenge.name,\n", + " \"headline\": challenge.headline,\n", + " \"headline_alternatives\": headlines\n", + "}\n", + "json_str = json.dumps(obj, indent=2)\n", + "\n", + "print(json_str)" + ] + }, + { + "cell_type": "markdown", "id": "12d3b54d", "metadata": {}, + "source": [ + "### Output challenge headlines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4496fd1d", + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from itertools import compress\n", + "import json\n", + "\n", + "raw_headlines = result.splitlines()\n", + "\n", + "def is_raw_headline(raw_headline):\n", + " prefixes = (\"1. \", \"2. \", \"3. \", \"4. \", \"5. \")\n", + " return raw_headline.startswith(prefixes)\n", + "\n", + "headlines = list(compress(raw_headlines, map(is_raw_headline, raw_headlines)))\n", + "\n", + "obj = {\n", + " \"id\": challenge.id,\n", + " \"slug\": challenge.slug,\n", + " \"name\": challenge.name,\n", + " \"headline\": challenge.headline,\n", + " \"headline_alternatives\": headlines\n", + "}\n", + "json_str = json.dumps(obj, indent=2)\n", + "\n", + "print(json_str)" + ] } ], "metadata": { diff --git a/apps/openchallenges/notebook/src/challenge_headline/__init__.py b/apps/openchallenges/notebook/src/challenge_headline/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/openchallenges/notebook/src/challenge_headline/challenge_headline_llm.py b/apps/openchallenges/notebook/src/challenge_headline/challenge_headline_llm.py new file mode 100644 index 0000000000..9806b571ef --- /dev/null +++ b/apps/openchallenges/notebook/src/challenge_headline/challenge_headline_llm.py @@ -0,0 +1,134 @@ +# Requirements +# +# - Login with AWS: aws --profile cnb sso login + +from dotenv import load_dotenv + +import openchallenges_client +from pprint import pprint +from openchallenges_client.api import challenge_api + +load_dotenv() + +# List challenges from OC.io + +# See configuration.py for a list of all supported configuration parameters. +configuration = openchallenges_client.Configuration( + host="https://openchallenges.io/api/v1" +) + +# Enter a context with an instance of the API client +challenges = [] +with openchallenges_client.ApiClient(configuration) as api_client: + api_instance = challenge_api.ChallengeApi(api_client) + + query = openchallenges_client.ChallengeSearchQuery(page_number=1, page_size=1000) + pprint(query) + + try: + # Get the first page of the list of challenges + page = api_instance.list_challenges(query) + pprint(page.size) + challenges.extend(page.challenges) + except openchallenges_client.ApiException as e: + print("Exception when calling ChallengeApi->list_challenges: %s\n" % e) + +# Sort challenge by ID +# challenges.sort(key=lambda challenge: challenge.id, reverse=False) +# pprint(challenges[:2]) + +print(len(challenges)) + +import sys + +sys.exit() + +# GENERATE THE HEADLINES WITH AWS BEDROCK + +# Configure the Bedrock client + +import json +import os +import sys + +import boto3 +import botocore + +module_path = "src" +sys.path.append(os.path.abspath(module_path)) +from utils import bedrock, print_ww + +os.environ["AWS_DEFAULT_REGION"] = "us-east-1" +os.environ["AWS_PROFILE"] = "cnb" + +boto3_bedrock = bedrock.get_bedrock_client( + assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None), + region=os.environ.get("AWS_DEFAULT_REGION", None), +) + +# Configure base model options + +from langchain.llms.bedrock import Bedrock + +inference_modifier = { + "max_tokens_to_sample": 6000, + "temperature": 0.6, + "top_k": 250, + "top_p": 1, + "stop_sequences": ["\n\nHuman"], +} + +textgen_llm = Bedrock( + model_id="anthropic.claude-v2", + client=boto3_bedrock, + model_kwargs=inference_modifier, +) + + +def generate_challenge_headlines(text, num_headlines): + prompt = ( + f"Please generate {num_headlines} headlines that have a maximum ten words from the " + "following challenge description. The headline must summarize the goal of the challenge. " + f"Description: \n{text}" + ) + response = Bedrock( + model_id="anthropic.claude-v2", + client=boto3_bedrock, + model_kwargs=inference_modifier, + )(prompt) + return response + + +from itertools import compress +import json + + +def is_raw_headline(raw_headline): + prefixes = ("1. ", "2. ", "3. ", "4. ", "5. ") + return raw_headline.startswith(prefixes) + + +def process_challenge(challenge): + print(f"Processing challenge ID {challenge.id}: {challenge.name}") + response = generate_challenge_headlines(challenge.description, 5) + + raw_headlines = response.splitlines() + headlines = list(compress(raw_headlines, map(is_raw_headline, raw_headlines))) + + obj = { + "id": challenge.id, + "slug": challenge.slug, + "name": challenge.name, + "headline": challenge.headline, + "headline_alternatives": headlines, + } + return obj + + +challenge_headlines = list(map(process_challenge, challenges[:2])) + + +# SAVE OUTPUT TO FILE + +with open("challenge_headlines.json", "w") as f: + json.dump(challenge_headlines, f, indent=2) diff --git a/apps/openchallenges/notebook/src/utils/__init__.py b/apps/openchallenges/notebook/src/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/openchallenges/notebook/src/utils/bedrock.py b/apps/openchallenges/notebook/src/utils/bedrock.py new file mode 100644 index 0000000000..b959e1009a --- /dev/null +++ b/apps/openchallenges/notebook/src/utils/bedrock.py @@ -0,0 +1,80 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 +"""Helper utilities for working with Amazon Bedrock from Python notebooks""" +# Python Built-Ins: +import os +from typing import Optional + +# External Dependencies: +import boto3 +from botocore.config import Config + + +def get_bedrock_client( + assumed_role: Optional[str] = None, + region: Optional[str] = None, + runtime: Optional[bool] = True, +): + """Create a boto3 client for Amazon Bedrock, with optional configuration overrides + + Parameters + ---------- + assumed_role : + Optional ARN of an AWS IAM role to assume for calling the Bedrock service. If not + specified, the current active credentials will be used. + region : + Optional name of the AWS Region in which the service should be called (e.g. "us-east-1"). + If not specified, AWS_REGION or AWS_DEFAULT_REGION environment variable will be used. + runtime : + Optional choice of getting different client to perform operations with the Amazon Bedrock service. + """ + if region is None: + target_region = os.environ.get( + "AWS_REGION", os.environ.get("AWS_DEFAULT_REGION") + ) + else: + target_region = region + + print(f"Create new client\n Using region: {target_region}") + session_kwargs = {"region_name": target_region} + client_kwargs = {**session_kwargs} + + profile_name = os.environ.get("AWS_PROFILE") + if profile_name: + print(f" Using profile: {profile_name}") + session_kwargs["profile_name"] = profile_name + + retry_config = Config( + region_name=target_region, + retries={ + "max_attempts": 10, + "mode": "standard", + }, + ) + session = boto3.Session(**session_kwargs) + + if assumed_role: + print(f" Using role: {assumed_role}", end="") + sts = session.client("sts") + response = sts.assume_role( + RoleArn=str(assumed_role), RoleSessionName="langchain-llm-1" + ) + print(" ... successful!") + client_kwargs["aws_access_key_id"] = response["Credentials"]["AccessKeyId"] + client_kwargs["aws_secret_access_key"] = response["Credentials"][ + "SecretAccessKey" + ] + client_kwargs["aws_session_token"] = response["Credentials"]["SessionToken"] + + if runtime: + service_name = "bedrock-runtime" + else: + service_name = "bedrock" + + bedrock_client = session.client( + service_name=service_name, config=retry_config, **client_kwargs + ) + + print("boto3 Bedrock client successfully created!") + print(bedrock_client._endpoint) + return bedrock_client diff --git a/apps/openchallenges/notebook/src/utils/print_ww.py b/apps/openchallenges/notebook/src/utils/print_ww.py new file mode 100644 index 0000000000..b03ad2c10a --- /dev/null +++ b/apps/openchallenges/notebook/src/utils/print_ww.py @@ -0,0 +1,21 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 +"""General helper utilities the workshop notebooks""" +# Python Built-Ins: +from io import StringIO +import sys +import textwrap + + +def print_ww(*args, width: int = 100, **kwargs): + """Like print(), but wraps output to `width` characters (default 100)""" + buffer = StringIO() + try: + _stdout = sys.stdout + sys.stdout = buffer + print(*args, **kwargs) + output = buffer.getvalue() + finally: + sys.stdout = _stdout + for line in output.splitlines(): + print("\n".join(textwrap.wrap(line, width=width))) From 783f02477b3c3f95205d40677da90be88cfe42d7 Mon Sep 17 00:00:00 2001 From: Thomas Schaffter Date: Wed, 1 Nov 2023 04:00:46 +0000 Subject: [PATCH 7/8] Add headlines output --- .../notebook/challenge_headlines.json | 3247 ++++++++++++++++- .../challenge_headline_llm.py | 17 +- 2 files changed, 3240 insertions(+), 24 deletions(-) diff --git a/apps/openchallenges/notebook/challenge_headlines.json b/apps/openchallenges/notebook/challenge_headlines.json index ae0e40f347..b953035c13 100644 --- a/apps/openchallenges/notebook/challenge_headlines.json +++ b/apps/openchallenges/notebook/challenge_headlines.json @@ -1,17 +1,3227 @@ [ { - "id": 279, - "slug": "niddk-central-repository-data-centric-challenge", - "name": "NIDDK Central Repository Data-Centric Challenge", - "headline": "Enhancing NIDDK datasets for future Artificial Intelligence (AI) applications.", + "id": 1, + "slug": "network-topology-and-parameter-inference", + "name": "Network Topology and Parameter Inference", + "headline": "", + "headline_alternatives": [ + "1. Optimize methods to estimate biology model parameters", + "2. Develop optimization for accurate biology model predictions ", + "3. Apply optimization to estimate parameters in Systems Biology", + "4. Select experiments to optimize Systems Biology model accuracy", + "5. Perturbation prediction through optimization of Systems Biology models" + ] + }, + { + "id": 2, + "slug": "breast-cancer-prognosis", + "name": "Breast Cancer Prognosis", + "headline": "", + "headline_alternatives": [ + "1. Predict breast cancer survival from clinical and genomic data", + "2. Assess models for breast cancer prognosis using clinical and molecular data ", + "3. Evaluate computational models for breast cancer survival prediction", + "4. Benchmark algorithms to predict breast cancer prognosis ", + "5. Assess accuracy of models predicting breast cancer patient survival" + ] + }, + { + "id": 3, + "slug": "phil-bowen-als-prediction-prize4life", + "name": "Phil Bowen ALS Prediction Prize4Life", + "headline": "", + "headline_alternatives": [ + "1. Seeking treatment to halt ALS's fatal loss of motor function ", + "2. Stopping ALS from rapidly killing nerve cells controlling muscles", + "3. Finding a cure for ALS, which currently has no treatment options", + "4. Developing new ways to prolong life for ALS patients ", + "5. Discovering risk factors and treatment for ALS, a fatal disease" + ] + }, + { + "id": 4, + "slug": "drug-sensitivity-and-drug-synergy-prediction", + "name": "Drug Sensitivity and Drug Synergy Prediction", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 5, + "slug": "niehs-ncats-unc-toxicogenetics", + "name": "NIEHS-NCATS-UNC Toxicogenetics", + "headline": "", + "headline_alternatives": [ + "1. Predicting cytotoxicity from genomic and chemical data", + "2. Modeling cytotoxicity responses to chemicals ", + "3. Forecasting cytotoxic effects of compounds", + "4. Estimating cytotoxicity in cell lines via models", + "5. Cytotoxicity challenge: predict toxicity from data" + ] + }, + { + "id": 6, + "slug": "whole-cell-parameter-estimation", + "name": "Whole-Cell Parameter Estimation", + "headline": "", + "headline_alternatives": [ + "1. Seeking innovative parameter estimation methods for large models", + "2. Comparing optimization approaches for parameterizing complex simulations ", + "3. Collaborate to find best methods for estimating large model parameters", + "4. Developing new techniques to select informative experiments ", + "5. Form teams to find optimal approaches to parameterize big models" + ] + }, + { + "id": 7, + "slug": "hpn-dream-breast-cancer-network-inference", + "name": "HPN-DREAM Breast Cancer Network Inference", + "headline": "", + "headline_alternatives": [ + "1. Inferring causal signaling networks in breast cancer", + "2. Advancing network inference in breast cancer cells ", + "3. Predicting phospho-dynamics from cancer cell line data", + "4. Breast cancer challenge - infer networks from perturbations", + "5. Using perturbations to infer breast cancer networks" + ] + }, + { + "id": 8, + "slug": "rheumatoid-arthritis-responder", + "name": "Rheumatoid Arthritis Responder", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 9, + "slug": "icgc-tcga-dream-mutation-calling", + "name": "ICGC-TCGA DREAM Mutation Calling", + "headline": "", + "headline_alternatives": [ + "1. Crowdsourcing Challenge Seeks to Improve Cancer Mutation Detection", + "2. Open Challenge Aims to Advance Cancer Genomics Analysis Methods ", + "3. International Effort to Boost Cancer Mutation Identification in Genomes", + "4. Challenge Pursues Innovation in Detecting Cancer Mutations in DNA", + "5. Scientists Initiate Contest to Improve Identification of Cancer Mutations" + ] + }, + { + "id": 10, + "slug": "acute-myeloid-leukemia-outcome-prediction", + "name": "Acute Myeloid Leukemia Outcome Prediction", + "headline": "", + "headline_alternatives": [ + "1. Uncover drivers of AML using clinical and proteomic data", + "2. Predict AML outcomes with clinical and proteomic datasets ", + "3. Tailor AML therapies using challenge insights on drivers", + "4. Accelerate leukemia drug development with challenge insights", + "5. Interpret rich AML dataset to uncover disease drivers" + ] + }, + { + "id": 11, + "slug": "broad-dream-gene-essentiality-prediction", + "name": "Broad-DREAM Gene Essentiality Prediction", + "headline": "", + "headline_alternatives": [ + "1. Crowdsourcing Models to Predict Cancer Cell Gene Dependencies", + "2. Competition to Develop Models Predicting Essential Cancer Genes ", + "3. Contest to Find Biomarkers Predicting Key Cancer Genes", + "4. Crowdsourced Models to Infer Cancer Cell Gene Importance ", + "5. Competition to Develop Cancer Gene Dependency Predictors" + ] + }, + { + "id": 12, + "slug": "alzheimers-disease-big-data", + "name": "Alzheimer's Disease Big Data", + "headline": "", + "headline_alternatives": [ + "1. Seeking Accurate Predictive Biomarkers for Alzheimer's Diagnosis", + "2. Leveraging Data to Improve Alzheimer's Disease Diagnosis and Treatment ", + "3. Applying Open Science to Identify Alzheimer's Biomarkers", + "4. First in Series to Use Big Data for Alzheimer's Biomarker Discovery", + "5. Alzheimer's Data Challenge Seeks Improved Diagnostic Biomarkers" + ] + }, + { + "id": 13, + "slug": "olfaction-prediction", + "name": "Olfaction Prediction", + "headline": "", + "headline_alternatives": [ + "1. Predicting smell from molecule features", + "2. Linking molecules to odor perception", + "3. Accelerating fragrance design through smell prediction ", + "4. Modeling how molecules become smell sensations", + "5. Connecting chemical features to odor predictions" + ] + }, + { + "id": 14, + "slug": "prostate-cancer", + "name": "Prostate Cancer", + "headline": "", + "headline_alternatives": [ + "1. Predict survival of docetaxel treatment in mCRPC patients", + "2. Establish benchmarks for mCRPC prognosis modeling ", + "3. Improve predictions for docetaxel toxicity and survival ", + "4. Enhance understanding of mCRPC progression via modeling", + "5. Benchmark prognostic models for mCRPC with docetaxel" + ] + }, + { + "id": 15, + "slug": "als-stratification-prize4life", + "name": "ALS Stratification Prize4Life", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 16, + "slug": "astrazeneca-sanger-drug-combination-prediction", + "name": "AstraZeneca-Sanger Drug Combination Prediction", + "headline": "", + "headline_alternatives": [ + "1. Predict effective drug combinations using genomic data", + "2. Explore traits underlying synergistic drug combinations ", + "3. Accelerate understanding of drug synergy with genomic data", + "4. Model synergistic drug behavior using pre-treatment data", + "5. Predict drug combination efficacy from genomic profiles" + ] + }, + { + "id": 17, + "slug": "smc-dna-meta", + "name": "SMC-DNA Meta", + "headline": "", + "headline_alternatives": [ + "1. Seeking Most Accurate Somatic Mutation Detection Pipeline", + "2. Establishing State-of-the-Art for Cancer Mutation Detection ", + "3. Identifying Best Meta-Pipeline for Detecting Somatic Mutations", + "4. Challenge to Find Top Cancer Mutation Detection Algorithm", + "5. Competition to Determine Ideal Somatic Mutation Calling Process" + ] + }, + { + "id": 18, + "slug": "smc-het", + "name": "SMC-Het", + "headline": "", + "headline_alternatives": [ + "1. Crowdsourcing Challenge to Improve Tumor Subclonal Reconstruction", + "2. Open Challenge to Advance Tumor Heterogeneity Quantification ", + "3. International Effort to Progress Tumor Subclonal Profiling", + "4. Crowdsourcing Tumor Heterogeneity and Subclonal Genotyping", + "5. Advancing Subclonal Reconstruction of Tumor Heterogeneity" + ] + }, + { + "id": 19, + "slug": "respiratory-viral", + "name": "Respiratory Viral", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 20, + "slug": "disease-module-identification", + "name": "Disease Module Identification", + "headline": "", + "headline_alternatives": [ + "1. Crowdsourcing challenge to find disease modules in genomic networks", + "2. Open effort to assess module ID methods on disease networks ", + "3. Discover novel modules in genomic networks related to disease", + "4. Leverage crowd wisdom to identify disease modules in networks", + "5. Assess module identification methods on genomic networks for disease" + ] + }, + { + "id": 21, + "slug": "encode", + "name": "ENCODE", + "headline": "", + "headline_alternatives": [ + "1. Predict transcription factor binding sites from limited data", + "2. Computationally expand knowledge of transcription factor binding", + "3. Improve prediction of in vivo transcription factor binding sites ", + "4. Model transcription factor binding across cell types and conditions", + "5. Complement experimental binding data with computational prediction" + ] + }, + { + "id": 22, + "slug": "idea", + "name": "Idea", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 23, + "slug": "smc-rna", + "name": "SMC-RNA", + "headline": "", + "headline_alternatives": [ + "1. Crowdsourcing Challenge Seeks to Improve Cancer Mutation Detection from RNA Data", + "2. Open Challenge Aims to Advance Identification of Cancer Mutations with RNA Sequencing ", + "3. International Effort to Boost Detection of Cancer Mutations from RNA Sequencing Data", + "4. Dream Challenge Focuses on Improving Identification of Cancer Mutations in RNA Sequencing", + "5. ICGC and TCGA Launch Crowdsourcing Effort to Improve RNA Methods for Finding Cancer Mutations" + ] + }, + { + "id": 24, + "slug": "digital-mammography", + "name": "Digital Mammography", + "headline": "", + "headline_alternatives": [ + "1. Improve mammography prediction to detect breast cancer early", + "2. Enhance tools for decreasing recall rate in mammography screening ", + "3. Establish new methods to shift screening towards more benefit, less harm", + "4. Develop models using mammography images to predict breast cancer", + "5. Create tools to help reduce unnecessary mammography recalls" + ] + }, + { + "id": 25, + "slug": "multiple-myeloma", + "name": "Multiple Myeloma", + "headline": "", + "headline_alternatives": [ + "1. Develop precise risk model for myeloma patients", + "2. Improve patient stratification for myeloma treatment ", + "3. Benchmark analytical methods to optimize myeloma care", + "4. Integrate data to tackle myeloma risk stratification ", + "5. Seek new therapies for high-risk myeloma patients" + ] + }, + { + "id": 26, + "slug": "ga4gh-dream-workflow-execution", + "name": "GA4GH-DREAM Workflow Execution", + "headline": "", + "headline_alternatives": [ + "1. Develop technologies to enable distributed genomic data analysis", + "2. Create modular pipelines for reproducible genomic data analysis ", + "3. Build tools to run genomic analyses across distributed datasets", + "4. Design APIs and workflows to find and access genomic resources ", + "5. Leverage containers and pipelines for portable genomic data analysis" + ] + }, + { + "id": 27, + "slug": "parkinsons-disease-digital-biomarker", + "name": "Parkinson's Disease Digital Biomarker", + "headline": "", + "headline_alternatives": [ + "1. Benchmarking methods to develop Parkinson's digital signatures from sensor data", + "2. Extracting predictive features from sensor data for Parkinson's digital biomarkers ", + "3. Developing Parkinson's digital biomarkers from raw sensor time series data", + "4. Predicting Parkinson's pathology from sensor data features in DREAM challenge", + "5. First challenge to extract Parkinson's digital biomarkers from raw sensor data" + ] + }, + { + "id": 28, + "slug": "nci-cptac-proteogenomics", + "name": "NCI-CPTAC Proteogenomics", + "headline": "", + "headline_alternatives": [ + "1. Develop tools to extract insights from cancer proteomics data ", + "2. Create computational methods to analyze tumor proteomes", + "3. Build models linking genome to proteome in cancer", + "4. Advance proteomics to revolutionize cancer research", + "5. Create powerful computational tools for cancer proteomics" + ] + }, + { + "id": 29, + "slug": "multi-targeting-drug", + "name": "Multi-Targeting Drug", + "headline": "", + "headline_alternatives": [ + "1. Seeking Generalizable Methods to Predict Multi-Target Compound Binding", + "2. Develop Techniques to Forecast Binding of Compounds to Multiple Targets ", + "3. Challenge: Predict Compound Binding Across Various Targets and Anti-Targets", + "4. Wanted: Approaches to Anticipate Compound Affinity for Multiple Proteins ", + "5. Can You Devise Ways to Foresee What Compounds Will Bind to Many Targets?" + ] + }, + { + "id": 30, + "slug": "single-cell-transcriptomics", + "name": "Single Cell Transcriptomics", + "headline": "", + "headline_alternatives": [ + "1. Reconstructing Cell Locations in Drosophila Embryo from Transcripts", + "2. Mapping Single Cells in Fly Embryo Using Transcriptomics ", + "3. Locating Cells in Drosophila Embryo Via Single-Cell RNA Data", + "4. Transcriptomics to Map Single Cells in Fruit Fly Embryo ", + "5. Using Transcripts to Pinpoint Cells in Developing Fly Embryo" + ] + }, + { + "id": 31, + "slug": "idg-drug-kinase-binding", + "name": "IDG Drug-Kinase Binding", + "headline": "", + "headline_alternatives": [ + "1. Challenge seeks machine learning for drug-kinase binding prediction", + "2. Evaluating models to predict compound-kinase interactions ", + "3. Mapping kinase inhibitors to targets with machine learning", + "4. Prioritizing potent kinase inhibitor interactions via modeling", + "5. Predicting kinase binding to focus experimental drug discovery" + ] + }, + { + "id": 32, + "slug": "malaria", + "name": "Malaria", + "headline": "", + "headline_alternatives": [ + "1. Predict malaria drug resistance from parasite gene expression", + "2. Model malaria drug resistance using parasite transcription data ", + "3. Forecast Artemisinin resistance in malaria with transcriptomes", + "4. Estimate malaria drug resistance from parasite transcripts", + "5. Predict Artemisinin resistance in malaria parasites computationally" + ] + }, + { + "id": 33, + "slug": "preterm-birth-prediction-transcriptomics", + "name": "Preterm Birth Prediction - Transcriptomics", + "headline": "", + "headline_alternatives": [ + "1. Developing Accurate, Inexpensive Molecular Clock to Determine Gestational Age", + "2. Creating Prediction Models for Gestational Age Using Pregnant Women's Blood", + "3. Identifying and Treating Women at Risk of Preterm Birth and Other Conditions ", + "4. Developing New Ways to Establish Gestational Age to Improve Pregnancy Care", + "5. Using Gene Expression to Build Models Predicting Gestational Age from Blood Samples" + ] + }, + { + "id": 34, + "slug": "single-cell-signaling-in-breast-cancer", + "name": "Single-Cell Signaling in Breast Cancer", + "headline": "", + "headline_alternatives": [ + "1. Exploring heterogeneous signaling in single cancer cells", + "2. Studying variation in breast cancer cell response ", + "3. Mapping diverse signaling in breast cancer lines", + "4. Probing single cell heterogeneity in signaling", + "5. Analyzing large breast cancer signaling dataset" + ] + }, + { + "id": 35, + "slug": "ehr-dream-challenge-patient-mortality-prediction", + "name": "EHR DREAM Challenge - Patient Mortality Prediction", + "headline": "", + "headline_alternatives": [ + "1. New tools to reconstruct cell lineages from CRISPR mutations ", + "2. Assessing algorithms for reconstructing cell lineages from molecular data", + "3. DREAM challenge to accurately reconstruct cell lineages ", + "4. Evaluating lineage reconstruction with diverse tools and datasets", + "5. Machine learning for accurate cell lineage reconstruction" + ] + }, + { + "id": 36, + "slug": "allen-institute-cell-lineage-reconstruction", + "name": "Allen Institute Cell Lineage Reconstruction", + "headline": "", + "headline_alternatives": [ + "1. New tools enable reconstructing complex cell lineages at single-cell resolution", + "2. Assessing algorithms for reconstructing cell lineages from CRISPR mutations ", + "3. DREAM challenge tests reconstructing cell lineages across tools and datasets", + "4. Can machine learning accurately reconstruct diverse cell lineage trees?", + "5. Allen Institute and DREAM partner to benchmark cell lineage reconstruction" + ] + }, + { + "id": 37, + "slug": "tumor-deconvolution", + "name": "Tumor Deconvolution", + "headline": "", + "headline_alternatives": [ + "1. Assess computational methods to deconvolve bulk tumor data into immune components ", + "2. Evaluate computational deconvolution of bulk tumor data into individual immune components", + "3. Test ability of computational methods to deconvolve bulk tumors into immune subpopulations", + "4. Assess deconvolution methods for recovering immune infiltration from bulk tumor data ", + "5. Evaluate computational decomposition of bulk tumors to quantify immune infiltration" + ] + }, + { + "id": 38, + "slug": "ctd2-pancancer-drug-activity", + "name": "CTD2 Pancancer Drug Activity", + "headline": "", + "headline_alternatives": [ + "1. Benchmark algorithms predicting drug targets from gene data", + "2. Develop algorithms to identify drug targets from gene expression", + "3. Predict chemotherapeutic targets using transcriptional profiling ", + "4. Elucidate drug mechanisms of action from gene expression changes", + "5. Identify drug targets across cancers using transcriptomic profiles" + ] + }, + { + "id": 39, + "slug": "ctd2-beataml", + "name": "CTD2 BeatAML", + "headline": "", + "headline_alternatives": [ + "1. Seeking New Drug Targets for Precision AML Treatment", + "2. Discovering Biomarkers to Predict AML Therapy Response ", + "3. Developing a Discovery Cohort to Yield AML Treatment Insights", + "4. Identifying Tailored AML Therapies for Refined Patient Groups", + "5. Studying AML Molecular Alterations and Drug Sensitivity" + ] + }, + { + "id": 40, + "slug": "metadata-automation", + "name": "Metadata Automation", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 41, + "slug": "automated-scoring-of-radiographic-joint-damage", + "name": "Automated Scoring of Radiographic Joint Damage", + "headline": "", + "headline_alternatives": [ + "1. Develop automated method to quantify rheumatoid arthritis joint damage", + "2. Create algorithm to automatically score rheumatoid arthritis radiographs ", + "3. Automate scoring of joint space narrowing in rheumatoid arthritis", + "4. Replace manual scoring with automated rheumatoid arthritis image analysis", + "5. Rapidly quantify rheumatoid arthritis joint damage from radiographs" + ] + }, + { + "id": 42, + "slug": "beat-pd", + "name": "BEAT-PD", + "headline": "", + "headline_alternatives": [ + "1. Develop mobile sensors to remotely monitor Parkinson's disease", + "2. Leverage smartphones and wearables to track Parkinson's symptoms ", + "3. Create digital biomarkers from sensor data for Parkinson's", + "4. Use mobile health to monitor Parkinson's disease progression", + "5. Standardize Parkinson's disease monitoring with mobile sensors" + ] + }, + { + "id": 43, + "slug": "ctd2-pancancer-chemosensitivity", + "name": "CTD2 Pancancer Chemosensitivity", + "headline": "", + "headline_alternatives": [ + "1. Predict drug sensitivity from cell line gene expression", + "2. Benchmark algorithms to predict drug response ", + "3. Develop methods to predict drug sensitivity", + "4. Predict drug sensitivity from RNAseq profiles", + "5. Elucidate drug mechanisms using transcriptional profiles" + ] + }, + { + "id": 44, + "slug": "ehr-dream-challenge-covid-19", + "name": "EHR DREAM Challenge-COVID-19", + "headline": "", + "headline_alternatives": [ + "1. Develop tools to predict COVID-19 risk without sharing data ", + "2. Rapidly discover approaches to characterize COVID-19 using analytics", + "3. Understand risk factors for COVID-19 positive tests from EHRs", + "4. Incorporate machine learning into clinical care to improve COVID-19 outcomes", + "5. Utilize analytics on clinical data to develop early warning for COVID-19" + ] + }, + { + "id": 45, + "slug": "anti-pd1-response-prediction", + "name": "Anti-PD1 Response Prediction", + "headline": "", + "headline_alternatives": [ + "1. Predicting lung cancer response to immuno-oncology therapy", + "2. Modeling outcomes of anti-PD-1 therapy in lung cancer ", + "3. Improving predictions of I-O benefit in lung cancer patients", + "4. Leveraging data to predict I-O response in lung cancer", + "5. Gaining insights into improving I-O therapy for lung cancer" + ] + }, + { + "id": 46, + "slug": "brats-2021-challenge", + "name": "BraTS 2021 Challenge", + "headline": "", + "headline_alternatives": [ + "1. Developing ML methods to analyze brain tumor MRI scans", + "2. Assessing ML techniques for segmenting glioblastoma in MRI images ", + "3. Evaluating approaches for classifying diffuse gliomas in mpMRI data", + "4. Benchmarking algorithms that detect brain tumors in MRI scans", + "5. Advancing image analysis methods for glioblastoma segmentation" + ] + }, + { + "id": 47, + "slug": "cancer-data-registry-nlp", + "name": "Cancer Data Registry NLP", + "headline": "", + "headline_alternatives": [ + "1. Unlocking Clinical Trial Data Hidden in Medical Records", + "2. Natural Language Processing to Improve Clinical Trial Matching ", + "3. Developing NLP to Extract Patient Data from Medical Records", + "4. Evaluating Algorithms to Match Patients to Clinical Trials", + "5. Accessing EHR Text Data to Advance Translational Research" + ] + }, + { + "id": 48, + "slug": "barda-community-challenge-pediatric-covid-19-data-challenge", + "name": "BARDA Community Challenge - Pediatric COVID-19 Data Challenge", + "headline": "", + "headline_alternatives": [ + "1. Models to predict severe COVID-19 in children sought", + "2. Data challenge seeks pediatric COVID-19 risk models ", + "3. Competition seeks models predicting COVID-19 severity in kids", + "4. Challenge seeks tools to identify high-risk pediatric COVID-19 ", + "5. Data competition aims to predict severe pediatric COVID-19" + ] + }, + { + "id": 49, + "slug": "brats-continuous-evaluation", + "name": "BraTS Continuous Evaluation", + "headline": "", + "headline_alternatives": [ + "1. Seeking Innovations To Improve Brain Tumor Diagnosis And Treatment", + "2. Developing New Therapies To Combat Deadly Brain Cancers ", + "3. Overcoming Barriers To Treating Heterogeneous, Treatment-Resistant Brain Tumors", + "4. Improving Brain Tumor Survival Rates And Access To Care Globally", + "5. Advancing Research To Increase Brain Tumor Survival Beyond 15 Months" + ] + }, + { + "id": 50, + "slug": "fets-2022", + "name": "FeTS 2022", + "headline": "", + "headline_alternatives": [ + "1. Benchmarking methods for federated learning in brain tumor segmentation ", + "2. Evaluating weight aggregation and generalizability in federated learning", + "3. Federated learning methods for multi-institutional brain tumor data", + "4. Real-world federated learning for brain tumor segmentation ", + "5. Testing federated training and evaluation for clinical brain scans" + ] + }, + { + "id": 51, + "slug": "random-promotor", + "name": "Random Promotor", + "headline": "", + "headline_alternatives": [ + "1. Decoding Gene Regulation to Understand Disease", + "2. Modeling Complex Gene Expression Regulation ", + "3. Learning Cis-Regulatory Logic of Human Genome", + "4. Understanding Origins of Disease Through Gene Regulation", + "5. Overcoming Limitations to Learn Gene Regulation Models" + ] + }, + { + "id": 52, + "slug": "preterm-birth-prediction-microbiome", + "name": "Preterm Birth Prediction - Microbiome", + "headline": "", + "headline_alternatives": [ + "1. Predict preterm births to reduce infant mortality", + "2. Identify women at risk of preterm delivery ", + "3. Prevent preterm births and improve infant health", + "4. Reduce preterm births and long-term complications ", + "5. Forecast preterm births to enable timely treatment" + ] + }, + { + "id": 53, + "slug": "finrisk", + "name": "FINRISK - Heart Failure and Microbiome", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 54, + "slug": "scrna-seq-and-scatac-seq-data-analysis", + "name": "scRNA-seq and scATAC-seq Data Analysis", + "headline": "", + "headline_alternatives": [ + "1. Assess computational methods for scRNA-seq and scATAC-seq analysis", + "2. Evaluate signal correction and peak identification for single cell sequencing ", + "3. Benchmark methods for sparse data analysis in single cell assays", + "4. Improve quantification and cell typing via better scRNA-seq analysis ", + "5. Develop accurate computational methods for sparse single cell data" + ] + }, + { + "id": 55, + "slug": "cough-diagnostic-algorithm-for-tuberculosis", + "name": "COugh Diagnostic Algorithm for Tuberculosis", + "headline": "", + "headline_alternatives": [ + "1. Develop low-cost cough screening tools to improve TB diagnosis", + "2. Create non-invasive digital cough tests for TB detection ", + "3. Improve TB diagnosis with new cough sound biomarkers ", + "4. Design scalable cough-based TB screening to boost detection", + "5. Build accessible digital cough diagnostics to find missing TB cases" + ] + }, + { + "id": 56, + "slug": "nih-long-covid-computational-challenge", + "name": "NIH Long COVID Computational Challenge", + "headline": "", + "headline_alternatives": [ + "1. Understanding Prevalence and Outcomes of Post-COVID Syndrome", + "2. Analyzing Longitudinal Data to Uncover Post-COVID Sequelae ", + "3. Using Advanced Analytics to Study Post-Acute COVID-19 Symptoms", + "4. Investigating Breadth of Post-COVID Conditions and Outcomes", + "5. Applying Innovative Methods to Assess Post-Acute COVID Sequelae" + ] + }, + { + "id": 57, + "slug": "bridge2ai", + "name": "Bridge2AI", + "headline": "What makes a good color palette?", + "headline_alternatives": [ + "1. Creating an appealing, cohesive color palette ", + "2. Designing color palettes for visual harmony", + "3. Choosing colors that work well together ", + "4. Developing color schemes with visual impact", + "5. Selecting colors for aesthetically pleasing palettes" + ] + }, + { + "id": 58, + "slug": "rare-x-open-data-science", + "name": "RARE-X Open Data Science", + "headline": "", + "headline_alternatives": [ + "1. Unlocking rare disease mysteries through open science collaboration", + "2. Researchers compete to analyze rare disease patient data ", + "3. Data challenge taps researchers to solve pediatric neurodevelopmental unknowns", + "4. Collaboration key to unraveling rare pediatric disease mysteries", + "5. Open science data challenge targets rare childhood brain conditions" + ] + }, + { + "id": 59, + "slug": "cagi5-regulation-saturation", + "name": "CAGI5: Regulation saturation", + "headline": "", + "headline_alternatives": [ + "1. Predicting effects of variants in disease-linked enhancers and promoters", + "2. Assessing variants in regulatory regions of disease genes via reporters ", + "3. Massively parallel reporter assays test variants in enhancers and promoters", + "4. Variant effects on expression: saturated mutagenesis of 14 regulatory regions", + "5. Can we predict regulatory variant effects from saturated mutagenesis data?" + ] + }, + { + "id": 60, + "slug": "cagi5-calm1", + "name": "CAGI5: CALM1", + "headline": "", + "headline_alternatives": [ + "1. Predicting effects of calmodulin variants on yeast growth", + "2. Assessing calmodulin variant impacts on yeast complementation", + "3. Evaluating calmodulin mutations using yeast assay ", + "4. Can yeast growth predict calmodulin variant function?", + "5. High-throughput yeast assay to test calmodulin variants" + ] + }, + { + "id": 61, + "slug": "cagi5-pcm1", + "name": "CAGI5: PCM1", + "headline": "", + "headline_alternatives": [ + "1. Assessing PCM1 variants' impact on zebrafish ventricle", + "2. Do PCM1 mutations affect zebrafish brain ventricles? ", + "3. Testing if PCM1 variants change zebrafish ventricle size", + "4. Schizophrenia PCM1 variants' effects on zebrafish brain", + "5. Zebrafish model tests PCM1 variants' ventricular effects" + ] + }, + { + "id": 62, + "slug": "cagi5-frataxin", + "name": "CAGI5: Frataxin", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 63, + "slug": "cagi5-tpmt", + "name": "CAGI5: TPMT and p10", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 64, + "slug": "cagi5-annotate-all-missense", + "name": "CAGI5: Annotate all nonsynonymous variants", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 65, + "slug": "cagi5-gaa", + "name": "CAGI5: GAA", + "headline": "", + "headline_alternatives": [ + "1. Predict enzyme activity of GAA mutants in Pompe disease", + "2. Assess fractional activity of GAA variants compared to wild-type ", + "3. Model impact of GAA mutations on enzyme function in Pompe", + "4. Estimate relative activity levels for GAA variants found in humans", + "5. Predict effects of GAA missense mutations on enzymatic activity" + ] + }, + { + "id": 66, + "slug": "cagi5-chek2", + "name": "CAGI5: CHEK2", + "headline": "", + "headline_alternatives": [ + "1. Estimate CHEK2 gene variant probabilities in Latino breast cancer cases", + "2. Assess CHEK2 variants in Latina breast cancer cases versus controls ", + "3. Analyze CHEK2 gene variants in Latina breast cancer cohort", + "4. Determine CHEK2 variant probabilities in Latinas with breast cancer", + "5. Calculate likelihood of CHEK2 variants in Latina breast cancer cases" + ] + }, + { + "id": 67, + "slug": "cagi5-enigma", + "name": "CAGI5: ENIGMA", + "headline": "", + "headline_alternatives": [ + "1. Predicting cancer risk from BRCA1/2 gene variants", + "2. Assessing breast cancer risk from BRCA mutations ", + "3. Evaluating BRCA1/2 variants for breast cancer risk", + "4. Identifying high-risk BRCA mutations for breast cancer", + "5. Predicting breast cancer risk from BRCA1/2 mutations" + ] + }, + { + "id": 68, + "slug": "cagi5-mapsy", + "name": "CAGI5: MaPSy", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 69, + "slug": "cagi5-vex-seq", + "name": "CAGI5: Vex-seq", + "headline": "", + "headline_alternatives": [ + "1. Predict splicing changes from variants in globin gene", + "2. Assess variant effects on splicing of globin construct ", + "3. Quantify splicing changes from globin variants in cells", + "4. Estimate globin splicing alterations from DNA variants", + "5. Model globin splicing differences caused by mutations" + ] + }, + { + "id": 70, + "slug": "cagi5-sickkids5", + "name": "CAGI5: SickKids clinical genomes", + "headline": "", + "headline_alternatives": [ + "1. Predict genetic disorders from 30 child genomes and phenotypes. ", + "2. Match 30 child genomes to clinical descriptions to identify disorders.", + "3. Identify disease classes and variants in 30 child genomes and phenotypes. ", + "4. Predict disorders and high-risk variants from 30 child genomes.", + "5. Link 30 child genomes to phenotypes to diagnose genetic diseases." + ] + }, + { + "id": 71, + "slug": "cagi5-intellectual-disability", + "name": "CAGI5: ID Panel", + "headline": "", + "headline_alternatives": [ + "1. Predict phenotypes and variants from gene panel sequences", + "2. Identify variants causing intellectual disability from sequences ", + "3. Predict intellectual disability phenotypes from gene panel data", + "4. Determine phenotypes and causal variants from panel sequences", + "5. Analyze gene sequences to predict neurodevelopmental phenotypes" + ] + }, + { + "id": 72, + "slug": "cagi5-clotting-disease", + "name": "CAGI5: Clotting disease exomes", + "headline": "", + "headline_alternatives": [ + "1. Predicting venous thromboembolism risk in African Americans", + "2. Distinguishing VTE from atrial fibrillation in African Americans ", + "3. Identifying genetic VTE risk factors in African Americans", + "4. Developing tools to anticipate VTE in African Americans", + "5. Using exome data to understand VTE in African Americans" + ] + }, + { + "id": 73, + "slug": "cagi6-sickkids", + "name": "CAGI6: SickKids clinical genomes and transcriptomes", + "headline": "The SickKids Genome Clinic is providing clinical phenotypic information in t...", + "headline_alternatives": [ + "1. Identify genes causing rare diseases using transcriptomics", + "2. Solve undiagnosed genetic disorders with transcriptomics ", + "3. Use transcriptomics to diagnose sick children's diseases", + "4. Transcriptome analysis to identify genetic mechanisms in kids ", + "5. Rare disease diagnosis through transcriptome sequencing" + ] + }, + { + "id": 74, + "slug": "cagi6-cam", + "name": "CAGI6: CaM", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 75, + "slug": "cami-ii", + "name": "CAMI II", + "headline": "", + "headline_alternatives": [ + "1. Assembling and Classifying Microbial Genomes in Complex Samples", + "2. Detecting Pathogens and Profiling Microbial Communities from Metagenomes ", + "3. Binning and Profiling Microbial Taxa Across Diverse Environmental Datasets", + "4. Challenges in Metagenomic Assembly, Binning and Clinical Pathogen Detection", + "5. Assembling, Binning, Profiling Microbial Genomes from Multiple Environments" + ] + }, + { + "id": 76, + "slug": "camda18-metasub-forensics", + "name": "CAMDA18-MetaSUB Forensics", + "headline": "", + "headline_alternatives": [ + "1. Building a metagenomic map of mass-transit systems globally", + "2. Creating a longitudinal map of microbes in mass-transit systems ", + "3. Analyzing microbes in mass-transit systems across multiple cities", + "4. Multi-city analysis of microbes on global mass-transit systems", + "5. First ever global analysis of mass-transit metagenomics across cities" + ] + }, + { + "id": 77, + "slug": "camda18-cmap-drug-safety", + "name": "CAMDA18-CMap Drug Safety", + "headline": "", + "headline_alternatives": [ + "1. Predicting drug toxicity using cell-based gene expression data", + "2. Mitigating drug risk via cell-based genomic profiling ", + "3. Evaluating cell screens to predict drug-induced liver injury", + "4. Understanding toxicity from cell-based genomic responses", + "5. Exploiting cell data to predict human liver drug reactions" + ] + }, + { + "id": 78, + "slug": "camda18-cancer-data-integration", + "name": "CAMDA18-Cancer Data Integration", + "headline": "", + "headline_alternatives": [ + "1. Unify data integration approaches for breast cancer and neuroblastoma ", + "2. Improve data integration for breast cancer and childhood cancer", + "3. Integrate data to advance breast and pediatric cancer care ", + "4. Data integration to enhance breast and neuroblastoma diagnosis", + "5. Harness data to beat breast cancer and neuroblastoma" + ] + }, + { + "id": 79, + "slug": "cafa-4", + "name": "CAFA 4", + "headline": "", + "headline_alternatives": [ + "1. Assessing algorithms for predicting protein function", + "2. Evaluating automated methods to predict protein ontology terms ", + "3. Critical test of protein function prediction algorithms", + "4. Benchmarking protein sequence annotation methods", + "5. Comparing computational protein function predictions" + ] + }, + { + "id": 80, + "slug": "casp13", + "name": "CASP13", + "headline": "", + "headline_alternatives": [ + "1. CASP assesses protein structure prediction methods", + "2. CASP compares computational models to experimental structures ", + "3. CASP advances protein structure modeling capabilities", + "4. CASP evaluates progress in modeling protein structures", + "5. CASP measures accuracy of protein structure predictions" + ] + }, + { + "id": 81, + "slug": "casp14", + "name": "CASP14", + "headline": "", + "headline_alternatives": [ + "1. Assessing progress in protein structure prediction", + "2. Advancing methods for modeling protein structures ", + "3. Community experiment evaluates protein modeling", + "4. Blind assessment tests protein structure prediction", + "5. Modeling protein structures from sequence in CASP14" + ] + }, + { + "id": 82, + "slug": "cfsan-pathogen-detection", + "name": "CFSAN Pathogen Detection", + "headline": "", + "headline_alternatives": [ + "1. Rapidly Identify Food Sources of Outbreaks", + "2. Stop Foodborne Illness Outbreaks Faster ", + "3. Link Food Sources to Outbreaks via Genomics", + "4. Improve Food Safety with Next-Gen Sequencing", + "5. Prevent Foodborne Illness Deaths and Hospitalizations" + ] + }, + { + "id": 83, + "slug": "cdrh-biothreat", + "name": "CDRH Biothreat", + "headline": "", + "headline_alternatives": [ + "1. Identifying infectious diseases from clinical samples using sequencing technology. ", + "2. Diagnosing infections without prior knowledge via next-gen sequencing.", + "3. Revealing disease-causing microbes in patients through genomic fingerprinting. ", + "4. Improving infectious disease diagnostics with high-throughput sequencing.", + "5. Using sequencing to identify pathogens from clinical samples." + ] + }, + { + "id": 84, + "slug": "multi-omics-enabled-sample-mislabeling-correction", + "name": "Multi-omics Enabled Sample Mislabeling Correction", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 85, + "slug": "biocompute-object-app-a-thon", + "name": "BioCompute Object App-a-thon", + "headline": "", + "headline_alternatives": [ + "1. Seeking Standards for Reproducible Bioinformatics Analysis", + "2. Developing Framework for Reproducible HTS Computations ", + "3. Partnering for Community Standards in Bioinformatics", + "4. Creating Reproducible Pipelines for Genomic Analysis", + "5. Establishing Schemas for Reproducible Scientific Workflows" + ] + }, + { + "id": 86, + "slug": "brain-cancer-predictive-modeling-and-biomarker-discovery", + "name": "Brain Cancer Predictive Modeling and Biomarker Discovery", + "headline": "", + "headline_alternatives": [ + "1. Seeking novel biomarkers to advance precision medicine for brain tumors", + "2. Identifying new clinical biomarkers to improve glioma prognosis and treatment ", + "3. Advancing precision medicine for brain tumors through multi-omics biomarkers", + "4. Discovering novel biomarkers to advance precision medicine for gliomas", + "5. Developing new prognostic and predictive markers for glioma treatment" + ] + }, + { + "id": 87, + "slug": "gaining-new-insights-by-detecting-adverse-event-anomalies", + "name": "Gaining New Insights by Detecting Adverse Event Anomalies", + "headline": "", + "headline_alternatives": [ + "1. Seeking Algorithms to Detect Adverse Events in FDA Data", + "2. Developing Methods to Find Anomalies in Public FDA Data ", + "3. Algorithms Wanted for Automatic Adverse Event Detection", + "4. Help Analyze FDA Data to Find Adverse Event Anomalies", + "5. Calling All Developers: Detect Anomalies in FDA Public Data" + ] + }, + { + "id": 88, + "slug": "calling-variants-in-difficult-to-map-regions", + "name": "Calling Variants in Difficult-to-Map Regions", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 89, + "slug": "vha-innovation-ecosystem-and-covid-19-risk-factor-modeling", + "name": "VHA Innovation Ecosystem and COVID-19 Risk Factor Modeling", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 90, + "slug": "covid-19-precision-immunology-app-a-thon", + "name": "COVID-19 Precision Immunology App-a-thon", + "headline": "", + "headline_alternatives": [ + "1. Seeking insights on COVID-19 pathophysiology to enable effective strategies.", + "2. Understanding COVID-19 mechanisms to improve diagnosis, prognosis, treatment. ", + "3. Investigating COVID-19 physiology for better strategies against transmission.", + "4. Exploring COVID-19 pathophysiology to combat widespread infection, save lives.", + "5. Studying COVID-19 disease mechanisms to enable rapid data-sharing, effective strategies." + ] + }, + { + "id": 91, + "slug": "smarter-food-safety-low-cost-tech-enabled-traceability", + "name": "Smarter Food Safety Low Cost Tech-Enabled Traceability", + "headline": "", + "headline_alternatives": [ + "1. Seeking Affordable Tech Solutions for Food Traceability", + "2. Democratizing Benefits of Digitizing Food Data ", + "3. Exploring Low-Cost Options for Food Traceability Systems", + "4. Advancing Widespread Traceability in Food Industry ", + "5. Integrating Data Streams to Improve Food Traceability" + ] + }, + { + "id": 92, + "slug": "tumor-mutational-burden-tmb-challenge-phase-1", + "name": "Tumor Mutational Burden (TMB) Challenge Phase 1", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 93, + "slug": "kits21", + "name": "Kidney and Kidney Tumor Segmentation", + "headline": "", + "headline_alternatives": [ + "1. Contest Seeks Best Kidney Tumor Segmentation System ", + "2. Teams Compete to Develop Top Kidney Cancer Segmenter", + "3. Challenge Tests Algorithms for Kidney Tumor Detection", + "4. Can You Build the Best Kidney Tumor Identifier?", + "5. KiTS21: Automating Kidney Tumor Segmentation" + ] + }, + { + "id": 94, + "slug": "realnoisemri", + "name": "Real Noise MRI", + "headline": "", + "headline_alternatives": [ + "1. Developing fast MRI techniques without fully sampled data ", + "2. Creating fast MRI methods using under sampled k-space data", + "3. Improving fast MRI acquisition without full k-space sampling", + "4. Advancing fast MRI sans complete k-space information ", + "5. Speeding up MRI minus fully sampled k-space data" + ] + }, + { + "id": 95, + "slug": "deep-generative-model-challenge-for-da-in-surgery", + "name": "Deep Generative Model Challenge for DA in Surgery", + "headline": "", + "headline_alternatives": [ + "1. Challenge aims to adapt algorithms from simulation to mitral valve surgery", + "2. Challenge addresses data issues for automatic analysis of mitral repair", + "3. Challenge formulates domain adaptation from simulator to real surgery ", + "4. Challenge provides data to adapt algorithms from simulator to surgery", + "5. Challenge reduces gap between simulation and real mitral valve surgery" + ] + }, + { + "id": 96, + "slug": "aimdatathon", + "name": "AIM Datathon 2020", + "headline": "Join the AI in Medicine ( AIM ) Datathon 2020", + "headline_alternatives": [] + }, + { + "id": 97, + "slug": "opc-recurrence", + "name": "Oropharynx Cancer (OPC) Radiomics Challenge :: Local Recurrence Prediction", + "headline": "Determine from CT data whether a tumor will be controlled by definitive radi...", + "headline_alternatives": [] + }, + { + "id": 98, + "slug": "oropharynx-radiomics-hpv", + "name": "Oropharynx Cancer (OPC) Radiomics Challenge :: Human Papilloma Virus (HPV) Status Prediction", + "headline": "Predict from CT data the HPV phenotype of oropharynx tumors; compare to grou...", + "headline_alternatives": [] + }, + { + "id": 99, + "slug": "data-science-bowl-2017", + "name": "Data Science Bowl 2017", + "headline": "Can you improve lung cancer detection?", + "headline_alternatives": [] + }, + { + "id": 100, + "slug": "predict-impact-of-air-quality-on-death-rates", + "name": "Predict impact of air quality on mortality rates", + "headline": "Predict CVD and cancer caused mortality rates in England using air quality d...", + "headline_alternatives": [ + "1. Predicting England's CVD and Cancer Deaths from Air Pollution Data ", + "2. Air Quality Data to Forecast CVD and Cancer Mortality Rates", + "3. Copernicus Data Used to Predict England's CVD and Cancer Death Toll", + "4. Estimating England's CVD and Cancer Mortality Using Atmospheric Data", + "5. Air Monitoring Service Data to Estimate CVD and Cancer Deaths in England" + ] + }, + { + "id": 101, + "slug": "intel-mobileodt-cervical-cancer-screening", + "name": "Intel & MobileODT Cervical Cancer Screening", + "headline": "Which cancer treatment will be most effective?", + "headline_alternatives": [] + }, + { + "id": 102, + "slug": "msk-redefining-cancer-treatment", + "name": "Personalized Medicine-Redefining Cancer Treatment", + "headline": "Predict the effect of Genetic Variants to enable Personalized Medicine", + "headline_alternatives": [ + "1. Predicting Genetic Variant Effects for Personalized Medicine", + "2. Genetic Variants' Effects Predicted to Enable Precision Medicine ", + "3. Forecasting Genetic Variants' Impacts to Allow Tailored Medicine", + "4. Projecting Genetic Variants' Influences for Customized Medicine", + "5. Estimating Genetic Variants' Consequences to Facilitate Individualized Medicine" + ] + }, + { + "id": 103, + "slug": "mubravo", + "name": "Predicting Cancer Diagnosis", + "headline": "Bravo's machine learning competition!", + "headline_alternatives": [ + "1. Bravo Hosts Machine Learning Competition for Innovators", + "2. Bravo Challenges Innovators in Machine Learning Contest ", + "3. Bravo's Contest Challenges Machine Learning Innovations", + "4. Bravo Invites Innovative Minds to Machine Learning Contest", + "5. Bravo Seeks Innovators for Machine Learning Competition" + ] + }, + { + "id": 104, + "slug": "histopathologic-cancer-detection", + "name": "Histopathologic Cancer Detection", + "headline": "Identify metastatic tissue in histopathologic scans of lymph node sections", + "headline_alternatives": [ + "1. Seeking AI to Detect Cancer Spread in Tissue Scans ", + "2. Can AI Spot Metastasis in Lymph Node Histology?", + "3. Automated Detection of Metastasis in Tissue Images", + "4. Identifying Cancer Spread from Histopathology Scans", + "5. Metastatic Tissue Detection in Lymph Node Sections" + ] + }, + { + "id": 105, + "slug": "tjml1920-decision-trees", + "name": "TJML 2019-20 Breast Cancer Detection Competition", + "headline": "Use a decision tree to identify malignant breast cancer tumors", + "headline_alternatives": [] + }, + { + "id": 106, + "slug": "prostate-cancer-grade-assessment", + "name": "Prostate cANcer graDe Assessment (PANDA) Challenge", + "headline": "Prostate cancer diagnosis using the Gleason grading system", + "headline_alternatives": [ + "1. AI for Prostate Cancer Diagnosis via Gleason Grading", + "2. Gleason-Based AI to Diagnose Prostate Cancer ", + "3. AI System to Grade Prostate Cancer Using Gleason Score", + "4. AI to Diagnose Prostate Cancer Through Gleason Grading ", + "5. Using AI and Gleason System to Diagnose Prostate Cancer" + ] + }, + { + "id": 107, + "slug": "breast-cancer", + "name": "Breast Cancer", + "headline": "Use cell nuclei categories to predict breast cancer tumor.", + "headline_alternatives": [] + }, + { + "id": 108, + "slug": "breast-cancer-detection", + "name": "Breast Cancer Detection", + "headline": "breast cancer detection", + "headline_alternatives": [ + "1. Developing AI to detect breast cancer early", + "2. Creating automated breast cancer screening tools ", + "3. Building algorithms to identify breast tumors", + "4. Advancing technology for breast cancer diagnosis", + "5. Improving breast cancer detection through innovation" + ] + }, + { + "id": 109, + "slug": "hrpred", + "name": "Prediction of High Risk Patients", + "headline": "Classification of high and low risk cancer patients", + "headline_alternatives": [ + "1. Identifying High and Low Cancer Risk Patients", + "2. Classifying Cancer Patients by Risk Level ", + "3. Categorizing Cancer Risk in Patients", + "4. Assessing Cancer Patient Risk Levels", + "5. Determining High vs Low Risk Cancer Patients" + ] + }, + { + "id": 110, + "slug": "ml4moleng-cancer", + "name": "MIT ML4MolEng-Predicting Cancer Progression", + "headline": "MIT 3.100, 10.402, 20.301 In class ML competition (Spring 2021)", + "headline_alternatives": [ + "1. MIT students compete to build best ML model", + "2. MIT classes hold machine learning competition ", + "3. MIT engineering students vie in ML contest", + "4. MIT classes challenge students in ML building", + "5. ML model building contest held for MIT classes" + ] + }, + { + "id": 111, + "slug": "uw-madison-gi-tract-image-segmentation", + "name": "UW-Madison GI Tract Image Segmentation", + "headline": "Track healthy organs in medical scans to improve cancer treatment", + "headline_alternatives": [ + "1. Scans Track Healthy Organs to Boost Cancer Care", + "2. Tracking Organs in Scans Advances Cancer Treatment ", + "3. Organ Tracking in Scans Improves Cancer Therapies", + "4. Healthy Organ Scans to Enhance Cancer Treatment", + "5. Scanning Organs Helps Improve Cancer Treatments" + ] + }, + { + "id": 112, + "slug": "rsna-miccai-brain-tumor-radiogenomic-classification", + "name": "RSNA-MICCAI Brain Tumor Radiogenomic Classification", + "headline": "Predict the status of a genetic biomarker important for brain cancer treatment", + "headline_alternatives": [ + "1. BraTS challenge evaluates brain tumor segmentation methods", + "2. BraTS evaluates glioblastoma segmentation and MGMT classification ", + "3. BraTS challenge tests brain tumor segmentation and methylation prediction", + "4. BraTS challenge focuses on glioblastoma segmentation and classification", + "5. BraTS celebrates 10 years evaluating brain tumor analysis methods" + ] + }, + { + "id": 113, + "slug": "breastcancer", + "name": "Breast Cancer - Beginners ML", + "headline": "Beginners hands-on experience with ML basics", + "headline_alternatives": [ + "1. Gaining Hands-On ML Basics For Newcomers", + "2. Beginners Get Hands-On With Machine Learning Fundamentals ", + "3. Hands-On Intro To ML Essentials For Novices", + "4. Starter's Hands-On Primer For ML Core Principles", + "5. Newbies Tackle Hands-On Machine Learning Groundwork" + ] + }, + { + "id": 114, + "slug": "ml-olympiad-health-and-education", + "name": "ML Olympiad -Let's Fight lung cancer", + "headline": "Use your ML expertise to help us step another step toward defeating cancer [...", + "headline_alternatives": [ + "1. Join the fight, help defeat cancer through ML", + "2. ML experts unite to advance cancer research ", + "3. Calling all ML experts - let's beat cancer together", + "4. ML community rallies to find cancer breakthroughs", + "5. ML challenge takes on cancer - starts February 14th" + ] + }, + { + "id": 115, + "slug": "cs98-22-dl-task1", + "name": "CS98X-22-DL-Task1", + "headline": "This competition is related to Task 1 in coursework-breast cancer classification", + "headline_alternatives": [ + "1. Classifying Breast Cancer Tumors via Machine Learning", + "2. Applying AI to Diagnose Breast Cancer from Images ", + "3. Using Deep Learning for Breast Cancer Detection", + "4. Automated Breast Cancer Classification with Neural Nets", + "5. Machine Learning Model for Breast Cancer Diagnosis" + ] + }, + { + "id": 116, + "slug": "parasitedetection-iiitb2019", + "name": "Parasite detection", + "headline": "detect if cell image has parasite or is uninfected", + "headline_alternatives": [] + }, + { + "id": 117, + "slug": "hpa-single-cell-image-classification", + "name": "Human Protein Atlas -Single Cell Classification", + "headline": "Find individual human cell differences in microscope images", + "headline_alternatives": [] + }, + { + "id": 118, + "slug": "stem-cell-predcition", + "name": "Stem Cell Predcition", + "headline": "Classify stem and non-stem cells using RNA-seq data", + "headline_alternatives": [] + }, + { + "id": 119, + "slug": "sartorius-cell-instance-segmentation", + "name": "Sartorius - Cell Instance Segmentation", + "headline": "Detect single neuronal cells in microscopy images", + "headline_alternatives": [ + "1. Segment neuronal cells in microscopy images to aid neuroresearch", + "2. Delineate distinct neuronal cells to quantify effects of disease ", + "3. Detect and segment neuronal cells to enable neurobiology research", + "4. Identify neuronal cells in images to advance neurological disorder research", + "5. Segment microscopy images of neuronal cells to further neurobiology" + ] + }, + { + "id": 120, + "slug": "pvelad", + "name": "Photovoltaic cell anomaly detection", + "headline": "Hosted by Hebei University of Technology (AIHebut research group) and Beihan...", + "headline_alternatives": [ + "1. Hebei and Beihang Universities Host AI Challenge", + "2. Hebut and NAVE Research Groups Hold Joint AI Contest ", + "3. AIHebut and NAVE Teams Compete in AI Challenge", + "4. Joint AI Challenge Hosted by Hebei and Beihang Universities", + "5. Hebei and Beihang University Groups Host AI Competition" + ] + }, + { + "id": 121, + "slug": "blood-mnist", + "name": "Blood-MNIST", + "headline": "Classifying blood cell types using Weights and Biases", + "headline_alternatives": [ + "1. Challenge: Categorize Blood Cells with Weights and Biases", + "2. Competition: Classify Blood Cells using Weights and Biases ", + "3. Contest: Identify Blood Cell Types via Weights and Biases", + "4. Challenge: Label Blood Cell Categories using Weights and Biases", + "5. Competition: Categorize Blood Cells through Weights and Biases" + ] + }, + { + "id": 122, + "slug": "insilicomolhack", + "name": "MolHack", + "headline": "Apply deep learning to speedup drug validation", + "headline_alternatives": [] + }, + { + "id": 123, + "slug": "codata2019challenge", + "name": "Cell Response Classification", + "headline": "From recorded timeseries of many cells in a well, predict which drug treatme...", + "headline_alternatives": [] + }, + { + "id": 124, + "slug": "drug-solubility-challenge", + "name": "Drug solubility challenge", + "headline": "Solubility is vital to achieve desired concentration of drug for anticipated...", + "headline_alternatives": [ + "1. Improving Drug Solubility to Achieve Optimal Concentrations ", + "2. Maximizing Drug Solubility for Desired Pharmacological Response", + "3. Solubility Key to Reaching Target Drug Concentrations ", + "4. Enhancing Solubility to Attain Effective Drug Concentrations", + "5. Solubility Crucial for Ideal Drug Concentrations and Effects" + ] + }, + { + "id": 125, + "slug": "kinase-inhibition-challenge", + "name": "Kinase inhibition challenge", + "headline": "Protein kinases have become a major class of drug targets, accumulating a hu...", + "headline_alternatives": [ + "1. Developing protein kinase inhibitors as promising new drug targets", + "2. Targeting protein kinases: a major new frontier for drug discovery", + "3. Exploring the vast drug potential of the protein kinase family ", + "4. Protein kinases: a treasure trove of data for drug development", + "5. Mining protein kinase data to advance targeted drug discovery" + ] + }, + { + "id": 126, + "slug": "ai-drug-discovery", + "name": "AI Drug Discovery Workshop and Coding Challenge", + "headline": "Developing Fundamental AI Programming Skills for Drug Discovery", + "headline_alternatives": [ + "1. Learn AI Skills to Advance Drug Discovery Programs", + "2. Acquire AI Expertise for Novel Drug Development ", + "3. Master AI Programming for Pharma Research Innovation", + "4. Gain AI Proficiency to Expedite Drug Discovery ", + "5. Develop AI Talent for Faster Medication Breakthroughs" + ] + }, + { + "id": 127, + "slug": "protein-compound-affinity", + "name": "Structure-free protein-ligand affinity prediction - Task 1 Fitting", + "headline": "Developing new AI models for drug discovery, main portal (Task-1 fitting)", + "headline_alternatives": [ + "1. New AI models aim to advance drug discovery efforts ", + "2. Developing AI to fit models for improved drug discovery", + "3. AI modeling to enhance drug discovery through portal fitting", + "4. Portal fitting with AI models to further drug discovery ", + "5. AI modeling via portal for advancing drug discovery research" + ] + }, + { + "id": 128, + "slug": "cisc873-dm-f21-a5", + "name": "CISC873-DM-F21-A5", + "headline": "Anti-Cancer Drug Activity Prediction", + "headline_alternatives": [] + }, + { + "id": 129, + "slug": "pro-lig-aff-task2-mse", + "name": "Structure-free protein-ligand affinity prediction - Task 2 Fitting", + "headline": "Developing new AI models for drug discovery (Task-2 fitting)", + "headline_alternatives": [ + "1. Creating AI to fit models for drug discovery", + "2. Developing AI to fit models and enable drug discovery ", + "3. Building AI models to fit and advance drug development", + "4. New AI to fit models and boost pharmaceutical discoveries", + "5. AI models developed to fit and progress drug discovery" + ] + }, + { + "id": 130, + "slug": "pro-lig-aff-task1-pearsonr", + "name": "Structure-free protein-ligand affinity prediction - Task 1 Ranking", + "headline": "Developing new AI models for drug discovery (Task-1 ranking)", + "headline_alternatives": [ + "1. Creating AI to Rank Drug Candidates for Discovery", + "2. Building AI Models to Prioritize Drug Compounds ", + "3. Developing AI to Rank Drug Leads for Research", + "4. Designing AI to Order Drug Prospects by Potential", + "5. Making AI to Classify Drug Possibilities for Trials" + ] + }, + { + "id": 131, + "slug": "pro-lig-aff-task2-pearsonr", + "name": "Structure-free protein-ligand affinity prediction - Task 2 Ranking", + "headline": "Developing new AI models for drug discovery (Task-2 ranking)", + "headline_alternatives": [ + "1. Creating AI to Rank Drug Candidates for Discovery", + "2. Using AI to Prioritize Drug Compounds for Research ", + "3. AI Models to Improve Drug Candidate Identification", + "4. AI Systems to Enhance Drug Discovery Processes", + "5. Developing AI to Streamline Drug Development Research" + ] + }, + { + "id": 132, + "slug": "pro-lig-aff-task3-spearmanr", + "name": "Structure-free protein-ligand affinity prediction - Task 3 Ranking", + "headline": "Developing new AI models for drug discovery (Task-3 ranking)", + "headline_alternatives": [ + "1. New AI models sought for drug discovery ranking", + "2. Develop AI to rank drug candidates for discovery ", + "3. Can AI improve drug discovery through candidate ranking?", + "4. Building AI models to rank drug discovery options", + "5. AI drug discovery challenge: rank candidates by potential" + ] + }, + { + "id": 133, + "slug": "hhp", + "name": "Heritage Health Prize", + "headline": "Identify patients who will be admitted to a hospital within the next year us...", + "headline_alternatives": [ + "1. Predicting Future Hospital Admissions from Claims Data", + "2. Forecasting Hospitalizations Using Insurance Claims History ", + "3. Claims Data to Identify Future Hospital Admissions", + "4. Hospital Admissions Prediction from Historical Claims", + "5. Using Past Claims to Foresee Hospitalizations" + ] + }, + { + "id": 134, + "slug": "pf2012", + "name": "Practice Fusion Analyze This! 2012 - Prediction Challenge", + "headline": "Start digging into electronic health records and submit your ideas for the m...", + "headline_alternatives": [] + }, + { + "id": 135, + "slug": "pf2012-at", + "name": "Practice Fusion Analyze This! 2012 - Open Challenge", + "headline": "Start digging into electronic health records and submit your creative, insig...", + "headline_alternatives": [] + }, + { + "id": 136, + "slug": "seizure-detection", + "name": "UPenn and Mayo Clinic's Seizure Detection Challenge", + "headline": "Detect seizures in intracranial EEG recordings", + "headline_alternatives": [ + "1. New contest to detect seizures from brain recordings", + "2. Competition to identify seizures in intracranial EEG ", + "3. Can you detect seizures in intracranial EEG data?", + "4. Help identify seizures from intracranial EEG signals", + "5. Seizure detection challenge using intracranial EEG" + ] + }, + { + "id": 137, + "slug": "seizure-prediction", + "name": "American Epilepsy Society Seizure Prediction Challenge", + "headline": "Predict seizures in intracranial EEG recordings", + "headline_alternatives": [] + }, + { + "id": 138, + "slug": "deephealth-1", + "name": "Deep Health - alcohol", + "headline": "Find Correlations and patterns with Medical data", + "headline_alternatives": [ + "1. Discover Medical Insights Through Data Correlations ", + "2. Uncovering Patterns in Medical Data to Advance Healthcare", + "3. Analyzing Medical Data to Reveal Important Health Connections", + "4. Medical Data Analysis Seeks to Find Key Correlations and Patterns", + "5. Can Medical Data Analysis Yield New Health Insights and Links?" + ] + }, + { + "id": 139, + "slug": "deep-health-3", + "name": "Deep Health - Diabetes 2", + "headline": "This competition is for those attending the Deep Health Hackathon. Predic...", + "headline_alternatives": [ + "1. Predicting diabetes at health hackathon", + "2. Forecasting diabetes for hackathon attendees ", + "3. Deep Health Hackathon: Predict diabetes", + "4. Diabetes prediction challenge at hackathon", + "5. Hackathon attendees compete to predict diabetes" + ] + }, + { + "id": 140, + "slug": "d012554-2021", + "name": "D012554 - 2021", + "headline": "Classify the health of a fetus using CTG data", + "headline_alternatives": [ + "1. Classifying Fetal Health from Cardiotocography Data", + "2. Predicting Fetus Condition with CTG Readings ", + "3. Assessing Fetal Wellbeing via Monitoring Signals", + "4. Determining Fetal Status Through Uterine Data ", + "5. Categorizing Fetal Health Using CTG Measurements" + ] + }, + { + "id": 141, + "slug": "idao-2022-bootcamp-insomnia", + "name": "IDAO 2022. ML Bootcamp - Insomnia", + "headline": "Predict sleep disorder on given human health data", + "headline_alternatives": [ + "1. Forecasting Sleep Issues Using Patient Information", + "2. Anticipating Sleep Disturbances From Health Records ", + "3. Predicting Sleep Disorders From Human Health Data", + "4. Estimating Sleep Problems With Medical Histories", + "5. Projecting Sleep Anomalies Based On Health Profiles" + ] + }, + { + "id": 142, + "slug": "tweet-mental-health-classification", + "name": "Tweet Mental Health Classification", + "headline": "Build Models to classify tweets to determine mental health", + "headline_alternatives": [ + "1. Classify Tweets to Detect Mental Health Signals", + "2. Build Models to Categorize Tweets by Mental State ", + "3. Use Twitter Data to Assess Mental Health Status", + "4. Develop Algorithms to Analyze Tweets for Mental Health", + "5. Create Systems to Classify Tweets for Mental Wellbeing" + ] + }, + { + "id": 143, + "slug": "ml-olympiad-good-health-and-well-being", + "name": "ML Olympiad - GOOD HEALTH AND WELL BEING", + "headline": "Use your ML expertise to classify if a patient has heart disease or not", + "headline_alternatives": [ + "1. ML to Diagnose Heart Disease from Patient Data ", + "2. Applying ML to Classify Heart Disease Risk", + "3. ML Model Predicts Heart Disease in Patients", + "4. ML Classification of Heart Disease from Health Data", + "5. ML Algorithm Detects Heart Disease from Inputs" + ] + }, + { + "id": 144, + "slug": "rsna-breast-cancer-detection", + "name": "RSNA Screening Mammography Breast Cancer Detection", + "headline": "Find breast cancers in screening mammograms", + "headline_alternatives": [] + }, + { + "id": 145, + "slug": "biocreative-vii-text-mining-drug-and-chemical-protein-interactions-drugprot", + "name": "BioCreative VII: Text mining drug and chemical-protein interactions (DrugProt)", + "headline": "", + "headline_alternatives": [ + "1. Develop systems to extract drug-gene relations from text", + "2. Automate extraction of drug-protein interactions from literature ", + "3. Detect relationships between drugs and genes/proteins in text", + "4. Promote systems that identify drug-gene/protein relations", + "5. Evaluate systems that extract drug-protein relations in text" + ] + }, + { + "id": 146, + "slug": "extended-literature-ai-for-drug-induced-liver-injury", + "name": "Extended Literature AI for Drug Induced Liver Injury", + "headline": "", + "headline_alternatives": [ + "1. Develop ML tools to analyze drug texts for liver injury data", + "2. Extract drug toxicity knowledge from free text publications ", + "3. Apply NLP to improve analysis of drug-induced liver injury data", + "4. Create automated ways to process drug texts for liver safety", + "5. Use AI to better understand drug-induced liver injury from texts" + ] + }, + { + "id": 147, + "slug": "anti-microbial-resistance-forensics", + "name": "Anti-Microbial Resistance Forensics", + "headline": "", + "headline_alternatives": [ + "1. Classifying Bacteriophages to Understand Microbial Evolution", + "2. Analyzing Phages to Combat Antimicrobial Resistance ", + "3. Harnessing Phages: Curing Infections Without Antibiotics", + "4. Phage Genomics: Tracing AMR Gene Transfer and Evolution", + "5. Improved Algorithms to Describe Diverse Phage Capabilities" + ] + }, + { + "id": 148, + "slug": "disease-maps-to-modelling-covid-19", + "name": "Disease Maps to Modelling COVID-19", + "headline": "Use the COVID-19 disease map to suggest drugs candidate for repurposing, tha...", + "headline_alternatives": [ + "1. Modeling COVID-19 infection to find drug repurposing candidates", + "2. COVID-19 challenge seeks drug repurposing through disease modeling ", + "3. COVID-19 challenge: model infection, find drug repurposing options", + "4. Modeling COVID-19 mechanisms to enable drug repurposing", + "5. COVID-19 challenge: model disease, repurpose drugs, validate with data" + ] + }, + { + "id": 149, + "slug": "crowdsourced-evaluation-of-inchi-based-tautomer-identification", + "name": "Crowdsourced Evaluation of InChI-based Tautomer Identification", + "headline": "Calling on scientists from industry, government, and academia dealing with c...", + "headline_alternatives": [] + }, + { + "id": 150, + "slug": "nctr-indel-calling-from-oncopanel-sequencing-challenge-phase-2", + "name": "NCTR Indel Calling from Oncopanel Sequencing Challenge Phase 2", + "headline": "In Phase 2, participants who completed in Phase 1 of the challenge have the ...", + "headline_alternatives": [ + "1. Develop standards for oncopanel sequencing quality control", + "2. Create reference sample to benchmark oncopanel performance ", + "3. Assess analytical performance of oncopanels for precision oncology", + "4. Establish quality metrics for clinical oncopanel sequencing ", + "5. Improve regulation of oncopanel sequencing through FDA project" + ] + }, + { + "id": 151, + "slug": "nctr-indel-calling-from-oncopanel-sequencing-data-challenge-phase-1", + "name": "NCTR Indel Calling from Oncopanel Sequencing Data Challenge Phase 1", + "headline": "Genetic variation involving indels (insertions and deletions) in the cancer ...", + "headline_alternatives": [ + "1. Develop standards for oncopanel sequencing quality control", + "2. Benchmark oncopanels with reference sample from FDA-led project ", + "3. Assess analytical performance of oncopanels using genomic reference", + "4. Establish quality metrics for clinical oncopanel sequencing ", + "5. Create protocols for fit-for-purpose next-gen sequencing data use" + ] + }, + { + "id": 152, + "slug": "vha-innovation-ecosystem-and-precisionfda-covid-19-risk-factor-modeling-challenge-phase-2", + "name": "VHA Innovation Ecosystem and precisionFDA COVID-19 Risk Factor Modeling Challenge Phase 2", + "headline": "The focus of Phase 2 was to validate the top performing models on two additi...", + "headline_alternatives": [] + }, + { + "id": 153, + "slug": "tumor-mutational-burden-tmb-challenge-phase-2", + "name": "Tumor Mutational Burden (TMB) Challenge Phase 2", + "headline": "The goal of the Friends of Cancer Research and precisionFDA Tumor Mutational...", + "headline_alternatives": [] + }, + { + "id": 154, + "slug": "predicting-gene-expression-using-millions-of-random-promoter-sequences", + "name": "Predicting Gene Expression Using Millions of Random Promoter Sequences", + "headline": "", + "headline_alternatives": [ + "1. Decoding gene expression regulation to understand disease", + "2. Modeling complex cis-regulatory logic of the human genome ", + "3. Learning models of human regulatory DNA function", + "4. Understanding cis-regulatory logic of disease origins", + "5. Decoding transcriptional regulation in human genome" + ] + }, + { + "id": 155, + "slug": "brats-2023", + "name": "BraTS 2023", + "headline": "", + "headline_alternatives": [ + "1. Benchmarking brain tumor segmentation with expanded dataset", + "2. Segmenting adult and pediatric brain tumors across populations ", + "3. Delineating gliomas and meningiomas in diverse patient groups", + "4. Segmenting brain tumors despite missing clinical data", + "5. BraTS challenge tests brain tumor segmentation methods" + ] + }, + { + "id": 156, + "slug": "cagi7", + "name": "CAGI7", + "headline": "The seventh round of CAGI", + "headline_alternatives": [ + "1. Seventh round of CAGI experiments planned for 2024", + "2. CAGI to hold seventh edition of prediction challenges in 2024 ", + "3. CAGI announces plans for seventh round of experiments in 2024", + "4. Seventh CAGI challenge focused on predictive modeling set for 2024", + "5. CAGI organizing seventh round of predictive modeling experiments for 2024" + ] + }, + { + "id": 157, + "slug": "casp15", + "name": "CASP15", + "headline": "Establish the state-of-art in modeling proteins and protein complexes", + "headline_alternatives": [ + "1. CASP15 Adapts to Advancements in Deep Learning Protein Modeling", + "2. CASP15 Strengthens Focus on Novel Protein Structure Applications ", + "3. CASP15 Evolves Categories to Maximize Deep Learning Insights", + "4. CASP15 Shifts to Emerging Areas Like RNA and Protein Complexes", + "5. CASP15 Partners with CAPRI, CAMEO to Apply New Protein Models" + ] + }, + { + "id": 158, + "slug": "synthrad2023", + "name": "SynthRAD2023", + "headline": "Synthesizing computed tomography for radiotherapy", + "headline_alternatives": [ + "1. Platform Compares sCT Generation Methods", + "2. First Public Benchmark for sCT Algorithms ", + "3. New Platform Evaluates sCT Generation Methods", + "4. Challenge Compares sCT Generation Algorithms", + "5. Public Platform to Evaluate sCT Methods" + ] + }, + { + "id": 159, + "slug": "synthetic-data-for-instrument-segmentation-in-surgery-syn-iss", + "name": "Synthetic Data for Instrument Segmentation in Surgery (Syn-ISS)", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 160, + "slug": "pitvis", + "name": "PitVis", + "headline": "Surgical workflow and instrument recognition in endonasal surgery", + "headline_alternatives": [ + "1. Computer guidance aims to improve pituitary tumor removal ", + "2. Technology assists surgeons removing pituitary growths", + "3. Systems guide surgeons extracting pituitary gland tumors ", + "4. Assisted intervention targets enhanced pituitary surgery", + "5. Computer assistance for precise pituitary tumor extraction" + ] + }, + { + "id": 161, + "slug": "mvseg2023", + "name": "MVSEG2023", + "headline": "Automatically segment mitral valve leaflets from single frame 3D trans-esoph...", + "headline_alternatives": [ + "1. Segment mitral valve from 3D echocardiography for treatment planning ", + "2. Personalize mitral valve repair with automatic leaflet segmentation", + "3. Improve outcomes in mitral repair with patient-specific 3D modeling", + "4. Tailor mitral valve surgery using automated 3D leaflet segmentation ", + "5. Segment mitral valve leaflets from 3D ultrasound for personalized care" + ] + }, + { + "id": 162, + "slug": "crossmoda23", + "name": "crossMoDA23", + "headline": "This challenge proposes is the third edition of the first medical imaging be...", + "headline_alternatives": [ + "1. Challenge tests unsupervised domain adaptation for MRI tumor segmentation ", + "2. Multi-class dataset benchmarks domain adaptation in medical imaging", + "3. First large dataset evaluates domain adaptation techniques for MRI", + "4. Challenge evaluates robustness of ML approaches across medical domains", + "5. Dataset tests domain adaptation for multi-class MRI segmentation task" + ] + }, + { + "id": 163, + "slug": "icr-identify-age-related-conditions", + "name": "ICR - Identifying Age-Related Conditions", + "headline": "Use Machine Learning to detect conditions with measurements of anonymous cha...", + "headline_alternatives": [ + "1. Predict medical conditions from health measurements", + "2. Classify patients by presence of three conditions ", + "3. Model predicts medical conditions from characteristics", + "4. Determine conditions from encoded health characteristics", + "5. Shorten diagnosis using predictive model on measurements" + ] + }, + { + "id": 164, + "slug": "cafa-5-protein-function-prediction", + "name": "CAFA 5: Protein Function Prediction", + "headline": "Predict the biological function of a protein", + "headline_alternatives": [] + }, + { + "id": 165, + "slug": "rsna-2023-abdominal-trauma-detection", + "name": "RSNA 2023 Abdominal Trauma Detection", + "headline": "Detect and classify traumatic abdominal injuries", + "headline_alternatives": [ + "1. AI to Assist Rapid Diagnosis of Abdominal Trauma from CT Scans", + "2. Machine Learning for Detecting and Grading Abdominal Injuries in CT", + "3. Automated Detection and Severity Grading of Abdominal Trauma by AI ", + "4. Rapid AI Diagnosis of Abdominal Injuries from CT Scans ", + "5. AI to Improve Outcomes in Abdominal Trauma Patients" + ] + }, + { + "id": 166, + "slug": "hubmap-hacking-the-human-vasculature", + "name": "HuBMAP: Hacking the Human Vasculature", + "headline": "Segment instances of microvascular structures from healthy human kidney tiss...", + "headline_alternatives": [ + "1. Model segments microvasculature in kidney histology images", + "2. Automate segmentation of capillaries, arterioles, venules ", + "3. Improve understanding of microvascular structures in tissues", + "4. Segment microvascular structures like capillaries in images", + "5. Create model to identify blood vessels in kidney slides" + ] + }, + { + "id": 167, + "slug": "amp-parkinsons-disease-progression-prediction", + "name": "AMP(R)-Parkinson's Disease Progression Prediction", + "headline": "Use protein and peptide data measurements from Parkinson's Disease patients ...", + "headline_alternatives": [ + "1. Predicting Parkinson's progression with MDS-UPDRS scores", + "2. Model to forecast Parkinson's severity via MDS-UPDRS", + "3. Develop model for Parkinson's progression using MDS-UPDRS", + "4. Predict MDS-UPDRS scores to track Parkinson's disease ", + "5. Use MDS-UPDRS to model Parkinson's disease progression" + ] + }, + { + "id": 168, + "slug": "open-problems-multimodal", + "name": "Open Problems -Multimodal Single-Cell Integration", + "headline": "Predict how DNA, RNA & protein measurements co-vary in single cells", + "headline_alternatives": [ + "1. Predict how DNA, RNA, protein relate in blood cell development", + "2. Model how stem cells mature into blood cells using omics data ", + "3. Map genetic data across cell states with time-series data", + "4. Relate DNA, RNA, protein in hematopoietic stem cell differentiation", + "5. Predict modalities from unseen timepoints in blood cell development" + ] + }, + { + "id": 169, + "slug": "multi-atlas-labeling-beyond-the-cranial-vault", + "name": "Multi-Atlas Labeling Beyond the Cranial Vault", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 170, + "slug": "hubmap-organ-segmentation", + "name": "HuBMAP + HPA: Hacking the Human Body", + "headline": "Segment multi-organ functional tissue units", + "headline_alternatives": [ + "1. Segment functional tissue units in human organs", + "2. Identify and segment tissue units across organs ", + "3. Build models to accurately segment functional tissue units", + "4. Accelerate understanding cell organization in tissues", + "5. Freely provide human tissue atlas to improve health " + ] + }, + { + "id": 171, + "slug": "hubmap-kidney-segmentation", + "name": "HuBMAP: Hacking the Kidney", + "headline": "Identify glomeruli in human kidney tissue images", + "headline_alternatives": [ + "1. Detect functional tissue units in human kidney maps ", + "2. Identify glomeruli in kidney images for HuBMAP", + "3. Map human kidney at single cell level to find tissue units", + "4. Develop tools to detect functional units in kidney images", + "5. Identify cell relationships in kidney to understand health" + ] + }, + { + "id": 172, + "slug": "ventilator-pressure-prediction", + "name": "Google Brain: Ventilator Pressure Prediction", + "headline": "Simulate a ventilator connected to a sedated patient's lung", + "headline_alternatives": [ + "1. Simulating ventilator-lung interactions to improve treatments", + "2. Developing affordable ventilator control methods to help patients", + "3. Simulating ventilator-lung dynamics for better patient care ", + "4. Overcoming cost barriers in ventilator control for wider access", + "5. Modeling ventilator-lung systems to adapt treatments to patients" + ] + }, + { + "id": 173, + "slug": "stanford-covid-vaccine", + "name": "OpenVaccine - COVID-19 mRNA Vaccine Degradation Prediction", + "headline": "Urgent need to bring the COVID-19 vaccine to mass production", + "headline_alternatives": [ + "1. Predict RNA degradation rates to aid vaccine design", + "2. Modeling RNA degradation for COVID vaccine mRNA ", + "3. Forecasting RNA decay to optimize mRNA vaccines", + "4. Estimating position-specific RNA degradation rates ", + "5. Data science for modeling RNA degradation rates" + ] + }, + { + "id": 174, + "slug": "openvaccine", + "name": "OpenVaccine", + "headline": "To develop mRNA vaccines stable enough to be deployed to everyone in the wor...", + "headline_alternatives": [ + "1. Crowdsource mRNA vaccine design for enhanced stability", + "2. Improve refrigerator stability of mRNA vaccines", + "3. Develop mRNA vaccine 2-10x more stable than COVID shots ", + "4. Machine learning to optimize mRNA vaccine formulation ", + "5. Transform mRNA vaccine viability via community game design" + ] + }, + { + "id": 175, + "slug": "opentb", + "name": "OpenTB", + "headline": "What if we could use RNA to detect a gene sequence found to be present only ...", + "headline_alternatives": [ + "1. Designing RNA sensors to detect TB gene signature", + "2. Players create RNA sensors to calculate TB gene levels ", + "3. Developing RNA-based sensors to detect active TB", + "4. Creating RNA sensors to detect 3 genes for TB test", + "5. Using RNA designs to build TB diagnostic devices" + ] + }, + { + "id": 176, + "slug": "opencrispr", + "name": "OpenCRISPR", + "headline": "A project to discover design patterns for guide RNAs to make gene editing mo...", + "headline_alternatives": [ + "1. CRISPR gene editing targeted to control diseases", + "2. Developing safe, small molecule switches for CRISPR", + "3. Controlling CRISPR with RNA hairpins and small molecules ", + "4. Tackling diseases with safe, controllable CRISPR editing", + "5. Unlocking CRISPR's potential with molecular on/off switches" + ] + }, + { + "id": 177, + "slug": "openknot", + "name": "OpenKnot", + "headline": "Many important biological processes depend on RNAs that form pseudoknots, an...", + "headline_alternatives": [ + "1. Understanding RNA pseudoknot folding pathways", + "2. Elucidating pseudoknot roles in gene regulation ", + "3. Modeling pseudoknot structures in viral replication", + "4. Analyzing pseudoknot enzymatic activity ", + "5. Exploring pseudoknot biological functions" + ] + }, + { + "id": 178, + "slug": "openaso", + "name": "OpenASO", + "headline": "A research initiative aimed at developing innovative design principles for R...", + "headline_alternatives": [ + "1. Decoding DNA into functional RNA via splicing", + "2. Understanding RNA splicing for protein production ", + "3. Investigating intron removal in mRNA maturation", + "4. Analyzing corrupted RNA splicing in human disease", + "5. Mapping DNA transcription and mRNA maturation" + ] + }, + { + "id": 179, + "slug": "openribosome", + "name": "OpenRibosome", + "headline": "We aim to 1) gain fundamental insights into the ribosome's RNA sequence-fold...", + "headline_alternatives": [] + }, + { + "id": 180, + "slug": "lish-moa", + "name": "Mechanisms of Action (MoA) Prediction", + "headline": "Can you improve the algorithm that classifies drugs based on their biologica...", + "headline_alternatives": [] + }, + { + "id": 181, + "slug": "recursion-cellular-image-classification", + "name": "Recursion Cellular Image Classification", + "headline": "CellSignal-Disentangling biological signal from experimental noise in cellul...", + "headline_alternatives": [ + "1. Classify cell images by genetic perturbation", + "2. Reduce noise in cellular image classification ", + "3. Model cell images by biology not noise", + "4. Improve industry modeling of cellular images", + "5. Decrease treatment costs through cellular image AI" + ] + }, + { + "id": 182, + "slug": "tlvmc-parkinsons-freezing-gait-prediction", + "name": "Parkinson's Freezing of Gait Prediction", + "headline": "Event detection from wearable sensor data", + "headline_alternatives": [ + "1. Detect freezing of gait in Parkinson's patients", + "2. Machine learning to detect freezing episodes in Parkinson's ", + "3. Wearable sensor data to understand freezing of gait", + "4. Improve detection of debilitating freezing in Parkinson's", + "5. Predict and prevent freezing of gait in Parkinson's" + ] + }, + { + "id": 183, + "slug": "chaimeleon", + "name": "CHAIMELEON Open Challenges", + "headline": "", + "headline_alternatives": [ + "1. AI competition seeks cancer diagnosis and treatment solutions", + "2. Developing innovative AI for cancer management and outcomes ", + "3. Advancing cancer research with AI prediction models", + "4. Training AI to answer clinical questions for 5 cancers", + "5. Showcasing AI solutions to improve cancer diagnosis and care" + ] + }, + { + "id": 184, + "slug": "topcow23", + "name": "Topology-Aware Anatomical Segmentation of the Circle of Willis for CTA and MRA", + "headline": "", + "headline_alternatives": [ + "1. Segmenting Cerebral Arteries from 3D Angiography Images", + "2. Extracting Circle of Willis Structure from Angiography Data ", + "3. Annotating Vessels in Brain Angiography Scans", + "4. CoW Vessel Segmentation from CTA and MRA Images", + "5. Separating Artery Components in Cerebral Angiograms" + ] + }, + { + "id": 185, + "slug": "circle-of-willis-intracranial-artery-classification-and-quantification-challenge-2023", + "name": "Circle of Willis Intracranial Artery Classification and Quantification Challenge 2023", + "headline": "", + "headline_alternatives": [ + "1. Challenge compares circle of Willis classification methods", + "2. Circle of Willis configuration classification challenge", + "3. Comparing methods for classifying circle of Willis anatomy ", + "4. Challenge to quantify circle of Willis artery diameters", + "5. Circle of Willis anatomy quantification challenge" + ] + }, + { + "id": 186, + "slug": "making-sense-of-electronic-health-record-ehr-race-and-ethnicity-data", + "name": "Making Sense of Electronic Health Record (EHR) Race and Ethnicity Data", + "headline": "The US Food and Drug Administration (FDA) calls on stakeholders, including t...", + "headline_alternatives": [] + }, + { + "id": 187, + "slug": "the-veterans-cardiac-health-and-ai-model-predictions-v-champs", + "name": "The Veterans Cardiac Health and AI Model Predictions (V-CHAMPS)", + "headline": "The Veterans Health Administration Innovation Ecosystem, the Digital Health ...", + "headline_alternatives": [ + "1. Develop AI models predicting heart health in synthetic Veteran records ", + "2. Create ML models for cardiovascular outcomes using fake Veteran data", + "3. Build AI to forecast heart disease risk with simulated Veteran data ", + "4. Synthesize Veteran records to train AI predicting heart issues", + "5. Use artificial data to create ML models for Veteran heart health" + ] + }, + { + "id": 188, + "slug": "predicting-high-risk-breast-cancer-phase-1", + "name": "Predicting High Risk Breast Cancer - Phase 1", + "headline": "Predicting High Risk Breast Cancer-a Nightingale OS & AHLI data challenge", + "headline_alternatives": [ + "1. Mammogram mystery: Why more cancers, not fewer deaths?", + "2. Solving the disturbing disconnect in breast cancer diagnoses ", + "3. Reducing unnecessary breast cancer surgeries and chemo", + "4. Identifying truly dangerous breast cancers to spare women", + "5. Algorithms to predict harmful cancers from biopsy images" + ] + }, + { + "id": 189, + "slug": "predicting-high-risk-breast-cancer-phase-2", + "name": "Predicting High Risk Breast Cancer - Phase 2", + "headline": "Predicting High Risk Breast Cancer-a Nightingale OS & AHLI data challenge", + "headline_alternatives": [ + "1. Seeking to improve breast cancer diagnosis with AI", + "2. Reducing unnecessary breast cancer surgeries with algorithms ", + "3. Using AI to distinguish harmless from harmful breast tumors", + "4. Algorithms to identify truly dangerous breast cancers ", + "5. Can AI spot deadly breast cancers more accurately?" + ] + }, + { + "id": 190, + "slug": "dream-2-in-silico-network-inference", + "name": "DREAM 2 - In Silico Network Inference", + "headline": "Predicting the connectivity and properties of in-silico networks.", + "headline_alternatives": [ + "1. Predict connectivity of simulated biological networks", + "2. Uncover properties of in-silico network dynamics ", + "3. Reveal connections in computational biology models", + "4. Decipher interactions in simulated network systems", + "5. Analyze artificial networks to infer biology" + ] + }, + { + "id": 191, + "slug": "dream-3-in-silico-network-challenge", + "name": "DREAM 3 - In Silico Network Challenge", + "headline": "The goal of the in silico challenges is the reverse engineering of gene netw...", + "headline_alternatives": [ + "1. Reverse engineer gene networks from data", + "2. Predict network topology from gene datasets ", + "3. Reconstruct networks from steady state and time series data", + "4. Infer directed unsigned networks from gene expression", + "5. Uncover gene regulatory networks from simulations" + ] + }, + { + "id": 192, + "slug": "dream-4-in-silico-network-challenge", + "name": "DREAM 4 - In Silico Network Challenge", + "headline": "The goal of the in silico network challenge is to reverse engineer gene regu...", + "headline_alternatives": [ + "1. Reverse engineer gene networks from simulated data", + "2. Infer gene regulation networks from simulated datasets ", + "3. Uncover network structure from simulated gene data", + "4. Reconstruct gene networks using steady-state and time-series data", + "5. Predict network responses to new perturbations from simulated data" + ] + }, + { + "id": 193, + "slug": "dream-5-network-inference-challenge", + "name": "DREAM 5 - Network Inference Challenge", + "headline": "The goal of this Network Inference Challenge is to reverse engineer gene reg...", + "headline_alternatives": [ + "1. Reverse engineer gene networks from arrays", + "2. Infer regulatory networks from gene datasets ", + "3. Challenge to infer networks from microarrays", + "4. Reconstruct networks from microbe gene data", + "5. Infer structure of gene networks from arrays" + ] + }, + { + "id": 194, + "slug": "nlp-sandbox-date-annotation", + "name": "NLP Sandbox Date Annotation", + "headline": "Identify dates in clinical notes.", + "headline_alternatives": [ + "1. Challenge seeks date annotator for clinical notes", + "2. Develop a date annotator for clinical notes ", + "3. Annotate dates in clinical notes with NLP ", + "4. Extract dates from clinical notes using NLP", + "5. Apply NLP to identify dates in clinical notes" + ] + }, + { + "id": 195, + "slug": "nlp-sandbox-person-name-annotation", + "name": "NLP Sandbox Person Name Annotation", + "headline": "Identify person names in clinical notes.", + "headline_alternatives": [ + "1. Challenge Seeks Annotator to Find Names in Clinical Notes", + "2. Develop Annotator to Identify Person Names from Clinical Text ", + "3. Build System to Extract Person Names from Clinical Documents", + "4. Create Annotator to Detect Person Names in Clinical Notes", + "5. Challenge to Build Person Name Annotator for Clinical Text" + ] + }, + { + "id": 196, + "slug": "nlp-sandbox-location-annotation", + "name": "NLP Sandbox Location Annotation", + "headline": "Identify location information in clinical notes.", + "headline_alternatives": [ + "1. Predict locations in clinical notes", + "2. Annotate locations in clinical notes ", + "3. Find locations mentioned in clinical notes", + "4. Identify location annotations in notes", + "5. Locate annotations in clinical notes" + ] + }, + { + "id": 197, + "slug": "nlp-sandbox-contact-annotation", + "name": "NLP Sandbox Contact Annotation", + "headline": "Identify contact information in clinical notes.", + "headline_alternatives": [ + "1. Develop contact annotator for clinical notes", + "2. Annotate contacts in clinical notes with NLP ", + "3. Predict contact annotations in clinical notes", + "4. Identify contacts in clinical notes using NLP", + "5. Apply NLP to extract contacts from notes" + ] + }, + { + "id": 198, + "slug": "nlp-sandbox-id-annotation", + "name": "NLP Sandbox ID Annotation", + "headline": "Identify identifiers in clinical notes.", + "headline_alternatives": [ + "1. Predict patient IDs in clinical notes", + "2. Annotate clinical notes with patient IDs ", + "3. Identify patient IDs in clinical records", + "4. Extract patient identifiers from notes", + "5. Find patient IDs in doctor's notes" + ] + }, + { + "id": 199, + "slug": "dream-2-bcl6-transcriptomic-target-prediction", + "name": "DREAM 2 - BCL6 Transcriptomic Target Prediction", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 200, + "slug": "dream-2-protein-protein-interaction-network-inference", + "name": "DREAM 2 - Protein-Protein Interaction Network Inference", + "headline": "Predict a PPI network of 47 proteins", + "headline_alternatives": [ + "1. Discover new yeast protein interactions using high-throughput methods", + "2. Identify novel yeast protein-protein interactions with repeated experiments ", + "3. Find unknown yeast gene interactions using stringent yeast two-hybrid assays", + "4. Determine yeast protein pairs that interact in multiple tests but not known before ", + "5. Categorize untested yeast gene pairs as interacting or not via experiments" + ] + }, + { + "id": 201, + "slug": "dream-2-genome-scale-network-inference", + "name": "DREAM 2 - Genome-Scale Network Inference", + "headline": "", + "headline_alternatives": [ + "1. Reconstruct genome network from microarray data", + "2. Infer transcriptional network from microarray data ", + "3. Build genome-scale network using microarray data", + "4. Reconstruct transcriptional network using microarrays", + "5. Infer TF-target interactions from microarray data" + ] + }, + { + "id": 202, + "slug": "dream-2-synthetic-five-gene-network-inference", + "name": "DREAM 2 - Synthetic Five-Gene Network Inference", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 203, + "slug": "dream-3-signaling-cascade-identification", + "name": "DREAM 3 - Signaling Cascade Identification", + "headline": "", + "headline_alternatives": [ + "1. Inferring signaling cascade dynamics from flow cytometry data", + "2. Extracting topology of signaling interactions from sparse data ", + "3. Exploring signaling cascade inference from incomplete measurements", + "4. Analyzing signaling cascade dynamics with limited flow data", + "5. Reconstructing signaling pathways from partial flow cytometry" + ] + }, + { + "id": 204, + "slug": "dream-3-gene-expression-prediction", + "name": "DREAM 3 - Gene Expression Prediction", + "headline": "", + "headline_alternatives": [ + "1. Predict gene expression rank in yeast strain", + "2. Order yeast gene expression changes ", + "3. Rank yeast gene repression after perturbation", + "4. Predict relative yeast gene induction ", + "5. Order yeast gene expression with partial data" + ] + }, + { + "id": 205, + "slug": "dream-4-predictive-signaling-network-modelling", + "name": "DREAM 4 - Predictive Signaling Network Modelling", + "headline": "Cell-type specific high-throughput experimental data", + "headline_alternatives": [ + "1. Create cell-specific signaling model for HepG2 using pathways and data", + "2. Build interpretable HepG2 signaling network consistent with data ", + "3. Develop HepG2 signaling model from pathways and high-throughput data", + "4. Infer HepG2-specific signaling network from generic pathways and data", + "5. Construct biological HepG2 signal transduction model matching data" + ] + }, + { + "id": 206, + "slug": "dream-3-signaling-response-prediction", + "name": "DREAM 3 - Signaling Response Prediction", + "headline": "Predict missing protein concentrations from a large corpus of measurements", + "headline_alternatives": [ + "1. Analyze intracellular, extracellular response in normal, cancer cells", + "2. Compare protein, cytokine dynamics between normal, tumor liver cells ", + "3. Measure signaling responses to stimuli in normal, cancerous hepatocytes", + "4. Characterize phosphoprotein, cytokine changes in hepatocytes after stimulation", + "5. Assess signaling pathway perturbations in human liver cells by inhibitors" + ] + }, + { + "id": 207, + "slug": "dream-4-peptide-recognition-domain-prd-specificity-prediction", + "name": "DREAM 4 - Peptide Recognition Domain (PRD) Specificity Prediction", + "headline": "", + "headline_alternatives": [ + "1. Predict binding specificity of protein domains", + "2. Model peptide recognition of protein domains ", + "3. Infer interaction profiles of peptide binding domains", + "4. Compute specificity matrices for domain-peptide binding", + "5. Estimate position weight matrices for protein interactions" + ] + }, + { + "id": 208, + "slug": "dream-5-transcription-factor-dna-motif-recognition-challenge", + "name": "DREAM 5 - Transcription-Factor, DNA-Motif Recognition Challenge", + "headline": "", + "headline_alternatives": [ + "1. Predict binding intensities for transcription factors from motifs", + "2. Model transcription factor binding specificities from genomic sequences ", + "3. Infer transcription factor binding strengths from DNA motifs", + "4. Estimate transcription factor affinities using sequence motifs", + "5. Predict transcription factor binding signals from DNA sequences" + ] + }, + { + "id": 209, + "slug": "dream-5-epitope-antibody-recognition-ear-challenge", + "name": "DREAM 5 - Epitope-Antibody Recognition (EAR) Challenge", + "headline": "Predict the binding specificity of peptide-antibody interactions.", + "headline_alternatives": [] + }, + { + "id": 210, + "slug": "dream-gene-expression-prediction-challenge", + "name": "DREAM Gene Expression Prediction Challenge", + "headline": "Predict gene expression levels from promoter sequences in eukaryotes", + "headline_alternatives": [ + "1. Predict promoter activity from sequence and condition", + "2. Quantify transcription regulation in yeast gene promoters ", + "3. Model transcriptional output of yeast ribosomal promoters", + "4. Decode regulatory code in yeast ribosomal protein genes", + "5. Infer ribosomal protein promoter strengths from sequences" + ] + }, + { + "id": 211, + "slug": "dream-5-systems-genetics-challenge", + "name": "DREAM 5 - Systems Genetics Challenge", + "headline": "Predict disease phenotypes and infer Gene Networks from Systems Genetics data", + "headline_alternatives": [ + "1. Inferring causal gene networks from randomized genetic perturbations ", + "2. Elucidating predictive models of biological networks using systems genetics", + "3. Gaining system-level understanding of networks through randomized experiments", + "4. Using randomized genetics to reconstruct causal biological networks", + "5. Systems genetics data reveals causal relationships in gene networks" + ] + }, + { + "id": 212, + "slug": "dream-6-estimation-of-model-parameters-challenge", + "name": "DREAM 6 - Estimation of Model Parameters Challenge", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 213, + "slug": "dream-6-flowcap2-molecular-classification-of-acute-myeloid-leukemia-challenge", + "name": "DREAM 6 - FlowCAP2 Molecular Classification of Acute Myeloid Leukemia Challenge", + "headline": "The goal of this challenge is to diagnose Acute Myeloid Leukaemia from patie...", + "headline_alternatives": [ + "1. Automating Identification of Cell Populations in Flow Cytometry Data", + "2. Developing Reliable Tools to Interpret High-Dimensional Flow Cytometry Data ", + "3. Advancing Analysis of Complex Flow Cytometry Datasets ", + "4. Modernizing Flow Cytometry Analysis with Machine Learning", + "5. Tackling Manual Analysis Bottleneck in Flow Cytometry" + ] + }, + { + "id": 214, + "slug": "dream-6-alternative-splicing-challenge", + "name": "DREAM 6 - Alternative Splicing Challenge", + "headline": "", + "headline_alternatives": [ + "1. Assess accuracy of mRNA-seq alternative splicing reconstruction", + "2. Compare mRNA-seq methods on primate and rhino transcripts ", + "3. mRNA-seq challenge evaluates transcriptome assembly methods", + "4. mRNA-seq methods tested on primate, rhino, and stem cells", + "5. Novel biology discovery goal of mRNA-seq splicing challenge" + ] + }, + { + "id": 215, + "slug": "causalbench-challenge", + "name": "CausalBench Challenge", + "headline": "A machine learning contest for gene network inference from single-cell pertu...", + "headline_alternatives": [ + "1. Mapping gene interactions to generate drug hypotheses", + "2. Advancing networks from single-cell data for causal insights ", + "3. Deriving gene-gene networks to improve causal disease insights", + "4. Machine learning to advance gene network inference from cells", + "5. Generating gene interaction maps to target disease mechanisms" + ] + }, + { + "id": 216, + "slug": "iclr-computational-geometry-and-topology-challenge-2022", + "name": "ICLR Computational Geometry & Topology Challenge 2022", + "headline": "", + "headline_alternatives": [] + }, + { + "id": 217, + "slug": "iclr-computational-geometry-and-topology-challenge-2021", + "name": "ICLR Computational Geometry & Topology Challenge 2021", + "headline": "", + "headline_alternatives": [ + "1. Advancing computational geometry and topology with Python", + "2. Pushing differential geometry and topology forward with Python", + "3. Improving computational methods in geometry and topology ", + "4. Creating new geometric and topological data analysis with Python", + "5. Developing numerical experiments in differential geometry and topology" + ] + }, + { + "id": 218, + "slug": "genedisco-challenge", + "name": "GeneDisco Challenge", + "headline": "", + "headline_alternatives": [ + "1. Exploring experimental design with active learning for genetics", + "2. Evaluating active learning algorithms for genetic perturbation", + "3. Machine learning challenge for exploring genetic experiment design", + "4. Community challenge to optimize active learning in gene experiments ", + "5. Assessing batch active learning in vast genetic perturbation space" + ] + }, + { + "id": 219, + "slug": "hidden-treasures-warm-up", + "name": "Hidden Treasures: Warm Up", + "headline": "", + "headline_alternatives": [ + "1. Assess genome sequencing software accuracy with unknown variants", + "2. Benchmark genome sequencing pipelines with in silico variants ", + "3. Test exome sequencing pipelines with injected variants", + "4. Prepare for upcoming genome sequencing challenge with practice", + "5. Evaluate genome sequencing accuracy before harder fall challenge" + ] + }, + { + "id": 220, + "slug": "data-management-and-graph-extraction-for-large-models-in-the-biomedical-space", + "name": "Data management and graph extraction for large models in the biomedical space", + "headline": "Collaborative hackathon on the topic of data management and graph extraction...", + "headline_alternatives": [ + "1. CMU and DNAnexus partner for biomedical data hackathon", + "2. CMU hackathon tackles biomedical data management ", + "3. CMU hosts hackathon on biomedical data extraction", + "4. Collaborative hackathon focuses on biomedical data", + "5. CMU and DNAnexus hack genomic data challenges" + ] + }, + { + "id": 221, + "slug": "cagi2-asthma-twins", + "name": "CAGI2: Asthma discordant monozygotic twins", + "headline": "With the provided whole genome and RNA sequencing data, identify which two i...", + "headline_alternatives": [ + "1. Identify genetic differences between asthmatic and healthy twins", + "2. Find genomic variants linked to asthma in identical twins ", + "3. Detect genomic and transcriptomic differences between asthmatic twins", + "4. Analyze genomic and RNA-seq data to understand asthma in twins", + "5. Use twin genomes and transcriptomes to elucidate asthma pathogenesis" + ] + }, + { + "id": 222, + "slug": "cagi4-bipolar", + "name": "CAGI4: Bipolar disorder", + "headline": "With the provided exome data, identify which individuals have BD and which i...", + "headline_alternatives": [ + "1. Predicting bipolar disorder from exome data", + "2. Identifying bipolar disorder using exome sequences ", + "3. Detecting bipolar disorder with exome learning", + "4. Classifying bipolar disorder from exomes", + "5. Diagnosing bipolar disorder through exome analysis" + ] + }, + { + "id": 223, + "slug": "cagi3-brca", + "name": "CAGI3: BRCA1 & BRCA2", + "headline": "For each variant, provide the probability that Myriad Genetics has classifie...", + "headline_alternatives": [ + "1. Assess hereditary cancer risk via BRCA gene analysis", + "2. Detect BRCA mutations to identify hereditary cancer risk ", + "3. Proprietary test analyzes BRCA genes for cancer risk", + "4. Analyze BRCA genes to assess hereditary cancer risk", + "5. Test for BRCA mutations, link to hereditary cancer risk" + ] + }, + { + "id": 224, + "slug": "cagi2-breast-cancer-pkg", + "name": "CAGI2: Breast cancer pharmacogenomics", + "headline": "Cancer tissues are specifically responsive to different drugs. For this expe...", + "headline_alternatives": [ + "1. Exploring CHEK2 as a candidate gene for cancer susceptibility", + "2. Investigating the role of CHEK2 in DNA repair and cell cycle regulation ", + "3. Assessing CHEK2 interactions with BRCA1 and TP53 in genome maintenance", + "4. Evaluating CHEK2 in cell cycle control and genome integrity mechanisms", + "5. Identifying CHEK2 involvement in cancer through cell cycle regulation" + ] + }, + { + "id": 225, + "slug": "cagi4-2eqtl", + "name": "CAGI4: eQTL causal SNPs", + "headline": "Participants are asked to submit predictions of the regulatory sequences tha...", + "headline_alternatives": [ + "1. Identify regulatory variants causing gene expression differences", + "2. Find expression-modulating variants for human eQTLs ", + "3. Discover causal alleles for gene expression variation", + "4. Determine sequences and variants underlying eQTLs", + "5. Use MPRA to pinpoint regulatory causes of eQTLs" + ] + }, + { + "id": 226, + "slug": "cagi1-cbs", + "name": "CAGI1: CBS", + "headline": "Participants were asked to submit predictions for the effect of the variants...", + "headline_alternatives": [ + "1. Seeking to understand CBS enzyme function in cysteine production ", + "2. Investigating cofactor dependence of human CBS enzyme activity", + "3. Studying CBS deficiency causing homocystinuria genetic disorder", + "4. Characterizing molecular basis of CBS-dependent homocystinuria ", + "5. Elucidating metabolic defects in CBS-deficient homocystinuria patients" + ] + }, + { + "id": 227, + "slug": "cagi2-cbs", + "name": "CAGI2: CBS", + "headline": "Participants were asked to submit predictions for the effect of the variants...", + "headline_alternatives": [ + "1. Developing treatment for homocystinuria caused by CBS deficiency", + "2. Understanding CBS cofactor dependence for cysteine synthesis ", + "3. Studying CBS mutations causing homocystinuria ", + "4. Characterizing CBS enzyme function in sulfur metabolism", + "5. Analyzing vitamin B6 and heme binding to CBS enzyme" + ] + }, + { + "id": 228, + "slug": "cagi1-chek2", + "name": "CAGI1: CHEK2", + "headline": "Variants in the ATM & CHEK2 genes are associated with breast cancer.", + "headline_alternatives": [] + }, + { + "id": 229, + "slug": "cagi3-fch", + "name": "CAGI3: FCH", + "headline": "The challenge involved exome sequencing data for 5 subjects in an FCH family...", + "headline_alternatives": [ + "1. Seeking to understand genetic basis of common hyperlipidemia disorder", + "2. Uncovering genetics behind prevalent cholesterol and triglyceride disorder ", + "3. Investigating genetics of combined hyperlipidemia, a coronary disease risk", + "4. Studying genetics of complex and variable cholesterol/triglyceride disorder", + "5. Exploring genetics of common hyperlipidemia linked to heart disease" + ] + }, + { + "id": 230, + "slug": "cagi3-ha", + "name": "CAGI3: HA", + "headline": "The dataset for this challenge comprises of exome sequencing data for 4 subj...", + "headline_alternatives": [ + "1. Raising HDL levels to reduce heart disease risk", + "2. Increasing HDL and APOA1 to combat low HDL levels ", + "3. Boosting HDL cholesterol in hypoalphalipoproteinemia patients", + "4. Targeting low HDL as a coronary artery disease risk", + "5. Correcting HDL deficiency to improve cardiovascular health" + ] + }, + { + "id": 231, + "slug": "cagi2-croshn-s", + "name": "CAGI2: Crohn's disease", + "headline": "With the provided exome data, identify which individuals have Crohn's diseas...", + "headline_alternatives": [ + "1. Seeking genes linked to Crohn's, an inflammatory bowel disease", + "2. Identifying genetic factors in Crohn's disease, a chronic GI disorder ", + "3. Understanding the genetics behind Crohn's disease, an inflammatory GI condition", + "4. Studying chronic inflammation in Crohn's disease, a complex genetic disorder", + "5. Exploring genetic links to relapsing inflammation in Crohn's disease" + ] + }, + { + "id": 232, + "slug": "cagi3-crohn-s", + "name": "CAGI3: Crohn's disease", + "headline": "With the provided exome data, identify which individuals have Crohn's diseas...", + "headline_alternatives": [ + "1. Understanding the genetics behind Crohn's disease", + "2. Exploring chronic inflammation in Crohn's disease ", + "3. Investigating the complexity of Crohn's disease", + "4. Analyzing gastrointestinal involvement in Crohn's disease", + "5. Characterizing the relapsing nature of Crohn's disease" + ] + }, + { + "id": 233, + "slug": "cagi4-chron-s-exome", + "name": "CAGI4: Crohn's exomes", + "headline": "With the provided exome data, identify which individuals have Crohn's diseas...", + "headline_alternatives": [ + "1. Seeking to understand genetic basis of Crohn's bowel disease", + "2. Uncovering genomic factors in chronic gastrointestinal inflammation ", + "3. Investigating genetics behind Crohn's inflammatory bowel disorder", + "4. Exploring complex genetics of relapsing bowel inflammation in Crohn's ", + "5. Studying genes involved in chronic inflammatory Crohn's disease" + ] + }, + { + "id": 234, + "slug": "cagi4-hopkins", + "name": "CAGI4: Hopkins clinical panel", + "headline": "Participants were tasked with identifying the disease class for each of 106 ...", + "headline_alternatives": [ + "1. Exonic sequences of 83 genes linked to 14 diseases analyzed", + "2. 83 gene exons associated with 14 disease classes examined ", + "3. Examine exonic sequences of 83 genes related to 14 diseases", + "4. Analyze exonic sequences for 83 genes linked to 14 disease classes", + "5. Study exonic sequences of 83 genes associated with 14 disorders" + ] + }, + { + "id": 235, + "slug": "cagi2-mouse-exomes", + "name": "CAGI2: Mouse exomes", + "headline": "The challenge involved identifying the causative variants leading to one of ...", + "headline_alternatives": [ + "1. Predict causative variants from exome sequencing data. ", + "2. Identify variants causing unpublished phenotypes from exome data.", + "3. Compare computational predictions to unpublished causative variants. ", + "4. Predict unpublished causative variants using exome sequencing.", + "5. Determine if variants explain unpublished phenotypes from exomes." + ] + }, + { + "id": 236, + "slug": "cagi3-mrn-mre11", + "name": "CAGI3: MRE11", + "headline": "Genomes are subject to constant threat by damaging agents that generate DNA ...", + "headline_alternatives": [] + }, + { + "id": 237, + "slug": "cagi4-naglu", + "name": "CAGI4: NAGLU", + "headline": "Participants are asked to submit predictions on the effect of the variants o...", + "headline_alternatives": [ + "1. Predicting enzymatic activity of NAGLU mutants", + "2. Assessing fractional activity of NAGLU variants ", + "3. Evaluating mutants of lysosomal enzyme NAGLU", + "4. Estimating activity of NAGLU mutants in Sanfilippo B", + "5. Modeling effects of mutations on NAGLU function" + ] + }, + { + "id": 238, + "slug": "cagi4-npm-alk", + "name": "CAGI4: NPM: ALK", + "headline": "Participants are asked to submit predictions of both the kinase activity and...", + "headline_alternatives": [ + "1. Predicting kinase activity of NPM-ALK fusion mutants", + "2. Assessing NPM-ALK fusion protein mutations in cells ", + "3. Evaluating NPM-ALK mutant effects on kinase and binding", + "4. Quantifying NPM-ALK mutant kinase and binding changes", + "5. Modeling impacts of NPM-ALK mutations in vitro" + ] + }, + { + "id": 239, + "slug": "cagi3-mrn-nbs1", + "name": "CAGI3: NBS1", + "headline": "Genomes are subject to constant threat by damaging agents that generate DNA ...", + "headline_alternatives": [ + "1. Predicting Pathogenicity of Rare MRE11 and NBS1 Variants", + "2. Assessing Pathogenic Potential of Rare MRE11/NBS1 Mutations ", + "3. Rating Pathogenic Likelihood of Uncommon MRE11 and NBS1 Alleles", + "4. Estimating Disease Risk from Rare MRE11 and NBS1 Variants", + "5. Scoring Probability of Pathogenicity for Rare MRE11/NBS1 Variants" + ] + }, + { + "id": 240, + "slug": "cagi3-p16", + "name": "CAGI3: p16", + "headline": "CDKN2A is the most common, high penetrance, susceptibility gene identified t...", "headline_alternatives": [ - "1. Challenge Seeks AI Solutions to Standardize NIDDK Data", - "2. Data Challenge Aims to Ready NIDDK Data for AI Discovery ", - "3. Making NIDDK Data AI-Ready Via Data Standardization Challenge", - "4. Challenge Invites AI to Standardize NIDDK Data for Reuse", - "5. Data Challenge Seeks AI Tools to Improve NIDDK Data Reuse" + "1. Assessing p16 protein variants' effects on cell growth", + "2. Testing if p16 variants can still halt cell proliferation ", + "3. Do mutations change p16's ability to stop cell division?", + "4. Evaluating if p16 variants retain anti-proliferative activity", + "5. Can p16 protein variants still inhibit cell proliferation?" ] }, + { + "id": 241, + "slug": "cagi2-p53", + "name": "CAGI2: p53 reactivation", + "headline": "Predictors are asked to submit predictions on the effect of the cancer rescu...", + "headline_alternatives": [] + }, + { + "id": 242, + "slug": "cagi1-pgp", + "name": "CAGI1: PGP", + "headline": "PGP challenge requires matching of full genome sequences to extensive phenot...", + "headline_alternatives": [ + "1. Participants share sequence and profile data publicly", + "2. Project makes participant data open for analysis ", + "3. Participants publicly release full genetic data profiles", + "4. Project opens participant sequence and phenotype data", + "5. Participants publicly share complete sequence and trait data" + ] + }, + { + "id": 243, + "slug": "cagi2-pgp", + "name": "CAGI2: PGP", + "headline": "PGP challenge requires matching of full genome sequences to extensive phenot...", + "headline_alternatives": [ + "1. Sequencing project shares data for prediction challenges", + "2. Participants openly provide sequences for prediction contests ", + "3. Prediction contests utilize shared sequence and profile data", + "4. Contests use pre-release data from collaborative sequencing project", + "5. Challenges based on pre-release collaboration sequence data" + ] + }, + { + "id": 244, + "slug": "cagi3-pgp", + "name": "CAGI3: PGP", + "headline": "PGP challenge requires matching of full genome sequences to extensive phenot...", + "headline_alternatives": [ + "1. Participants Share Genomic Data for Analysis Challenges", + "2. Genomic Data Released for Computational Prediction Tests ", + "3. Public Genomic Data Used in Prediction Competition ", + "4. Participants Make Genomes Public for Assessment Events", + "5. Genomic Sequence and Profile Data Shared Openly" + ] + }, + { + "id": 245, + "slug": "cagi4-pgp", + "name": "CAGI4: PGP", + "headline": "PGP challenge requires matching of full genome sequences to extensive phenot...", + "headline_alternatives": [] + }, + { + "id": 246, + "slug": "cagi4-pyruvate-kinase", + "name": "CAGI4: Pyruvate kinase", + "headline": "Participants are asked to submit predictions on the effect of the mutations ...", + "headline_alternatives": [ + "1. Predicting mutation impacts on pyruvate kinase activity and regulation", + "2. Assessing pyruvate kinase variant effects on allosteric regulation ", + "3. Modeling mutations in glycolytic enzyme pyruvate kinase ", + "4. Evaluating mutations in pyruvate kinase allosteric sites", + "5. Analyzing variants of pyruvate kinase for enzymatic defects" + ] + }, + { + "id": 247, + "slug": "cagi2-rad50", + "name": "CAGI2: RAD50", + "headline": "Predict the probability of the variant occurring in a case individual.", + "headline_alternatives": [ + "1. Assessing RAD50 variants for breast cancer risk", + "2. Evaluating RAD50 variants in breast cancer cases and controls ", + "3. Identifying RAD50 variants associated with breast cancer", + "4. Determining if RAD50 is a breast cancer gene ", + "5. Testing RAD50 as a breast cancer susceptibility gene" + ] + }, + { + "id": 248, + "slug": "cagi2-risksnps", + "name": "CAGI2: riskSNPs", + "headline": "The goal of these challenges is to investigate the community\u2019s ability to id...", + "headline_alternatives": [ + "1. Exploring molecular mechanisms linking SNPs to disease risk", + "2. Investigating potential mechanisms underlying SNP-disease associations ", + "3. Assigning putative mechanisms to SNP-disease risk loci", + "4. Cataloging plausible molecular mechanisms for SNP-disease links", + "5. Elucidating molecular underpinnings of SNP associations with disease" + ] + }, + { + "id": 249, + "slug": "cagi3-risksnps", + "name": "CAGI3: riskSNPs", + "headline": "The goal of these challenges is to investigate the community\u2019s ability to id...", + "headline_alternatives": [ + "1. Exploring molecular mechanisms linking SNPs to disease risk", + "2. Investigating possible mechanisms for SNP-disease associations ", + "3. Assigning potential mechanisms to SNP-disease risk loci", + "4. Can SNP-disease mechanisms be confidently determined?", + "5. Cataloging plausible mechanisms underlying SNP-disease links" + ] + }, + { + "id": 250, + "slug": "cagi2-nav1-5", + "name": "CAGI2: SCN5A", + "headline": "Predictors are asked to submit predictions on the effect of the mutants on t...", + "headline_alternatives": [] + }, + { + "id": 251, + "slug": "cagi2-mr-1", + "name": "CAGI2: Shewanella oneidensis strain MR-1", + "headline": "Shewanella oneidensis strain MR-1 (formerly known as S. putrefaciens) is a m...", + "headline_alternatives": [] + }, + { + "id": 252, + "slug": "cagi3-mr-1", + "name": "CAGI3: Shewanella oneidensis strain MR-1", + "headline": "Shewanella oneidensis strain MR-1 (formerly known as S. putrefaciens) is a m...", + "headline_alternatives": [] + }, + { + "id": 253, + "slug": "cagi4-sickkids", + "name": "CAGI4: SickKids", + "headline": "The challenge presented here is to use computational methods to match each g...", + "headline_alternatives": [ + "1. Predict phenotypes from genome sequences of children", + "2. Match genome sequences to clinical descriptions in kids ", + "3. Identify variants predicting genetic disorders from genomes", + "4. Link genome sequences and phenotypes in pediatric cases", + "5. Infer traits and disease risk from children's genomes" + ] + }, + { + "id": 254, + "slug": "cagi4-sumo-ligase", + "name": "CAGI4: SUMO ligase", + "headline": "Participants are asked to submit predictions of the effect of the variants o...", + "headline_alternatives": [] + }, + { + "id": 255, + "slug": "cagi3-splicing", + "name": "CAGI3: TP53 splicing", + "headline": "With the provided data determine which disease-causing mutations in the TP53...", + "headline_alternatives": [] + }, + { + "id": 256, + "slug": "cagi4-warfarin", + "name": "CAGI4: Warfarin exomes", + "headline": "With the provided exome data and clinical covariates, predict the therapeuti...", + "headline_alternatives": [ + "1. Improve warfarin dosing to reduce adverse events", + "2. Develop better warfarin dosing for fewer complications ", + "3. Optimize warfarin doses using new methods", + "4. Find better warfarin dosing strategies to limit risks", + "5. New approaches to warfarin dosing sought to cut harms" + ] + }, + { + "id": 257, + "slug": "cagi6-calmodulin", + "name": "CAGI6: Calmodulin", + "headline": "participants were asked to submit predictions for the competitive growth sco...", + "headline_alternatives": [] + }, + { + "id": 258, + "slug": "cagi2-splicing", + "name": "CAGI2: splicing", + "headline": "Predictors are asked to compare exons from wild type and disease-associated ...", + "headline_alternatives": [ + "1. Developing methods to improve accuracy of pre-mRNA splicing", + "2. Understanding mechanisms regulating spliceosome assembly on pre-mRNAs ", + "3. Elucidating roles of splicing factors in splice site recognition and intron removal", + "4. Characterizing spliceosome dynamics during catalytic steps of intron excision", + "5. Determining how splicing errors lead to disease-causing protein isoforms" + ] + }, + { + "id": 259, + "slug": "cagi6-arsa", + "name": "CAGI6: ARSA", + "headline": "Predicting the effect of naturally occurring missense mutations on enzymatic...", + "headline_alternatives": [ + "1. Predict enzyme activity for Metachromatic Leukodystrophy mutations", + "2. Forecast ARSA mutant function in lysosomal storage disease ", + "3. Estimate impact of ARSA variants in sulfatide metabolism", + "4. Model effects of missense mutations on ARSA activity", + "5. Quantify mutant protein function in genetic leukodystrophy" + ] + }, + { + "id": 260, + "slug": "predict-hits-for-the-wdr-domain-of-lrrk2", + "name": "CACHE1: PREDICT HITS FOR THE WDR DOMAIN OF LRRK2", + "headline": "Finding ligands targeting the central cavity of the WD-40 repeat (WDR) domai...", + "headline_alternatives": [ + "1. Seeking Compounds Binding to LRRK2's WD40 Parkinson's Protein", + "2. Discovering LRRK2 WD40 Inhibitors to Treat Parkinson's Disease", + "3. Targeting the WD40 Domain of LRRK2 for Parkinson's Therapy", + "4. Can Your Compounds Bind the LRRK2 WD40 Domain? ", + "5. Help Find Inhibitors of LRRK2's WD40 Domain for Parkinson's" + ] + }, + { + "id": 261, + "slug": "finding-ligands-targeting-the-conserved-rna-binding-site-of-sars-cov-2-nsp13", + "name": "CACHE2: FINDING LIGANDS TARGETING THE CONSERVED RNA BINDING SITE OF SARS-CoV-2 NSP13", + "headline": "Finding ligands targeting the conserved RNA binding site of SARS-CoV-2 NSP13.", + "headline_alternatives": [ + "1. New compounds to be tested using enzymatic and binding assays", + "2. Procured compounds evaluated via enzyme and binding experiments ", + "3. Compound library screened through enzymatic and target binding tests", + "4. Purchased molecules assayed enzymatically and for target interaction", + "5. Acquired chemicals tested in enzyme and binding activity assays" + ] + }, + { + "id": 262, + "slug": "finding-ligands-targeting-the-macrodomain-of-sars-cov-2-nsp3", + "name": "CACHE3: Finding ligands targeting the macrodomain of SARS-CoV-2 Nsp3", + "headline": "Severe acute respiratory syndrome coronavirus 2", + "headline_alternatives": [] + }, + { + "id": 263, + "slug": "finding-ligands-targeting-the-tkb-domain-of-cblb", + "name": "CACHE4: Finding ligands targeting the TKB domain of CBLB", + "headline": "Several cancers (PMID-33306199), potential immunotherapy (PMID-24875217), in...", + "headline_alternatives": [ + "1. Seeking Novel Compounds to Bind CBLB's TKB Domain", + "2. Discover New Chemicals that Bind CBLB's Closed Conformation ", + "3. Predict Binders Under 30 Micromolar for CBLB's TKB Domain", + "4. Wanted: Sub 30 Micromolar Binders to CBLB TKB Domain", + "5. Can You Find New Sub 30 Micromolar Binders for CBLB?" + ] + }, + { + "id": 264, + "slug": "jan2024-rare-disease-ai-hackathon", + "name": "Jan2024: Rare Disease AI Hackathon", + "headline": "Researchers and medical experts are invited to collaborate on our patient ca...", + "headline_alternatives": [ + "1. Uniting AI and medicine to aid rare disease diagnosis", + "2. Building open source AI models to unlock rare disease insights ", + "3. Creating open access to rare disease expertise with AI", + "4. Using AI to find connections between rare diseases", + "5. Launching open source AI models to improve rare disease care" + ] + }, + { + "id": 265, + "slug": "cometh-benchmark", + "name": "COMETH Benchmark", + "headline": "Quantify tumor heterogeneity-how many cell types are present and in which pr...", + "headline_alternatives": [ + "1. Quantifying Cancer Heterogeneity Using Statistical Methods", + "2. Estimating Cell Types in Cancer Samples with Omics Data ", + "3. Exploring Statistical Methods to Quantify Tumor Heterogeneity", + "4. Statistical Approaches to Deconvolute Cancer Samples ", + "5. Assessing Intra-tumor Heterogeneity Through Statistical Analysis" + ] + }, + { + "id": 266, + "slug": "the-miccai-2014-machine-learning-challenge", + "name": "The MICCAI 2014 Machine Learning Challenge", + "headline": "Predicting Binary and Continuous Phenotypes from Structural Brain MRI Data", + "headline_alternatives": [ + "1. Benchmark study to compare machine learning tools for brain MRI analysis", + "2. Standardized datasets to validate new machine learning tools for brain MRI ", + "3. MICCAI challenge to benchmark machine learning for brain MRI prediction", + "4. Assess state-of-the-art machine learning for brain MRI analysis", + "5. MICCAI competition compares machine learning tools for brain MRI" + ] + }, + { + "id": 267, + "slug": "cagi6-annotate-all-missense", + "name": "CAGI6: Annotate All Missense", + "headline": "Predictors are asked to predict the functional effect predict each coding SNV.", + "headline_alternatives": [ + "1. Predict functional impact of 81 million human protein variants", + "2. Assess effects of missense and nonsense variants across genome ", + "3. Ongoing assessment of variant function using new annotations", + "4. Compare predictions to new data on protein-altering variant effects", + "5. Can we predict the functional impact of every genomic variant?" + ] + }, + { + "id": 268, + "slug": "cagi6-hmbs", + "name": "CAGI6: HMBS", + "headline": "Participants are asked to submit predictions of the fitness score for each o...", + "headline_alternatives": [] + }, + { + "id": 269, + "slug": "cagi6-intellectual-disability-panel", + "name": "CAGI6: Intellectual Disability Panel", + "headline": "In this challenge predictors are asked to analyze the sequence data for the ...", + "headline_alternatives": [] + }, + { + "id": 270, + "slug": "cagi6-mapk1", + "name": "CAGI6: MAPK1", + "headline": "For each variant, participants are asked to predict the \u0394\u0394GH20 value for the...", + "headline_alternatives": [ + "1. Predict MAPK1 variant stability and catalytic efficiency", + "2. Assess impact of MAPK1 mutations on stability and function ", + "3. Quantify effects of MAPK1 variants on stability and kinetics", + "4. Calculate unfolding energies and catalytic efficiency of MAPK1 mutants", + "5. Model MAPK1 variant thermodynamic stability and enzymatic activity" + ] + }, + { + "id": 271, + "slug": "cagi6-mapk3", + "name": "CAGI6: MAPK3", + "headline": "For each variant, participants are asked to predict the \u0394\u0394GH20 value for the...", + "headline_alternatives": [ + "1. Predict stability and activity changes for MAPK3 variants", + "2. Quantify stability and function of MAPK3 mutants", + "3. Model effects of mutations on MAPK3 stability and catalysis ", + "4. Calculate unfolding energies and kinetics for MAPK3 mutants", + "5. Analyze thermodynamic and catalytic impacts of MAPK3 variants" + ] + }, + { + "id": 272, + "slug": "cagi6-mthfr", + "name": "CAGI6: MTHFR", + "headline": "Participants are asked to submit predictions of the fitness score for each m...", + "headline_alternatives": [] + }, + { + "id": 273, + "slug": "cagi6-polygenic-risk-scores", + "name": "CAGI6: Polygenic Risk Scores", + "headline": "Participants will be expected to provide a fully trained prediction model th...", + "headline_alternatives": [] + }, + { + "id": 274, + "slug": "cagi6-rare-genomes-project", + "name": "CAGI6: Rare Genomes Project", + "headline": "The prediction challenge involves approximately 30 families.The prediction s...", + "headline_alternatives": [ + "1. Identify causative variants in rare disease genomes to advance diagnosis", + "2. Analyze rare disease genomes to find variants causing participants' symptoms ", + "3. Use genome sequencing to diagnose rare diseases and discover new genes", + "4. Find variants causing rare diseases by analyzing RGP participant genomes", + "5. Diagnose rare diseases by identifying causative variants in RGP genomes" + ] + }, + { + "id": 275, + "slug": "cagi6-sherloc-clinical-classification", + "name": "CAGI6: Sherloc clinical classification", + "headline": "Over 122,000 coding (missense, silent, frameshift, stop gained, in-frame cod...", + "headline_alternatives": [ + "1. Predict pathogenicity of 122,000 uncharacterized variants for submission to ClinVar", + "2. Assess clinical utility of pathogenicity predictions for 122,000 variants to submit to ClinVar ", + "3. Interpret 122,000 variants and predict pathogenicity before submission to ClinVar", + "4. ClinVar submission: predict pathogenicity of 122,000 uncharacterized genetic variants ", + "5. Pathogenicity predictions of 122,000 variants for clinical utility assessment and ClinVar submission" + ] + }, + { + "id": 276, + "slug": "cagi6-splicing-vus", + "name": "CAGI6: Splicing VUS", + "headline": "Predict whether the experimentally validated variants of unknown significanc...", + "headline_alternatives": [ + "1. Predict splicing disruption from variants of unknown significance", + "2. Identify variants causing aberrant splicing from diagnostic sequencing ", + "3. Diagnose disorders by predicting splicing disruption from VUS variants", + "4. Predict which VUS variants disrupt splicing based on whole-blood RT-PCR", + "5. Identify missed diagnoses: predict splicing disruption from VUS variants" + ] + }, + { + "id": 277, + "slug": "cagi6-stk11", + "name": "CAGI6: STK11", + "headline": "Participants are asked to submit predictions on the impact of the variants l...", + "headline_alternatives": [] + }, { "id": 278, "slug": "qbi-hackathon", @@ -20,9 +3230,22 @@ "headline_alternatives": [ "1. Hackathon connects developers and scientists to advance biomedical research ", "2. 48-hour hackathon applies AI to biomedical data ", - "3. Developers and scientists collaborate on biomedical challenges", - "4. Hackathon pushes science ahead with latest algorithms ", - "5. Event connects Bay Area developers and scientists" + "3. Hackathon pushes science ahead through latest algorithms", + "4. Developers and scientists collaborate on biomedical problems", + "5. Hackathon establishes connection between developers and scientists" + ] + }, + { + "id": 279, + "slug": "niddk-central-repository-data-centric-challenge", + "name": "NIDDK Central Repository Data-Centric Challenge", + "headline": "Enhancing NIDDK datasets for future Artificial Intelligence (AI) applications.", + "headline_alternatives": [ + "1. Challenge Seeks to Standardize Data for AI Discovery", + "2. Data Challenge Aims to Ready Datasets for AI Research ", + "3. Challenge Targets Data Standardization to Enable AI Insights", + "4. Data Challenge Focuses on Preparing Data for AI Analysis", + "5. Challenge Works to Improve Data for AI Research Discovery" ] } ] \ No newline at end of file diff --git a/apps/openchallenges/notebook/src/challenge_headline/challenge_headline_llm.py b/apps/openchallenges/notebook/src/challenge_headline/challenge_headline_llm.py index 9806b571ef..d7bf542379 100644 --- a/apps/openchallenges/notebook/src/challenge_headline/challenge_headline_llm.py +++ b/apps/openchallenges/notebook/src/challenge_headline/challenge_headline_llm.py @@ -22,26 +22,19 @@ with openchallenges_client.ApiClient(configuration) as api_client: api_instance = challenge_api.ChallengeApi(api_client) - query = openchallenges_client.ChallengeSearchQuery(page_number=1, page_size=1000) - pprint(query) + # IMPORTANT: The auto-generated API client does not support object for query params + query = openchallenges_client.ChallengeSearchQuery(page_number=0, page_size=1000) try: # Get the first page of the list of challenges page = api_instance.list_challenges(query) - pprint(page.size) challenges.extend(page.challenges) except openchallenges_client.ApiException as e: print("Exception when calling ChallengeApi->list_challenges: %s\n" % e) -# Sort challenge by ID -# challenges.sort(key=lambda challenge: challenge.id, reverse=False) -# pprint(challenges[:2]) +# Sort challenge by ID in ascending order +challenges.sort(key=lambda challenge: challenge.id, reverse=False) -print(len(challenges)) - -import sys - -sys.exit() # GENERATE THE HEADLINES WITH AWS BEDROCK @@ -125,7 +118,7 @@ def process_challenge(challenge): return obj -challenge_headlines = list(map(process_challenge, challenges[:2])) +challenge_headlines = list(map(process_challenge, challenges)) # SAVE OUTPUT TO FILE From dfc10de62a5d2b6e20716bfe528db3c355ee43a5 Mon Sep 17 00:00:00 2001 From: Thomas Schaffter Date: Wed, 1 Nov 2023 20:51:46 +0000 Subject: [PATCH 8/8] Remove Docker image for oc-notebook --- apps/openchallenges/notebook/project.json | 34 +++++++++++------------ 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/apps/openchallenges/notebook/project.json b/apps/openchallenges/notebook/project.json index 52070878f3..8414a2aaa7 100644 --- a/apps/openchallenges/notebook/project.json +++ b/apps/openchallenges/notebook/project.json @@ -25,24 +25,24 @@ "cwd": "apps/openchallenges/notebook" }, "dependsOn": [] - }, - "build-image": { - "executor": "@nx-tools/nx-container:build", - "options": { - "context": "apps/openchallenges/notebook", - "metadata": { - "images": ["ghcr.io/sage-bionetworks/openchallenges-notebook"], - "tags": ["type=edge,branch=main", "type=raw,value=local", "type=sha"] - }, - "push": false - } - }, - "serve-detach": { - "executor": "nx:run-commands", - "options": { - "command": "docker/openchallenges/serve-detach.sh openchallenges-notebook" - } } + // "build-image": { + // "executor": "@nx-tools/nx-container:build", + // "options": { + // "context": "apps/openchallenges/notebook", + // "metadata": { + // "images": ["ghcr.io/sage-bionetworks/openchallenges-notebook"], + // "tags": ["type=edge,branch=main", "type=raw,value=local", "type=sha"] + // }, + // "push": false + // } + // }, + // "serve-detach": { + // "executor": "nx:run-commands", + // "options": { + // "command": "docker/openchallenges/serve-detach.sh openchallenges-notebook" + // } + // } }, "tags": ["type:app", "scope:client"], "implicitDependencies": []