Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dependency parsing grammar for gateway allowlist #1459

Merged
merged 17 commits into from
Sep 10, 2024
1 change: 1 addition & 0 deletions .github/workflows/kubernetes-deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ jobs:
--set gateway.application.ray.nodeImage=ray:test \
--set gateway.application.ray.proxyImage=proxy:test \
--set gateway.application.ray.cpu=1 \
--set gateway.application.debug=1 \
--set gateway.application.limits.keepClusterOnComplete=false \
--set gateway.application.authMockproviderRegistry=test \
--set gateway.application.proxy.enabled=false \
Expand Down
4 changes: 2 additions & 2 deletions charts/qiskit-serverless/charts/gateway/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ ingress:

resources:
limits:
cpu: "1500m"
memory: "1Gi"
psschwei marked this conversation as resolved.
Show resolved Hide resolved
cpu: "3000m"
memory: "2Gi"
requests:
cpu: "500m"
memory: "700Mi"
Expand Down
176 changes: 176 additions & 0 deletions gateway/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@
import inspect
import json
import logging
import os
import re
import time
import uuid
import sys
import platform
from typing import Any, Optional, Tuple, Union, Callable, Dict, List

from cryptography.fernet import Fernet
from ray.dashboard.modules.job.common import JobStatus
from django.conf import settings
from parsley import makeGrammar

from .models import Job

Expand Down Expand Up @@ -228,3 +232,175 @@ def remove_duplicates_from_list(original_list: List[Any]) -> List[Any]:
a list without duplicates maintining the order
"""
return list(OrderedDict.fromkeys(original_list))


# Utilities for parsing python dependency information
# source: https://peps.python.org/pep-0508/#complete-grammar
RAW_DEPENDENCY_GRAMMAR = """
wsp = ' ' | '\t'
version_cmp = wsp* <'<=' | '<' | '!=' | '==' | '>=' | '>' | '~=' | '==='>
version = wsp* <( letterOrDigit | '-' | '_' | '.' | '*' | '+' | '!' )+>
version_one = version_cmp:op version:v wsp* -> (op, v)
version_many = version_one:v1 (wsp* ',' version_one)*:v2 -> [v1] + v2
versionspec = ('(' version_many:v ')' ->v) | version_many
urlspec = '@' wsp* <URI_reference>
marker_op = version_cmp | (wsp* 'in') | (wsp* 'not' wsp+ 'in')
python_str_c = (wsp | letter | digit | '(' | ')' | '.' | '{' | '}' |
'-' | '_' | '*' | '#' | ':' | ';' | ',' | '/' | '?' |
'[' | ']' | '!' | '~' | '`' | '@' | '$' | '%' | '^' |
'&' | '=' | '+' | '|' | '<' | '>' )
dquote = '"'
squote = '\\''
python_str = (squote <(python_str_c | dquote)*>:s squote |
dquote <(python_str_c | squote)*>:s dquote) -> s
env_var = ('python_version' | 'python_full_version' |
'os_name' | 'sys_platform' | 'platform_release' |
'platform_system' | 'platform_version' |
'platform_machine' | 'platform_python_implementation' |
'implementation_name' | 'implementation_version' |
'extra' # ONLY when defined by a containing layer
):varname -> lookup(varname)
marker_var = wsp* (env_var | python_str)
marker_expr = marker_var:l marker_op:o marker_var:r -> (o, l, r)
| wsp* '(' marker:m wsp* ')' -> m
marker_and = marker_expr:l wsp* 'and' marker_expr:r -> ('and', l, r)
| marker_expr:m -> m
marker_or = marker_and:l wsp* 'or' marker_and:r -> ('or', l, r)
| marker_and:m -> m
marker = marker_or
quoted_marker = ';' wsp* marker
identifier_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit)
identifier = < letterOrDigit identifier_end* >
name = identifier
extras_list = identifier:i (wsp* ',' wsp* identifier)*:ids -> [i] + ids
extras = '[' wsp* extras_list?:e wsp* ']' -> e
name_req = (name:n wsp* extras?:e wsp* versionspec?:v wsp* quoted_marker?:m
-> (n, e or [], v or [], m))
url_req = (name:n wsp* extras?:e wsp* urlspec:v (wsp+ | end) quoted_marker?:m
-> (n, e or [], v, m))
specification = wsp* ( url_req | name_req ):s wsp* -> s
# The result is a tuple - name, list-of-extras,
# list-of-version-constraints-or-a-url, marker-ast or None


URI_reference = <URI | relative_ref>
URI = scheme ':' hier_part ('?' query )? ( '#' fragment)?
hier_part = ('//' authority path_abempty) | path_absolute | path_rootless | path_empty
absolute_URI = scheme ':' hier_part ( '?' query )?
relative_ref = relative_part ( '?' query )? ( '#' fragment )?
relative_part = '//' authority path_abempty | path_absolute | path_noscheme | path_empty
scheme = letter ( letter | digit | '+' | '-' | '.')*
authority = ( userinfo '@' )? host ( ':' port )?
userinfo = ( unreserved | pct_encoded | sub_delims | ':')*
host = IP_literal | IPv4address | reg_name
port = digit*
IP_literal = '[' ( IPv6address | IPvFuture) ']'
IPvFuture = 'v' hexdig+ '.' ( unreserved | sub_delims | ':')+
IPv6address = (
( h16 ':'){6} ls32
| '::' ( h16 ':'){5} ls32
| ( h16 )? '::' ( h16 ':'){4} ls32
| ( ( h16 ':')? h16 )? '::' ( h16 ':'){3} ls32
| ( ( h16 ':'){0,2} h16 )? '::' ( h16 ':'){2} ls32
| ( ( h16 ':'){0,3} h16 )? '::' h16 ':' ls32
| ( ( h16 ':'){0,4} h16 )? '::' ls32
| ( ( h16 ':'){0,5} h16 )? '::' h16
| ( ( h16 ':'){0,6} h16 )? '::' )
h16 = hexdig{1,4}
ls32 = ( h16 ':' h16) | IPv4address
IPv4address = dec_octet '.' dec_octet '.' dec_octet '.' dec_octet
nz = ~'0' digit
dec_octet = (
digit # 0-9
| nz digit # 10-99
| '1' digit{2} # 100-199
| '2' ('0' | '1' | '2' | '3' | '4') digit # 200-249
| '25' ('0' | '1' | '2' | '3' | '4' | '5') )# %250-255
reg_name = ( unreserved | pct_encoded | sub_delims)*
path = (
path_abempty # begins with '/' or is empty
| path_absolute # begins with '/' but not '//'
| path_noscheme # begins with a non-colon segment
| path_rootless # begins with a segment
| path_empty ) # zero characters
path_abempty = ( '/' segment)*
path_absolute = '/' ( segment_nz ( '/' segment)* )?
path_noscheme = segment_nz_nc ( '/' segment)*
path_rootless = segment_nz ( '/' segment)*
path_empty = pchar{0}
segment = pchar*
segment_nz = pchar+
segment_nz_nc = ( unreserved | pct_encoded | sub_delims | '@')+
# non-zero-length segment without any colon ':'
pchar = unreserved | pct_encoded | sub_delims | ':' | '@'
query = ( pchar | '/' | '?')*
fragment = ( pchar | '/' | '?')*
pct_encoded = '%' hexdig
unreserved = letter | digit | '-' | '.' | '_' | '~'
reserved = gen_delims | sub_delims
gen_delims = ':' | '/' | '?' | '#' | '(' | ')?' | '@'
sub_delims = '!' | '$' | '&' | '\\'' | '(' | ')' | '*' | '+' | ',' | ';' | '='
hexdig = digit | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D' | 'e' | 'E' | 'f' | 'F'
"""


def create_dependency_grammar(grammar=RAW_DEPENDENCY_GRAMMAR):
"""Create dependency grammar."""

if hasattr(sys, "implementation"):
sys_version = sys.implementation.version
version = f"{sys_version.major}.{sys_version.minor}.{sys_version.micro}"
kind = sys.implementation.version.releaselevel
if kind != "final":
version += kind[0] + str(sys.implementation.version.serial)
implementation_version = version
implementation_name = sys.implementation.name
else:
implementation_version = "0"
implementation_name = ""
bindings = {
"implementation_name": implementation_name,
"implementation_version": implementation_version,
"os_name": os.name,
"platform_machine": platform.machine(),
"platform_python_implementation": platform.python_implementation(),
"platform_release": platform.release(),
"platform_system": platform.system(),
"platform_version": platform.version(),
"python_full_version": platform.python_version(),
"python_version": ".".join(platform.python_version_tuple()[:2]),
"sys_platform": sys.platform,
}

dependency_grammar = makeGrammar(grammar, {"lookup": bindings.__getitem__})
return dependency_grammar


def parse_dependency(dep, grammar):
"""Parse dependency."""
parsed = grammar(dep).specification()
dep_name = parsed[0]
dep_ver = parsed[2]

return dep_name, dep_ver


def create_dependency_allowlist():
"""
Create dictionary with allowed dependencies and versions.

Sample format:
allowlist = { "wheel": ["0.44.0", "0.43.2"] }
where the values for each key are allowed versions of dependency.
"""
try:
with open(settings.GATEWAY_ALLOWLIST_CONFIG, encoding="utf-8", mode="r") as f:
allowlist = json.load(f)
except IOError as e:
logger.error("Unable to open allowlist config file: %s", e)
raise ValueError("Unable to open allowlist config file") from e
except ValueError as e:
logger.error("Unable to decode dependency allowlist: %s", e)
raise ValueError("Unable to decode dependency allowlist") from e

return allowlist
36 changes: 9 additions & 27 deletions gateway/api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@
import json
import logging
from rest_framework.serializers import ValidationError
from django.conf import settings
from api import serializers
from api.models import Provider
from api.utils import (
create_dependency_allowlist,
create_dependency_grammar,
parse_dependency,
)

logger = logging.getLogger("gateway.serializers")

Expand Down Expand Up @@ -50,38 +54,16 @@ def validate(self, attrs): # pylint: disable=too-many-branches
)

# validate dependencies
# allowlist stored in json config file (eventually via configmap)
# sample:
# allowlist = { "wheel": ["0.44.0", "0.43.2"] }
# where the values for each key are allowed versions of dependency
dependency_grammar = create_dependency_grammar()
deps = json.loads(attrs.get("dependencies", None))
try:
with open(
settings.GATEWAY_ALLOWLIST_CONFIG, encoding="utf-8", mode="r"
) as f:
allowlist = json.load(f)
except IOError as e:
logger.error("Unable to open allowlist config file: %s", e)
raise ValueError("Unable to open allowlist config file") from e
except ValueError as e:
logger.error("Unable to decode dependency allowlist: %s", e)
raise ValueError("Unable to decode dependency allowlist") from e

# If no allowlist specified, all dependencies allowed
if len(allowlist.keys()) > 0:
allowlist = create_dependency_allowlist()
if len(allowlist.keys()) > 0: # If no allowlist, all dependencies allowed
for d in deps:
dep, ver = d.split("==")

dep, _ = parse_dependency(d, dependency_grammar)
# Determine if a dependency is allowed
if dep not in allowlist:
raise ValidationError(f"Dependency {dep} is not allowed")

# Determine if a specific version of a dependency is allowed
if allowlist[dep] and ver not in allowlist[dep]:
raise ValidationError(
f"Version {ver} of dependency {dep} is not allowed"
)

title = attrs.get("title")
provider = attrs.get("provider", None)
if provider and "/" in title:
Expand Down
1 change: 1 addition & 0 deletions gateway/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ sqlparse>=0.5.0, <1
qiskit-ibm-runtime>=0.29.0
tzdata>=2024.1
django-cors-headers>=4.4.0, <5
parsley>=1.3, <2
30 changes: 0 additions & 30 deletions gateway/tests/api/test_v1_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,33 +323,3 @@ def test_upload_program_serializer_blocked_dependency(self):

serializer = UploadProgramSerializer(data=data)
self.assertFalse(serializer.is_valid())

Tansito marked this conversation as resolved.
Show resolved Hide resolved
def test_upload_program_serializer_dependency_bad_version(self):
"""Tests dependency allowlist."""
path_to_resource_artifact = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"..",
"resources",
"artifact.tar",
)
file_data = File(open(path_to_resource_artifact, "rb"))
upload_file = SimpleUploadedFile(
"artifact.tar", file_data.read(), content_type="multipart/form-data"
)

user = models.User.objects.get(username="test_user")

title = "Hello world"
entrypoint = "pattern.py"
arguments = "{}"
dependencies = '["wheel==0.4.1"]'

data = {}
data["title"] = title
data["entrypoint"] = entrypoint
data["arguments"] = arguments
data["dependencies"] = dependencies
data["artifact"] = upload_file

serializer = UploadProgramSerializer(data=data)
self.assertFalse(serializer.is_valid())