Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

services/kill.py: Allow admins to kill any run #55

Merged
merged 11 commits into from
Nov 26, 2024
2 changes: 2 additions & 0 deletions .env.dev
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ GH_CLIENT_SECRET=
GH_AUTHORIZATION_BASE_URL='https://github.com/login/oauth/authorize'
GH_TOKEN_URL='https://github.com/login/oauth/access_token'
GH_FETCH_MEMBERSHIP_URL='https://api.github.com/user/memberships/orgs/ceph'
GH_ORG_TEAM_URL='https://api.github.com/orgs/ceph/teams'
ADMIN_TEAM='Ceph'

#Session Related Stuff
## SESSION_SECRET_KEY is used to encrypt session data
Expand Down
8 changes: 8 additions & 0 deletions src/teuthology_api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ def read_root(request: Request):
allow_methods=["*"],
allow_headers=["*"],
)
else:
app.add_middleware(
CORSMiddleware,
allow_origins=[PULPITO_URL, PADDLES_URL],
allow_credentials=True,
allow_methods=["GET", "POST", "OPTIONS"],
allow_headers=["Cookie"],
)

app.add_middleware(SessionMiddleware, secret_key=SESSION_SECRET_KEY)
app.include_router(suite.router)
Expand Down
23 changes: 18 additions & 5 deletions src/teuthology_api/routes/kill.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging

from fastapi import APIRouter, Depends, Request
from fastapi import APIRouter, Depends, Request, HTTPException
from requests.exceptions import HTTPError

from teuthology_api.services.kill import run
from teuthology_api.services.helpers import get_token
Expand All @@ -15,11 +16,11 @@


@router.post("/", status_code=200)
def create_run(
async def create_run(
request: Request,
args: KillArgs,
logs: bool = False,
access_token: str = Depends(get_token),
token: str = Depends(get_token),
):
"""
POST route for killing a run or a job.
Expand All @@ -28,5 +29,17 @@ def create_run(
or else it will SyntaxError: non-dafault
argument follows default argument error.
"""
args = args.model_dump(by_alias=True, exclude_unset=True)
return run(args, logs, access_token, request)
try:
args = args.model_dump(by_alias=True, exclude_unset=True)
return await run(args, logs, token, request)
except HTTPException as http_exp:
log.error(http_exp)
raise
except HTTPError as http_err:
log.error(http_err)
raise HTTPException(
status_code=http_err.response.status_code, detail=str(http_err)
) from http_err
except Exception as err:
log.error(err)
raise HTTPException(status_code=500, detail=str(err)) from err
6 changes: 6 additions & 0 deletions src/teuthology_api/routes/login.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from dotenv import load_dotenv
import httpx

from teuthology_api.services.helpers import isAdmin

load_dotenv()

GH_CLIENT_ID = os.getenv("GH_CLIENT_ID")
Expand Down Expand Up @@ -80,14 +82,18 @@ async def handle_callback(code: str, request: Request):
data = {
"id": response_org_dic.get("user", {}).get("id"),
"username": response_org_dic.get("user", {}).get("login"),
"avatar_url": response_org_dic.get("user", {}).get("avatar_url"),
"state": response_org_dic.get("state"),
"role": response_org_dic.get("role"),
"access_token": token,
}
request.session["user"] = data
isUserAdmin = await isAdmin(data["username"], data["access_token"])
data["isUserAdmin"] = isUserAdmin
cookie_data = {
"username": data["username"],
"avatar_url": response_org_dic.get("user", {}).get("avatar_url"),
"isUserAdmin": isUserAdmin,
}
cookie = "; ".join(
[f"{str(key)}={str(value)}" for key, value in cookie_data.items()]
Expand Down
33 changes: 31 additions & 2 deletions src/teuthology_api/services/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import uuid
import httpx
from pathlib import Path

from fastapi import HTTPException, Request
Expand All @@ -15,6 +16,10 @@

PADDLES_URL = os.getenv("PADDLES_URL")
ARCHIVE_DIR = os.getenv("ARCHIVE_DIR")
TEUTHOLOGY_PATH = os.getenv("TEUTHOLOGY_PATH")

ADMIN_TEAM = os.getenv("ADMIN_TEAM")
GH_ORG_TEAM_URL = os.getenv("GH_ORG_TEAM_URL")

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -61,11 +66,11 @@ def get_run_details(run_name: str):
except HTTPError as http_err:
log.error(http_err)
raise HTTPException(
status_code=http_err.response.status_code, detail=repr(http_err)
status_code=http_err.response.status_code, detail=str(http_err)
) from http_err
except Exception as err:
log.error(err)
raise HTTPException(status_code=500, detail=repr(err)) from err
raise HTTPException(status_code=500, detail=str(err)) from err


def get_username(request: Request):
Expand Down Expand Up @@ -96,3 +101,27 @@ def get_token(request: Request):
detail="You need to be logged in",
headers={"WWW-Authenticate": "Bearer"},
)


async def isAdmin(username, token):
if not (GH_ORG_TEAM_URL and ADMIN_TEAM):
log.error("GH_ORG_TEAM_URL or ADMIN_TEAM is not set in .env")
return False
if not (token and username):
raise HTTPException(
status_code=401,
detail="You are probably not logged in (username or token missing)",
headers={"WWW-Authenticate": "Bearer"},
)
TEAM_MEMBER_URL = f"{GH_ORG_TEAM_URL}/{ADMIN_TEAM}/memberships/{username}"
async with httpx.AsyncClient() as client:
headers = {
"Authorization": "token " + token,
"Accept": "application/json",
}
response_org = await client.get(url=TEAM_MEMBER_URL, headers=headers)
if response_org:
response_org_dict = dict(response_org.json())
if response_org_dict.get("state", "") == "active":
return True
return False
54 changes: 33 additions & 21 deletions src/teuthology_api/services/kill.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,60 +4,72 @@

from fastapi import HTTPException, Request

from teuthology_api.services.helpers import get_username, get_run_details
from teuthology_api.services.helpers import get_username, get_run_details, isAdmin


TEUTHOLOGY_PATH = os.getenv("TEUTHOLOGY_PATH")
ADMIN_TEAM = os.getenv("ADMIN_TEAM")

log = logging.getLogger(__name__)


def run(args, send_logs: bool, access_token: str, request: Request):
async def run(args, send_logs: bool, token: dict, request: Request):
"""
Kill running teuthology jobs.
"""
if not access_token:
if not token:
log.error("access_token empty, user probably is not logged in.")
raise HTTPException(
status_code=401,
detail="You need to be logged in",
headers={"WWW-Authenticate": "Bearer"},
)
username = get_username(request)
run_name = args.get("--run")
run_name = args.get("--run", "")
if run_name:
run_details = get_run_details(run_name)
run_owner = run_details.get("user")
jobs_details = run_details.get("jobs", [])
VallariAg marked this conversation as resolved.
Show resolved Hide resolved
if jobs_details:
run_owner = jobs_details[0].get("owner", "")
else:
log.error("teuthology-kill is missing --run")
raise HTTPException(status_code=400, detail="--run is a required argument")
# TODO if user has admin priviledge, then they can kill any run/job.
if run_owner.lower() != username.lower():
log.error(
"%s doesn't have permission to kill a job scheduled by: %s",
username,
run_owner,
)
raise HTTPException(
status_code=401, detail="You don't have permission to kill this run/job"
)

if (run_owner.lower() != username.lower()) and (
run_owner.lower() != f"scheduled_{username.lower()}@teuthology"
):
isUserAdmin = await isAdmin(username, token.get("access_token"))
if not isUserAdmin:
log.error(
"%s doesn't have permission to kill a job scheduled by: %s",
username,
run_owner,
)
raise HTTPException(
status_code=401, detail="You don't have permission to kill this run/job"
)
log.info("Killing with admin privileges")
try:
kill_cmd = [f"{TEUTHOLOGY_PATH}/virtualenv/bin/teuthology-kill"]
for flag, flag_value in args.items():
if isinstance(flag_value, bool):
flag_value = int(flag_value)
kill_cmd += [flag, str(flag_value)]
if isinstance(flag_value, bool): # check for --preserve-queues
if flag_value == True:
kill_cmd += [flag]
else:
kill_cmd += [flag, str(flag_value)]
log.info(kill_cmd)
proc = subprocess.Popen(
kill_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
stdout, stderr = proc.communicate()
returncode = proc.wait(timeout=120)
log.info(stdout)
output_logs = stdout.decode()
log.info(output_logs)
if returncode != 0:
raise Exception(stdout)
raise Exception(output_logs)
if send_logs:
return {"kill": "success", "logs": stdout}
return {"kill": "success"}
except Exception as exc:
log.error("teuthology-kill command failed with the error: %s", repr(exc))
raise HTTPException(status_code=500, detail=repr(exc)) from exc
raise HTTPException(status_code=500, detail=str(exc)) from exc
2 changes: 1 addition & 1 deletion src/teuthology_api/services/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def run(args, send_logs: bool, access_token: str):
return {"run": run_details}
except Exception as exc:
log.error("teuthology.suite.main failed with the error: %s", repr(exc))
raise HTTPException(status_code=500, detail=repr(exc)) from exc
raise HTTPException(status_code=500, detail=str(exc)) from exc


def make_run_name(run_dic):
Expand Down
55 changes: 50 additions & 5 deletions tests/test_kill.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


async def override_get_token():
return {"access_token": "token_123", "token_type": "bearer"}
return {"access_token": "token_123", "token_type": "bearer", "isUserAdmin": False}


app.dependency_overrides[get_token] = override_get_token
Expand All @@ -30,16 +30,22 @@ async def override_get_token():
}


@patch("teuthology_api.services.kill.isAdmin")
@patch("subprocess.Popen")
@patch("teuthology_api.services.kill.get_run_details")
@patch("teuthology_api.services.kill.get_username")
def test_kill_run_success(m_get_username, m_get_run_details, m_popen):
def test_kill_run_success(m_get_username, m_get_run_details, m_popen, m_isAdmin):
m_get_username.return_value = "user1"
m_get_run_details.return_value = {"id": "7451978", "user": "user1"}
m_isAdmin.return_value = False
m_get_run_details.return_value = {
"id": "7451978",
"user": "user1",
"jobs": [{"owner": "user1"}],
}
mock_process = m_popen.return_value
mock_process.communicate.return_value = ("logs", "")
mock_process.communicate.return_value = (b"logs", "")
mock_process.wait.return_value = 0
response = client.post("/kill", data=json.dumps(mock_kill_args))
response = client.post("kill/", data=json.dumps(mock_kill_args))
assert response.status_code == 200
assert response.json() == {"kill": "success"}

Expand All @@ -48,3 +54,42 @@ def test_kill_run_fail():
response = client.post("/kill", data=json.dumps(mock_kill_args))
assert response.status_code == 401
assert response.json() == {"detail": "You need to be logged in"}


@patch("teuthology_api.services.kill.isAdmin")
@patch("subprocess.Popen")
@patch("teuthology_api.services.kill.get_run_details")
@patch("teuthology_api.services.kill.get_username")
def test_admin_kill_run_success(m_get_username, m_get_run_details, m_popen, m_isAdmin):
m_get_username.return_value = "user1"
m_isAdmin.return_value = True
m_get_run_details.return_value = {
"id": "7451978",
"user": "user1",
"jobs": [{"owner": "someone_else"}],
}
mock_process = m_popen.return_value
mock_process.communicate.return_value = (b"logs", "")
mock_process.wait.return_value = 0
response = client.post("kill/", data=json.dumps(mock_kill_args))
assert response.status_code == 200
assert response.json() == {"kill": "success"}


@patch("teuthology_api.services.kill.isAdmin")
@patch("subprocess.Popen")
@patch("teuthology_api.services.kill.get_run_details")
@patch("teuthology_api.services.kill.get_username")
def test_non_admin_kill_run_fail(m_get_username, m_get_run_details, m_popen, m_isAdmin):
m_get_username.return_value = "user1"
m_isAdmin.return_value = False
m_get_run_details.return_value = {
"id": "7451978",
"user": "user1",
"jobs": [{"owner": "someone_else"}],
}
mock_process = m_popen.return_value
mock_process.communicate.return_value = (b"logs", "")
mock_process.wait.return_value = 0
response = client.post("kill/", data=json.dumps(mock_kill_args))
assert response.status_code == 401 # run doesn't belong to user + user is not admin
Loading