Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(db): mssql support #814

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docker/Dockerfile.api
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@ ENV PYTHONFAULTHANDLER=1 \
PYTHONUNBUFFERED=1

RUN useradd --user-group --system --create-home --no-log-init keep

# This is for MSSQL DB connections
RUN apt-get update && apt-get install -y tdsodbc unixodbc-dev \
&& apt install unixodbc -y \
&& apt-get clean -y

RUN echo "[FreeTDS]\n\
Description = FreeTDS unixODBC Driver\n\
Driver = /usr/lib/arm-linux-gnueabi/odbc/libtdsodbc.so\n\
Setup = /usr/lib/arm-linux-gnueabi/odbc/libtdsS.so" >> /etc/odbcinst.ini

WORKDIR /app

FROM base as builder
Expand All @@ -26,6 +37,7 @@ FROM base as final
ENV PATH="/venv/bin:${PATH}"
ENV VIRTUAL_ENV="/venv"
COPY --from=builder /venv /venv

# as per Openshift guidelines, https://docs.openshift.com/container-platform/4.11/openshift_images/create-images.html#use-uid_create-images
RUN chgrp -R 0 /app && chmod -R g=u /app
RUN chown -R keep:keep /app
Expand Down
85 changes: 68 additions & 17 deletions keep/api/core/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
from dotenv import find_dotenv, load_dotenv
from google.cloud.sql.connector import Connector
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from sqlalchemy import and_, desc, func, null, select, update
from sqlalchemy.exc import IntegrityError
from sqlalchemy import and_, desc, func, null, select, text, update
from sqlalchemy.exc import IntegrityError, OperationalError, ProgrammingError
from sqlalchemy.orm import joinedload, selectinload, subqueryload
from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy.orm.exc import StaleDataError
Expand Down Expand Up @@ -138,6 +138,25 @@ def create_db_and_tables():
# migrate the workflowtoexecution table
with Session(engine) as session:
try:
if engine.dialect.name == "mssql":
connection.execute(
text("ALTER TABLE alert ADD alert_hash VARCHAR(255);")
)
else:
connection.execute(
text("ALTER TABLE alert ADD COLUMN alert_hash VARCHAR(255);")
)
except ProgrammingError as e:
if "column names in each table must be unique" in str(e).lower():
return
raise
except OperationalError as e:
# that's ok
if "duplicate column" in str(e).lower():
return
logger.exception("Failed to add column alert_hash to alert table")
raise

logger.info("Migrating WorkflowToAlertExecution table")
# get the foreign key constraint name
results = session.exec(
Expand Down Expand Up @@ -874,10 +893,11 @@ def get_alerts_with_filters(
if filters:
for f in filters:
filter_key, filter_value = f.get("key"), f.get("value")
filter_path = f"$.{filter_key}"
if isinstance(filter_value, bool) and filter_value is True:
# If the filter value is True, we want to filter by the existence of the enrichment
# e.g.: all the alerts that have ticket_id
if session.bind.dialect.name in ["mysql", "postgresql"]:
if session.bind.dialect.name in ["mysql", "postgresql", "mssql"]:
query = query.filter(
func.json_extract(
AlertEnrichment.enrichments, f"$.{filter_key}"
Expand All @@ -896,16 +916,19 @@ def get_alerts_with_filters(
query = query.filter(
func.json_unquote(
func.json_extract(
AlertEnrichment.enrichments, f"$.{filter_key}"
AlertEnrichment.enrichments, filter_path
)
)
== filter_value
)
elif session.bind.dialect.name == "mssql":
query = query.filter(
func.JSON_VALUE(AlertEnrichment.enrichments, filter_path)
== str(filter_value)
)
elif session.bind.dialect.name == "sqlite":
query = query.filter(
func.json_extract(
AlertEnrichment.enrichments, f"$.{filter_key}"
)
func.json_extract(AlertEnrichment.enrichments, filter_path)
== filter_value
)
else:
Expand Down Expand Up @@ -1373,26 +1396,54 @@ def get_rule_distribution(tenant_id, minute=False):
elif session.bind.dialect.name == "sqlite":
time_format = "%Y-%m-%d %H:%M" if minute else "%Y-%m-%d %H"
timestamp_format = func.strftime(time_format, AlertToGroup.timestamp)
elif session.bind.dialect.name == "mssql":
# For MSSQL, using CONVERT to format date
if minute:
timestamp_format = func.format(
AlertToGroup.timestamp, "yyyy-MM-dd HH:mm"
)
else:
timestamp_format = (
func.format(AlertToGroup.timestamp, "yyyy-MM-dd HH") + ":00"
)
else:
raise ValueError("Unsupported database dialect")
# Construct the query
query = (
session.query(
# Create a subquery
subquery = (
select(
Rule.id.label("rule_id"),
Rule.name.label("rule_name"),
Group.id.label("group_id"),
Group.group_fingerprint.label("group_fingerprint"),
timestamp_format.label("time"),
func.count(AlertToGroup.alert_id).label("hits"),
timestamp_format.label("formatted_timestamp"),
AlertToGroup.alert_id,
)
.join(Group, Rule.id == Group.rule_id)
.join(AlertToGroup, Group.id == AlertToGroup.group_id)
.filter(AlertToGroup.timestamp >= seven_days_ago)
.filter(Rule.tenant_id == tenant_id) # Filter by tenant_id
.filter(
AlertToGroup.timestamp >= seven_days_ago, Rule.tenant_id == tenant_id
)
.subquery()
)

query = (
session.query(
subquery.c.rule_id,
subquery.c.rule_name,
subquery.c.group_id,
subquery.c.group_fingerprint,
subquery.c.formatted_timestamp.label("time"),
func.count(subquery.c.alert_id).label("hits"),
)
.group_by(
"rule_id", "rule_name", "group_id", "group_fingerprint", "time"
) # Adjusted here
.order_by("time")
subquery.c.rule_id,
subquery.c.rule_name,
subquery.c.group_id,
subquery.c.group_fingerprint,
subquery.c.formatted_timestamp,
)
.order_by(subquery.c.formatted_timestamp)
)

results = query.all()
Expand All @@ -1402,7 +1453,7 @@ def get_rule_distribution(tenant_id, minute=False):
for result in results:
rule_id = result.rule_id
group_fingerprint = result.group_fingerprint
timestamp = result.time
timestamp = result.formatted_timestamp
hits = result.hits

if rule_id not in rule_distribution:
Expand Down
48 changes: 30 additions & 18 deletions keep/api/models/db/alert.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import hashlib
from datetime import datetime
from typing import List
from uuid import UUID, uuid4
from uuid import uuid4

from sqlmodel import JSON, Column, Field, Relationship, SQLModel

Expand All @@ -10,16 +10,18 @@

# many to many map between alerts and groups
class AlertToGroup(SQLModel, table=True):
tenant_id: str = Field(foreign_key="tenant.id")
tenant_id: str = Field(foreign_key="tenant.id", max_length=36)
timestamp: datetime = Field(default_factory=datetime.utcnow)
alert_id: UUID = Field(foreign_key="alert.id", primary_key=True)
group_id: UUID = Field(foreign_key="group.id", primary_key=True)
alert_id: str = Field(foreign_key="alert.id", primary_key=True, max_length=36)
group_id: str = Field(foreign_key="group.id", primary_key=True, max_length=36)


class Group(SQLModel, table=True):
id: UUID = Field(default_factory=uuid4, primary_key=True)
tenant_id: str = Field(foreign_key="tenant.id")
rule_id: UUID = Field(foreign_key="rule.id")
id: str = Field(
default_factory=lambda: str(uuid4()), primary_key=True, max_length=36
)
tenant_id: str = Field(foreign_key="tenant.id", max_length=36)
rule_id: str = Field(foreign_key="rule.id", max_length=36)
creation_time: datetime = Field(default_factory=datetime.utcnow)
# the instance of the grouping criteria
# e.g. grouping_criteria = ["event.labels.queue", "event.labels.cluster"] => group_fingerprint = "queue1,cluster1"
Expand All @@ -38,8 +40,10 @@ def calculate_fingerprint(self):


class Alert(SQLModel, table=True):
id: UUID = Field(default_factory=uuid4, primary_key=True)
tenant_id: str = Field(foreign_key="tenant.id")
id: str = Field(
default_factory=lambda: str(uuid4()), primary_key=True, max_length=36
)
tenant_id: str = Field(foreign_key="tenant.id", max_length=36)
tenant: Tenant = Relationship()
# index=True added because we query top 1000 alerts order by timestamp. On a large dataset, this will be slow without an index.
# with 1M alerts, we see queries goes from >30s to 0s with the index
Expand All @@ -48,7 +52,9 @@ class Alert(SQLModel, table=True):
provider_type: str
provider_id: str | None
event: dict = Field(sa_column=Column(JSON))
fingerprint: str = Field(index=True) # Add the fingerprint field with an index
fingerprint: str = Field(
index=True, max_length=256
) # Add the fingerprint field with an index
groups: List["Group"] = Relationship(
back_populates="alerts", link_model=AlertToGroup
)
Expand All @@ -70,10 +76,12 @@ class Config:


class AlertEnrichment(SQLModel, table=True):
id: UUID = Field(default_factory=uuid4, primary_key=True)
tenant_id: str = Field(foreign_key="tenant.id")
id: str = Field(
default_factory=lambda: str(uuid4()), primary_key=True, max_length=36
)
tenant_id: str = Field(foreign_key="tenant.id", max_length=36)
timestamp: datetime = Field(default_factory=datetime.utcnow)
alert_fingerprint: str = Field(unique=True)
alert_fingerprint: str = Field(unique=True, max_length=256)
enrichments: dict = Field(sa_column=Column(JSON))

alerts: list[Alert] = Relationship(
Expand All @@ -90,20 +98,24 @@ class Config:


class AlertDeduplicationFilter(SQLModel, table=True):
id: UUID = Field(default_factory=uuid4, primary_key=True)
tenant_id: str = Field(foreign_key="tenant.id")
id: str = Field(
default_factory=lambda: str(uuid4()), primary_key=True, max_length=36
)
tenant_id: str = Field(foreign_key="tenant.id", max_length=36)
# the list of fields to pop from the alert before hashing
fields: list = Field(sa_column=Column(JSON), default=[])
# a CEL expression to match the alert
matcher_cel: str
matcher_cel: str = Field(max_length=2000)

class Config:
arbitrary_types_allowed = True


class AlertRaw(SQLModel, table=True):
id: UUID = Field(default_factory=uuid4, primary_key=True)
tenant_id: str = Field(foreign_key="tenant.id")
id: str = Field(
default_factory=lambda: str(uuid4()), primary_key=True, max_length=36
)
tenant_id: str = Field(foreign_key="tenant.id", max_length=36)
raw_alert: dict = Field(sa_column=Column(JSON))

class Config:
Expand Down
2 changes: 1 addition & 1 deletion keep/api/models/db/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class MappingRule(SQLModel, table=True):
id: Optional[int] = Field(primary_key=True, default=None)
tenant_id: str = Field(foreign_key="tenant.id")
tenant_id: str = Field(foreign_key="tenant.id", max_length=36)
priority: int = Field(default=0, nullable=False)
name: str = Field(max_length=255, nullable=False)
description: Optional[str] = Field(max_length=2048)
Expand Down
11 changes: 6 additions & 5 deletions keep/api/models/db/preset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@

class Preset(SQLModel, table=True):
# Unique ID for each preset
id: UUID = Field(default_factory=uuid4, primary_key=True)

tenant_id: str = Field(foreign_key="tenant.id", index=True)

id: str = Field(
default_factory=lambda: str(uuid4()), primary_key=True, max_length=36
)
tenant_id: str = Field(foreign_key="tenant.id", index=True, max_length=36)
name: str = Field(unique=True, max_length=256)

# keeping index=True for better search
created_by: Optional[str] = Field(index=True, nullable=False)
is_private: Optional[bool] = Field(default=False)
name: str = Field(unique=True)
options: list = Field(sa_column=Column(JSON)) # [{"label": "", "value": ""}]


Expand Down
4 changes: 2 additions & 2 deletions keep/api/models/db/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@


class Provider(SQLModel, table=True):
id: str = Field(default=None, primary_key=True)
tenant_id: str = Field(foreign_key="tenant.id")
id: str = Field(default=None, primary_key=True, max_length=256)
tenant_id: str = Field(foreign_key="tenant.id", max_length=36)
name: str
description: Optional[str]
type: str
Expand Down
8 changes: 5 additions & 3 deletions keep/api/models/db/rule.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from uuid import UUID, uuid4
from uuid import uuid4

from sqlmodel import JSON, Column, Field, SQLModel

Expand All @@ -15,8 +15,10 @@
# 3. action - currently support create alert, down the road should support workflows
# 4. timeframe - should be per definition group
class Rule(SQLModel, table=True):
id: UUID = Field(default_factory=uuid4, primary_key=True)
tenant_id: str = Field(foreign_key="tenant.id")
id: str = Field(
default_factory=lambda: str(uuid4()), primary_key=True, max_length=36
)
tenant_id: str = Field(foreign_key="tenant.id", max_length=36)
name: str
definition: dict = Field(sa_column=Column(JSON)) # sql / params
definition_cel: str # cel
Expand Down
18 changes: 4 additions & 14 deletions keep/api/models/db/tenant.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
from typing import List, Optional
from uuid import UUID, uuid4
from datetime import datetime
from typing import Optional

from sqlmodel import Field, Relationship, SQLModel


class Tenant(SQLModel, table=True):
# uuid
id: str = Field(primary_key=True)
id: str = Field(max_length=36, primary_key=True)
name: str
installations: List["TenantInstallation"] = Relationship(back_populates="tenant")


class TenantApiKey(SQLModel, table=True):
tenant_id: str = Field(foreign_key="tenant.id")
tenant_id: str = Field(foreign_key="tenant.id", max_length=36)
reference_id: str = Field(description="For instance, the GitHub installation ID")
key_hash: str = Field(primary_key=True)
key_hash: str = Field(primary_key=True, max_length=64)
tenant: Tenant = Relationship()
is_system: bool = False
is_deleted: bool = False
Expand All @@ -28,11 +26,3 @@ class TenantApiKey(SQLModel, table=True):
class Config:
orm_mode = True
unique_together = ["tenant_id", "reference_id"]


class TenantInstallation(SQLModel, table=True):
id: UUID = Field(default=uuid4, primary_key=True)
tenant_id: str = Field(foreign_key="tenant.id")
bot_id: str
installed: bool = False
tenant: Optional[Tenant] = Relationship(back_populates="installations")
4 changes: 2 additions & 2 deletions keep/api/models/db/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ class User(SQLModel, table=True):
# Unique ID for each user
id: int = Field(primary_key=True)

tenant_id: str = Field(default=SINGLE_TENANT_UUID)
tenant_id: str = Field(default=SINGLE_TENANT_UUID, max_length=36)

# Username for the user (should be unique)
username: str = Field(index=True, unique=True)
username: str = Field(index=True, unique=True, max_length=256)

# Hashed password (never store plain-text passwords)
password_hash: str
Expand Down
Loading
Loading