From 4d079adb88f3ddc670f3e84848b718d095ef09f8 Mon Sep 17 00:00:00 2001 From: juju4 Date: Sat, 19 Nov 2022 17:45:12 +0000 Subject: [PATCH 1/4] add azure monitor/log analytics as source --- cartography/cli.py | 16 +++ cartography/config.py | 4 + .../cleanup/azuremonitor_import_cleanup.json | 10 ++ cartography/intel/azuremonitor/__init__.py | 59 ++++++++ cartography/intel/azuremonitor/endpoints.py | 61 ++++++++ cartography/intel/azuremonitor/util.py | 136 ++++++++++++++++++ cartography/sync.py | 2 + docs/root/modules/azuremonitor/config.md | 16 +++ docs/root/modules/azuremonitor/schema.md | 29 ++++ .../azuremonitor/azuremonitor_endpoints.py | 15 ++ 10 files changed, 348 insertions(+) create mode 100644 cartography/data/jobs/cleanup/azuremonitor_import_cleanup.json create mode 100644 cartography/intel/azuremonitor/__init__.py create mode 100644 cartography/intel/azuremonitor/endpoints.py create mode 100644 cartography/intel/azuremonitor/util.py create mode 100644 docs/root/modules/azuremonitor/config.md create mode 100644 docs/root/modules/azuremonitor/schema.md create mode 100644 tests/data/azuremonitor/azuremonitor_endpoints.py diff --git a/cartography/cli.py b/cartography/cli.py index 78ebc6886..a47aac8d8 100644 --- a/cartography/cli.py +++ b/cartography/cli.py @@ -184,6 +184,22 @@ def _build_parser(self): 'The name of environment variable containing Azure Client Secret for Service Principal Authentication.' ), ) + parser.add_argument( + '--azuremonitor-workspace-name', + type=str, + default=None, + help=( + 'Name of used Azure Log Analytics Workspace name for labelling. Authentication from environment.' + ), + ) + parser.add_argument( + '--azuremonitor-workspace-id', + type=str, + default=None, + help=( + 'Azure Log Analytics Workspace id (guid). Authentication from environment.' + ), + ) parser.add_argument( '--aws-requested-syncs', type=str, diff --git a/cartography/config.py b/cartography/config.py index a323c8a4d..941ebb40d 100644 --- a/cartography/config.py +++ b/cartography/config.py @@ -98,6 +98,8 @@ def __init__( azure_tenant_id=None, azure_client_id=None, azure_client_secret=None, + azuremonitor_workspace_name=None, + azuremonitor_workspace_id=None, aws_requested_syncs=None, analysis_job_directory=None, crxcavator_api_base_uri=None, @@ -138,6 +140,8 @@ def __init__( self.azure_tenant_id = azure_tenant_id self.azure_client_id = azure_client_id self.azure_client_secret = azure_client_secret + self.azuremonitor_workspace_name = azuremonitor_workspace_name + self.azuremonitor_workspace_id = azuremonitor_workspace_id self.aws_requested_syncs = aws_requested_syncs self.analysis_job_directory = analysis_job_directory self.crxcavator_api_base_uri = crxcavator_api_base_uri diff --git a/cartography/data/jobs/cleanup/azuremonitor_import_cleanup.json b/cartography/data/jobs/cleanup/azuremonitor_import_cleanup.json new file mode 100644 index 000000000..3a84f30b6 --- /dev/null +++ b/cartography/data/jobs/cleanup/azuremonitor_import_cleanup.json @@ -0,0 +1,10 @@ +{ + "statements": [ + { + "query": "WITH datetime()-duration('P3D') AS threshold MATCH (h:AzureMonitorHost) WHERE h.lastupdated < threshold WITH h LIMIT $LIMIT_SIZE DETACH DELETE (h)", + "iterative": true, + "iterationsize": 100 + } + ], + "name": "cleanup azuremonitor" +} diff --git a/cartography/intel/azuremonitor/__init__.py b/cartography/intel/azuremonitor/__init__.py new file mode 100644 index 000000000..71df35e27 --- /dev/null +++ b/cartography/intel/azuremonitor/__init__.py @@ -0,0 +1,59 @@ +""" +cartography/intel/azuremonitor +""" +import logging + +import neo4j + +from cartography.config import Config +from cartography.intel.azuremonitor.endpoints import sync_hosts +from cartography.stats import get_stats_client +from cartography.util import merge_module_sync_metadata +from cartography.util import run_cleanup_job +from cartography.util import timeit + +logger = logging.getLogger(__name__) +stat_handler = get_stats_client(__name__) + + +@timeit +def start_azuremonitor_ingestion( + neo4j_session: neo4j.Session, + config: Config, +) -> None: + """ + Perform ingestion of Azure Monitor / Sentinel data. + :param neo4j_session: Neo4J session for database interface + :param config: A cartography.config object + :return: None + """ + common_job_parameters = { + "UPDATE_TAG": config.update_tag, + } + # the real authentication is directly read from environment with usual Azure variables. + if not (config.azuremonitor_workspace_name and config.azuremonitor_workspace_id): + logger.error("azuremonitor config not found") + return + + sync_hosts( + neo4j_session, + config.update_tag, + (config.azuremonitor_workspace_name, config.azuremonitor_workspace_id), + ) + run_cleanup_job( + "azuremonitor_import_cleanup.json", + neo4j_session, + common_job_parameters, + ) + + group_id = "public" + if config.azuremonitor_workspace_name: + group_id = config.azuremonitor_workspace_name + merge_module_sync_metadata( + neo4j_session, + group_type="azuremonitor", + group_id=group_id, + synced_type="azuremonitor", + update_tag=config.update_tag, + stat_handler=stat_handler, + ) diff --git a/cartography/intel/azuremonitor/endpoints.py b/cartography/intel/azuremonitor/endpoints.py new file mode 100644 index 000000000..9920845bc --- /dev/null +++ b/cartography/intel/azuremonitor/endpoints.py @@ -0,0 +1,61 @@ +""" +cartography/intel/azuremonitor/endpoints +""" +# pylint: disable=missing-function-docstring,too-many-arguments +import logging +from typing import Dict +from typing import List +from typing import Tuple + +import neo4j + +from .util import azuremonitor_hosts +from cartography.util import timeit + +logger = logging.getLogger(__name__) + + +@timeit +def sync_hosts( + neo4j_session: neo4j.Session, + update_tag: int, + authorization: Tuple[str, str], +) -> None: + azuremonitor_hosts_list = azuremonitor_hosts(authorization) + for host_data in azuremonitor_hosts_list: + load_host_data(neo4j_session, host_data, update_tag) + + +def load_host_data( + neo4j_session: neo4j.Session, + data: List[Dict], + update_tag: int, +) -> None: + """ + Transform and load scan information + """ + ingestion_cypher_query = """ + UNWIND $Hosts AS host + MERGE (h:AzureMonitorHost{hostname: host.Computer}) + ON CREATE SET h.hostname = host.Computer, + h.short_hostname = toLower(host.Computer), + h.resource_id = host.resource_id, + h.resource_group = host.resource_group, + h.subscription_id = host.subscription_id, + h.tenant_id = host.TenantId, + h.sentinel_sourcesystem = host.SourceSystem, + h.sentinel_host_ip = host.HostIP, + h.workspace = host.workspace, + h.tool_first_seen = host.firstseen, + h.platform = host.systemtype, + h.workspace_name = host.workspace_name + SET h.tool_last_seen = host.lastseen, + h.modified_timestamp = host.modified_timestamp, + h.lastupdated = $update_tag + """ + logger.debug("Loading %s azuremonitor hosts.", len(data)) + neo4j_session.run( + ingestion_cypher_query, + Hosts=data, + update_tag=update_tag, + ) diff --git a/cartography/intel/azuremonitor/util.py b/cartography/intel/azuremonitor/util.py new file mode 100644 index 000000000..b00ec2fb7 --- /dev/null +++ b/cartography/intel/azuremonitor/util.py @@ -0,0 +1,136 @@ +""" +cartography/intel/azuremonitor/util +""" +import datetime +import json +import logging +import os +from typing import List +from typing import Tuple + +import pandas +from azure.core.exceptions import HttpResponseError +from azure.identity import ClientSecretCredential +from azure.monitor.query import LogsQueryClient +from azure.monitor.query import LogsQueryStatus + +logger = logging.getLogger(__name__) + + +def monitor_query( + client: LogsQueryClient, + workspace_id: str, + query: str, + start_time: datetime.date, + end_time: datetime.date, +) -> pandas.DataFrame: + """ + Azure Monitor Query + + https://pypi.org/project/azure-monitor-query/ + https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-query + https://learn.microsoft.com/en-us/python/api/overview/azure/monitor-query-readme?view=azure-python + """ + try: + response = client.query_workspace( + workspace_id=workspace_id, query=query, timespan=(start_time, end_time), + ) + if response.status == LogsQueryStatus.PARTIAL: + error = response.partial_error + data = response.partial_data + print(error.message) + elif response.status == LogsQueryStatus.SUCCESS: + data = response.tables + for table in data: + df_results = pandas.DataFrame(data=table.rows, columns=table.columns) + return df_results + except HttpResponseError as err: + logger.exception("something fatal happened: %s", err) + + return pandas.DataFrame() + + +def azuremonitor_hosts( + authorization: Tuple[str, str], +) -> List: + """ + Get Azure Monitor/Sentinel (Logging) coverage inventory + + Timeout should be adapted to context, mostly size of indexes and searched timeperiod. + Alternative: Msticpy + https://msticpy.readthedocs.io/en/latest/data_acquisition/DataProv-MSSentinel.html#ms-sentinel-authentication-options + """ + workspace_name = authorization[0] + workspace_id = authorization[1] + if "AZURE_CLIENT_ID" in os.environ: + logger.info( + "azuremonitor inputs: tenant %s, clientid %s, name %s", + os.environ["AZURE_TENANT_ID"], + os.environ["AZURE_CLIENT_ID"], + workspace_name, + ) + else: + logger.info( + "azuremonitor inputs: tenant %s, name %s - managed identity?", + os.environ["AZURE_TENANT_ID"], + workspace_name, + ) + end_time = datetime.datetime.now() + start_time = end_time - datetime.timedelta(hours=1) + + azure_token = ClientSecretCredential( + os.environ["AZURE_TENANT_ID"], + os.environ["AZURE_CLIENT_ID"], + os.environ["AZURE_CLIENT_SECRET"], + ) + logger.warning("Using App registration identity: %s", azure_token) + client = LogsQueryClient(azure_token) + + syslog_query = f"""search in (Syslog) "*" +| where TimeGenerated >= datetime({start_time}) +| where TimeGenerated <= datetime({end_time}) +| extend resource_group = extract("/resourcegroups/([0-9a-zA-Z.-]+)/providers/", 1, _ResourceId) +| summarize min(TimeGenerated), max(TimeGenerated) by Computer,_SubscriptionId,resource_group,_ResourceId,TenantId,SourceSystem,HostIP +| extend firstseen = min_TimeGenerated, lastseen = max_TimeGenerated +| project-away min_TimeGenerated, max_TimeGenerated""" + df_syslog = monitor_query(client, workspace_id, syslog_query, start_time, end_time) + df_syslog["systemtype"] = "linux" + + win_query = f"""search in (Event, SecurityEvent) "*" +| where TimeGenerated >= datetime({start_time}) +| where TimeGenerated <= datetime({end_time}) +| extend resource_group = extract("/resourcegroups/([0-9a-zA-Z.-]+)/providers/", 1, _ResourceId) +| summarize min(TimeGenerated), max(TimeGenerated) by Computer,_SubscriptionId,resource_group,_ResourceId,TenantId,SourceSystem +| extend firstseen = min_TimeGenerated, lastseen = max_TimeGenerated +| project-away min_TimeGenerated, max_TimeGenerated""" + df_win = monitor_query(client, workspace_id, win_query, start_time, end_time) + df_win["systemtype"] = "windows" + + df_sentinel = pandas.concat([df_syslog, df_win]) + if workspace_name: + df_sentinel["workspace_name"] = workspace_name + + logger.info("AzureMonitor count final: %s", df_sentinel.shape[0]) + + df_sentinel["lastseen"] = pandas.to_datetime( + df_sentinel["lastseen"], unit="s", + ).dt.strftime("%Y-%m-%dT%H:%M:%S") + df_sentinel["firstseen"] = pandas.to_datetime( + df_sentinel["firstseen"], unit="s", + ).dt.strftime("%Y-%m-%dT%H:%M:%S") + + df_sentinel.rename( + columns={ + "_SubscriptionId": "subscription_id", + "_ResourceId": "resource_id", + }, + errors="ignore", + inplace=True, + ) + if df_sentinel.shape[0]: + flatten_data = json.loads(df_sentinel.to_json(orient="records")) + logger.debug("Example: %s", flatten_data[0]) + return flatten_data + + logger.warning("No data returned") + return [] diff --git a/cartography/sync.py b/cartography/sync.py index 4ac02593c..a5f3fb53c 100644 --- a/cartography/sync.py +++ b/cartography/sync.py @@ -14,6 +14,7 @@ import cartography.intel.analysis import cartography.intel.aws import cartography.intel.azure +import cartography.intel.azuremonitor import cartography.intel.create_indexes import cartography.intel.crowdstrike import cartography.intel.crxcavator.crxcavator @@ -175,6 +176,7 @@ def build_default_sync() -> Sync: ('create-indexes', cartography.intel.create_indexes.run), ('aws', cartography.intel.aws.start_aws_ingestion), ('azure', cartography.intel.azure.start_azure_ingestion), + ('azuremonitor', cartography.intel.azuremonitor.start_azuremonitor_ingestion), ('crowdstrike', cartography.intel.crowdstrike.start_crowdstrike_ingestion), ('gcp', cartography.intel.gcp.start_gcp_ingestion), ('gsuite', cartography.intel.gsuite.start_gsuite_ingestion), diff --git a/docs/root/modules/azuremonitor/config.md b/docs/root/modules/azuremonitor/config.md new file mode 100644 index 000000000..5e913f067 --- /dev/null +++ b/docs/root/modules/azuremonitor/config.md @@ -0,0 +1,16 @@ +## AzureMonitor Configuration + +.. _azuremonitor_config: + +Follow these steps to analyze Microsoft Azure Monitor (aka Azure Log Analytics or Microsoft Sentinel) with Cartography: + +1. Set up an Azure identity for Cartography to use, and ensure that this identity has the built-in Azure [Microsoft Sentinel Reader role](https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles#microsoft-sentinel-reader) attached: + * Authenticate: `$ az login` + * Create a Service Principal: `$ az ad sp create-for-rbac --name cartography --role "Microsoft Sentinel Reader"` + * Note the values of the `tenant`, `appId`, and `password` fields +1. Populate environment variables with the values generated in the previous step (e.g., `AZURE_TENANT_ID`, `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET`) +1. Call the `cartography` CLI with: + ```bash + --azuremonitor-workspace-name ALA_NAME \ + --azuremonitor-workspace-id ALA_GUID + ``` diff --git a/docs/root/modules/azuremonitor/schema.md b/docs/root/modules/azuremonitor/schema.md new file mode 100644 index 000000000..962bdf24b --- /dev/null +++ b/docs/root/modules/azuremonitor/schema.md @@ -0,0 +1,29 @@ +## AzureMonitor Schema + +.. _azuremonitor_schema: + +### AzureMonitor + +Representation of a system sending logs to Azure Monitor aka Azure Log Analytics. + +| Field | Description | +|-------|-------------| +|tool_first_seen| Timestamp of when first available logs for host is available since first sync| +|tool_last_seen| Timestamp of when last available logs for host is available per last sync| +|lastupdated| Timestamp of the last time the node was updated| +|**hostname**| The Azure Virtual Machine Computer name| +|short_hostname| The Azure Virtual Machine short hostname, lowercase| +|platform| The platform of the resource (linux or windows)| +|resource_group| The Resource Group where Virtual Machine is created| +|tenant_id| The Tenant Id where Virtual Machine is created| +|sentinel_sourcesystem| The SourceSystem as available in Log Analytics| +|sentinel_host_ip| The HostIP as available in Log Analytics| +|workspace_name| The Log Analytics workspace name where matching logs where identified| + +#### Relationships + +- Azure VirtualMachine contains one AzureMonitor host + + ``` + (VirtualMachine)-[PRESENT_IN]->(AzureMonitor) + ``` diff --git a/tests/data/azuremonitor/azuremonitor_endpoints.py b/tests/data/azuremonitor/azuremonitor_endpoints.py new file mode 100644 index 000000000..a12637122 --- /dev/null +++ b/tests/data/azuremonitor/azuremonitor_endpoints.py @@ -0,0 +1,15 @@ +GET_HOSTS = [ + { + 'Computer': 'myhostname-01', + 'subscription_id': 'c56b2c59-4e9b-4b89-85e2-13f8146eb0717', + 'resource_group': 'rg-myhostname', + 'resource_id': 'subscriptions/SUB/resourceGroups/RG/providers/Microsoft.Compute/virtualMachines/HOST', + 'TenantId': 'c56b2c59-4e9b-4b89-85e2-13f8146eb0717', + 'SourceSystem': 'Linux', + 'HostIP': '10.1.2.3', + 'firstseen': '2022-10-11T12:34:56', + 'lastseen': '2022-10-11T18:12:34', + 'systemtype': 'linux', + 'workspace_name': 'AzureLogAnalytics-workspace-name', + }, +] From d469a98f8c54e339b566d4b912135c1eafb6f4e2 Mon Sep 17 00:00:00 2001 From: juju4 Date: Sat, 3 Dec 2022 13:38:35 +0000 Subject: [PATCH 2/4] fix flake8 +black --- cartography/intel/azuremonitor/util.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/cartography/intel/azuremonitor/util.py b/cartography/intel/azuremonitor/util.py index b00ec2fb7..544828532 100644 --- a/cartography/intel/azuremonitor/util.py +++ b/cartography/intel/azuremonitor/util.py @@ -33,7 +33,9 @@ def monitor_query( """ try: response = client.query_workspace( - workspace_id=workspace_id, query=query, timespan=(start_time, end_time), + workspace_id=workspace_id, + query=query, + timespan=(start_time, end_time), ) if response.status == LogsQueryStatus.PARTIAL: error = response.partial_error @@ -86,23 +88,29 @@ def azuremonitor_hosts( logger.warning("Using App registration identity: %s", azure_token) client = LogsQueryClient(azure_token) - syslog_query = f"""search in (Syslog) "*" + syslog_query = ( + f"""search in (Syslog) "*" | where TimeGenerated >= datetime({start_time}) | where TimeGenerated <= datetime({end_time}) | extend resource_group = extract("/resourcegroups/([0-9a-zA-Z.-]+)/providers/", 1, _ResourceId) -| summarize min(TimeGenerated), max(TimeGenerated) by Computer,_SubscriptionId,resource_group,_ResourceId,TenantId,SourceSystem,HostIP +| summarize min(TimeGenerated), max(TimeGenerated) by Computer,_SubscriptionId,""" + """resource_group,_ResourceId,TenantId,SourceSystem,HostIP | extend firstseen = min_TimeGenerated, lastseen = max_TimeGenerated | project-away min_TimeGenerated, max_TimeGenerated""" + ) df_syslog = monitor_query(client, workspace_id, syslog_query, start_time, end_time) df_syslog["systemtype"] = "linux" - win_query = f"""search in (Event, SecurityEvent) "*" + win_query = ( + f"""search in (Event, SecurityEvent) "*" | where TimeGenerated >= datetime({start_time}) | where TimeGenerated <= datetime({end_time}) | extend resource_group = extract("/resourcegroups/([0-9a-zA-Z.-]+)/providers/", 1, _ResourceId) -| summarize min(TimeGenerated), max(TimeGenerated) by Computer,_SubscriptionId,resource_group,_ResourceId,TenantId,SourceSystem +| summarize min(TimeGenerated), max(TimeGenerated) by Computer,_SubscriptionId,""" + """resource_group,_ResourceId,TenantId,SourceSystem | extend firstseen = min_TimeGenerated, lastseen = max_TimeGenerated | project-away min_TimeGenerated, max_TimeGenerated""" + ) df_win = monitor_query(client, workspace_id, win_query, start_time, end_time) df_win["systemtype"] = "windows" @@ -113,10 +121,12 @@ def azuremonitor_hosts( logger.info("AzureMonitor count final: %s", df_sentinel.shape[0]) df_sentinel["lastseen"] = pandas.to_datetime( - df_sentinel["lastseen"], unit="s", + df_sentinel["lastseen"], + unit="s", ).dt.strftime("%Y-%m-%dT%H:%M:%S") df_sentinel["firstseen"] = pandas.to_datetime( - df_sentinel["firstseen"], unit="s", + df_sentinel["firstseen"], + unit="s", ).dt.strftime("%Y-%m-%dT%H:%M:%S") df_sentinel.rename( From 7c5af1f55314de09a02478ebd0b98e3f3ba01e27 Mon Sep 17 00:00:00 2001 From: juju4 Date: Sat, 7 Jan 2023 15:00:11 +0000 Subject: [PATCH 3/4] add missing azure-monitor-query pypi dependency --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index f6ce7bb7b..0a76a7414 100644 --- a/setup.py +++ b/setup.py @@ -59,6 +59,7 @@ "pdpyras>=4.3.0", "crowdstrike-falconpy>=0.5.1", "python-dateutil", + "azure-monitor-query>=1.0.3", ], extras_require={ ':python_version<"3.7"': [ From f08cf06034d9b1b36910a2ef3127ecb330512bab Mon Sep 17 00:00:00 2001 From: juju4 Date: Sat, 7 Jan 2023 19:13:30 +0000 Subject: [PATCH 4/4] add verb to functions --- cartography/intel/azuremonitor/endpoints.py | 4 ++-- cartography/intel/azuremonitor/util.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cartography/intel/azuremonitor/endpoints.py b/cartography/intel/azuremonitor/endpoints.py index 9920845bc..091de111c 100644 --- a/cartography/intel/azuremonitor/endpoints.py +++ b/cartography/intel/azuremonitor/endpoints.py @@ -9,7 +9,7 @@ import neo4j -from .util import azuremonitor_hosts +from .util import get_azuremonitor_hosts from cartography.util import timeit logger = logging.getLogger(__name__) @@ -21,7 +21,7 @@ def sync_hosts( update_tag: int, authorization: Tuple[str, str], ) -> None: - azuremonitor_hosts_list = azuremonitor_hosts(authorization) + azuremonitor_hosts_list = get_azuremonitor_hosts(authorization) for host_data in azuremonitor_hosts_list: load_host_data(neo4j_session, host_data, update_tag) diff --git a/cartography/intel/azuremonitor/util.py b/cartography/intel/azuremonitor/util.py index 544828532..36dadb4b6 100644 --- a/cartography/intel/azuremonitor/util.py +++ b/cartography/intel/azuremonitor/util.py @@ -52,7 +52,7 @@ def monitor_query( return pandas.DataFrame() -def azuremonitor_hosts( +def get_azuremonitor_hosts( authorization: Tuple[str, str], ) -> List: """