Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Azure Monitor source #1075

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions cartography/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,22 @@ def _build_parser(self):
'The name of environment variable containing Azure Client Secret for Service Principal Authentication.'
),
)
parser.add_argument(
'--azuremonitor-workspace-name',
type=str,
default=None,
help=(
'Name of used Azure Log Analytics Workspace name for labelling. Authentication from environment.'
),
)
parser.add_argument(
'--azuremonitor-workspace-id',
type=str,
default=None,
help=(
'Azure Log Analytics Workspace id (guid). Authentication from environment.'
),
)
parser.add_argument(
'--aws-requested-syncs',
type=str,
Expand Down
4 changes: 4 additions & 0 deletions cartography/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ def __init__(
azure_tenant_id=None,
azure_client_id=None,
azure_client_secret=None,
azuremonitor_workspace_name=None,
azuremonitor_workspace_id=None,
aws_requested_syncs=None,
analysis_job_directory=None,
crxcavator_api_base_uri=None,
Expand Down Expand Up @@ -148,6 +150,8 @@ def __init__(
self.azure_tenant_id = azure_tenant_id
self.azure_client_id = azure_client_id
self.azure_client_secret = azure_client_secret
self.azuremonitor_workspace_name = azuremonitor_workspace_name
self.azuremonitor_workspace_id = azuremonitor_workspace_id
self.aws_requested_syncs = aws_requested_syncs
self.analysis_job_directory = analysis_job_directory
self.crxcavator_api_base_uri = crxcavator_api_base_uri
Expand Down
10 changes: 10 additions & 0 deletions cartography/data/jobs/cleanup/azuremonitor_import_cleanup.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"statements": [
{
"query": "WITH datetime()-duration('P3D') AS threshold MATCH (h:AzureMonitorHost) WHERE h.lastupdated < threshold WITH h LIMIT $LIMIT_SIZE DETACH DELETE (h)",
"iterative": true,
"iterationsize": 100
}
],
"name": "cleanup azuremonitor"
}
59 changes: 59 additions & 0 deletions cartography/intel/azuremonitor/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""
cartography/intel/azuremonitor
"""
import logging

import neo4j

from cartography.config import Config
from cartography.intel.azuremonitor.endpoints import sync_hosts
from cartography.stats import get_stats_client
from cartography.util import merge_module_sync_metadata
from cartography.util import run_cleanup_job
from cartography.util import timeit

logger = logging.getLogger(__name__)
stat_handler = get_stats_client(__name__)


@timeit
def start_azuremonitor_ingestion(
neo4j_session: neo4j.Session,
config: Config,
) -> None:
"""
Perform ingestion of Azure Monitor / Sentinel data.
:param neo4j_session: Neo4J session for database interface
:param config: A cartography.config object
:return: None
"""
common_job_parameters = {
"UPDATE_TAG": config.update_tag,
}
# the real authentication is directly read from environment with usual Azure variables.
if not (config.azuremonitor_workspace_name and config.azuremonitor_workspace_id):
logger.error("azuremonitor config not found")
return

sync_hosts(
neo4j_session,
config.update_tag,
(config.azuremonitor_workspace_name, config.azuremonitor_workspace_id),
)
run_cleanup_job(
"azuremonitor_import_cleanup.json",
neo4j_session,
common_job_parameters,
)

group_id = "public"
if config.azuremonitor_workspace_name:
group_id = config.azuremonitor_workspace_name
merge_module_sync_metadata(
neo4j_session,
group_type="azuremonitor",
group_id=group_id,
synced_type="azuremonitor",
update_tag=config.update_tag,
stat_handler=stat_handler,
)
61 changes: 61 additions & 0 deletions cartography/intel/azuremonitor/endpoints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
cartography/intel/azuremonitor/endpoints
"""
# pylint: disable=missing-function-docstring,too-many-arguments
import logging
from typing import Dict
from typing import List
from typing import Tuple

import neo4j

from .util import get_azuremonitor_hosts
from cartography.util import timeit

logger = logging.getLogger(__name__)


@timeit
def sync_hosts(
neo4j_session: neo4j.Session,
update_tag: int,
authorization: Tuple[str, str],
) -> None:
azuremonitor_hosts_list = get_azuremonitor_hosts(authorization)
for host_data in azuremonitor_hosts_list:
load_host_data(neo4j_session, host_data, update_tag)


def load_host_data(
neo4j_session: neo4j.Session,
data: List[Dict],
update_tag: int,
) -> None:
"""
Transform and load scan information
"""
ingestion_cypher_query = """
UNWIND $Hosts AS host
MERGE (h:AzureMonitorHost{hostname: host.Computer})
ON CREATE SET h.hostname = host.Computer,
h.short_hostname = toLower(host.Computer),
h.resource_id = host.resource_id,
h.resource_group = host.resource_group,
h.subscription_id = host.subscription_id,
h.tenant_id = host.TenantId,
h.sentinel_sourcesystem = host.SourceSystem,
h.sentinel_host_ip = host.HostIP,
h.workspace = host.workspace,
h.tool_first_seen = host.firstseen,
h.platform = host.systemtype,
h.workspace_name = host.workspace_name
SET h.tool_last_seen = host.lastseen,
h.modified_timestamp = host.modified_timestamp,
h.lastupdated = $update_tag
"""
logger.debug("Loading %s azuremonitor hosts.", len(data))
neo4j_session.run(
ingestion_cypher_query,
Hosts=data,
update_tag=update_tag,
)
146 changes: 146 additions & 0 deletions cartography/intel/azuremonitor/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""
cartography/intel/azuremonitor/util
"""
import datetime
import json
import logging
import os
from typing import List
from typing import Tuple

import pandas
from azure.core.exceptions import HttpResponseError
from azure.identity import ClientSecretCredential
from azure.monitor.query import LogsQueryClient
from azure.monitor.query import LogsQueryStatus

logger = logging.getLogger(__name__)


def monitor_query(
client: LogsQueryClient,
workspace_id: str,
query: str,
start_time: datetime.date,
end_time: datetime.date,
) -> pandas.DataFrame:
"""
Azure Monitor Query

https://pypi.org/project/azure-monitor-query/
https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-query
https://learn.microsoft.com/en-us/python/api/overview/azure/monitor-query-readme?view=azure-python
"""
try:
response = client.query_workspace(
workspace_id=workspace_id,
query=query,
timespan=(start_time, end_time),
)
if response.status == LogsQueryStatus.PARTIAL:
error = response.partial_error
data = response.partial_data
print(error.message)
elif response.status == LogsQueryStatus.SUCCESS:
data = response.tables
for table in data:
df_results = pandas.DataFrame(data=table.rows, columns=table.columns)
return df_results
except HttpResponseError as err:
logger.exception("something fatal happened: %s", err)

return pandas.DataFrame()


def get_azuremonitor_hosts(
authorization: Tuple[str, str],
) -> List:
"""
Get Azure Monitor/Sentinel (Logging) coverage inventory

Timeout should be adapted to context, mostly size of indexes and searched timeperiod.
Alternative: Msticpy
https://msticpy.readthedocs.io/en/latest/data_acquisition/DataProv-MSSentinel.html#ms-sentinel-authentication-options
"""
workspace_name = authorization[0]
workspace_id = authorization[1]
if "AZURE_CLIENT_ID" in os.environ:
logger.info(
"azuremonitor inputs: tenant %s, clientid %s, name %s",
os.environ["AZURE_TENANT_ID"],
os.environ["AZURE_CLIENT_ID"],
workspace_name,
)
else:
logger.info(
"azuremonitor inputs: tenant %s, name %s - managed identity?",
os.environ["AZURE_TENANT_ID"],
workspace_name,
)
end_time = datetime.datetime.now()
start_time = end_time - datetime.timedelta(hours=1)

azure_token = ClientSecretCredential(
os.environ["AZURE_TENANT_ID"],
os.environ["AZURE_CLIENT_ID"],
os.environ["AZURE_CLIENT_SECRET"],
)
logger.warning("Using App registration identity: %s", azure_token)
client = LogsQueryClient(azure_token)

syslog_query = (
f"""search in (Syslog) "*"
| where TimeGenerated >= datetime({start_time})
| where TimeGenerated <= datetime({end_time})
| extend resource_group = extract("/resourcegroups/([0-9a-zA-Z.-]+)/providers/", 1, _ResourceId)
| summarize min(TimeGenerated), max(TimeGenerated) by Computer,_SubscriptionId,"""
"""resource_group,_ResourceId,TenantId,SourceSystem,HostIP
| extend firstseen = min_TimeGenerated, lastseen = max_TimeGenerated
| project-away min_TimeGenerated, max_TimeGenerated"""
)
df_syslog = monitor_query(client, workspace_id, syslog_query, start_time, end_time)
df_syslog["systemtype"] = "linux"

win_query = (
f"""search in (Event, SecurityEvent) "*"
| where TimeGenerated >= datetime({start_time})
| where TimeGenerated <= datetime({end_time})
| extend resource_group = extract("/resourcegroups/([0-9a-zA-Z.-]+)/providers/", 1, _ResourceId)
| summarize min(TimeGenerated), max(TimeGenerated) by Computer,_SubscriptionId,"""
"""resource_group,_ResourceId,TenantId,SourceSystem
| extend firstseen = min_TimeGenerated, lastseen = max_TimeGenerated
| project-away min_TimeGenerated, max_TimeGenerated"""
)
df_win = monitor_query(client, workspace_id, win_query, start_time, end_time)
df_win["systemtype"] = "windows"

df_sentinel = pandas.concat([df_syslog, df_win])
if workspace_name:
df_sentinel["workspace_name"] = workspace_name

logger.info("AzureMonitor count final: %s", df_sentinel.shape[0])

df_sentinel["lastseen"] = pandas.to_datetime(
df_sentinel["lastseen"],
unit="s",
).dt.strftime("%Y-%m-%dT%H:%M:%S")
df_sentinel["firstseen"] = pandas.to_datetime(
df_sentinel["firstseen"],
unit="s",
).dt.strftime("%Y-%m-%dT%H:%M:%S")

df_sentinel.rename(
columns={
"_SubscriptionId": "subscription_id",
"_ResourceId": "resource_id",
},
errors="ignore",
inplace=True,
)
if df_sentinel.shape[0]:
flatten_data = json.loads(df_sentinel.to_json(orient="records"))
logger.debug("Example: %s", flatten_data[0])
return flatten_data

logger.warning("No data returned")
return []
2 changes: 2 additions & 0 deletions cartography/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import cartography.intel.analysis
import cartography.intel.aws
import cartography.intel.azure
import cartography.intel.azuremonitor
import cartography.intel.create_indexes
import cartography.intel.crowdstrike
import cartography.intel.crxcavator.crxcavator
Expand All @@ -37,6 +38,7 @@
'create-indexes': cartography.intel.create_indexes.run,
'aws': cartography.intel.aws.start_aws_ingestion,
'azure': cartography.intel.azure.start_azure_ingestion,
'azuremonitor': cartography.intel.azuremonitor.start_azuremonitor_ingestion,
'crowdstrike': cartography.intel.crowdstrike.start_crowdstrike_ingestion,
'gcp': cartography.intel.gcp.start_gcp_ingestion,
'gsuite': cartography.intel.gsuite.start_gsuite_ingestion,
Expand Down
16 changes: 16 additions & 0 deletions docs/root/modules/azuremonitor/config.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
## AzureMonitor Configuration

.. _azuremonitor_config:

Follow these steps to analyze Microsoft Azure Monitor (aka Azure Log Analytics or Microsoft Sentinel) with Cartography:

1. Set up an Azure identity for Cartography to use, and ensure that this identity has the built-in Azure [Microsoft Sentinel Reader role](https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles#microsoft-sentinel-reader) attached:
* Authenticate: `$ az login`
* Create a Service Principal: `$ az ad sp create-for-rbac --name cartography --role "Microsoft Sentinel Reader"`
* Note the values of the `tenant`, `appId`, and `password` fields
1. Populate environment variables with the values generated in the previous step (e.g., `AZURE_TENANT_ID`, `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET`)
1. Call the `cartography` CLI with:
```bash
--azuremonitor-workspace-name ALA_NAME \
--azuremonitor-workspace-id ALA_GUID
```
29 changes: 29 additions & 0 deletions docs/root/modules/azuremonitor/schema.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
## AzureMonitor Schema

.. _azuremonitor_schema:

### AzureMonitor

Representation of a system sending logs to Azure Monitor aka Azure Log Analytics.

| Field | Description |
|-------|-------------|
|tool_first_seen| Timestamp of when first available logs for host is available since first sync|
|tool_last_seen| Timestamp of when last available logs for host is available per last sync|
|lastupdated| Timestamp of the last time the node was updated|
|**hostname**| The Azure Virtual Machine Computer name|
|short_hostname| The Azure Virtual Machine short hostname, lowercase|
|platform| The platform of the resource (linux or windows)|
|resource_group| The Resource Group where Virtual Machine is created|
|tenant_id| The Tenant Id where Virtual Machine is created|
|sentinel_sourcesystem| The SourceSystem as available in Log Analytics|
|sentinel_host_ip| The HostIP as available in Log Analytics|
|workspace_name| The Log Analytics workspace name where matching logs where identified|

#### Relationships

- Azure VirtualMachine contains one AzureMonitor host

```
(VirtualMachine)-[PRESENT_IN]->(AzureMonitor)
```
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
"pdpyras>=4.3.0",
"crowdstrike-falconpy>=0.5.1",
"python-dateutil",
"azure-monitor-query>=1.0.3",
],
extras_require={
':python_version<"3.7"': [
Expand Down
Loading