Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Azure ResourceGraph source #1076

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
49 changes: 49 additions & 0 deletions cartography/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,38 @@ def _build_parser(self):
'The crowdstrike URL, if using self-hosted. Defaults to the public crowdstrike API URL otherwise.'
),
)
parser.add_argument(
'--azureresourcegraph-tenant-id',
type=str,
default=None,
help=(
'The azureresourcegraph tenant id for authentication.'
),
)
parser.add_argument(
'--azureresourcegraph-client-id-env-var',
type=str,
default=None,
help=(
'The name of environment variable containing the azureresourcegraph client id for authentication.'
),
)
parser.add_argument(
'--azureresourcegraph-client-secret-env-var',
type=str,
default=None,
help=(
'The name of environment variable containing the azureresourcegraph secret key for authentication.'
),
)
parser.add_argument(
'--azureresourcegraph-use-managedidentity',
type=bool,
default=False,
help=(
'Use managed identity for azureresourcegraph authentication.'
),
)
parser.add_argument(
'--gsuite-auth-method',
type=str,
Expand Down Expand Up @@ -569,6 +601,23 @@ def main(self, argv: str) -> int:
else:
config.crowdstrike_client_secret = None

# Azure Resource Graph config
if config.azureresourcegraph_client_id_env_var:
logger.debug(
f"Reading API key for mde from environment variable {config.azureresourcegraph_client_id_env_var}",
)
config.azureresourcegraph_client_id = os.environ.get(config.azureresourcegraph_client_id_env_var)
else:
config.azureresourcegraph_client_id = None

if config.azureresourcegraph_client_secret_env_var:
logger.debug(
f"Reading API key for mde from environment variable {config.azureresourcegraph_client_secret_env_var}",
)
config.azureresourcegraph_client_secret = os.environ.get(config.azureresourcegraph_client_secret_env_var)
else:
config.azureresourcegraph_client_secret = None

# GSuite config
if config.gsuite_tokens_env_var:
logger.debug(f"Reading config string for GSuite from environment variable {config.gsuite_tokens_env_var}")
Expand Down
8 changes: 8 additions & 0 deletions cartography/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ def __init__(
crowdstrike_client_id=None,
crowdstrike_client_secret=None,
crowdstrike_api_url=None,
azureresourcegraph_tenant_id=None,
azureresourcegraph_client_id=None,
azureresourcegraph_client_secret=None,
azureresourcegraph_use_managedidentity=None,
gsuite_auth_method=None,
gsuite_config=None,
):
Expand Down Expand Up @@ -174,5 +178,9 @@ def __init__(
self.crowdstrike_client_id = crowdstrike_client_id
self.crowdstrike_client_secret = crowdstrike_client_secret
self.crowdstrike_api_url = crowdstrike_api_url
self.azureresourcegraph_tenant_id = azureresourcegraph_tenant_id
self.azureresourcegraph_client_id = azureresourcegraph_client_id
self.azureresourcegraph_client_secret = azureresourcegraph_client_secret
self.azureresourcegraph_use_managedidentity = azureresourcegraph_use_managedidentity
self.gsuite_auth_method = gsuite_auth_method
self.gsuite_config = gsuite_config
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"statements": [
{
"query": "WITH datetime()-duration('P7D') AS threshold MATCH (h:AzureResourceGraphHost) WHERE h.lastupdated < threshold WITH h LIMIT $LIMIT_SIZE DETACH DELETE (h)",
"iterative": true,
"iterationsize": 100
}
],
"name": "cleanup azureresourcegraph"
}
74 changes: 74 additions & 0 deletions cartography/intel/azureresourcegraph/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
cartography/intel/azureresourcegraph
"""
import logging

import neo4j

from cartography.config import Config
from cartography.intel.azureresourcegraph.endpoints import sync_hosts
from cartography.intel.azureresourcegraph.util import get_authorization
from cartography.stats import get_stats_client
from cartography.util import merge_module_sync_metadata
from cartography.util import run_cleanup_job
from cartography.util import timeit

logger = logging.getLogger(__name__)
stat_handler = get_stats_client(__name__)


@timeit
def start_azureresourcegraph_ingestion(
neo4j_session: neo4j.Session,
config: Config,
) -> None:
"""
Perform ingestion of Azure Resource Graph data.
:param neo4j_session: Neo4J session for database interface
:param config: A cartography.config object
:return: None
"""
common_job_parameters = {
"UPDATE_TAG": config.update_tag,
}
if not (
(
config.azureresourcegraph_client_id and
config.azureresourcegraph_client_secret
) or
(config.azureresourcegraph_use_managedidentity.lower() == "true")
):
logger.error(
"azureresourcegraph config not found and not requested managed identity.",
)
return

authorization = get_authorization(
config.azureresourcegraph_client_id,
config.azureresourcegraph_client_secret,
config.azureresourcegraph_tenant_id,
True,
config.azureresourcegraph_use_managedidentity,
)
sync_hosts(
neo4j_session,
config.update_tag,
authorization,
)
run_cleanup_job(
"azureresourcegraph_import_cleanup.json",
neo4j_session,
common_job_parameters,
)

group_id = "public"
if config.azureresourcegraph_tenant_id:
group_id = config.azureresourcegraph_tenant_id
merge_module_sync_metadata(
neo4j_session,
group_type="azureresourcegraph",
group_id=group_id,
synced_type="azureresourcegraph",
update_tag=config.update_tag,
stat_handler=stat_handler,
)
90 changes: 90 additions & 0 deletions cartography/intel/azureresourcegraph/endpoints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""
cartography/intel/azureresourcegraph/endpoints
"""
# pylint: disable=missing-function-docstring,too-many-arguments
import logging
from typing import Dict
from typing import List

import neo4j

from .util import get_azureresourcegraph_hosts
from cartography.util import timeit

logger = logging.getLogger(__name__)


@timeit
def sync_hosts(
neo4j_session: neo4j.Session,
update_tag: int,
authorization: str,
) -> None:
arg_hosts_list = get_azureresourcegraph_hosts(authorization)
for host_data in arg_hosts_list:
load_host_data(neo4j_session, host_data, update_tag)


def load_host_data(
neo4j_session: neo4j.Session,
data: List[Dict],
update_tag: int,
) -> None:
"""
Transform and load scan information

resourceid,instance_id,subscriptionId,subscriptionName, resourceGroup, name, type, vmStatus,
tags.environment, tags.costcenter, tags.contact, tags.businessproduct, tags.businesscontact,
tags.engproduct, tags.engcontact, tags.lob, tags.compliance, tags.ticket,
subproperties,publicIpName,publicIPAllocationMethod,ipAddress,nsgId

FIXME! create duplicate entries/not merging
"""
ingestion_cypher_query = """
UNWIND $Hosts AS host
MERGE (h:AzureResourceGraphHost{id: host.resourceid})
ON CREATE SET h.id = host.resourceid,
h.firstseen = timestamp()
SET h.instance_id = host.instance_id,
h.resource_id = toLower(host.resourceid),
h.subscription_id = host.subscriptionId,
h.subscription_name = host.subscriptionName,
h.resource_group = host.resourceGroup,
h.hostname = host.name,
h.short_hostname = host.short_hostname,
h.type = host.type,
h.osname = host.osname,
h.ostype = host.ostype,
h.tags_environment = host.tags_environment,
h.tags_costcenter = host.tags_costcenter,
h.tags_engproduct = host.tags_engproduct,
h.tags_engcontact = host.tags_engcontact,
h.tags_businesscontact = host.tags_businesscontact,
h.vm_status = host.vmStatus,
h.image_publisher = host.image_publisher,
h.image_offer = host.image_offer,
h.image_sku = host.image_sku,
h.image_galleryid = host.image_galleryid,
h.public_ip_name = host.publicIpName,
h.public_ip_allocation_method = host.publicIPAllocationMethod,
h.public_ip = host.ipAddress,
h.nsg_id = host.nsgId,
h.modified_timestamp = host.modified_timestamp,
h.lastupdated = $update_tag
WITH h
MATCH (s:AzureSubscription{id: h.subscription_id})
MERGE (s)-[r:CONTAINS]->(h)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $update_tag
WITH h
MATCH (s:AzureVirtualMachine{id: h.resource_id})
MERGE (s)-[r:PRESENT_IN]->(h)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $update_tag
"""
logger.debug("Loading %s azureresourcegraph hosts.", len(data))
neo4j_session.run(
ingestion_cypher_query,
Hosts=data,
update_tag=update_tag,
)
Loading