-
Notifications
You must be signed in to change notification settings - Fork 340
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add sumologic source #1078
base: master
Are you sure you want to change the base?
Add sumologic source #1078
Changes from all commits
546b9de
0b9f462
5772ef2
7ec98d0
e056d6b
78be15d
c7c33f8
b4d73e8
b197682
b4fd6a8
a0b0fc9
4afe156
949dea3
d0ce424
4bb8980
8815192
dddad13
d733f6b
4760471
8d3eef3
ce8c106
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{ | ||
"statements": [ | ||
{ | ||
"query": "WITH datetime()-duration('P7D') AS threshold MATCH (h:SumologicHost) WHERE h.lastupdated < threshold WITH h LIMIT $LIMIT_SIZE DETACH DELETE (h)", | ||
"iterative": true, | ||
"iterationsize": 100 | ||
} | ||
], | ||
"name": "cleanup sumologic" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
""" | ||
cartography/intel/sumologic | ||
""" | ||
import logging | ||
|
||
import neo4j | ||
|
||
from cartography.config import Config | ||
from cartography.intel.sumologic.endpoints import sync_hosts | ||
from cartography.stats import get_stats_client | ||
from cartography.util import merge_module_sync_metadata | ||
from cartography.util import run_cleanup_job | ||
from cartography.util import timeit | ||
|
||
logger = logging.getLogger(__name__) | ||
stat_handler = get_stats_client(__name__) | ||
|
||
|
||
@timeit | ||
def start_sumologic_ingestion( | ||
neo4j_session: neo4j.Session, | ||
config: Config, | ||
) -> None: | ||
""" | ||
Perform ingestion of Sumologic data. | ||
:param neo4j_session: Neo4J session for database interface | ||
:param config: A cartography.config object | ||
:return: None | ||
""" | ||
common_job_parameters = { | ||
"UPDATE_TAG": config.update_tag, | ||
} | ||
if not config.sumologic_access_id or not config.sumologic_access_key: | ||
logger.error("sumologic config not found") | ||
return | ||
|
||
authorization = ( | ||
config.sumologic_access_id, | ||
config.sumologic_access_key, | ||
config.sumologic_api_url, | ||
) | ||
sync_hosts( | ||
neo4j_session, | ||
config.update_tag, | ||
authorization, | ||
) | ||
run_cleanup_job( | ||
"sumologic_import_cleanup.json", | ||
neo4j_session, | ||
common_job_parameters, | ||
) | ||
|
||
group_id = "public" | ||
if config.sumologic_api_url: | ||
group_id = config.sumologic_api_url | ||
merge_module_sync_metadata( | ||
neo4j_session, | ||
group_type="sumologic", | ||
group_id=group_id, | ||
synced_type="sumologic", | ||
update_tag=config.update_tag, | ||
stat_handler=stat_handler, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
""" | ||
cartography/intel/sumologic/endpoints | ||
""" | ||
# pylint: disable=missing-function-docstring,too-many-arguments | ||
import logging | ||
from typing import Dict | ||
from typing import List | ||
from typing import Tuple | ||
|
||
import neo4j | ||
|
||
from .util import get_sumologic_hosts | ||
from cartography.util import timeit | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@timeit | ||
def sync_hosts( | ||
neo4j_session: neo4j.Session, | ||
update_tag: int, | ||
authorization: Tuple[str, str, str], | ||
) -> None: | ||
sumologic_hosts_list = get_sumologic_hosts(authorization) | ||
for host_data in sumologic_hosts_list: | ||
load_host_data(neo4j_session, host_data, update_tag) | ||
|
||
|
||
def load_host_data( | ||
neo4j_session: neo4j.Session, | ||
data: List[Dict], | ||
update_tag: int, | ||
) -> None: | ||
""" | ||
Transform and load scan information | ||
""" | ||
ingestion_cypher_query = """ | ||
UNWIND $Hosts AS host | ||
MERGE (h:SumologicHost{hostname: host.hostname}) | ||
ON CREATE SET h.hostname = host.hostname, | ||
h.sumologic_instance = host.instance, | ||
h.tool_first_seen = host.firstseen | ||
SET h.short_hostname = host.short_hostname, | ||
h.tool_last_seen = host.lastseen, | ||
h.sumologic_bu = host.bu, | ||
h.sumologic_dc = host.dc, | ||
h.modified_timestamp = host.modified_timestamp, | ||
h.lastupdated = $update_tag | ||
""" | ||
logger.info("Loading %s sumologic hosts.", len(data)) | ||
neo4j_session.run( | ||
ingestion_cypher_query, | ||
Hosts=data, | ||
update_tag=update_tag, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
""" | ||
cartography/intel/sumologic/util | ||
""" | ||
# pylint: disable=invalid-name,broad-except | ||
import datetime | ||
import json | ||
import logging | ||
from typing import List | ||
from typing import Tuple | ||
|
||
from msticpy.data.data_providers import QueryProvider | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def get_sumologic_hosts( | ||
authorization: Tuple[str, str, str], | ||
timeout_max: int = 600, | ||
) -> List: | ||
""" | ||
Get Sumologic (Logging) coverage inventory | ||
|
||
Timeout should be adapted to context, mostly size of indexes and searched timeperiod. | ||
|
||
https://help.sumologic.com/docs/api/search-job/ | ||
https://msticpy.readthedocs.io/en/latest/data_acquisition/DataProv-Sumologic.html | ||
""" | ||
|
||
( | ||
sumologic_access_id, | ||
sumologic_access_key, | ||
sumologic_server_url, | ||
) = authorization | ||
end_time = datetime.datetime.now() | ||
start_time = end_time - datetime.timedelta(hours=1) | ||
|
||
qry_prov = QueryProvider("Sumologic") | ||
qry_prov.connect( | ||
connection_str=sumologic_server_url, | ||
accessid=sumologic_access_id, | ||
accesskey=sumologic_access_key, | ||
) | ||
|
||
# _sourceCategory, _sourceHost and _collector will get multiple entries for same host... | ||
# Exact query depends on your context and how structured in your platform and | ||
# the existence of a hostname (through Field Extraction Rule for example) | ||
# or use Sumologic Cloud SIEMT Enterprise normalized schema if apply | ||
# | formatDate(_messageTime,"yyyy/dd/MM HH:mm:ss") as date | ||
# | formatDate(_messageTime,"yyyy/MM/dd'T'HH:mm:ss'Z'") as date - NOK | ||
# | formatDate(_messageTime,"yyyy-MM-dd HH:mm:ss") as date - NOK | ||
vm_assets_query = r"""(_sourceCategory=*/*/SERVER or _sourceCategory=*/*/NXLOG_*) | ||
| formatDate(_messageTime,"yyyy-MM-dd'T'HH:mm:ss'Z'") as date | ||
| tolowercase(replace(hostname, /\..*$/, "")) as short_hostname | ||
| tolowercase(hostname) as hostname | ||
| first(date) as lastseen, last(date) as firstseen by bu,dc,hostname,short_hostname | ||
| count bu,dc,hostname,short_hostname,firstseen,lastseen | ||
| fields bu,dc,hostname,short_hostname,firstseen,lastseen""" | ||
df_vm_assets = qry_prov.exec_query( | ||
vm_assets_query, | ||
start_time=start_time, | ||
end_time=end_time, | ||
timeout=timeout_max, | ||
verbosity=2, | ||
) | ||
|
||
df_vm_assets.columns = df_vm_assets.columns.str.lstrip("map.") | ||
df_vm_assets["instance"] = sumologic_server_url.replace( | ||
".sumologic.com/api", | ||
"", | ||
).replace("https://api.", "") | ||
|
||
logger.info("SumologicHosts count final: %s", df_vm_assets.shape[0]) | ||
|
||
if df_vm_assets.shape[0]: | ||
flatten_data = json.loads(df_vm_assets.to_json(orient="records")) | ||
logger.debug("Example: %s", flatten_data[0]) | ||
return flatten_data | ||
|
||
logger.warning("No data returned") | ||
return [] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
## Sumologic Configuration | ||
|
||
.. _sumologic_config: | ||
|
||
Follow these steps to collect Sumologic hosts data with Cartography: | ||
|
||
1. Set up an Access key as per https://help.sumologic.com/docs/manage/security/access-keys/ | ||
1. Populate environment variable with the value generated in the previous step (for example, `SUMO_ACCESSKEY`) | ||
1. Call the `cartography` CLI with: | ||
```bash | ||
--sumologic-access-id xxx \ | ||
--sumologic-access-key-env-var SUMO_ACCESSKEY \ | ||
--sumologic-api-url https://api.us2.sumologic.com/api | ||
``` | ||
API url per https://help.sumologic.com/docs/api/getting-started/ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
## Sumologic Schema | ||
|
||
.. _sumologic_schema: | ||
|
||
### Sumologic core platform | ||
|
||
Representation of a system sending logs to Sumologic core platform. Be aware that this may vary depending on your environment. | ||
Default code expects _sourceCategory to be formatted as BU/DC/SOURCE_TYPE aka business unit, datacenter or location, and source type. Source type as SERVER or NXLOG_* to match targeted systems (linux, windows...). Some fields may need to be set through [Field Extraction Rules](https://help.sumologic.com/docs/manage/field-extractions/) | ||
|
||
| Field | Description | | ||
|-------|-------------| | ||
|tool_first_seen| Timestamp of when first available logs for host is available since first sync| | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you meant to add the schema for the hosts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand the comment. |
||
|tool_last_seen| Timestamp of when last available logs for host is available per last sync| | ||
|lastupdated| Timestamp of the last time the node was updated| | ||
|**hostname**| The Hostname Computer name| | ||
|short_hostname| The short hostname, lowercase| | ||
|bu| The business unit| | ||
|dc| The datacenter| |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
GET_HOSTS = [ | ||
{ | ||
"firstseen": "2020-12-01T10:20:30Z", | ||
"lastseen": "2022-12-15T15:15:15Z", | ||
"hostname": "sumohostname.example.com", | ||
"bu": "myBU", | ||
"short_hostname": "sumohostname", | ||
"dc": "REGION1", | ||
"instance": "us2", | ||
}, | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you tell us how this works?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the same thing as before but cleaning only entries lastupdated older than 7 days as per #1015