Skip to content

Commit

Permalink
Merge pull request #256 from sudiptob2/feat/255/cloud-datastore-crawler
Browse files Browse the repository at this point in the history
Feat/255/cloud datastore crawler
  • Loading branch information
0xDeva authored Jul 31, 2023
2 parents cc03a1e + 1c33cc5 commit 606bd06
Show file tree
Hide file tree
Showing 11 changed files with 176 additions and 38 deletions.
3 changes: 3 additions & 0 deletions example_config
Original file line number Diff line number Diff line change
Expand Up @@ -89,5 +89,8 @@
},
"firestore_collections": {
"fetch": true
},
"datastore_kinds": {
"fetch": true
}
}
64 changes: 27 additions & 37 deletions src/gcp_scanner/client/client_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
from gcp_scanner.client.cloud_functions_client import CloudFunctionsClient
from gcp_scanner.client.cloud_resource_manager_client import CloudResourceManagerClient
from gcp_scanner.client.compute_client import ComputeClient
from gcp_scanner.client.firestore_client import FirestoreClient
from gcp_scanner.client.datastore_client import DatastoreClient
from gcp_scanner.client.dns_client import DNSClient
from gcp_scanner.client.filestore_client import FilestoreClient
from gcp_scanner.client.firestore_client import FirestoreClient
from gcp_scanner.client.iam_client import IAMClient
from gcp_scanner.client.kms_client import CloudKMSClient
from gcp_scanner.client.pubsub_client import PubSubClient
Expand All @@ -37,46 +38,35 @@
class ClientFactory:
"""Factory class for creating clients."""

clients = {
"appengine": AppEngineClient,
"bigquery": BQClient,
"bigtableadmin": BigTableClient,
"cloudfunctions": CloudFunctionsClient,
"cloudkms": CloudKMSClient,
"cloudresourcemanager": CloudResourceManagerClient,
"compute": ComputeClient,
"datastore": DatastoreClient,
"dns": DNSClient,
"firestore": FirestoreClient,
"file": FilestoreClient,
"iam": IAMClient,
"pubsub": PubSubClient,
"servicemanagement": ServiceManagementClient,
"serviceusage": ServiceUsageClient,
"sourcerepo": SourceRepoClient,
"spanner": SpannerClient,
"sqladmin": SQLClient,
"storage": StorageClient,
}

@classmethod
def get_client(cls, name):
"""Returns the appropriate client."""

if name == "appengine":
return AppEngineClient()
if name == "bigquery":
return BQClient()
if name == "bigtableadmin":
return BigTableClient()
if name == "cloudfunctions":
return CloudFunctionsClient()
if name == "cloudkms":
return CloudKMSClient()
if name == "cloudresourcemanager":
return CloudResourceManagerClient()
if name == "compute":
return ComputeClient()
if name == "dns":
return DNSClient()
if name == "firestore":
return FirestoreClient()
if name == "file":
return FilestoreClient()
if name == "iam":
return IAMClient()
if name == "pubsub":
return PubSubClient()
if name == "servicemanagement":
return ServiceManagementClient()
if name == "serviceusage":
return ServiceUsageClient()
if name == "sourcerepo":
return SourceRepoClient()
if name == "spanner":
return SpannerClient()
if name == "sqladmin":
return SQLClient()
if name == "storage":
return StorageClient()
client_cls = cls.clients.get(name.lower())
if client_cls:
return client_cls()

logging.error("Client not supported.")
return None
38 changes: 38 additions & 0 deletions src/gcp_scanner/client/datastore_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from googleapiclient import discovery
from httplib2 import Credentials

from .interface_client import IClient


class DatastoreClient(IClient):
"""DatastoreClient class."""

def get_service(self, credentials: Credentials) -> discovery.Resource:
"""Get discovery service for datastore resource.
Args:
credentials: An google.oauth2.credentials.Credentials object.
Returns:
An object of discovery.Resource
"""
return discovery.build(
"datastore",
"v1",
credentials=credentials,
cache_discovery=False,
)
2 changes: 2 additions & 0 deletions src/gcp_scanner/crawler/crawler_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from gcp_scanner.crawler.compute_snapshots_crawler import ComputeSnapshotsCrawler
from gcp_scanner.crawler.compute_static_ips_crawler import ComputeStaticIPsCrawler
from gcp_scanner.crawler.compute_subnets_crawler import ComputeSubnetsCrawler
from gcp_scanner.crawler.datastore_crawler import DatastoreCrawler
from gcp_scanner.crawler.dns_managed_zones_crawler import DNSManagedZonesCrawler
from gcp_scanner.crawler.dns_policies_crawler import DNSPoliciesCrawler
from gcp_scanner.crawler.endpoints_crawler import EndpointsCrawler
Expand All @@ -52,6 +53,7 @@
"compute_images": ComputeImagesCrawler,
"compute_instances": ComputeInstancesCrawler,
"compute_snapshots": ComputeSnapshotsCrawler,
"datastore_kinds": DatastoreCrawler,
"dns_policies": DNSPoliciesCrawler,
"endpoints": EndpointsCrawler,
"filestore_instances": FilestoreInstancesCrawler,
Expand Down
56 changes: 56 additions & 0 deletions src/gcp_scanner/crawler/datastore_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import sys
from typing import List, Dict, Any, Union

from googleapiclient import discovery

from gcp_scanner.crawler.interface_crawler import ICrawler


class DatastoreCrawler(ICrawler):
"""Handle crawling of cloud datastore data."""

def crawl(self, project_id: str, service: discovery.Resource,
config: Dict[str, Union[bool, str]] = None) -> Dict[str, List[Dict[str, Any]]]:
"""Retrieve a list of datastore kinds available in the project.
Args:
project_id: A name of a project to query info about.
service: A resource object for interacting with the Firestore API.
config: Configuration options for the crawler (Optional).
Returns:
A list of objects representing the crawled data.
"""

logging.info("Retrieving Datastore entities.")
datastore_kinds = dict()
query = {
"query": {
"kind": [{"name": "__kind__"}]
}
}
try:
request = service.projects().runQuery(projectId=project_id, body=query)
if request is not None:
response = request.execute()
entity_results = response.get("batch", {}).get("entityResults", [])
datastore_kinds['kinds'] = [entity["entity"]["key"]["path"][0]["name"] for entity in entity_results]
except Exception:
logging.info("Failed to retrieve datastore entities for project %s", project_id)
logging.info(sys.exc_info())
return datastore_kinds
1 change: 1 addition & 0 deletions src/gcp_scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
'compute_images': 'compute',
'compute_instances': 'compute',
'compute_snapshots': 'compute',
'datastore_kinds': 'datastore',
'dns_policies': 'dns',
'endpoints': 'servicemanagement',
'firestore_collections': 'firestore',
Expand Down
2 changes: 1 addition & 1 deletion src/gcp_scanner/test_acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from . import scanner

RESOURCE_COUNT = 30
RESOURCE_COUNT = 31
RESULTS_JSON_COUNT = 1
PROJECT_INFO_COUNT = 5
IAM_POLICY_COUNT = 12
Expand Down
28 changes: 28 additions & 0 deletions src/gcp_scanner/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from .client.cloud_functions_client import CloudFunctionsClient
from .client.cloud_resource_manager_client import CloudResourceManagerClient
from .client.compute_client import ComputeClient
from .client.datastore_client import DatastoreClient
from .client.dns_client import DNSClient
from .client.filestore_client import FilestoreClient
from .client.firestore_client import FirestoreClient
Expand Down Expand Up @@ -68,6 +69,7 @@
from .crawler.compute_static_ips_crawler import ComputeStaticIPsCrawler
from .crawler.compute_subnets_crawler import ComputeSubnetsCrawler
from .crawler.crawler_factory import CrawlerFactory
from .crawler.datastore_crawler import DatastoreCrawler
from .crawler.dns_managed_zones_crawler import DNSManagedZonesCrawler
from .crawler.dns_policies_crawler import DNSPoliciesCrawler
from .crawler.endpoints_crawler import EndpointsCrawler
Expand Down Expand Up @@ -803,6 +805,22 @@ def test_firestore_collections(self):
ClientFactory.get_client("firestore").get_service(self.credentials),
),
"firestore_collections",
False,
)
)

def test_datastore_kinds(self):
"""Test Datastore kinds."""
self.assertTrue(
verify(
CrawlerFactory.create_crawler(
"datastore_kinds",
).crawl(
PROJECT_NAME,
ClientFactory.get_client("datastore").get_service(self.credentials),
),
"datastore_kinds",
False,
)
)

Expand Down Expand Up @@ -900,6 +918,11 @@ def test_get_client_firestore(self):
client = ClientFactory.get_client("firestore")
self.assertIsInstance(client, FirestoreClient)

def test_get_client_datastore(self):
"""Test get_client method with 'datastore' name."""
client = ClientFactory.get_client("datastore")
self.assertIsInstance(client, DatastoreClient)

def test_get_client_invalid(self):
"""Test get_client method with invalid name."""
with self.assertLogs(level=logging.ERROR) as log:
Expand Down Expand Up @@ -1051,6 +1074,11 @@ def test_create_crawler_firestore_collections(self):
crawler = CrawlerFactory.create_crawler("firestore_collections")
self.assertIsInstance(crawler, FirestoreCollectionsCrawler)

def test_create_crawler_datastore_kinds(self):
"""Test create_crawler method with 'datastore_kinds' name."""
crawler = CrawlerFactory.create_crawler("datastore_kinds")
self.assertIsInstance(crawler, DatastoreCrawler)

def test_create_crawler_invalid(self):
"""Test create_crawler method with invalid name."""
with self.assertLogs(level=logging.ERROR) as log:
Expand Down
3 changes: 3 additions & 0 deletions test/bootstrap/datastore.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

# Todo
16 changes: 16 additions & 0 deletions test/datastore_kinds
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"kinds": [
"Person",
"Pet",
"Toy",
"__Stat_Kind_IsRootEntity__",
"__Stat_Kind__",
"__Stat_PropertyName_Kind__",
"__Stat_PropertyType_Kind__",
"__Stat_PropertyType_PropertyName_Kind__",
"__Stat_PropertyType__",
"__Stat_Total__",
"gsoc-2023",
"test-collection-2"
]
}
1 change: 1 addition & 0 deletions test/firestore_collections
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"projects/test-gcp-scanner-2/databases/(default)": [
"Person",
"gsoc-2023",
"test-collection-2"
]
Expand Down

0 comments on commit 606bd06

Please sign in to comment.