Skip to content

Commit

Permalink
github users integration testing
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel Brauer <[email protected]>
  • Loading branch information
danbrauer committed Oct 31, 2024
1 parent 64e72e2 commit 213c2fc
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 53 deletions.
54 changes: 31 additions & 23 deletions cartography/intel/github/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ def get_users(token: str, api_url: str, organization: str) -> Tuple[List[Dict],
:param token: The Github API token as string.
:param api_url: The Github v4 API endpoint as string.
:param organization: The name of the target Github organization as string.
:return: A 2-tuple containing 1. a list of dicts representing users - see tests.data.github.users.GITHUB_USER_DATA
for shape, and 2. data on the owning GitHub organization - see tests.data.github.users.GITHUB_ORG_DATA for shape.
:return: A 2-tuple containing
1. a list of dicts representing users and
2. data on the owning GitHub organization
see tests.data.github.users.GITHUB_ORG_DATA for shape of both
"""
users, org = fetch_all(
token,
Expand All @@ -92,30 +94,37 @@ def get_users(token: str, api_url: str, organization: str) -> Tuple[List[Dict],
)
return users.edges, org

@timeit
def get_enterprise_owners(token: str, api_url: str, organization: str) -> Tuple[List[Dict], List[Dict], Dict]:
def _get_enterprise_owners_raw(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]:
"""
Function broken out for testing purposes. See 'get_enterprise_owners' for docs.
"""
Retrieve a list of enterprise owners from the given GitHub organization as described in
https://docs.github.com/en/graphql/reference/objects#organizationenterpriseowneredge.
:param token: The Github API token as string.
:param api_url: The Github v4 API endpoint as string.
:param organization: The name of the target Github organization as string.
:return: A 2-tuple containing
1. a list of dicts representing enterprise owners who are also users in the organization - see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape
2. a list of dicts representing enterprise owners who are NOT users in the organization - see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape
3. data on the owning GitHub organization - see tests.data.github.users.GITHUB_ORG_DATA for shape.
"""
owners, org = fetch_all(
token,
api_url,
organization,
GITHUB_ENTERPRISE_OWNER_USERS_PAGINATED_GRAPHQL,
'enterpriseOwners',
)
return owners.edges, org

@timeit
def get_enterprise_owners(token: str, api_url: str, organization: str) -> Tuple[List[Dict], List[Dict], Dict]:
"""
Retrieve a list of enterprise owners from the given GitHub organization as described in
https://docs.github.com/en/graphql/reference/objects#organizationenterpriseowneredge.
:param token: The Github API token as string.
:param api_url: The Github v4 API endpoint as string.
:param organization: The name of the target Github organization as string.
:return: A 3-tuple containing
1. a list of dicts representing enterprise owners who are also users in the organization
2. a list of dicts representing enterprise owners who are not users in the organization
3. data on the owning GitHub organization
see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape
"""
owners, org = _get_enterprise_owners_raw(token, api_url, organization)
unaffiliated_owners = []
affiliated_owners = []
for owner in owners.edges:
for owner in owners:
if owner['organizationRole'] == 'UNAFFILIATED':
unaffiliated_owners.append(owner)
else:
Expand All @@ -130,12 +139,11 @@ def _mark_users_as_enterprise_owners(
owner_org_data: Dict,
) -> list[Dict]:
"""
For every organization user, mark if they are also an enterprise owner.
:param user_data: A list of dicts representing users - see tests.data.github.users.GITHUB_USER_DATA for shape.
:param user_org_data: A dict representing the organization for the user_data - see tests.data.github.users.GITHUB_ORG_DATA for shape.
:param affiliated_owner_data: A list of dicts representing affiliated enterprise owners - see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape.
:param owner_org_data: A dict representing the organization for the enterprise_owner_data - see tests.data.github.users.GITHUB_ORG_DATA for shape.
:return: A new list of user_data dicts, updated with a new property, isEnterpriseOwner
:param affiliated_owner_data: A list of dicts representing affiliated enterprise owners (owners who are also users in the org) - see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape.
:param owner_org_data: A dict representing the organization for the owner data - see tests.data.github.users.GITHUB_ORG_DATA for shape.
:return: A new list of user_data dicts updated with a new property, isEnterpriseOwner
"""

# Guarding against accidental mixing of data from different orgs. Since user data and owner data are queried
Expand Down Expand Up @@ -199,11 +207,11 @@ def load_unaffiliated_owners(
) -> None:
"""
The owner_data here represents users who are enterprise owners but are not in the target org.
Note the subtle differences between what is loaded here and what in load_organization_users:
1. The user-org relationship is set to UNAFFILIATED instead of MEMBER_OF.
Note the subtle differences between what is loaded here and in load_organization_users:
1. The user-org relationship is set to UNAFFILIATED
2. 'role' is not set: these users have no role in the organization (i.e. they are neither 'MEMBER' nor 'ADMIN').
3. 'has_2fa_enabled' is not set: it is unavailable from the GraphQL query for these owners
4. 'is_enterprise_owner' is always set to TRUE
3. 'has_2fa_enabled' is not set (it is unavailable from the GraphQL query for these owners)
4. 'is_enterprise_owner' is set to TRUE
If the user does already exist in the graph (perhaps they are members of other orgs) then this merge will
update the user's node but leave 'role' and 'has_2fa_enabled' untouched.
Expand Down
56 changes: 35 additions & 21 deletions tests/data/github/users.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
GITHUB_USER_DATA = [
GITHUB_ORG_DATA = {
'url': 'https://example.com/my_org',
'login': 'my_org',
}


GITHUB_USER_DATA = ([
{
'hasTwoFactorEnabled': None,
'node': {
Expand All @@ -12,6 +18,17 @@
'role': 'MEMBER',
}, {
'hasTwoFactorEnabled': None,
'node': {
'url': 'https://example.com/lmsimpson',
'login': 'lmsimpson',
'name': 'Lisa Simpson',
'isSiteAdmin': False,
'email': '[email protected]',
'company': 'Simpson Residence',
},
'role': 'MEMBER',
}, {
'hasTwoFactorEnabled': True,
'node': {
'url': 'https://example.com/mbsimpson',
'login': 'mbsimpson',
Expand All @@ -21,16 +38,16 @@
'company': 'Simpson Residence',
},
'role': 'ADMIN',
},
]
}],
GITHUB_ORG_DATA
)

# Note the subtle differences between owner data and user data:
# Subtle differences between owner data and user data:
# 1. owner data does not include a `hasTwoFactorEnabled` field (it in unavailable in the GraphQL query for these owners)
# 2. an `organizationRole` field instead of a `role` field. For user data, membership in the queried org
# is assumed. The owner data, membership is not assumed, so there is an 'UNAFFILIATED' value for owners who are
# not also users in an organization. In this list, the 'OWNER' organizationRole matches the 'ADMIN' role in the
# user data. Similarly, the 'DIRECT_MEMBER' organizationRole matches the 'MEMBER' role.
GITHUB_ENTERPRISE_OWNER_DATA = [ # TODO put in real fake values for testing
# 2. an `organizationRole` field instead of a `role` field. In owner data, membership within an org is not assumed, so
# there is an 'UNAFFILIATED' value for owners of an org who are not also members of it. (Otherwise the 'OWNER'
# organizationRole matches the 'ADMIN' role in the user data, and the 'DIRECT_MEMBER' organizationRole matches the 'MEMBER' role.)
GITHUB_ENTERPRISE_OWNER_DATA = ([
{
'node': {
'url': 'https://example.com/kbroflovski',
Expand All @@ -43,14 +60,14 @@
'organizationRole': 'UNAFFILIATED',
}, {
'node': {
'url': 'https://example.com/bjsimpson',
'login': 'bjsimpson',
'name': 'Bartholomew Simpson',
'url': 'https://example.com/mbsimpson',
'login': 'mbsimpson',
'name': 'Marge Simpson',
'isSiteAdmin': False,
'email': 'bjsimpson@example.com',
'email': 'mbsimpson@example.com',
'company': 'Simpson Residence',
},
'organizationRole': 'DIRECT_MEMBER',
'organizationRole': 'OWNER',
}, {
'node': {
'url': 'https://example.com/lmsimpson',
Expand All @@ -60,11 +77,8 @@
'email': '[email protected]',
'company': 'Simpson Residence',
},
'organizationRole': 'OWNER',
},
]
'organizationRole': 'DIRECT_MEMBER',
}],
GITHUB_ORG_DATA
)

GITHUB_ORG_DATA = {
'url': 'https://example.com/my_org',
'login': 'my_org',
}
87 changes: 78 additions & 9 deletions tests/integration/cartography/intel/github/test_users.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,31 @@
from unittest.mock import patch
import cartography.intel.github.users
import tests.data.github.users
from tests.data.github.users import GITHUB_USER_DATA
from tests.data.github.users import GITHUB_ORG_DATA
from tests.data.github.users import GITHUB_ENTERPRISE_OWNER_DATA

TEST_UPDATE_TAG = 123456789
TEST_JOB_PARAMS = {'UPDATE_TAG': TEST_UPDATE_TAG}
TEST_GITHUB_URL = GITHUB_ORG_DATA['url']
TEST_GITHUB_ORG = GITHUB_ORG_DATA['login']
FAKE_API_KEY = 'asdf'


def test_load_github_organization_users(neo4j_session):
cartography.intel.github.users.load_organization_users(
@patch.object(cartography.intel.github.users, 'get_users', return_value=GITHUB_USER_DATA)
@patch.object(cartography.intel.github.users, '_get_enterprise_owners_raw', return_value=GITHUB_ENTERPRISE_OWNER_DATA)
def test_sync(mock_owners, mock_users, neo4j_session):
# Arrange
# No need to 'arrange' data here. The patched functions return all the data needed.

# Act
cartography.intel.github.users.sync(
neo4j_session,
tests.data.github.users.GITHUB_USER_DATA,
tests.data.github.users.GITHUB_ORG_DATA,
TEST_UPDATE_TAG,
)
TEST_JOB_PARAMS,
FAKE_API_KEY,
TEST_GITHUB_URL,
TEST_GITHUB_ORG)

# Assert

# Ensure users got loaded
nodes = neo4j_session.run(
Expand All @@ -20,7 +35,9 @@ def test_load_github_organization_users(neo4j_session):
)
expected_nodes = {
("https://example.com/hjsimpson", 'MEMBER'),
("https://example.com/lmsimpson", 'MEMBER'),
("https://example.com/mbsimpson", 'ADMIN'),
("https://example.com/kbroflovski", None),
}
actual_nodes = {
(
Expand All @@ -33,23 +50,75 @@ def test_load_github_organization_users(neo4j_session):
# Ensure users are connected to the expected organization
nodes = neo4j_session.run(
"""
MATCH(user:GitHubUser)-[:MEMBER_OF]->(org:GitHubOrganization)
RETURN user.id, org.id
MATCH(user:GitHubUser)-[r]->(org:GitHubOrganization)
RETURN user.id, type(r), org.id
""",
)
actual_nodes = {
(
n['user.id'],
n['type(r)'],
n['org.id'],
) for n in nodes
}
expected_nodes = {
(
'https://example.com/hjsimpson',
'MEMBER_OF',
'https://example.com/my_org',
), (
'https://example.com/lmsimpson',
'MEMBER_OF',
'https://example.com/my_org',
), (
'https://example.com/mbsimpson',
'MEMBER_OF',
'https://example.com/my_org',
), (
'https://example.com/kbroflovski',
'UNAFFILIATED',
'https://example.com/my_org',
),
}
assert actual_nodes == expected_nodes

# Ensure enterprise owners are identified
nodes = neo4j_session.run(
"""
MATCH (g:GitHubUser) RETURN g.id, g.is_enterprise_owner
""",
)
expected_nodes = {
("https://example.com/hjsimpson", False),
("https://example.com/lmsimpson", True),
("https://example.com/mbsimpson", True),
("https://example.com/kbroflovski", True),
}
actual_nodes = {
(
n['g.id'],
n['g.is_enterprise_owner'],
) for n in nodes
}
assert actual_nodes == expected_nodes

# Ensure hasTwoFactorEnabled has not been improperly overwritten for enterprise owners
# Ensure enterprise owners are identified
nodes = neo4j_session.run(
"""
MATCH (g:GitHubUser) RETURN g.id, g.has_2fa_enabled
""",
)
expected_nodes = {
("https://example.com/hjsimpson", None),
("https://example.com/lmsimpson", None),
("https://example.com/mbsimpson", True),
("https://example.com/kbroflovski", None),
}
actual_nodes = {
(
n['g.id'],
n['g.has_2fa_enabled'],
) for n in nodes
}
assert actual_nodes == expected_nodes

0 comments on commit 213c2fc

Please sign in to comment.