Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FLOC-3983] Acceptance tests prep for node shutdown #2532

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions admin/acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,6 @@ def get_trial_environment(cluster):
'FLOCKER_ACCEPTANCE_VOLUME_BACKEND': cluster.dataset_backend.name,
'FLOCKER_ACCEPTANCE_API_CERTIFICATES_PATH':
cluster.certificates_path.path,
'FLOCKER_ACCEPTANCE_HOSTNAME_TO_PUBLIC_ADDRESS': json.dumps({
node.private_address: node.address
for node in cluster.agent_nodes
if node.private_address is not None
}),
'FLOCKER_ACCEPTANCE_DEFAULT_VOLUME_SIZE': bytes(
cluster.default_volume_size
),
Expand Down
1 change: 0 additions & 1 deletion flocker/acceptance/endtoend/test_installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,6 @@ def _cleanup_flocker(self):
control_node=self.control_node_ip.encode('ascii'),
certificates_path=local_certs_path,
num_agent_nodes=2,
hostname_to_public_address={},
username='user1',
)
d.addCallback(
Expand Down
60 changes: 45 additions & 15 deletions flocker/acceptance/testtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

from docker.tls import TLSConfig

from ipaddress import ip_address

from twisted.web.http import OK, CREATED
from twisted.python.filepath import FilePath
from twisted.python.constants import Names, NamedConstant
Expand Down Expand Up @@ -45,6 +47,7 @@
from ..apiclient import FlockerClient, DatasetState
from ..node.script import get_backend, get_api
from ..node import dockerpy_client
from ..node.agents.blockdevice import ICloudAPI

from .node_scripts import SCRIPTS as NODE_SCRIPTS

Expand Down Expand Up @@ -826,7 +829,7 @@ def get_file(self, node, path):

def connected_cluster(
reactor, control_node, certificates_path, num_agent_nodes,
hostname_to_public_address, username='user',
username='user',
):
cluster_cert = certificates_path.child(b"cluster.crt")
user_cert = certificates_path.child(
Expand Down Expand Up @@ -869,24 +872,56 @@ def failed_query(failure):
failed_query)
return d
agents_connected = loop_until(reactor, nodes_available)
agents_connected.addCallback(lambda _: _add_nodes(cluster))
return agents_connected


def _add_nodes(cluster):
"""
Configure the ``Node`` objects for a newly created ``Cluster`` whose
nodes are known to be alive.

:param Cluster cluster: Cluster that still needs nodes set.
:return: ``cluster`` updated with appropriate ``nodes`` set.
"""
# By default we just trust address returned by Flocker
def default_get_public_ip(address):
return address

# Extract node hostnames from API that lists nodes. Currently we
# happen know these in advance, but in FLOC-1631 node identification
# will switch to UUIDs instead.
agents_connected.addCallback(lambda _: cluster.current_nodes())
try:
backend = get_backend_api(None, cluster.cluster_uuid)
except SkipTest:
# Can't load backend, will have to trust Flocker's reported IPs.
get_public_ip = default_get_public_ip
else:
if ICloudAPI.providedBy(backend):
node_ips = list(set(ip_address(i) for i in ips)
for ips in backend.list_live_nodes().values())

def get_public_ip(address):
for ips in node_ips:
if ip_address(address) in ips:
return [unicode(ip) for ip in ips
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prefer list(unicode(ip) for ...) rather than [unicode(ip) ...] so that we don't pollute the ip variable into the outer namespace.

if not any([ip.is_private, ip.is_link_local,
ip.is_loopback])][0]
raise ValueError(
"Couldn't find address in cloud API reported IPs")
else:
get_public_ip = default_get_public_ip

def node_from_dict(node):
reported_hostname = node["host"]
public_address = hostname_to_public_address.get(
reported_hostname, reported_hostname)
public_address = get_public_ip(reported_hostname)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❔ This looks kinda gross. This assumes that reported_hostname is a unicode IP address, and will blow up if it is not. Seems that our json schema says our API returns a json schema string(read unicode) that is the IPv4 address as node["host"] so we are actually in a good place. Please just rename reported_hostname to reported_ip_address ... or just remove that poorly named variable?

return Node(
uuid=node[u"uuid"],
public_address=public_address.encode("ascii"),
reported_hostname=reported_hostname.encode("ascii"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❔ ew at this. But it looks like it's nearly unused and pending removal, so probably not worth fixing up.

)
agents_connected.addCallback(lambda nodes: cluster.set(
"nodes", map(node_from_dict, nodes)))
return agents_connected

d = cluster.current_nodes()
d.addCallback(
lambda nodes: cluster.set("nodes", map(node_from_dict, nodes)))
return d


def _get_test_cluster(reactor):
Expand Down Expand Up @@ -914,16 +949,11 @@ def _get_test_cluster(reactor):
certificates_path = FilePath(
environ["FLOCKER_ACCEPTANCE_API_CERTIFICATES_PATH"])

hostname_to_public_address_env_var = environ.get(
"FLOCKER_ACCEPTANCE_HOSTNAME_TO_PUBLIC_ADDRESS", "{}")
hostname_to_public_address = json.loads(hostname_to_public_address_env_var)

return connected_cluster(
reactor,
control_node,
certificates_path,
num_agent_nodes,
hostname_to_public_address
)


Expand Down
9 changes: 6 additions & 3 deletions flocker/node/agents/blockdevice.py
Original file line number Diff line number Diff line change
Expand Up @@ -1121,8 +1121,10 @@ def list_live_nodes():
This is used to figure out which nodes are dead, so that other
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❔ Potentially awkward interface note. This is kind of like a Get details on the nodes, and filter results so that I only see the live nodes. For instance, if this was directly an interface on GCE I would probably be a bit upset.

I think more natural would be 'list_nodes' with an optional filter for only live nodes.

No great ideas here though, just something to think about.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do think we need to rewrite this before it's public. I'll add a comment to that effect.

nodes can do the detach.

:returns: A collection of ``unicode`` compute instance IDs, compatible
with those returned by ``IBlockDeviceAPI.compute_instance_id``.
:returns: A mapping of ``unicode`` compute instance IDs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thought about this more... I think we might want to introduce a new type here rather than returning a raw mapping. Given that we might eventually make this interface public... I think we should be a bit careful and precise about changing and designing it.

Something like returning a list of CloudComputeInstance's:

class CloudComputeInstance(PClass):
   id = field(type=unicode)
   ip_addresses = pvector_field(type=unicode)

Rational:

I don't know what the following does without looking at the source:

"ssh root@{}".format(api.list_live_nodes()['i-3ab8f212'][0])

I can guess what the following does without going to the source:

"ssh root@{}"format(next(x for x in api.list_live_nodes() if x.id = 'i-3ab8f212').ip_addresses[0])

(compatible with those returned by
``IBlockDeviceAPI.compute_instance_id``) to IPs of those
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I misunderstood this until I read the code further down, thinking it was one IP per compute instance. perhaps as lists of unicode IPs instead of also as unicode?

nodes, also as unicode..
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: extra . also

``unicode``

"""

def start_node(node_id):
Expand Down Expand Up @@ -1647,7 +1649,8 @@ def is_existing_block_device(dataset_id, path):
pass

if ICloudAPI.providedBy(self._underlying_blockdevice_api):
live_instances = self._underlying_blockdevice_api.list_live_nodes()
live_instances = list(
self._underlying_blockdevice_api.list_live_nodes())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❔ I personally prefer dict(a=1, b=2).keys() over list(dict(a=1, b=2)) everything else being equal. Seems it's only 1 more character, but seems to more explicitly express intent.

TBH, I always think that turning a dictionary into a list should have the behavior of dict().iteritems(), and the mental burden of remembering that I'm wrong is tiresome 😛. Optional as this is obviously personal preference.

else:
# Can't know accurately who is alive and who is dead:
live_instances = None
Expand Down
7 changes: 5 additions & 2 deletions flocker/node/agents/cinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,8 +689,11 @@ def get_device_path(self, blockdevice_id):

# ICloudAPI:
def list_live_nodes(self):
return list(server.id for server in self.nova_server_manager.list()
if server.status == u'ACTIVE')
return {server.id:
list(map(
unicode, _extract_nova_server_addresses(server.addresses)))
for server in self.nova_server_manager.list()
if server.status == u'ACTIVE'}

def start_node(self, node_id):
server = self.nova_server_manager.get(node_id)
Expand Down
7 changes: 5 additions & 2 deletions flocker/node/agents/ebs.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ def _wait_for_volume_state_change(operation,
start_time = time.time()
poll_until(
lambda: _reached_end_state(
operation, volume, update, time.time() - start_time, timeout
operation, volume, update, time.time() - start_time, timeout
),
itertools.repeat(1)
)
Expand Down Expand Up @@ -1363,7 +1363,10 @@ def get_device_path(self, blockdevice_id):
def list_live_nodes(self):
instances = self.connection.instances.filter(
Filters=[{'Name': 'instance-state-name', 'Values': ['running']}])
return list(unicode(instance.id) for instance in instances)
return {unicode(instance.id):
[unicode(instance.public_ip_address),
unicode(instance.private_ip_address)]
for instance in instances}

@boto3_log
def start_node(self, node_id):
Expand Down
25 changes: 22 additions & 3 deletions flocker/node/agents/test/test_blockdevice.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
_backing_file_name,
)
from ....common.algebraic import tagged_union_strategy

from ....common import get_all_ips

from ... import run_state_change, in_parallel, ILocalState, IStateChange, NoOp
from ...testtools import (
Expand Down Expand Up @@ -754,7 +754,12 @@ def __init__(self, block_api, live_nodes=()):
self.live_nodes = live_nodes

def list_live_nodes(self):
return [self.compute_instance_id()] + list(self.live_nodes)
result = {self.compute_instance_id():
set(unicode(i) for i in get_all_ips()
if i != b"127.0.0.1")}
result.update({node: [u"10.1.1.{}".format(i)]
for i, node in enumerate(self.live_nodes)})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❔ would you mind documenting the type of self.live_nodes as iterable of unicode compute_instance_ids (or equivalent) on line 751? Slowed my review down not having it there. Optional as it's not your change.

return result

def start_node(self, node_id):
return
Expand Down Expand Up @@ -5501,11 +5506,25 @@ def test_current_machine_is_live(self):
self.assertIn(self.api.compute_instance_id(), live))
return d

def test_current_machine_has_appropriate_ip(self):
"""
The machine's known IP is set for the current node.
"""
local_addresses = set(unicode(i) for i in get_all_ips()
if i != b"127.0.0.1")
d = self.async_cloud_api.list_live_nodes()
d.addCallback(
lambda live:
self.assertTrue(
set(live[self.api.compute_instance_id()]).intersection(
local_addresses)))
return d

def test_list_live_nodes(self):
"""
``list_live_nodes`` returns an iterable of unicode values.
"""
live_nodes = self.api.list_live_nodes()
live_nodes = list(self.api.list_live_nodes())
self.assertThat(live_nodes, AllMatch(IsInstance(unicode)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Considering how brittle getting the wrong type may be (would fail acceptance tests, blowing up when trying to create an ip_address, which would be sad since it could be caught here), strongly prefer to verify the types of everything returned.

self.assertThat(live_nodes.itieritems(), AllMatch(
    MatchesListwise(
        (IsInstance(unicode), AllMatch(IsInstance(unicode)))
    )
))


return Tests
Expand Down