From 00a11b403b44fb1033c3baa101d295812de16008 Mon Sep 17 00:00:00 2001 From: Brooke McKim Date: Tue, 16 Jul 2024 20:19:11 -0400 Subject: [PATCH] fix: EBS page went away. Use the api. --- ec2.py | 12 +++++++ rds.py | 99 ++++++++----------------------------------------------- scrape.py | 93 +-------------------------------------------------- 3 files changed, 26 insertions(+), 178 deletions(-) diff --git a/ec2.py b/ec2.py index 39ee904..4d18524 100644 --- a/ec2.py +++ b/ec2.py @@ -384,6 +384,18 @@ def parse_instance(instance_type, product_attributes, api_description): "ips_per_eni": netinfo["Ipv4AddressesPerInterface"], } + if api_description: + if "EbsInfo" in api_description: + if "EbsOptimizedInfo" in api_description["EbsInfo"]: + ebs_optimized_info = api_description["EbsInfo"]["EbsOptimizedInfo"] + i.ebs_optimized = True + i.ebs_baseline_throughput = ebs_optimized_info['BaselineThroughputInMBps'] + i.ebs_baseline_iops = ebs_optimized_info['BaselineIops'] + i.ebs_baseline_bandwidth = ebs_optimized_info['BaselineBandwidthInMbps'] + i.ebs_throughput = ebs_optimized_info['BaselineThroughputInMBps'] + i.ebs_iops = ebs_optimized_info['MaximumIops'] + i.ebs_max_bandwidth = ebs_optimized_info['MaximumBandwidthInMbps'] + try: ecu = product_attributes.get("ecu") if ecu == "Variable": diff --git a/rds.py b/rds.py index ad82a7c..a49fad0 100644 --- a/rds.py +++ b/rds.py @@ -83,94 +83,21 @@ def totext(elt): def add_ebs_info(instances): - """ - Six tables on this page: - - 5 of them: EBS optimized by default and baseline: - Baseline performance metrics for instances with asterisk (unsupported for now, see comment below) - Instance type | Maximum bandwidth (Mib/s) | Maximum throughput (MiB/s, 128 KiB I/O) | Maximum IOPS (16 KiB I/O) - Instance type | Baseline bandwidth (Mib/s) | Baseline throughput (MiB/s, 128 KiB I/O) | Baseline IOPS (16 KiB I/O) - - Table 6: Not EBS optimized by default - Instance type | Maximum bandwidth (Mib/s) | Maximum throughput (MiB/s, 128 KiB I/O) | Maximum IOPS (16 KiB I/O) - - TODO: Support the asterisk on type names in the first table, which means: - "These instance types can support maximum performance for 30 minutes at least once every 24 hours. For example, - c5.large instances can deliver 281 MB/s for 30 minutes at least once every 24 hours. If you have a workload - that requires sustained maximum performance for longer than 30 minutes, select an instance type based on the - following baseline performance." - - """ - - def parse_ebs_combined_table(by_type, table): - for row in table.xpath("tr"): - if row.xpath("th"): - continue - cols = row.xpath("td") - instance_type = sanitize_instance_type(totext(cols[0]))[:-1] - - if len(cols) == 4: - ebs_baseline_bandwidth = locale.atof(totext(cols[1])) - ebs_baseline_throughput = locale.atof(totext(cols[2])) - ebs_baseline_iops = locale.atof(totext(cols[3])) - ebs_max_bandwidth = locale.atof(totext(cols[1])) - ebs_throughput = locale.atof(totext(cols[2])) - ebs_iops = locale.atof(totext(cols[3])) - elif len(cols) == 7: - ebs_baseline_bandwidth = locale.atof(totext(cols[1])) - ebs_max_bandwidth = locale.atof(totext(cols[2])) - ebs_baseline_throughput = locale.atof(totext(cols[3])) - ebs_throughput = locale.atof(totext(cols[4])) - ebs_baseline_iops = locale.atof(totext(cols[5])) - ebs_iops = locale.atof(totext(cols[6])) - - instance_type = "db." + instance_type - if instance_type in by_type: - by_type[instance_type]["ebs_optimized"] = True - by_type[instance_type]["ebs_optimized_by_default"] = True - by_type[instance_type][ - "ebs_baseline_throughput" - ] = ebs_baseline_throughput - by_type[instance_type]["ebs_baseline_iops"] = ebs_baseline_iops - by_type[instance_type][ - "ebs_baseline_bandwidth" - ] = ebs_baseline_bandwidth - by_type[instance_type]["ebs_throughput"] = ebs_throughput - by_type[instance_type]["ebs_iops"] = ebs_iops - by_type[instance_type]["ebs_max_bandwidth"] = ebs_max_bandwidth - - def parse_ebs_nondefault_table(by_type, table): - for row in table.xpath("tr"): - if row.xpath("th"): - continue - cols = row.xpath("td") - instance_type = sanitize_instance_type(totext(cols[0]))[:-1] - ebs_max_bandwidth = locale.atof(totext(cols[1])) - ebs_throughput = locale.atof(totext(cols[2])) - ebs_iops = locale.atof(totext(cols[3])) - - instance_type = "db." + instance_type - if instance_type not in by_type: - print(f"ERROR: Ignoring EBS info for unknown instance {instance_type}") - else: - if ebs_max_bandwidth: - by_type[instance_type]["ebs_optimized"] = True - by_type[instance_type]["ebs_optimized_by_default"] = False - by_type[instance_type]["ebs_throughput"] = ebs_throughput - by_type[instance_type]["ebs_iops"] = ebs_iops - by_type[instance_type]["ebs_max_bandwidth"] = ebs_max_bandwidth - by_type = {k: v for k, v in instances.items()} - # Canonical URL for this info is https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-optimized.html - # ebs_url = "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-optimized.partial.html" - ebs_url = "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-optimized.html" - tree = etree.parse(urllib2.urlopen(ebs_url), etree.HTMLParser()) - tables = tree.xpath('//div[@class="table-contents"]//table') - for t in [0, 1, 2, 3, 4]: - parse_ebs_combined_table(by_type, tables[t]) - - parse_ebs_nondefault_table(by_type, tables[5]) + ec2_instances = ec2.get_instances() + + for i in ec2_instances: + instance_type = "db." + i.instance_type + + if instance_type in by_type: + by_type[instance_type]["ebs_optimized"] = i.ebs_optimized + by_type[instance_type]["ebs_baseline_throughput"] = i.ebs_baseline_throughput + by_type[instance_type]["ebs_baseline_iops"] = i.ebs_baseline_iops + by_type[instance_type]["ebs_baseline_bandwidth"] = i.ebs_baseline_bandwidth + by_type[instance_type]["ebs_throughput"] = i.ebs_throughput + by_type[instance_type]["ebs_iops"] = i.ebs_iops + by_type[instance_type]["ebs_max_bandwidth"] = i.ebs_max_bandwidth def scrape(output_file, input_file=None): # if an argument is given, use that as the path for the json file diff --git a/scrape.py b/scrape.py index 334c825..5a8c988 100755 --- a/scrape.py +++ b/scrape.py @@ -282,7 +282,7 @@ def add_eni_info(instances): if instance_type not in by_type: print( - "WARNING: Ignoring ENI data for unknown instance type: {}".format( + "WARNING: Ignoring data for unknown instance type: {}".format( instance_type ) ) @@ -297,95 +297,6 @@ def add_eni_info(instances): "ips_per_eni": ip_per_eni, } - -def add_ebs_info(instances): - """ - Six tables on this page: - - 5 of them: EBS optimized by default and baseline: - Baseline performance metrics for instances with asterisk (unsupported for now, see comment below) - Instance type | Maximum bandwidth (Mib/s) | Maximum throughput (MiB/s, 128 KiB I/O) | Maximum IOPS (16 KiB I/O) - Instance type | Baseline bandwidth (Mib/s) | Baseline throughput (MiB/s, 128 KiB I/O) | Baseline IOPS (16 KiB I/O) - - Table 6: Not EBS optimized by default - Instance type | Maximum bandwidth (Mib/s) | Maximum throughput (MiB/s, 128 KiB I/O) | Maximum IOPS (16 KiB I/O) - - TODO: Support the asterisk on type names in the first table, which means: - "These instance types can support maximum performance for 30 minutes at least once every 24 hours. For example, - c5.large instances can deliver 281 MB/s for 30 minutes at least once every 24 hours. If you have a workload - that requires sustained maximum performance for longer than 30 minutes, select an instance type based on the - following baseline performance." - - """ - - def parse_ebs_combined_table(by_type, table): - for row in table.xpath("tr"): - if row.xpath("th"): - continue - cols = row.xpath("td") - # remove last character which is a superscript with other info - instance_type = sanitize_instance_type(totext(cols[0]))[:-1] - - if len(cols) == 4: - ebs_baseline_bandwidth = locale.atof(totext(cols[1])) - ebs_baseline_throughput = locale.atof(totext(cols[2])) - ebs_baseline_iops = locale.atof(totext(cols[3])) - ebs_max_bandwidth = locale.atof(totext(cols[1])) - ebs_throughput = locale.atof(totext(cols[2])) - ebs_iops = locale.atof(totext(cols[3])) - elif len(cols) == 7: - ebs_baseline_bandwidth = locale.atof(totext(cols[1])) - ebs_max_bandwidth = locale.atof(totext(cols[2])) - ebs_baseline_throughput = locale.atof(totext(cols[3])) - ebs_throughput = locale.atof(totext(cols[4])) - ebs_baseline_iops = locale.atof(totext(cols[5])) - ebs_iops = locale.atof(totext(cols[6])) - - if instance_type not in by_type: - print(f"ERROR: Ignoring EBS info for unknown instance {instance_type}") - else: - by_type[instance_type].ebs_optimized = True - by_type[instance_type].ebs_optimized_by_default = True - by_type[instance_type].ebs_baseline_throughput = ebs_baseline_throughput - by_type[instance_type].ebs_baseline_iops = ebs_baseline_iops - by_type[instance_type].ebs_baseline_bandwidth = ebs_baseline_bandwidth - by_type[instance_type].ebs_throughput = ebs_throughput - by_type[instance_type].ebs_iops = ebs_iops - by_type[instance_type].ebs_max_bandwidth = ebs_max_bandwidth - - def parse_ebs_nondefault_table(by_type, table): - for row in table.xpath("tr"): - if row.xpath("th"): - continue - cols = row.xpath("td") - # remove last character which is a superscript with other info - instance_type = sanitize_instance_type(totext(cols[0]))[:-1] - ebs_max_bandwidth = locale.atof(totext(cols[1])) - ebs_throughput = locale.atof(totext(cols[2])) - ebs_iops = locale.atof(totext(cols[3])) - - if instance_type not in by_type: - print(f"ERROR: Ignoring EBS info for unknown instance {instance_type}") - else: - if ebs_max_bandwidth: - by_type[instance_type].ebs_optimized = True - by_type[instance_type].ebs_optimized_by_default = False - by_type[instance_type].ebs_throughput = ebs_throughput - by_type[instance_type].ebs_iops = ebs_iops - by_type[instance_type].ebs_max_bandwidth = ebs_max_bandwidth - - by_type = {i.instance_type: i for i in instances} - # Canonical URL for this info is https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-optimized.html - # ebs_url = "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-optimized.partial.html" - ebs_url = "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-optimized.html" - tree = etree.parse(urllib2.urlopen(ebs_url), etree.HTMLParser()) - tables = tree.xpath('//div[@class="table-contents"]//table') - for t in [0, 1, 2, 3, 4]: - parse_ebs_combined_table(by_type, tables[t]) - - parse_ebs_nondefault_table(by_type, tables[5]) - - def add_linux_ami_info(instances): """Add information about which virtualization options are supported. @@ -1316,8 +1227,6 @@ def scrape(data_file): add_pricing_info(all_instances) print("Parsing ENI info...") add_eni_info(all_instances) - print("Parsing EBS info...") - add_ebs_info(all_instances) print("Parsing Linux AMI info...") add_linux_ami_info(all_instances) print("Parsing VPC-only info...")