forked from xapi-project/sm
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Returns the number of allocated clusters in a Qcow
This script returns the number of allocated clusters in a Qcow file. Signed-off-by: Guillaume <[email protected]>
- Loading branch information
Showing
1 changed file
with
177 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import struct | ||
import sys | ||
from typing import BinaryIO, Dict, List | ||
|
||
QCOW2_MAGIC = 0x514649FB # b"QFI\xfb": Magic number for QCOW2 files | ||
QCOW2_HEADER_SIZE = 104 # In fact the last information we need is at offset 40-47 | ||
|
||
|
||
def read_qcow2_header(file: BinaryIO) -> Dict[str, int]: | ||
"""Returns a dict containing some information from QCow2 header. | ||
Args: | ||
file: The qcow2 file object. | ||
Returns: | ||
dict: magic, version, cluster_bits, l1_size and l1_table_offset. | ||
Raises: | ||
ValueError: if qcow2 magic is not recognized. | ||
""" | ||
# The header is as follow: | ||
# | ||
# magic: u32, // Magic string "QFI\xfb" | ||
# version: u32, // Version (2 or 3) | ||
# backing_file_offset: u64, // Offset to the backing file name | ||
# backing_file_size: u32, // Size of the backing file name | ||
# cluster_bits: u32, // Bits used for addressing within a cluster | ||
# size: u64, // Virtual disk size | ||
# crypt_method: u32, // 0 = no encryption, 1 = AES encryption | ||
# l1_size: u32, // Number of entries in the L1 table | ||
# l1_table_offset: u64, // Offset to the active L1 table | ||
# refcount_table_offset: u64, // Offset to the refcount table | ||
# refcount_table_clusters: u32, // Number of clusters for the refcount table | ||
# nb_snapshots: u32, // Number of snapshots in the image | ||
# snapshots_offset: u64, // Offset to the snapshot table | ||
|
||
file.seek(0) | ||
header = file.read(QCOW2_HEADER_SIZE) | ||
magic, version, _, _, cluster_bits, size, _, l1_size, l1_table_offset = ( | ||
struct.unpack(">IIQIIQIIQ", header[:48]) | ||
) | ||
|
||
if magic != QCOW2_MAGIC: | ||
raise ValueError("Not a valid QCOW2 file") | ||
|
||
return { | ||
"version": version, | ||
"virtual_disk_size": size, | ||
"cluster_bits": cluster_bits, | ||
"l1_size": l1_size, | ||
"l1_table_offset": l1_table_offset, | ||
} | ||
|
||
|
||
def get_l1_entries(file: BinaryIO, header: Dict[str, int]) -> List[int]: | ||
"""Return the list of L1 entries. | ||
Args: | ||
file: The qcow2 file object. | ||
header: The header returned by read_qcow2_header. | ||
Returns: | ||
list: List of L1 entries | ||
""" | ||
l1_table_offset = header["l1_table_offset"] | ||
l1_table_size = header["l1_size"] * 8 # Each L1 entry is 8 bytes | ||
file.seek(l1_table_offset) | ||
l1_table = file.read(l1_table_size) | ||
return [ | ||
struct.unpack(">Q", l1_table[i : i + 8])[0] for i in range(0, len(l1_table), 8) | ||
] | ||
|
||
|
||
def get_l2_offsets(file: BinaryIO, header: Dict[str, int]) -> List[int]: | ||
"""Return the list of offsets of L2 tables by looking L1 entries. | ||
Args: | ||
file: The qcow2 file object. | ||
header: The header returned by read_qcow2_header. | ||
Returns: | ||
list: offsets of L2 table that have been allocated. | ||
""" | ||
# L1 table entry: | ||
# Bit 00 - 08 : Reserved (set to 0) | ||
# 09 - 55 : Bits 9-55 of the offset into the image file at which the L2 | ||
# table starts. | ||
# 56 - 62 : Reserved (set to 0) | ||
# 63 : 0 for an L2 table that is unused or requires COW, | ||
# 1 if its refcount is exactly one. This information is only accurate | ||
# in the active L1 table. | ||
return [ | ||
0x00FFFFFF_FFFFFFFF & entry | ||
for entry in get_l1_entries(file, header) | ||
if entry != 0 | ||
] | ||
|
||
|
||
def get_l2_entries(file: BinaryIO, l2_offset: int) -> List[int]: | ||
"""Return the list of L2 entries at a given L2 offset. | ||
Args: | ||
file: The qcow2 file object. | ||
header: The header returned by read_qcow2_header. | ||
Returns: | ||
list: List of L2 entries | ||
""" | ||
# The size of L2 is 65536 bytes and each entry is 8 bytes. | ||
file.seek(l2_offset) | ||
l2_table = file.read(65536) | ||
|
||
return [ | ||
struct.unpack(">Q", l2_table[i : i + 8])[0] for i in range(0, len(l2_table), 8) | ||
] | ||
|
||
|
||
def get_l2_bitmap_allocations(file: BinaryIO, l2_offset: int) -> List[int]: | ||
"""Returns a bitmap of allocated entries for a given L2 table. | ||
Args: | ||
file: The qcow2 file object. | ||
int: the offset of an L2 table. | ||
Returns: | ||
list: The bitmap of allocated L2 entry. If the entry is | ||
allocated 1 is set, otherwise 0 is set. | ||
""" | ||
# L2 entry is typically 512 bytes (64 entries of 8 bytes each) | ||
# It looks like: | ||
# Bit 00 - 61: Cluster descriptor | ||
# 62: 0 for standard clusters | ||
# 1 for compressed clusters | ||
# 63: 0 for a cluster that is unused or requires COW, | ||
# 1 if its refcount is exactly one. | ||
# Currently we are only interesting by allocated entry. So return 1 if it is | ||
# allocated and 0 otherwise. | ||
|
||
return [0 if entry == 0 else 1 for entry in get_l2_entries(file, l2_offset)] | ||
|
||
|
||
def get_number_of_allocated_clusters(file: BinaryIO, verbose: bool = False) -> int: | ||
"""Returns the number of allocated clusters. | ||
Args: | ||
file: The qcow2 file object. | ||
Returns: | ||
int: The number of allocated clusters for a given file. | ||
""" | ||
header = read_qcow2_header(file) | ||
l2_offsets = get_l2_offsets(file, header) | ||
|
||
if verbose: | ||
print(f"{header}") | ||
l2_offsets_hex = [f"0x{offset:0x}" for offset in l2_offsets] | ||
print("L2 offsets: ", " ".join(l2_offsets_hex)) | ||
|
||
allocated_clusters = 0 | ||
# We look for all L2 tables the number of allocated clusters. A '1' means | ||
# that it is allocated, '0' means the opposite. So by adding all values we | ||
# have the number of allocated cluster for the L2 entry. | ||
for l2_offset in l2_offsets: | ||
l2_bitmap = get_l2_bitmap_allocations(file, l2_offset) | ||
allocated_clusters += sum(l2_bitmap) | ||
|
||
return allocated_clusters | ||
|
||
|
||
# Usage: ./get_allocated_cluster.py <file.qcow2> | ||
if __name__ == "__main__": | ||
qcow2_file_path = sys.argv[1] | ||
with open(qcow2_file_path, "rb") as qcow2_file: | ||
allocated_clusters = get_number_of_allocated_clusters(qcow2_file, True) | ||
print(f"clusters allocated: {allocated_clusters}") |