Skip to content

Commit

Permalink
Returns the number of allocated clusters in a Qcow
Browse files Browse the repository at this point in the history
This script returns the number of allocated clusters in a Qcow file.

Signed-off-by: Guillaume <[email protected]>
  • Loading branch information
gthvn1 committed Nov 1, 2024
1 parent bc8ee2e commit 303c6b0
Showing 1 changed file with 177 additions and 0 deletions.
177 changes: 177 additions & 0 deletions scripts/get_allocated_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#!/usr/bin/env python3

import struct
import sys
from typing import BinaryIO, Dict, List

QCOW2_MAGIC = 0x514649FB # b"QFI\xfb": Magic number for QCOW2 files
QCOW2_HEADER_SIZE = 104 # In fact the last information we need is at offset 40-47


def read_qcow2_header(file: BinaryIO) -> Dict[str, int]:
"""Returns a dict containing some information from QCow2 header.
Args:
file: The qcow2 file object.
Returns:
dict: magic, version, cluster_bits, l1_size and l1_table_offset.
Raises:
ValueError: if qcow2 magic is not recognized.
"""
# The header is as follow:
#
# magic: u32, // Magic string "QFI\xfb"
# version: u32, // Version (2 or 3)
# backing_file_offset: u64, // Offset to the backing file name
# backing_file_size: u32, // Size of the backing file name
# cluster_bits: u32, // Bits used for addressing within a cluster
# size: u64, // Virtual disk size
# crypt_method: u32, // 0 = no encryption, 1 = AES encryption
# l1_size: u32, // Number of entries in the L1 table
# l1_table_offset: u64, // Offset to the active L1 table
# refcount_table_offset: u64, // Offset to the refcount table
# refcount_table_clusters: u32, // Number of clusters for the refcount table
# nb_snapshots: u32, // Number of snapshots in the image
# snapshots_offset: u64, // Offset to the snapshot table

file.seek(0)
header = file.read(QCOW2_HEADER_SIZE)
magic, version, _, _, cluster_bits, size, _, l1_size, l1_table_offset = (
struct.unpack(">IIQIIQIIQ", header[:48])
)

if magic != QCOW2_MAGIC:
raise ValueError("Not a valid QCOW2 file")

return {
"version": version,
"virtual_disk_size": size,
"cluster_bits": cluster_bits,
"l1_size": l1_size,
"l1_table_offset": l1_table_offset,
}


def get_l1_entries(file: BinaryIO, header: Dict[str, int]) -> List[int]:
"""Return the list of L1 entries.
Args:
file: The qcow2 file object.
header: The header returned by read_qcow2_header.
Returns:
list: List of L1 entries
"""
l1_table_offset = header["l1_table_offset"]
l1_table_size = header["l1_size"] * 8 # Each L1 entry is 8 bytes
file.seek(l1_table_offset)
l1_table = file.read(l1_table_size)
return [
struct.unpack(">Q", l1_table[i : i + 8])[0] for i in range(0, len(l1_table), 8)
]


def get_l2_offsets(file: BinaryIO, header: Dict[str, int]) -> List[int]:
"""Return the list of offsets of L2 tables by looking L1 entries.
Args:
file: The qcow2 file object.
header: The header returned by read_qcow2_header.
Returns:
list: offsets of L2 table that have been allocated.
"""
# L1 table entry:
# Bit 00 - 08 : Reserved (set to 0)
# 09 - 55 : Bits 9-55 of the offset into the image file at which the L2
# table starts.
# 56 - 62 : Reserved (set to 0)
# 63 : 0 for an L2 table that is unused or requires COW,
# 1 if its refcount is exactly one. This information is only accurate
# in the active L1 table.
return [
0x00FFFFFF_FFFFFFFF & entry
for entry in get_l1_entries(file, header)
if entry != 0
]


def get_l2_entries(file: BinaryIO, l2_offset: int) -> List[int]:
"""Return the list of L2 entries at a given L2 offset.
Args:
file: The qcow2 file object.
header: The header returned by read_qcow2_header.
Returns:
list: List of L2 entries
"""
# The size of L2 is 65536 bytes and each entry is 8 bytes.
file.seek(l2_offset)
l2_table = file.read(65536)

return [
struct.unpack(">Q", l2_table[i : i + 8])[0] for i in range(0, len(l2_table), 8)
]


def get_l2_bitmap_allocations(file: BinaryIO, l2_offset: int) -> List[int]:
"""Returns a bitmap of allocated entries for a given L2 table.
Args:
file: The qcow2 file object.
int: the offset of an L2 table.
Returns:
list: The bitmap of allocated L2 entry. If the entry is
allocated 1 is set, otherwise 0 is set.
"""
# L2 entry is typically 512 bytes (64 entries of 8 bytes each)
# It looks like:
# Bit 00 - 61: Cluster descriptor
# 62: 0 for standard clusters
# 1 for compressed clusters
# 63: 0 for a cluster that is unused or requires COW,
# 1 if its refcount is exactly one.
# Currently we are only interesting by allocated entry. So return 1 if it is
# allocated and 0 otherwise.

return [0 if entry == 0 else 1 for entry in get_l2_entries(file, l2_offset)]


def get_number_of_allocated_clusters(file: BinaryIO, verbose: bool = False) -> int:
"""Returns the number of allocated clusters.
Args:
file: The qcow2 file object.
Returns:
int: The number of allocated clusters for a given file.
"""
header = read_qcow2_header(file)
l2_offsets = get_l2_offsets(file, header)

if verbose:
print(f"{header}")
l2_offsets_hex = [f"0x{offset:0x}" for offset in l2_offsets]
print("L2 offsets: ", " ".join(l2_offsets_hex))

allocated_clusters = 0
# We look for all L2 tables the number of allocated clusters. A '1' means
# that it is allocated, '0' means the opposite. So by adding all values we
# have the number of allocated cluster for the L2 entry.
for l2_offset in l2_offsets:
l2_bitmap = get_l2_bitmap_allocations(file, l2_offset)
allocated_clusters += sum(l2_bitmap)

return allocated_clusters


# Usage: ./get_allocated_cluster.py <file.qcow2>
if __name__ == "__main__":
qcow2_file_path = sys.argv[1]
with open(qcow2_file_path, "rb") as qcow2_file:
allocated_clusters = get_number_of_allocated_clusters(qcow2_file, True)
print(f"clusters allocated: {allocated_clusters}")

0 comments on commit 303c6b0

Please sign in to comment.