Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 75 additions & 54 deletions dissect/hypervisor/disk/asif.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@ class ASIF:
framework, as well as through Disk Utility.

An ASIF file is pretty straight forward. There's a small header which, among some other details, contains two
directory offsets. Each directory contains a list of tables, which in turn contain a list of data entries. Each data
entry points to a chunk of data in the ASIF file. The chunk size is defined in the header and is typically 1 MiB.
The chunk size is always a multiple of the block size, which is also defined in the header (typically 512 bytes).
directory offsets. Each directory contains a list of tables, and each table contains a list of data and bitmap
entries. Each data entry points to a chunk of data in the ASIF file, and each bitmap entry points to a bitmap that
covers a group of data entries. We call the combined number of data entries plus the bitmap entry a "chunk group".

The chunk size is defined in the header and is typically 1 MiB. The chunk size is always a multiple of the block
size, which is also defined in the header (typically 512 bytes).

Each directory has a version number, and the directory with the highest version number is the active directory. This
allows for atomic updates of the directory/table data.

Expand Down Expand Up @@ -55,33 +59,35 @@ def __init__(self, fh: BinaryIO):
self.size = self.header.sector_count * self.block_size
self.max_size = self.header.max_sector_count * self.block_size

# The following math is taken from the assembly with some creative variable naming
# It's possible that some of this can be simplified or the names improved
self._blocks_per_chunk = self.chunk_size // self.block_size

# This check doesn't really make sense, but keep it in for now
reserved_size = 4 * self.chunk_size
self._num_reserved_table_entries = (
1 if reserved_size < self._blocks_per_chunk else reserved_size // self._blocks_per_chunk
)
# Table entries are grouped into "chunk groups", with each group followed by a bitmap that covers that group
# We need to calculate how large a chunk group is, and how many chunk groups we can fit in a table

self._max_table_entries = self.chunk_size >> 3
self._num_table_entries = self._max_table_entries - (
self._max_table_entries % (self._num_reserved_table_entries + 1)
)
self._num_reserved_directory_entries = (self._num_reserved_table_entries + self._num_table_entries) // (
self._num_reserved_table_entries + 1
)
self._num_usable_entries = self._num_table_entries - self._num_reserved_directory_entries
# This is the size in bytes of data covered by a single table
self._size_per_table = self._num_usable_entries * self.chunk_size
# A bitmap uses 2 bits per block (not chunk), so a single byte in the bitmap covers 4 blocks
# A bitmap is 1 chunk large in bytes, so a single bitmap can cover 4 * chunk_size blocks
# A bitmap covers one chunk group, so the number of blocks per chunk group is equal to the bitmap coverage
num_blocks_per_group = 4 * self.chunk_size

max_size = self.block_size * self.header.max_sector_count
self._num_directory_entries = (self._size_per_table + max_size - 1) // self._size_per_table
# Derive the number of chunks per group from this
self._num_chunks_per_group = max(1, num_blocks_per_group // self._blocks_per_chunk)

self._aligned_table_size = (
(self.block_size + 8 * self._num_table_entries - 1) // self.block_size * self.block_size
)
# A table is 1 chunk large, so we start with the number of entries (uint64) that fit inside a single chunk
num_words_per_chunk = self.chunk_size // 8
# A chunk group has one entry for each chunk, plus one entry for the bitmap
num_entries_per_group = self._num_chunks_per_group + 1

num_groups_per_table, num_remaining = divmod(num_words_per_chunk, num_entries_per_group)
# The number of total entries in a table, including both chunk and bitmap entries
self._num_table_entries = num_words_per_chunk - num_remaining

# Calculate the size in bytes of data covered by a single table
num_chunk_entries_per_table = self._num_table_entries - num_groups_per_table
self._size_per_table = num_chunk_entries_per_table * self.chunk_size

# Calculate the maximum size of the virtual disk, and the number of tables needed to cover that size
max_size = self.block_size * self.header.max_sector_count
self._num_tables = (self._size_per_table + max_size - 1) // self._size_per_table

self.directories = sorted(
(Directory(self, offset) for offset in self.header.directory_offsets),
Expand Down Expand Up @@ -167,17 +173,21 @@ def entries(self) -> list[int]:
"""List of table entries in the directory."""
# Seek over the version
self.asif.fh.seek(self.offset + 8)
return c_asif.uint64[self.asif._num_directory_entries](self.asif.fh)
return c_asif.uint64[self.asif._num_tables](self.asif.fh)

def table(self, index: int) -> Table:
def table(self, index: int) -> Table | None:
"""Get a table from the directory.

Args:
index: Index of the table in the directory.
"""
if index >= self.asif._num_directory_entries:
if index >= self.asif._num_tables:
raise IndexError("Table index out of range")
return Table(self, index)

if (entry := self.entries[index]) == 0:
return None

return Table(self.asif, index, entry * self.asif.chunk_size)


class Table:
Expand All @@ -186,27 +196,36 @@ class Table:
A table contains a list of data entries (``uint64[]``). Each data entry is a chunk number and points to a chunk of
data in the ASIF image. Each table covers a fixed amount of data in the virtual disk.

Data entries have 55 bits usable for the chunk number and 9 bits reserved for flags.
Data entries have 55 bits usable for the chunk number and 9 bits for flags, 7 of which are reserved.

.. rubric :: Encoding
.. code-block:: c

0b00000000 01111111 11111111 11111111 11111111 11111111 11111111 11111111 (chunk number)
0b00111111 10000000 00000000 00000000 00000000 00000000 00000000 00000000 (reserved)
0b01000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 (entry dirty)
0b10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 (content dirty)
0b11000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 (flags)

The following flags are known for data entries:

.. rubric :: Flags
.. code-block:: c

0b00 (uninitialized)
0b01 (fully initialized)
0b10 (unmapped)
0b11 (has bitmap)

Args:
directory: The directory this table belongs to.
asif: The ASIF image this table belongs to.
index: Index of the table in the directory.
offset: Offset of the table in the ASIF image.
"""

def __init__(self, directory: Directory, index: int):
self.asif = directory.asif
self.directory = directory
def __init__(self, asif: ASIF, index: int, offset: int):
self.asif = asif
self.index = index
self.offset = offset

self.offset = self.directory.entries[index] * self.asif.chunk_size
self.virtual_offset = index * self.asif._size_per_table

def __repr__(self) -> str:
Expand Down Expand Up @@ -236,25 +255,27 @@ def __init__(self, asif: ASIF, reserved: bool = False):
def _read(self, offset: int, length: int) -> bytes:
result = []
while length:
table = self.directory.table(offset // self.asif._size_per_table)
relative_block_index = (offset // self.asif.block_size) - (table.virtual_offset // self.asif.block_size)
data_idx = (
relative_block_index // self.asif._blocks_per_chunk
+ relative_block_index // self.asif._blocks_per_chunk * self.asif._num_reserved_table_entries
) // self.asif._num_reserved_table_entries

# 0x8000000000000000 = content dirty bit
# 0x4000000000000000 = entry dirty bit
# 0x3F80000000000000 = reserved bits
chunk = table.entries[data_idx] & 0x7FFFFFFFFFFFFF
raw_offset = chunk * self.asif.chunk_size

read_length = min(length, self.asif.chunk_size)
if chunk == 0:
if (table := self.directory.table(offset // self.asif._size_per_table)) is None:
read_length = min(length, self.asif._size_per_table)
result.append(b"\x00" * read_length)
else:
self.asif.fh.seek(raw_offset)
result.append(self.asif.fh.read(read_length))
# Calculate the relative chunk index within the table
relative_block_index = (offset // self.asif.block_size) - (table.virtual_offset // self.asif.block_size)
relative_chunk_index = relative_block_index // self.asif._blocks_per_chunk

# Calculate the chunk group
chunk_group = relative_chunk_index // self.asif._num_chunks_per_group
# Each chunk group has a bitmap entry, so we need to account for that in the entry index
entry_index = relative_chunk_index + chunk_group

chunk = table.entries[entry_index] & 0x7FFFFFFFFFFFFF

read_length = min(length, self.asif.chunk_size)
if chunk == 0:
result.append(b"\x00" * read_length)
else:
self.asif.fh.seek(chunk * self.asif.chunk_size)
result.append(self.asif.fh.read(read_length))

offset += read_length
length -= read_length
Expand Down
5 changes: 5 additions & 0 deletions tests/disk/test_asif.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,8 @@ def test_asif() -> None:
with asif.open() as stream:
for i in range(100):
assert stream.read(1024 * 1024).strip(bytes([i])) == b"", f"Mismatch at offset {i * 1024 * 1024:#x}"

with asif.open(reserved=True) as stream:
assert asif.active_directory.table(1337) is None
stream.seek(1337 * asif._size_per_table)
assert stream.read(asif.chunk_size) == b"\x00" * asif.chunk_size
Loading