Skip to content

Commit 1ca33d8

Browse files
committed
pci: expand sub-page VFIO BAR mmap to page size
On aarch64 with 64K host pages, VFIO passthrough of devices with sub-page BARs (e.g. 16K NVMe BAR0) crashes with EINVAL from KVM_SET_USER_MEMORY_REGION, which requires memory_size to be a multiple of the host page size. Expand the mmap to page size instead of rejecting it, matching QEMU's approach. The kernel's vfio_pci_probe_mmaps() already verifies that sub-page BARs are page-aligned and reserves the remainder of the page, so this is safe. To prevent the expanded KVM memory slot from overlapping the relocated MSI-X trap region, fixup_msix_region() now ensures the lower half of the virtual BAR is at least one host page. Signed-off-by: Saravanan D <saravanand@crusoe.ai>
1 parent 5ca6ff8 commit 1ca33d8

1 file changed

Lines changed: 52 additions & 7 deletions

File tree

pci/src/vfio.rs

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@ use anyhow::anyhow;
1515
use byteorder::{ByteOrder, LittleEndian};
1616
use hypervisor::HypervisorVmError;
1717
use libc::{_SC_PAGESIZE, sysconf};
18-
use log::{error, info};
18+
use log::{error, info, warn};
1919
use serde::{Deserialize, Serialize};
2020
use thiserror::Error;
2121
use vfio_bindings::bindings::vfio::*;
2222
use vfio_ioctls::{VfioDevice, VfioIrq, VfioOps, VfioRegionInfoCap, VfioRegionSparseMmapArea};
2323
use vm_allocator::page_size::{
24-
align_page_size_down, align_page_size_up, is_4k_aligned, is_4k_multiple, is_page_size_aligned,
24+
align_page_size_down, align_page_size_up, get_page_size, is_4k_aligned, is_4k_multiple,
25+
is_page_size_aligned,
2526
};
2627
use vm_allocator::{AddressAllocator, MemorySlotAllocator, SystemAllocator};
2728
use vm_device::dma_mapping::ExternalDmaMapping;
@@ -585,9 +586,14 @@ impl VfioCommon {
585586

586587
let (pba_offset, pba_size) = msix_cap.pba_range();
587588
let msix_sz = align_page_size_up(table_size + pba_size);
588-
// Expand region to hold RW and trap region which both page size aligned
589-
let size = std::cmp::max(region_size * 2, msix_sz * 2);
590-
// let table starts from the middle of the region
589+
// Double the virtual BAR so the lower half is mmapped for direct
590+
// guest access and the upper half holds the relocated MSI-X
591+
// (trapped by the VMM). Use max(region_size, page_size) so the
592+
// lower half is at least one host page. This lets
593+
// map_mmio_regions() expand a sub-page BAR mmap to page size
594+
// without the KVM memory slot overlapping the relocated MSI-X.
595+
let size = std::cmp::max(std::cmp::max(region_size, get_page_size()) * 2, msix_sz * 2);
596+
// Relocate MSI-X table to the start of the upper half
591597
msix_cap.table_set_offset((size / 2) as u32);
592598
msix_cap.pba_set_offset((size / 2 + pba_offset - table_offset) as u32);
593599

@@ -1677,8 +1683,47 @@ impl VfioPciDevice {
16771683
)?;
16781684

16791685
for area in sparse_areas.iter() {
1686+
let page_size = get_page_size();
1687+
let mmap_len = if area.size < page_size {
1688+
// KVM_SET_USER_MEMORY_REGION requires memory_size to be a
1689+
// multiple of the host page size. On aarch64 with 64K pages
1690+
// a device BAR can be smaller than a page (e.g. 16K NVMe
1691+
// BAR).
1692+
//
1693+
// The kernel only sets VFIO_REGION_INFO_FLAG_MMAP on sub-page
1694+
// BARs after verifying the physical BAR start is page-aligned
1695+
// and reserving the rest of the page. We expand the mmap to
1696+
// page size for direct guest MMIO access, matching QEMU.
1697+
//
1698+
// fixup_msix_region() guarantees MSI-X is relocated to at
1699+
// least page_size offset, so the one-page KVM memory slot
1700+
// cannot overlap the MSI-X trap region.
1701+
//
1702+
// Only expand when the sparse area offset and guest physical
1703+
// address are both page-aligned. A non-aligned offset from
1704+
// MSI-X carving could overlap other trap regions.
1705+
let gpa = region.start.0 + area.offset;
1706+
if !is_page_size_aligned(area.offset)
1707+
|| !is_page_size_aligned(gpa)
1708+
{
1709+
warn!(
1710+
"Skipping mmap of sub-page sparse area \
1711+
(offset 0x{:x}, size 0x{:x}): not page-aligned",
1712+
area.offset, area.size,
1713+
);
1714+
continue;
1715+
}
1716+
info!(
1717+
"Expanding sub-page sparse area mmap from 0x{:x} to \
1718+
page size 0x{:x}",
1719+
area.size, page_size,
1720+
);
1721+
page_size
1722+
} else {
1723+
area.size
1724+
};
16801725
let mapping = match MmapRegion::mmap(
1681-
area.size,
1726+
mmap_len,
16821727
prot,
16831728
fd,
16841729
mmap_offset,
@@ -1689,7 +1734,7 @@ impl VfioPciDevice {
16891734
error!(
16901735
"Could not mmap sparse area (offset = 0x{:x}, size = 0x{:x}): {}",
16911736
mmap_offset,
1692-
area.size,
1737+
mmap_len,
16931738
std::io::Error::last_os_error()
16941739
);
16951740
return Err(VfioPciError::MmapArea);

0 commit comments

Comments
 (0)