Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd

Pull iommufd updates from Jason Gunthorpe:

 - The iova_bitmap logic for efficiently reporting dirty pages back to
   userspace has a few more tricky corner case bugs that have been
   resolved and backed with new tests.

   The revised version has simpler logic.

 - Shared branch with iommu for handle support when doing domain attach.

   Handles allow the domain owner to include additional private data on
   a per-device basis.

 - IO Page Fault Reporting to userspace via iommufd. Page faults can be
   generated on fault capable HWPTs when a translation is not present.

   Routing them to userspace would allow a VMM to be able to virtualize
   them into an emulated vIOMMU. This is the next step to fully enabling
   vSVA support.

* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (26 commits)
  iommufd: Put constants for all the uAPI enums
  iommufd: Fix error pointer checking
  iommufd: Add check on user response code
  iommufd: Remove IOMMUFD_PAGE_RESP_FAILURE
  iommufd: Require drivers to supply the cache_invalidate_user ops
  iommufd/selftest: Add coverage for IOPF test
  iommufd/selftest: Add IOPF support for mock device
  iommufd: Associate fault object with iommufd_hw_pgtable
  iommufd: Fault-capable hwpt attach/detach/replace
  iommufd: Add iommufd fault object
  iommufd: Add fault and response message definitions
  iommu: Extend domain attach group with handle support
  iommu: Add attach handle to struct iopf_group
  iommu: Remove sva handle list
  iommu: Introduce domain attachment handle
  iommufd/iova_bitmap: Remove iterator logic
  iommufd/iova_bitmap: Dynamic pinning on iova_bitmap_set()
  iommufd/iova_bitmap: Consolidate iova_bitmap_set exit conditionals
  iommufd/iova_bitmap: Move initial pinning to iova_bitmap_for_each()
  iommufd/iova_bitmap: Cache mapped length in iova_bitmap_map struct
  ...
This commit is contained in:
Linus Torvalds
2024-07-19 09:42:29 -07:00
19 changed files with 1212 additions and 245 deletions

View File

@@ -124,12 +124,16 @@ struct iopf_fault {
struct iopf_group {
struct iopf_fault last_fault;
struct list_head faults;
size_t fault_count;
/* list node for iommu_fault_param::faults */
struct list_head pending_node;
struct work_struct work;
struct iommu_domain *domain;
struct iommu_attach_handle *attach_handle;
/* The device's fault data parameter. */
struct iommu_fault_param *fault_param;
/* Used by handler provider to hook the group on its own lists. */
struct list_head node;
u32 cookie;
};
/**
@@ -547,6 +551,10 @@ static inline int __iommu_copy_struct_from_user_array(
* @default_domain: If not NULL this will always be set as the default domain.
* This should be an IDENTITY/BLOCKED/PLATFORM domain.
* Do not use in new drivers.
* @user_pasid_table: IOMMU driver supports user-managed PASID table. There is
* no user domain for each PASID and the I/O page faults are
* forwarded through the user domain attached to the device
* RID.
*/
struct iommu_ops {
bool (*capable)(struct device *dev, enum iommu_cap);
@@ -590,6 +598,7 @@ struct iommu_ops {
struct iommu_domain *blocked_domain;
struct iommu_domain *release_domain;
struct iommu_domain *default_domain;
u8 user_pasid_table:1;
};
/**
@@ -989,20 +998,28 @@ struct iommu_fwspec {
/* ATS is supported */
#define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0)
/*
* An iommu attach handle represents a relationship between an iommu domain
* and a PASID or RID of a device. It is allocated and managed by the component
* that manages the domain and is stored in the iommu group during the time the
* domain is attached.
*/
struct iommu_attach_handle {
struct iommu_domain *domain;
};
/**
* struct iommu_sva - handle to a device-mm bond
*/
struct iommu_sva {
struct iommu_attach_handle handle;
struct device *dev;
struct iommu_domain *domain;
struct list_head handle_item;
refcount_t users;
};
struct iommu_mm_data {
u32 pasid;
struct list_head sva_domains;
struct list_head sva_handles;
};
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
@@ -1052,12 +1069,10 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner);
void iommu_device_release_dma_owner(struct device *dev);
int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
struct device *dev, ioasid_t pasid,
struct iommu_attach_handle *handle);
void iommu_detach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
struct iommu_domain *
iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
unsigned int type);
ioasid_t iommu_alloc_global_pasid(struct device *dev);
void iommu_free_global_pasid(ioasid_t pasid);
#else /* CONFIG_IOMMU_API */
@@ -1388,7 +1403,8 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
}
static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
struct device *dev, ioasid_t pasid,
struct iommu_attach_handle *handle)
{
return -ENODEV;
}
@@ -1398,13 +1414,6 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
{
}
static inline struct iommu_domain *
iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
unsigned int type)
{
return NULL;
}
static inline ioasid_t iommu_alloc_global_pasid(struct device *dev)
{
return IOMMU_PASID_INVALID;

View File

@@ -37,19 +37,20 @@
enum {
IOMMUFD_CMD_BASE = 0x80,
IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
IOMMUFD_CMD_IOAS_ALLOC,
IOMMUFD_CMD_IOAS_ALLOW_IOVAS,
IOMMUFD_CMD_IOAS_COPY,
IOMMUFD_CMD_IOAS_IOVA_RANGES,
IOMMUFD_CMD_IOAS_MAP,
IOMMUFD_CMD_IOAS_UNMAP,
IOMMUFD_CMD_OPTION,
IOMMUFD_CMD_VFIO_IOAS,
IOMMUFD_CMD_HWPT_ALLOC,
IOMMUFD_CMD_GET_HW_INFO,
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
IOMMUFD_CMD_HWPT_INVALIDATE,
IOMMUFD_CMD_IOAS_ALLOC = 0x81,
IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
IOMMUFD_CMD_IOAS_COPY = 0x83,
IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
IOMMUFD_CMD_IOAS_MAP = 0x85,
IOMMUFD_CMD_IOAS_UNMAP = 0x86,
IOMMUFD_CMD_OPTION = 0x87,
IOMMUFD_CMD_VFIO_IOAS = 0x88,
IOMMUFD_CMD_HWPT_ALLOC = 0x89,
IOMMUFD_CMD_GET_HW_INFO = 0x8a,
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
};
/**
@@ -356,10 +357,13 @@ struct iommu_vfio_ioas {
* the parent HWPT in a nesting configuration.
* @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
* enforced on device attachment
* @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
* valid.
*/
enum iommufd_hwpt_alloc_flags {
IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
};
/**
@@ -396,8 +400,8 @@ struct iommu_hwpt_vtd_s1 {
* @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
*/
enum iommu_hwpt_data_type {
IOMMU_HWPT_DATA_NONE,
IOMMU_HWPT_DATA_VTD_S1,
IOMMU_HWPT_DATA_NONE = 0,
IOMMU_HWPT_DATA_VTD_S1 = 1,
};
/**
@@ -411,6 +415,9 @@ enum iommu_hwpt_data_type {
* @data_type: One of enum iommu_hwpt_data_type
* @data_len: Length of the type specific data
* @data_uptr: User pointer to the type specific data
* @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
* IOMMU_HWPT_FAULT_ID_VALID is set.
* @__reserved2: Padding to 64-bit alignment. Must be 0.
*
* Explicitly allocate a hardware page table object. This is the same object
* type that is returned by iommufd_device_attach() and represents the
@@ -441,6 +448,8 @@ struct iommu_hwpt_alloc {
__u32 data_type;
__u32 data_len;
__aligned_u64 data_uptr;
__u32 fault_id;
__u32 __reserved2;
};
#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
@@ -482,8 +491,8 @@ struct iommu_hw_info_vtd {
* @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
*/
enum iommu_hw_info_type {
IOMMU_HW_INFO_TYPE_NONE,
IOMMU_HW_INFO_TYPE_INTEL_VTD,
IOMMU_HW_INFO_TYPE_NONE = 0,
IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
};
/**
@@ -620,7 +629,7 @@ struct iommu_hwpt_get_dirty_bitmap {
* @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
*/
enum iommu_hwpt_invalidate_data_type {
IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
};
/**
@@ -692,4 +701,100 @@ struct iommu_hwpt_invalidate {
__u32 __reserved;
};
#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
/**
* enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
* @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
* valid.
* @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
*/
enum iommu_hwpt_pgfault_flags {
IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0),
IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1),
};
/**
* enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
* @IOMMU_PGFAULT_PERM_READ: request for read permission
* @IOMMU_PGFAULT_PERM_WRITE: request for write permission
* @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
* Execute Requested bit set in PASID TLP Prefix.
* @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
* Privileged Mode Requested bit set in PASID TLP
* Prefix.
*/
enum iommu_hwpt_pgfault_perm {
IOMMU_PGFAULT_PERM_READ = (1 << 0),
IOMMU_PGFAULT_PERM_WRITE = (1 << 1),
IOMMU_PGFAULT_PERM_EXEC = (1 << 2),
IOMMU_PGFAULT_PERM_PRIV = (1 << 3),
};
/**
* struct iommu_hwpt_pgfault - iommu page fault data
* @flags: Combination of enum iommu_hwpt_pgfault_flags
* @dev_id: id of the originated device
* @pasid: Process Address Space ID
* @grpid: Page Request Group Index
* @perm: Combination of enum iommu_hwpt_pgfault_perm
* @addr: Fault address
* @length: a hint of how much data the requestor is expecting to fetch. For
* example, if the PRI initiator knows it is going to do a 10MB
* transfer, it could fill in 10MB and the OS could pre-fault in
* 10MB of IOVA. It's default to 0 if there's no such hint.
* @cookie: kernel-managed cookie identifying a group of fault messages. The
* cookie number encoded in the last page fault of the group should
* be echoed back in the response message.
*/
struct iommu_hwpt_pgfault {
__u32 flags;
__u32 dev_id;
__u32 pasid;
__u32 grpid;
__u32 perm;
__u64 addr;
__u32 length;
__u32 cookie;
};
/**
* enum iommufd_page_response_code - Return status of fault handlers
* @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
* populated, retry the access. This is the
* "Success" defined in PCI 10.4.2.1.
* @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
* access. This is the "Invalid Request" in PCI
* 10.4.2.1.
*/
enum iommufd_page_response_code {
IOMMUFD_PAGE_RESP_SUCCESS = 0,
IOMMUFD_PAGE_RESP_INVALID = 1,
};
/**
* struct iommu_hwpt_page_response - IOMMU page fault response
* @cookie: The kernel-managed cookie reported in the fault message.
* @code: One of response code in enum iommufd_page_response_code.
*/
struct iommu_hwpt_page_response {
__u32 cookie;
__u32 code;
};
/**
* struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
* @size: sizeof(struct iommu_fault_alloc)
* @flags: Must be 0
* @out_fault_id: The ID of the new FAULT
* @out_fault_fd: The fd of the new FAULT
*
* Explicitly allocate a fault handling object.
*/
struct iommu_fault_alloc {
__u32 size;
__u32 flags;
__u32 out_fault_id;
__u32 out_fault_fd;
};
#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
#endif