diff options
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/intel-svm.h | 13 | ||||
-rw-r--r-- | include/linux/interval_tree.h | 58 | ||||
-rw-r--r-- | include/linux/iommu.h | 145 | ||||
-rw-r--r-- | include/linux/iommufd.h | 98 | ||||
-rw-r--r-- | include/linux/sched/user.h | 2 | ||||
-rw-r--r-- | include/linux/vfio.h | 39 |
6 files changed, 310 insertions, 45 deletions
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 207ef06ba3e1..f9a0d44f6fdb 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -13,17 +13,4 @@ #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) #define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) -/* - * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only - * for access to kernel addresses. No IOTLB flushes are automatically done - * for kernel mappings; it is valid only for access to the kernel's static - * 1:1 mapping of physical memory — not to vmalloc or even module mappings. - * A future API addition may permit the use of such ranges, by means of an - * explicit IOTLB flush call (akin to the DMA API's unmap method). - * - * It is unlikely that we will ever hook into flush_tlb_kernel_range() to - * do such IOTLB flushes automatically. - */ -#define SVM_FLAG_SUPERVISOR_MODE BIT(0) - #endif /* __INTEL_SVM_H__ */ diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h index 288c26f50732..2b8026a39906 100644 --- a/include/linux/interval_tree.h +++ b/include/linux/interval_tree.h @@ -27,4 +27,62 @@ extern struct interval_tree_node * interval_tree_iter_next(struct interval_tree_node *node, unsigned long start, unsigned long last); +/** + * struct interval_tree_span_iter - Find used and unused spans. + * @start_hole: Start of an interval for a hole when is_hole == 1 + * @last_hole: Inclusive end of an interval for a hole when is_hole == 1 + * @start_used: Start of a used interval when is_hole == 0 + * @last_used: Inclusive end of a used interval when is_hole == 0 + * @is_hole: 0 == used, 1 == is_hole, -1 == done iteration + * + * This iterator travels over spans in an interval tree. It does not return + * nodes but classifies each span as either a hole, where no nodes intersect, or + * a used, which is fully covered by nodes. Each iteration step toggles between + * hole and used until the entire range is covered. The returned spans always + * fully cover the requested range. + * + * The iterator is greedy, it always returns the largest hole or used possible, + * consolidating all consecutive nodes. + * + * Use interval_tree_span_iter_done() to detect end of iteration. + */ +struct interval_tree_span_iter { + /* private: not for use by the caller */ + struct interval_tree_node *nodes[2]; + unsigned long first_index; + unsigned long last_index; + + /* public: */ + union { + unsigned long start_hole; + unsigned long start_used; + }; + union { + unsigned long last_hole; + unsigned long last_used; + }; + int is_hole; +}; + +void interval_tree_span_iter_first(struct interval_tree_span_iter *state, + struct rb_root_cached *itree, + unsigned long first_index, + unsigned long last_index); +void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter, + struct rb_root_cached *itree, + unsigned long new_index); +void interval_tree_span_iter_next(struct interval_tree_span_iter *state); + +static inline bool +interval_tree_span_iter_done(struct interval_tree_span_iter *state) +{ + return state->is_hole == -1; +} + +#define interval_tree_for_each_span(span, itree, first_index, last_index) \ + for (interval_tree_span_iter_first(span, itree, \ + first_index, last_index); \ + !interval_tree_span_iter_done(span); \ + interval_tree_span_iter_next(span)) + #endif /* _LINUX_INTERVAL_TREE_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6f53ad74fa0d..46e1347bfa22 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -64,6 +64,8 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */ #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ +#define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ + /* * This are the possible domain-types * @@ -77,6 +79,8 @@ struct iommu_domain_geometry { * certain optimizations for these domains * IOMMU_DOMAIN_DMA_FQ - As above, but definitely using batched TLB * invalidation. + * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses + * represented by mm_struct's. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) @@ -86,15 +90,27 @@ struct iommu_domain_geometry { #define IOMMU_DOMAIN_DMA_FQ (__IOMMU_DOMAIN_PAGING | \ __IOMMU_DOMAIN_DMA_API | \ __IOMMU_DOMAIN_DMA_FQ) +#define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA) struct iommu_domain { unsigned type; const struct iommu_domain_ops *ops; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ - iommu_fault_handler_t handler; - void *handler_token; struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; + enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault, + void *data); + void *fault_data; + union { + struct { + iommu_fault_handler_t handler; + void *handler_token; + }; + struct { /* IOMMU_DOMAIN_SVA */ + struct mm_struct *mm; + int users; + }; + }; }; static inline bool iommu_is_dma_domain(struct iommu_domain *domain) @@ -108,6 +124,11 @@ enum iommu_cap { IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for DMA protection and we should too */ + /* + * Per-device flag indicating if enforce_cache_coherency() will work on + * this device. + */ + IOMMU_CAP_ENFORCE_CACHE_COHERENCY, }; /* These are the possible reserved region types */ @@ -214,15 +235,15 @@ struct iommu_iotlb_gather { * driver init to device driver init (default no) * @dev_enable/disable_feat: per device entries to enable/disable * iommu specific features. - * @sva_bind: Bind process address space to device - * @sva_unbind: Unbind process address space from device - * @sva_get_pasid: Get PASID associated to a SVA handle * @page_response: handle page request response * @def_domain_type: device default domain type, return value: * - IOMMU_DOMAIN_IDENTITY: must use an identity domain * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting * @default_domain_ops: the default ops for domains + * @remove_dev_pasid: Remove any translation configurations of a specific + * pasid, so that any DMA transactions with this pasid + * will be blocked by the hardware. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops */ @@ -247,16 +268,12 @@ struct iommu_ops { int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, - void *drvdata); - void (*sva_unbind)(struct iommu_sva *handle); - u32 (*sva_get_pasid)(struct iommu_sva *handle); - int (*page_response)(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); + void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid); const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; @@ -266,7 +283,20 @@ struct iommu_ops { /** * struct iommu_domain_ops - domain specific operations * @attach_dev: attach an iommu domain to a device + * Return: + * * 0 - success + * * EINVAL - can indicate that device and domain are incompatible due to + * some previous configuration of the domain, in which case the + * driver shouldn't log an error, since it is legitimate for a + * caller to test reuse of existing domains. Otherwise, it may + * still represent some other fundamental problem + * * ENOMEM - out of memory + * * ENOSPC - non-ENOMEM type of resource allocation failures + * * EBUSY - device is attached to a domain and cannot be changed + * * ENODEV - device specific errors, not able to be attached + * * <others> - treated as ENODEV by the caller. Use is discouraged * @detach_dev: detach an iommu domain from a device + * @set_dev_pasid: set an iommu domain to a pasid of device * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. @@ -287,6 +317,8 @@ struct iommu_ops { struct iommu_domain_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, + ioasid_t pasid); int (*map)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); @@ -322,12 +354,14 @@ struct iommu_domain_ops { * @list: Used by the iommu-core to keep a list of registered iommus * @ops: iommu-ops for talking to this iommu * @dev: struct device for sysfs handling + * @max_pasids: number of supported PASIDs */ struct iommu_device { struct list_head list; const struct iommu_ops *ops; struct fwnode_handle *fwnode; struct device *dev; + u32 max_pasids; }; /** @@ -366,6 +400,7 @@ struct iommu_fault_param { * @fwspec: IOMMU fwspec data * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data + * @max_pasids: number of PASIDs this device can consume * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -377,6 +412,7 @@ struct dev_iommu { struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; + u32 max_pasids; }; int iommu_device_register(struct iommu_device *iommu, @@ -626,6 +662,7 @@ struct iommu_fwspec { */ struct iommu_sva { struct device *dev; + struct iommu_domain *domain; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, @@ -667,12 +704,6 @@ void iommu_release_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); -struct iommu_sva *iommu_sva_bind_device(struct device *dev, - struct mm_struct *mm, - void *drvdata); -void iommu_sva_unbind_device(struct iommu_sva *handle); -u32 iommu_sva_get_pasid(struct iommu_sva *handle); - int iommu_device_use_default_domain(struct device *dev); void iommu_device_unuse_default_domain(struct device *dev); @@ -680,6 +711,18 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); void iommu_group_release_dma_owner(struct iommu_group *group); bool iommu_group_dma_owner_claimed(struct iommu_group *group); +int iommu_device_claim_dma_owner(struct device *dev, void *owner); +void iommu_device_release_dma_owner(struct device *dev); + +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); +int iommu_attach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); +void iommu_detach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); +struct iommu_domain * +iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, + unsigned int type); #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -999,21 +1042,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) return -ENODEV; } -static inline struct iommu_sva * -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) -{ - return NULL; -} - -static inline void iommu_sva_unbind_device(struct iommu_sva *handle) -{ -} - -static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) -{ - return IOMMU_PASID_INVALID; -} - static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; @@ -1042,6 +1070,39 @@ static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group) { return false; } + +static inline void iommu_device_release_dma_owner(struct device *dev) +{ +} + +static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) +{ + return -ENODEV; +} + +static inline struct iommu_domain * +iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} + +static inline int iommu_attach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + return -ENODEV; +} + +static inline void iommu_detach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ +} + +static inline struct iommu_domain * +iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, + unsigned int type) +{ + return NULL; +} #endif /* CONFIG_IOMMU_API */ /** @@ -1124,4 +1185,26 @@ static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream return false; } +#ifdef CONFIG_IOMMU_SVA +struct iommu_sva *iommu_sva_bind_device(struct device *dev, + struct mm_struct *mm); +void iommu_sva_unbind_device(struct iommu_sva *handle); +u32 iommu_sva_get_pasid(struct iommu_sva *handle); +#else +static inline struct iommu_sva * +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} + +static inline void iommu_sva_unbind_device(struct iommu_sva *handle) +{ +} + +static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) +{ + return IOMMU_PASID_INVALID; +} +#endif /* CONFIG_IOMMU_SVA */ + #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h new file mode 100644 index 000000000000..650d45629647 --- /dev/null +++ b/include/linux/iommufd.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Intel Corporation + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#ifndef __LINUX_IOMMUFD_H +#define __LINUX_IOMMUFD_H + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/err.h> + +struct device; +struct iommufd_device; +struct page; +struct iommufd_ctx; +struct iommufd_access; +struct file; + +struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, + struct device *dev, u32 *id); +void iommufd_device_unbind(struct iommufd_device *idev); + +int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); +void iommufd_device_detach(struct iommufd_device *idev); + +struct iommufd_access_ops { + u8 needs_pin_pages : 1; + void (*unmap)(void *data, unsigned long iova, unsigned long length); +}; + +enum { + IOMMUFD_ACCESS_RW_READ = 0, + IOMMUFD_ACCESS_RW_WRITE = 1 << 0, + /* Set if the caller is in a kthread then rw will use kthread_use_mm() */ + IOMMUFD_ACCESS_RW_KTHREAD = 1 << 1, + + /* Only for use by selftest */ + __IOMMUFD_ACCESS_RW_SLOW_PATH = 1 << 2, +}; + +struct iommufd_access * +iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id, + const struct iommufd_access_ops *ops, void *data); +void iommufd_access_destroy(struct iommufd_access *access); + +void iommufd_ctx_get(struct iommufd_ctx *ictx); + +#if IS_ENABLED(CONFIG_IOMMUFD) +struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); +void iommufd_ctx_put(struct iommufd_ctx *ictx); + +int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, + unsigned long length, struct page **out_pages, + unsigned int flags); +void iommufd_access_unpin_pages(struct iommufd_access *access, + unsigned long iova, unsigned long length); +int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, + void *data, size_t len, unsigned int flags); +int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +#else /* !CONFIG_IOMMUFD */ +static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void iommufd_ctx_put(struct iommufd_ctx *ictx) +{ +} + +static inline int iommufd_access_pin_pages(struct iommufd_access *access, + unsigned long iova, + unsigned long length, + struct page **out_pages, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline void iommufd_access_unpin_pages(struct iommufd_access *access, + unsigned long iova, + unsigned long length) +{ +} + +static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, + void *data, size_t len, unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, + u32 *out_ioas_id) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_IOMMUFD */ +#endif diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index f054d0360a75..4cc52698e214 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -25,7 +25,7 @@ struct user_struct { #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \ defined(CONFIG_NET) || defined(CONFIG_IO_URING) || \ - defined(CONFIG_VFIO_PCI_ZDEV_KVM) + defined(CONFIG_VFIO_PCI_ZDEV_KVM) || IS_ENABLED(CONFIG_IOMMUFD) atomic_long_t locked_vm; #endif #ifdef CONFIG_WATCH_QUEUE diff --git a/include/linux/vfio.h b/include/linux/vfio.h index fdd393f70b19..a615542df1e0 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -17,6 +17,9 @@ #include <linux/iova_bitmap.h> struct kvm; +struct iommufd_ctx; +struct iommufd_device; +struct iommufd_access; /* * VFIO devices can be placed in a set, this allows all devices to share this @@ -54,6 +57,12 @@ struct vfio_device { struct completion comp; struct list_head group_next; struct list_head iommu_entry; + struct iommufd_access *iommufd_access; +#if IS_ENABLED(CONFIG_IOMMUFD) + struct iommufd_device *iommufd_device; + struct iommufd_ctx *iommufd_ictx; + bool iommufd_attached; +#endif }; /** @@ -80,6 +89,10 @@ struct vfio_device_ops { char *name; int (*init)(struct vfio_device *vdev); void (*release)(struct vfio_device *vdev); + int (*bind_iommufd)(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); + void (*unbind_iommufd)(struct vfio_device *vdev); + int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -96,6 +109,32 @@ struct vfio_device_ops { void __user *arg, size_t argsz); }; +#if IS_ENABLED(CONFIG_IOMMUFD) +int vfio_iommufd_physical_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); +void vfio_iommufd_physical_unbind(struct vfio_device *vdev); +int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +int vfio_iommufd_emulated_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); +void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); +int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +#else +#define vfio_iommufd_physical_bind \ + ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ + u32 *out_device_id)) NULL) +#define vfio_iommufd_physical_unbind \ + ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_physical_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#define vfio_iommufd_emulated_bind \ + ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ + u32 *out_device_id)) NULL) +#define vfio_iommufd_emulated_unbind \ + ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_emulated_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#endif + /** * @migration_set_state: Optional callback to change the migration state for * devices that support migration. It's mandatory for |