diff options
Diffstat (limited to 'tools/testing/vma/vma_internal.h')
-rw-r--r-- | tools/testing/vma/vma_internal.h | 398 |
1 files changed, 304 insertions, 94 deletions
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 1eae23039854..f6e45e62da3a 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -25,7 +25,7 @@ #include <linux/maple_tree.h> #include <linux/mm.h> #include <linux/rbtree.h> -#include <linux/rwsem.h> +#include <linux/refcount.h> extern unsigned long stack_guard_gap; #ifdef CONFIG_MMU @@ -56,6 +56,8 @@ extern unsigned long dac_mmap_min_addr; #define VM_PFNMAP 0x00000400 #define VM_LOCKED 0x00002000 #define VM_IO 0x00004000 +#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ +#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ #define VM_DONTEXPAND 0x00040000 #define VM_LOCKONFAULT 0x00080000 #define VM_ACCOUNT 0x00100000 @@ -70,6 +72,20 @@ extern unsigned long dac_mmap_min_addr; #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +#ifdef CONFIG_STACK_GROWSUP +#define VM_STACK VM_GROWSUP +#define VM_STACK_EARLY VM_GROWSDOWN +#else +#define VM_STACK VM_GROWSDOWN +#define VM_STACK_EARLY 0 +#endif + +#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) +#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW +#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW +#define STACK_TOP TASK_SIZE_LOW +#define STACK_TOP_MAX TASK_SIZE_MAX + /* This mask represents all the VMA flag bits used by mlock */ #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) @@ -82,6 +98,10 @@ extern unsigned long dac_mmap_min_addr; #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) +#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS +#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) +#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) + #define RLIMIT_STACK 3 /* max stack size */ #define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */ @@ -135,9 +155,9 @@ typedef __bitwise unsigned int vm_fault_t; */ #define pr_warn_once pr_err -typedef struct refcount_struct { - atomic_t refs; -} refcount_t; +#define data_race(expr) expr + +#define ASSERT_EXCLUSIVE_WRITER(x) struct kref { refcount_t refcount; @@ -233,15 +253,46 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access */ }; -struct vma_lock { - struct rw_semaphore lock; +struct vm_area_struct; + +/* + * Describes a VMA that is about to be mmap()'ed. Drivers may choose to + * manipulate mutable fields which will cause those fields to be updated in the + * resultant VMA. + * + * Helper functions are not required for manipulating any field. + */ +struct vm_area_desc { + /* Immutable state. */ + struct mm_struct *mm; + unsigned long start; + unsigned long end; + + /* Mutable fields. Populated with initial state. */ + pgoff_t pgoff; + struct file *file; + vm_flags_t vm_flags; + pgprot_t page_prot; + + /* Write-only fields. */ + const struct vm_operations_struct *vm_ops; + void *private_data; }; +struct file_operations { + int (*mmap)(struct file *, struct vm_area_struct *); + int (*mmap_prepare)(struct vm_area_desc *); +}; struct file { struct address_space *f_mapping; + const struct file_operations *f_op; }; +#define VMA_LOCK_OFFSET 0x40000000 + +typedef struct { unsigned long v; } freeptr_t; + struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ @@ -251,9 +302,7 @@ struct vm_area_struct { unsigned long vm_start; unsigned long vm_end; }; -#ifdef CONFIG_PER_VMA_LOCK - struct rcu_head vm_rcu; /* Used for deferred freeing. */ -#endif + freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */ }; struct mm_struct *vm_mm; /* The address space we belong to. */ @@ -269,16 +318,13 @@ struct vm_area_struct { }; #ifdef CONFIG_PER_VMA_LOCK - /* Flag to indicate areas detached from the mm->mm_mt tree */ - bool detached; - /* * Can only be written (using WRITE_ONCE()) while holding both: * - mmap_lock (in write mode) - * - vm_lock->lock (in write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set * Can be read reliably while holding one of: * - mmap_lock (in read or write mode) - * - vm_lock->lock (in read or write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1 * Can be read unreliably (using READ_ONCE()) for pessimistic bailout * while holding nothing (except RCU to keep the VMA struct allocated). * @@ -287,20 +333,9 @@ struct vm_area_struct { * slowpath. */ unsigned int vm_lock_seq; - struct vma_lock *vm_lock; #endif /* - * For areas with an address space and backing store, - * linkage into the address_space->i_mmap interval tree. - * - */ - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; - - /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * list, after a COW of one of the file pages. A MAP_SHARED vma * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack @@ -319,14 +354,6 @@ struct vm_area_struct { struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ -#ifdef CONFIG_ANON_VMA_NAME - /* - * For private and shared anonymous mappings, a pointer to a null - * terminated string containing the name given to the vma, or NULL if - * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. - */ - struct anon_vma_name *anon_name; -#endif #ifdef CONFIG_SWAP atomic_long_t swap_readahead_info; #endif @@ -339,6 +366,27 @@ struct vm_area_struct { #ifdef CONFIG_NUMA_BALANCING struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif +#ifdef CONFIG_PER_VMA_LOCK + /* Unstable RCU readers are allowed to read this. */ + refcount_t vm_refcnt; +#endif + /* + * For areas with an address space and backing store, + * linkage into the address_space->i_mmap interval tree. + * + */ + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; +#ifdef CONFIG_ANON_VMA_NAME + /* + * For private and shared anonymous mappings, a pointer to a null + * terminated string containing the name given to the vma, or NULL if + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. + */ + struct anon_vma_name *anon_name; +#endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; @@ -429,6 +477,87 @@ struct vm_unmapped_area_info { unsigned long start_gap; }; +struct pagetable_move_control { + struct vm_area_struct *old; /* Source VMA. */ + struct vm_area_struct *new; /* Destination VMA. */ + unsigned long old_addr; /* Address from which the move begins. */ + unsigned long old_end; /* Exclusive address at which old range ends. */ + unsigned long new_addr; /* Address to move page tables to. */ + unsigned long len_in; /* Bytes to remap specified by user. */ + + bool need_rmap_locks; /* Do rmap locks need to be taken? */ + bool for_stack; /* Is this an early temp stack being moved? */ +}; + +#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \ + struct pagetable_move_control name = { \ + .old = old_, \ + .new = new_, \ + .old_addr = old_addr_, \ + .old_end = (old_addr_) + (len_), \ + .new_addr = new_addr_, \ + .len_in = len_, \ + } + +struct kmem_cache_args { + /** + * @align: The required alignment for the objects. + * + * %0 means no specific alignment is requested. + */ + unsigned int align; + /** + * @useroffset: Usercopy region offset. + * + * %0 is a valid offset, when @usersize is non-%0 + */ + unsigned int useroffset; + /** + * @usersize: Usercopy region size. + * + * %0 means no usercopy region is specified. + */ + unsigned int usersize; + /** + * @freeptr_offset: Custom offset for the free pointer + * in &SLAB_TYPESAFE_BY_RCU caches + * + * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer + * outside of the object. This might cause the object to grow in size. + * Cache creators that have a reason to avoid this can specify a custom + * free pointer offset in their struct where the free pointer will be + * placed. + * + * Note that placing the free pointer inside the object requires the + * caller to ensure that no fields are invalidated that are required to + * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for + * details). + * + * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset + * is specified, %use_freeptr_offset must be set %true. + * + * Note that @ctor currently isn't supported with custom free pointers + * as a @ctor requires an external free pointer. + */ + unsigned int freeptr_offset; + /** + * @use_freeptr_offset: Whether a @freeptr_offset is used. + */ + bool use_freeptr_offset; + /** + * @ctor: A constructor for the objects. + * + * The constructor is invoked for each object in a newly allocated slab + * page. It is the cache user's responsibility to free object in the + * same state as after calling the constructor, or deal appropriately + * with any differences between a freshly constructed and a reallocated + * object. + * + * %NULL means no constructor. + */ + void (*ctor)(void *); +}; + static inline void vma_iter_invalidate(struct vma_iterator *vmi) { mas_pause(&vmi->mas); @@ -464,26 +593,40 @@ static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) return mas_find(&vmi->mas, ULONG_MAX); } -static inline bool vma_lock_alloc(struct vm_area_struct *vma) +/* + * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these + * assertions should be made either under mmap_write_lock or when the object + * has been isolated under mmap_write_lock, ensuring no competing writers. + */ +static inline void vma_assert_attached(struct vm_area_struct *vma) { - vma->vm_lock = calloc(1, sizeof(struct vma_lock)); - - if (!vma->vm_lock) - return false; - - init_rwsem(&vma->vm_lock->lock); - vma->vm_lock_seq = UINT_MAX; + WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); +} - return true; +static inline void vma_assert_detached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); } static inline void vma_assert_write_locked(struct vm_area_struct *); -static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) +static inline void vma_mark_attached(struct vm_area_struct *vma) +{ + vma_assert_write_locked(vma); + vma_assert_detached(vma); + refcount_set_release(&vma->vm_refcnt, 1); +} + +static inline void vma_mark_detached(struct vm_area_struct *vma) { - /* When detaching vma should be write-locked */ - if (detached) - vma_assert_write_locked(vma); - vma->detached = detached; + vma_assert_write_locked(vma); + vma_assert_attached(vma); + /* We are the only writer, so no need to use vma_refcount_put(). */ + if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { + /* + * Reader must have temporarily raised vm_refcnt but it will + * drop it without using the vma since vma is write-locked. + */ + } } extern const struct vm_operations_struct vma_dummy_vm_ops; @@ -496,40 +639,41 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_mm = mm; vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); - vma_mark_detached(vma, false); + vma->vm_lock_seq = UINT_MAX; } -static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) -{ - struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct)); +struct kmem_cache { + const char *name; + size_t object_size; + struct kmem_cache_args *args; +}; - if (!vma) - return NULL; +static inline struct kmem_cache *__kmem_cache_create(const char *name, + size_t object_size, + struct kmem_cache_args *args) +{ + struct kmem_cache *ret = malloc(sizeof(struct kmem_cache)); - vma_init(vma, mm); - if (!vma_lock_alloc(vma)) { - free(vma); - return NULL; - } + ret->name = name; + ret->object_size = object_size; + ret->args = args; - return vma; + return ret; } -static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) -{ - struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct)); +#define kmem_cache_create(__name, __object_size, __args, ...) \ + __kmem_cache_create((__name), (__object_size), (__args)) - if (!new) - return NULL; +static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) +{ + (void)gfpflags; - memcpy(new, orig, sizeof(*new)); - if (!vma_lock_alloc(new)) { - free(new); - return NULL; - } - INIT_LIST_HEAD(&new->anon_vma_chain); + return calloc(s->object_size, 1); +} - return new; +static inline void kmem_cache_free(struct kmem_cache *s, void *x) +{ + free(x); } /* @@ -696,22 +840,6 @@ static inline void mpol_put(struct mempolicy *) { } -static inline void vma_lock_free(struct vm_area_struct *vma) -{ - free(vma->vm_lock); -} - -static inline void __vm_area_free(struct vm_area_struct *vma) -{ - vma_lock_free(vma); - free(vma); -} - -static inline void vm_area_free(struct vm_area_struct *vma) -{ - __vm_area_free(vma); -} - static inline void lru_add_drain(void) { } @@ -796,12 +924,12 @@ static inline void vma_start_write(struct vm_area_struct *vma) static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, - long adjust_next) + struct vm_area_struct *next) { (void)vma; (void)start; (void)end; - (void)adjust_next; + (void)next; } static inline void vma_iter_free(struct vma_iterator *vmi) @@ -1029,11 +1157,6 @@ static inline void vm_flags_clear(struct vm_area_struct *vma, vma->__vm_flags &= ~flags; } -static inline int call_mmap(struct file *, struct vm_area_struct *) -{ - return 0; -} - static inline int shmem_zero_setup(struct vm_area_struct *) { return 0; @@ -1251,4 +1374,91 @@ static inline int mapping_map_writable(struct address_space *mapping) return 0; } +static inline unsigned long move_page_tables(struct pagetable_move_control *pmc) +{ + (void)pmc; + + return 0; +} + +static inline void free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + (void)tlb; + (void)addr; + (void)end; + (void)floor; + (void)ceiling; +} + +static inline int ksm_execve(struct mm_struct *mm) +{ + (void)mm; + + return 0; +} + +static inline void ksm_exit(struct mm_struct *mm) +{ + (void)mm; +} + +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) +{ + (void)vma; + (void)reset_refcnt; +} + +static inline void vma_numab_state_init(struct vm_area_struct *vma) +{ + (void)vma; +} + +static inline void vma_numab_state_free(struct vm_area_struct *vma) +{ + (void)vma; +} + +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) +{ + (void)orig_vma; + (void)new_vma; +} + +static inline void free_anon_vma_name(struct vm_area_struct *vma) +{ + (void)vma; +} + +/* Did the driver provide valid mmap hook configuration? */ +static inline bool file_has_valid_mmap_hooks(struct file *file) +{ + bool has_mmap = file->f_op->mmap; + bool has_mmap_prepare = file->f_op->mmap_prepare; + + /* Hooks are mutually exclusive. */ + if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) + return false; + if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare)) + return false; + + return true; +} + +static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (WARN_ON_ONCE(file->f_op->mmap_prepare)) + return -EINVAL; + + return file->f_op->mmap(file, vma); +} + +static inline int __call_mmap_prepare(struct file *file, + struct vm_area_desc *desc) +{ + return file->f_op->mmap_prepare(desc); +} + #endif /* __MM_VMA_INTERNAL_H */ |