diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/compiler-gcc.h | 2 | ||||
-rw-r--r-- | include/linux/execmem.h | 54 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 2 | ||||
-rw-r--r-- | include/linux/fscrypt.h | 10 | ||||
-rw-r--r-- | include/linux/io-mapping.h | 3 | ||||
-rw-r--r-- | include/linux/mm.h | 6 | ||||
-rw-r--r-- | include/linux/mmap_lock.h | 30 | ||||
-rw-r--r-- | include/linux/page-flags.h | 2 | ||||
-rw-r--r-- | include/linux/pgtable.h | 45 | ||||
-rw-r--r-- | include/linux/rmap.h | 22 | ||||
-rw-r--r-- | include/linux/sprintf.h | 2 |
11 files changed, 134 insertions, 44 deletions
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 32048052c64a..5d07c469b571 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -127,6 +127,8 @@ #define __diag_GCC_8(s) #endif +#define __diag_GCC_all(s) __diag(s) + #define __diag_ignore_all(option, comment) \ __diag(__diag_GCC_ignore option) diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 3be35680a54f..7de229134e30 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -60,27 +60,11 @@ enum execmem_range_flags { * will trap * @ptr: pointer to memory to fill * @size: size of the range to fill - * @writable: is the memory poited by @ptr is writable or ROX * * A hook for architecures to fill execmem ranges with invalid instructions. * Architectures that use EXECMEM_ROX_CACHE must implement this. */ -void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); - -/** - * execmem_make_temp_rw - temporarily remap region with read-write - * permissions - * @ptr: address of the region to remap - * @size: size of the region to remap - * - * Remaps a part of the cached large page in the ROX cache in the range - * [@ptr, @ptr + @size) as writable and not executable. The caller must - * have exclusive ownership of this range and ensure nothing will try to - * execute code in this range. - * - * Return: 0 on success or negative error code on failure. - */ -int execmem_make_temp_rw(void *ptr, size_t size); +void execmem_fill_trapping_insns(void *ptr, size_t size); /** * execmem_restore_rox - restore read-only-execute permissions @@ -95,7 +79,6 @@ int execmem_make_temp_rw(void *ptr, size_t size); */ int execmem_restore_rox(void *ptr, size_t size); #else -static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } #endif @@ -166,6 +149,28 @@ struct execmem_info *execmem_arch_setup(void); void *execmem_alloc(enum execmem_type type, size_t size); /** + * execmem_alloc_rw - allocate writable executable memory + * @type: type of the allocation + * @size: how many bytes of memory are required + * + * Allocates memory that will contain executable code, either generated or + * loaded from kernel modules. + * + * Allocates memory that will contain data coupled with executable code, + * like data sections in kernel modules. + * + * Forces writable permissions on the allocated memory and the caller is + * responsible to manage the permissions afterwards. + * + * For architectures that use ROX cache the permissions will be set to R+W. + * For architectures that don't use ROX cache the default permissions for @type + * will be used as they must be writable. + * + * Return: a pointer to the allocated memory or %NULL + */ +void *execmem_alloc_rw(enum execmem_type type, size_t size); + +/** * execmem_free - free executable memory * @ptr: pointer to the memory that should be freed */ @@ -186,19 +191,6 @@ struct vm_struct *execmem_vmap(size_t size); #endif /** - * execmem_update_copy - copy an update to executable memory - * @dst: destination address to update - * @src: source address containing the data - * @size: how many bytes of memory shold be copied - * - * Copy @size bytes from @src to @dst using text poking if the memory at - * @dst is read-only. - * - * Return: a pointer to @dst or NULL on error - */ -void *execmem_update_copy(void *dst, const void *src, size_t size); - -/** * execmem_is_rox - check if execmem is read-only * @type - the execmem type to check * diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 5206d63b3386..2f8b8bfc0e73 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -268,7 +268,7 @@ struct node_footer { /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(page, inode) (addrs_per_page(inode, IS_INODE(page))) +#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(folio))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 8d0e3ad89b94..10dd161690a2 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -332,12 +332,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return (struct page *)page_private(bounce_page); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return folio->mapping == NULL; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { return bounce_folio->private; } @@ -517,12 +518,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return ERR_PTR(-EINVAL); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return false; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 7376c1df9c90..c16353cc6e3c 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -225,7 +225,4 @@ io_mapping_free(struct io_mapping *iomap) kfree(iomap); } -int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, unsigned long size); - #endif /* _LINUX_IO_MAPPING_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 349f0d9aad22..1ae97a0b8ec7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -414,8 +414,10 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_64BIT -/* VM is sealed, in vm_flags */ -#define VM_SEALED _BITUL(63) +#define VM_SEALED_BIT 42 +#define VM_SEALED BIT(VM_SEALED_BIT) +#else +#define VM_SEALED VM_NONE #endif /* Bits set in the VMA until the stack is in its final location */ diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 1f4f44951abe..11a078de9150 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -12,6 +12,7 @@ extern int rcuwait_wake_up(struct rcuwait *w); #include <linux/tracepoint-defs.h> #include <linux/types.h> #include <linux/cleanup.h> +#include <linux/sched/mm.h> #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), @@ -154,6 +155,10 @@ static inline void vma_refcount_put(struct vm_area_struct *vma) * reused and attached to a different mm before we lock it. * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got * detached. + * + * WARNING! The vma passed to this function cannot be used if the function + * fails to lock it because in certain cases RCU lock is dropped and then + * reacquired. Once RCU lock is dropped the vma can be concurently freed. */ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, struct vm_area_struct *vma) @@ -183,6 +188,31 @@ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, } rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + + /* + * If vma got attached to another mm from under us, that mm is not + * stable and can be freed in the narrow window after vma->vm_refcnt + * is dropped and before rcuwait_wake_up(mm) is called. Grab it before + * releasing vma->vm_refcnt. + */ + if (unlikely(vma->vm_mm != mm)) { + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *other_mm = vma->vm_mm; + + /* + * __mmdrop() is a heavy operation and we don't need RCU + * protection here. Release RCU lock during these operations. + * We reinstate the RCU read lock as the caller expects it to + * be held when this function returns even on error. + */ + rcu_read_unlock(); + mmgrab(other_mm); + vma_refcount_put(vma); + mmdrop(other_mm); + rcu_read_lock(); + return NULL; + } + /* * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. * False unlocked result is impossible because we modify and check diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 8e4d6eda8a8d..8d3fa3a91ce4 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -837,8 +837,6 @@ void set_page_writeback(struct page *page); #define folio_start_writeback(folio) \ __folio_start_writeback(folio, false) -#define folio_start_writeback_keepwrite(folio) \ - __folio_start_writeback(folio, true) static __always_inline bool folio_test_head(const struct folio *folio) { diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e3b99920be05..4c035637eeb7 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -736,6 +736,29 @@ static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, } #endif +/** + * get_and_clear_ptes - Clear present PTEs that map consecutive pages of + * the same folio, collecting dirty/accessed bits. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of get_and_clear_full_ptes() if it is known that we don't + * need to clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline pte_t get_and_clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + return get_and_clear_full_ptes(mm, addr, ptep, nr, 0); +} + #ifndef clear_full_ptes /** * clear_full_ptes - Clear present PTEs that map consecutive pages of the same @@ -768,6 +791,28 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, } #endif +/** + * clear_ptes - Clear present PTEs that map consecutive pages of the same folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of clear_full_ptes() if it is known that we don't need to + * clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + clear_full_ptes(mm, addr, ptep, nr, 0); +} + /* * If two threads concurrently fault at the same page, the thread that * won the race updates the PTE and its local TLB/Cache. The other thread diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 20803fcb49a7..6cd020eea37a 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -449,6 +449,28 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, default: VM_WARN_ON_ONCE(true); } + + /* + * Anon folios must have an associated live anon_vma as long as they're + * mapped into userspace. + * Note that the atomic_read() mainly does two things: + * + * 1. In KASAN builds with CONFIG_SLUB_RCU_DEBUG, it causes KASAN to + * check that the associated anon_vma has not yet been freed (subject + * to KASAN's usual limitations). This check will pass if the + * anon_vma's refcount has already dropped to 0 but an RCU grace + * period hasn't passed since then. + * 2. If the anon_vma has not yet been freed, it checks that the + * anon_vma still has a nonzero refcount (as opposed to being in the + * middle of an RCU delay for getting freed). + */ + if (folio_test_anon(folio) && !folio_test_ksm(folio)) { + unsigned long mapping = (unsigned long)folio->mapping; + struct anon_vma *anon_vma; + + anon_vma = (void *)(mapping - FOLIO_MAPPING_ANON); + VM_WARN_ON_FOLIO(atomic_read(&anon_vma->refcount) == 0, folio); + } } /* diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h index 876130091384..f06f7b785091 100644 --- a/include/linux/sprintf.h +++ b/include/linux/sprintf.h @@ -23,7 +23,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list); /* These are for specific cases, do not use without real need */ extern bool no_hash_pointers; -int no_hash_pointers_enable(char *str); +void hash_pointers_finalize(bool slub_debug); /* Used for Rust formatting ('%pA') */ char *rust_fmt_argument(char *buf, char *end, const void *ptr); |