summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/compiler-gcc.h2
-rw-r--r--include/linux/execmem.h54
-rw-r--r--include/linux/f2fs_fs.h2
-rw-r--r--include/linux/fscrypt.h10
-rw-r--r--include/linux/io-mapping.h3
-rw-r--r--include/linux/mm.h6
-rw-r--r--include/linux/mmap_lock.h30
-rw-r--r--include/linux/page-flags.h2
-rw-r--r--include/linux/pgtable.h45
-rw-r--r--include/linux/rmap.h22
-rw-r--r--include/linux/sprintf.h2
11 files changed, 134 insertions, 44 deletions
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 32048052c64a..5d07c469b571 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -127,6 +127,8 @@
#define __diag_GCC_8(s)
#endif
+#define __diag_GCC_all(s) __diag(s)
+
#define __diag_ignore_all(option, comment) \
__diag(__diag_GCC_ignore option)
diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 3be35680a54f..7de229134e30 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -60,27 +60,11 @@ enum execmem_range_flags {
* will trap
* @ptr: pointer to memory to fill
* @size: size of the range to fill
- * @writable: is the memory poited by @ptr is writable or ROX
*
* A hook for architecures to fill execmem ranges with invalid instructions.
* Architectures that use EXECMEM_ROX_CACHE must implement this.
*/
-void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable);
-
-/**
- * execmem_make_temp_rw - temporarily remap region with read-write
- * permissions
- * @ptr: address of the region to remap
- * @size: size of the region to remap
- *
- * Remaps a part of the cached large page in the ROX cache in the range
- * [@ptr, @ptr + @size) as writable and not executable. The caller must
- * have exclusive ownership of this range and ensure nothing will try to
- * execute code in this range.
- *
- * Return: 0 on success or negative error code on failure.
- */
-int execmem_make_temp_rw(void *ptr, size_t size);
+void execmem_fill_trapping_insns(void *ptr, size_t size);
/**
* execmem_restore_rox - restore read-only-execute permissions
@@ -95,7 +79,6 @@ int execmem_make_temp_rw(void *ptr, size_t size);
*/
int execmem_restore_rox(void *ptr, size_t size);
#else
-static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; }
static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; }
#endif
@@ -166,6 +149,28 @@ struct execmem_info *execmem_arch_setup(void);
void *execmem_alloc(enum execmem_type type, size_t size);
/**
+ * execmem_alloc_rw - allocate writable executable memory
+ * @type: type of the allocation
+ * @size: how many bytes of memory are required
+ *
+ * Allocates memory that will contain executable code, either generated or
+ * loaded from kernel modules.
+ *
+ * Allocates memory that will contain data coupled with executable code,
+ * like data sections in kernel modules.
+ *
+ * Forces writable permissions on the allocated memory and the caller is
+ * responsible to manage the permissions afterwards.
+ *
+ * For architectures that use ROX cache the permissions will be set to R+W.
+ * For architectures that don't use ROX cache the default permissions for @type
+ * will be used as they must be writable.
+ *
+ * Return: a pointer to the allocated memory or %NULL
+ */
+void *execmem_alloc_rw(enum execmem_type type, size_t size);
+
+/**
* execmem_free - free executable memory
* @ptr: pointer to the memory that should be freed
*/
@@ -186,19 +191,6 @@ struct vm_struct *execmem_vmap(size_t size);
#endif
/**
- * execmem_update_copy - copy an update to executable memory
- * @dst: destination address to update
- * @src: source address containing the data
- * @size: how many bytes of memory shold be copied
- *
- * Copy @size bytes from @src to @dst using text poking if the memory at
- * @dst is read-only.
- *
- * Return: a pointer to @dst or NULL on error
- */
-void *execmem_update_copy(void *dst, const void *src, size_t size);
-
-/**
* execmem_is_rox - check if execmem is read-only
* @type - the execmem type to check
*
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 5206d63b3386..2f8b8bfc0e73 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -268,7 +268,7 @@ struct node_footer {
/* Node IDs in an Indirect Block */
#define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32))
-#define ADDRS_PER_PAGE(page, inode) (addrs_per_page(inode, IS_INODE(page)))
+#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(folio)))
#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1)
#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2)
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 8d0e3ad89b94..10dd161690a2 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -332,12 +332,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
return (struct page *)page_private(bounce_page);
}
-static inline bool fscrypt_is_bounce_folio(struct folio *folio)
+static inline bool fscrypt_is_bounce_folio(const struct folio *folio)
{
return folio->mapping == NULL;
}
-static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio)
+static inline
+struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio)
{
return bounce_folio->private;
}
@@ -517,12 +518,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
return ERR_PTR(-EINVAL);
}
-static inline bool fscrypt_is_bounce_folio(struct folio *folio)
+static inline bool fscrypt_is_bounce_folio(const struct folio *folio)
{
return false;
}
-static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio)
+static inline
+struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio)
{
WARN_ON_ONCE(1);
return ERR_PTR(-EINVAL);
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 7376c1df9c90..c16353cc6e3c 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -225,7 +225,4 @@ io_mapping_free(struct io_mapping *iomap)
kfree(iomap);
}
-int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma,
- unsigned long addr, unsigned long pfn, unsigned long size);
-
#endif /* _LINUX_IO_MAPPING_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 349f0d9aad22..1ae97a0b8ec7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -414,8 +414,10 @@ extern unsigned int kobjsize(const void *objp);
#endif
#ifdef CONFIG_64BIT
-/* VM is sealed, in vm_flags */
-#define VM_SEALED _BITUL(63)
+#define VM_SEALED_BIT 42
+#define VM_SEALED BIT(VM_SEALED_BIT)
+#else
+#define VM_SEALED VM_NONE
#endif
/* Bits set in the VMA until the stack is in its final location */
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 1f4f44951abe..11a078de9150 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -12,6 +12,7 @@ extern int rcuwait_wake_up(struct rcuwait *w);
#include <linux/tracepoint-defs.h>
#include <linux/types.h>
#include <linux/cleanup.h>
+#include <linux/sched/mm.h>
#define MMAP_LOCK_INITIALIZER(name) \
.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
@@ -154,6 +155,10 @@ static inline void vma_refcount_put(struct vm_area_struct *vma)
* reused and attached to a different mm before we lock it.
* Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
* detached.
+ *
+ * WARNING! The vma passed to this function cannot be used if the function
+ * fails to lock it because in certain cases RCU lock is dropped and then
+ * reacquired. Once RCU lock is dropped the vma can be concurently freed.
*/
static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
struct vm_area_struct *vma)
@@ -183,6 +188,31 @@ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
}
rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
+
+ /*
+ * If vma got attached to another mm from under us, that mm is not
+ * stable and can be freed in the narrow window after vma->vm_refcnt
+ * is dropped and before rcuwait_wake_up(mm) is called. Grab it before
+ * releasing vma->vm_refcnt.
+ */
+ if (unlikely(vma->vm_mm != mm)) {
+ /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
+ struct mm_struct *other_mm = vma->vm_mm;
+
+ /*
+ * __mmdrop() is a heavy operation and we don't need RCU
+ * protection here. Release RCU lock during these operations.
+ * We reinstate the RCU read lock as the caller expects it to
+ * be held when this function returns even on error.
+ */
+ rcu_read_unlock();
+ mmgrab(other_mm);
+ vma_refcount_put(vma);
+ mmdrop(other_mm);
+ rcu_read_lock();
+ return NULL;
+ }
+
/*
* Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
* False unlocked result is impossible because we modify and check
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8e4d6eda8a8d..8d3fa3a91ce4 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -837,8 +837,6 @@ void set_page_writeback(struct page *page);
#define folio_start_writeback(folio) \
__folio_start_writeback(folio, false)
-#define folio_start_writeback_keepwrite(folio) \
- __folio_start_writeback(folio, true)
static __always_inline bool folio_test_head(const struct folio *folio)
{
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e3b99920be05..4c035637eeb7 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -736,6 +736,29 @@ static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
}
#endif
+/**
+ * get_and_clear_ptes - Clear present PTEs that map consecutive pages of
+ * the same folio, collecting dirty/accessed bits.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear.
+ *
+ * Use this instead of get_and_clear_full_ptes() if it is known that we don't
+ * need to clear the full mm, which is mostly the case.
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock. The PTEs map consecutive
+ * pages that belong to the same folio. The PTEs are all in the same PMD.
+ */
+static inline pte_t get_and_clear_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr)
+{
+ return get_and_clear_full_ptes(mm, addr, ptep, nr, 0);
+}
+
#ifndef clear_full_ptes
/**
* clear_full_ptes - Clear present PTEs that map consecutive pages of the same
@@ -768,6 +791,28 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
}
#endif
+/**
+ * clear_ptes - Clear present PTEs that map consecutive pages of the same folio.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear.
+ *
+ * Use this instead of clear_full_ptes() if it is known that we don't need to
+ * clear the full mm, which is mostly the case.
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock. The PTEs map consecutive
+ * pages that belong to the same folio. The PTEs are all in the same PMD.
+ */
+static inline void clear_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr)
+{
+ clear_full_ptes(mm, addr, ptep, nr, 0);
+}
+
/*
* If two threads concurrently fault at the same page, the thread that
* won the race updates the PTE and its local TLB/Cache. The other thread
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 20803fcb49a7..6cd020eea37a 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -449,6 +449,28 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio,
default:
VM_WARN_ON_ONCE(true);
}
+
+ /*
+ * Anon folios must have an associated live anon_vma as long as they're
+ * mapped into userspace.
+ * Note that the atomic_read() mainly does two things:
+ *
+ * 1. In KASAN builds with CONFIG_SLUB_RCU_DEBUG, it causes KASAN to
+ * check that the associated anon_vma has not yet been freed (subject
+ * to KASAN's usual limitations). This check will pass if the
+ * anon_vma's refcount has already dropped to 0 but an RCU grace
+ * period hasn't passed since then.
+ * 2. If the anon_vma has not yet been freed, it checks that the
+ * anon_vma still has a nonzero refcount (as opposed to being in the
+ * middle of an RCU delay for getting freed).
+ */
+ if (folio_test_anon(folio) && !folio_test_ksm(folio)) {
+ unsigned long mapping = (unsigned long)folio->mapping;
+ struct anon_vma *anon_vma;
+
+ anon_vma = (void *)(mapping - FOLIO_MAPPING_ANON);
+ VM_WARN_ON_FOLIO(atomic_read(&anon_vma->refcount) == 0, folio);
+ }
}
/*
diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h
index 876130091384..f06f7b785091 100644
--- a/include/linux/sprintf.h
+++ b/include/linux/sprintf.h
@@ -23,7 +23,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list);
/* These are for specific cases, do not use without real need */
extern bool no_hash_pointers;
-int no_hash_pointers_enable(char *str);
+void hash_pointers_finalize(bool slub_debug);
/* Used for Rust formatting ('%pA') */
char *rust_fmt_argument(char *buf, char *end, const void *ptr);