diff options
Diffstat (limited to 'include/linux')
89 files changed, 1296 insertions, 831 deletions
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 8f7931eb7d16..9ef2633e2c08 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -88,7 +88,7 @@ static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct) return container_of(ct, struct alloc_tag, ct); } -#ifdef ARCH_NEEDS_WEAK_PER_CPU +#if defined(CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU) && defined(MODULE) /* * When percpu variables are required to be defined as weak, static percpu * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION). @@ -102,7 +102,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); .ct = CODE_TAG_INIT, \ .counters = &_shared_alloc_tag }; -#else /* ARCH_NEEDS_WEAK_PER_CPU */ +#else /* CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU && MODULE */ #ifdef MODULE @@ -123,7 +123,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #endif /* MODULE */ -#endif /* ARCH_NEEDS_WEAK_PER_CPU */ +#endif /* CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU && MODULE */ DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, mem_alloc_profiling_key); diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index 5ca2d5699620..7cfe48769239 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -4,12 +4,13 @@ * * Common interface definitions for making balloon pages movable by compaction. * - * Balloon page migration makes use of the general non-lru movable page + * Balloon page migration makes use of the general "movable_ops page migration" * feature. * * page->private is used to reference the responsible balloon device. - * page->mapping is used in context of non-lru page migration to reference - * the address space operations for page isolation/migration/compaction. + * That these pages have movable_ops, and which movable_ops apply, + * is derived from the page type (PageOffline()) combined with the + * PG_movable_ops flag (PageMovableOps()). * * As the page isolation scanning step a compaction thread does is a lockless * procedure (from a page standpoint), it might bring some racy situations while @@ -17,12 +18,10 @@ * and safely perform balloon's page compaction and migration we must, always, * ensure following these simple rules: * - * i. when updating a balloon's page ->mapping element, strictly do it under - * the following lock order, independently of the far superior - * locking scheme (lru_lock, balloon_lock): + * i. Setting the PG_movable_ops flag and page->private with the following + * lock order * +-page_lock(page); * +--spin_lock_irq(&b_dev_info->pages_lock); - * ... page->mapping updates here ... * * ii. isolation or dequeueing procedure must remove the page from balloon * device page list under b_dev_info->pages_lock. @@ -78,6 +77,15 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) #ifdef CONFIG_BALLOON_COMPACTION extern const struct movable_operations balloon_mops; +/* + * balloon_page_device - get the b_dev_info descriptor for the balloon device + * that enqueues the given page. + */ +static inline struct balloon_dev_info *balloon_page_device(struct page *page) +{ + return (struct balloon_dev_info *)page_private(page); +} +#endif /* CONFIG_BALLOON_COMPACTION */ /* * balloon_page_insert - insert a page into the balloon's page list and make @@ -92,68 +100,34 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, struct page *page) { __SetPageOffline(page); - __SetPageMovable(page, &balloon_mops); - set_page_private(page, (unsigned long)balloon); + if (IS_ENABLED(CONFIG_BALLOON_COMPACTION)) { + SetPageMovableOps(page); + set_page_private(page, (unsigned long)balloon); + } list_add(&page->lru, &balloon->pages); } -/* - * balloon_page_delete - delete a page from balloon's page list and clear - * the page->private assignement accordingly. - * @page : page to be released from balloon's page list - * - * Caller must ensure the page is locked and the spin_lock protecting balloon - * pages list is held before deleting a page from the balloon device. - */ -static inline void balloon_page_delete(struct page *page) +static inline gfp_t balloon_mapping_gfp_mask(void) { - __ClearPageOffline(page); - __ClearPageMovable(page); - set_page_private(page, 0); - /* - * No touch page.lru field once @page has been isolated - * because VM is using the field. - */ - if (!PageIsolated(page)) - list_del(&page->lru); + if (IS_ENABLED(CONFIG_BALLOON_COMPACTION)) + return GFP_HIGHUSER_MOVABLE; + return GFP_HIGHUSER; } /* - * balloon_page_device - get the b_dev_info descriptor for the balloon device - * that enqueues the given page. + * balloon_page_finalize - prepare a balloon page that was removed from the + * balloon list for release to the page allocator + * @page: page to be released to the page allocator + * + * Caller must ensure that the page is locked. */ -static inline struct balloon_dev_info *balloon_page_device(struct page *page) +static inline void balloon_page_finalize(struct page *page) { - return (struct balloon_dev_info *)page_private(page); + if (IS_ENABLED(CONFIG_BALLOON_COMPACTION)) + set_page_private(page, 0); + /* PageOffline is sticky until the page is freed to the buddy. */ } -static inline gfp_t balloon_mapping_gfp_mask(void) -{ - return GFP_HIGHUSER_MOVABLE; -} - -#else /* !CONFIG_BALLOON_COMPACTION */ - -static inline void balloon_page_insert(struct balloon_dev_info *balloon, - struct page *page) -{ - __SetPageOffline(page); - list_add(&page->lru, &balloon->pages); -} - -static inline void balloon_page_delete(struct page *page) -{ - __ClearPageOffline(page); - list_del(&page->lru); -} - -static inline gfp_t balloon_mapping_gfp_mask(void) -{ - return GFP_HIGHUSER; -} - -#endif /* CONFIG_BALLOON_COMPACTION */ - /* * balloon_page_push - insert a page into a page list. * @head : pointer to list diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 6d9a53db54b6..5355f8f806a9 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -189,14 +189,14 @@ static __always_inline u64 field_mask(u64 field) } #define field_max(field) ((typeof(field))field_mask(field)) #define ____MAKE_OP(type,base,to,from) \ -static __always_inline __##type type##_encode_bits(base v, base field) \ +static __always_inline __##type __must_check type##_encode_bits(base v, base field) \ { \ if (__builtin_constant_p(v) && (v & ~field_mask(field))) \ __field_overflow(); \ return to((v & field_mask(field)) * field_multiplier(field)); \ } \ -static __always_inline __##type type##_replace_bits(__##type old, \ - base val, base field) \ +static __always_inline __##type __must_check type##_replace_bits(__##type old, \ + base val, base field) \ { \ return (old & ~to(field)) | type##_encode_bits(val, field); \ } \ @@ -205,7 +205,7 @@ static __always_inline void type##p_replace_bits(__##type *p, \ { \ *p = (*p & ~to(field)) | type##_encode_bits(val, field); \ } \ -static __always_inline base type##_get_bits(__##type v, base field) \ +static __always_inline base __must_check type##_get_bits(__##type v, base field) \ { \ return (from(v) & field)/field_multiplier(field); \ } diff --git a/include/linux/bits.h b/include/linux/bits.h index 7ad056219115..a40cc861b3a7 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -2,10 +2,8 @@ #ifndef __LINUX_BITS_H #define __LINUX_BITS_H -#include <linux/const.h> #include <vdso/bits.h> #include <uapi/linux/bits.h> -#include <asm/bitsperlong.h> #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) @@ -50,10 +48,14 @@ (type_max(t) << (l) & \ type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) +#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l) +#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l) + #define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) #define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) #define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) #define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) +#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l) /* * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The @@ -79,28 +81,9 @@ * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ -#define GENMASK_INPUT_CHECK(h, l) 0 +#define GENMASK(h, l) __GENMASK(h, l) +#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l) #endif /* !defined(__ASSEMBLY__) */ -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) - -#if !defined(__ASSEMBLY__) -/* - * Missing asm support - * - * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __int128' data - * type instead of direct representation of 128 bit constants - * such as long and unsigned long. The fundamental problem is - * that a 128 bit constant will get silently truncated by the - * gcc compiler. - */ -#define GENMASK_U128(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) -#endif - #endif /* __LINUX_BITS_H */ diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 082ccd8ad96b..aedf573bdb42 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -77,9 +77,6 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) extern struct static_key_false cgroup_bpf_enabled_key[MAX_CGROUP_BPF_ATTACH_TYPE]; #define cgroup_bpf_enabled(atype) static_branch_unlikely(&cgroup_bpf_enabled_key[atype]) -#define for_each_cgroup_storage_type(stype) \ - for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) - struct bpf_cgroup_storage_map; struct bpf_storage_buffer { @@ -510,8 +507,6 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) -#define for_each_cgroup_storage_type(stype) for (; false; ) - #endif /* CONFIG_CGROUP_BPF */ #endif /* _BPF_CGROUP_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f9cd2164ed23..cc700925b802 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -208,6 +208,20 @@ enum btf_field_type { BPF_RES_SPIN_LOCK = (1 << 12), }; +enum bpf_cgroup_storage_type { + BPF_CGROUP_STORAGE_SHARED, + BPF_CGROUP_STORAGE_PERCPU, + __BPF_CGROUP_STORAGE_MAX +#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX +}; + +#ifdef CONFIG_CGROUP_BPF +# define for_each_cgroup_storage_type(stype) \ + for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) +#else +# define for_each_cgroup_storage_type(stype) for (; false; ) +#endif /* CONFIG_CGROUP_BPF */ + typedef void (*btf_dtor_kfunc_t)(void *); struct btf_field_kptr { @@ -260,6 +274,19 @@ struct bpf_list_node_kern { void *owner; } __attribute__((aligned(8))); +/* 'Ownership' of program-containing map is claimed by the first program + * that is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have the + * same prog type, JITed flag and xdp_has_frags flag. + */ +struct bpf_map_owner { + enum bpf_prog_type type; + bool jited; + bool xdp_has_frags; + u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE]; + const struct btf_type *attach_func_proto; +}; + struct bpf_map { const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; @@ -292,24 +319,15 @@ struct bpf_map { struct rcu_head rcu; }; atomic64_t writecnt; - /* 'Ownership' of program-containing map is claimed by the first program - * that is going to use this map or by the first program which FD is - * stored in the map to make sure that all callers and callees have the - * same prog type, JITed flag and xdp_has_frags flag. - */ - struct { - const struct btf_type *attach_func_proto; - spinlock_t lock; - enum bpf_prog_type type; - bool jited; - bool xdp_has_frags; - } owner; + spinlock_t owner_lock; + struct bpf_map_owner *owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ bool free_after_mult_rcu_gp; bool free_after_rcu_gp; atomic64_t sleepable_refcnt; s64 __percpu *elem_count; + u64 cookie; /* write-once */ }; static inline const char *btf_field_type_name(enum btf_field_type type) @@ -1082,14 +1100,6 @@ struct bpf_prog_offload { u32 jited_len; }; -enum bpf_cgroup_storage_type { - BPF_CGROUP_STORAGE_SHARED, - BPF_CGROUP_STORAGE_PERCPU, - __BPF_CGROUP_STORAGE_MAX -}; - -#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX - /* The longest tracepoint has 12 args. * See include/trace/bpf_probe.h */ @@ -2108,6 +2118,16 @@ static inline bool bpf_map_flags_access_ok(u32 access_flags) (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); } +static inline struct bpf_map_owner *bpf_map_owner_alloc(struct bpf_map *map) +{ + return kzalloc(sizeof(*map->owner), GFP_ATOMIC); +} + +static inline void bpf_map_owner_free(struct bpf_map *map) +{ + kfree(map->owner); +} + struct bpf_event_entry { struct perf_event *event; struct file *perf_file; diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 1db17ecbb86c..52a98886a455 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -11,16 +11,9 @@ #include <linux/module.h> #include <asm/cfi.h> +#ifdef CONFIG_CFI_CLANG extern bool cfi_warn; -#ifndef cfi_get_offset -static inline int cfi_get_offset(void) -{ - return 0; -} -#endif - -#ifdef CONFIG_CFI_CLANG enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, unsigned long *target, u32 type); @@ -29,6 +22,44 @@ static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs, { return report_cfi_failure(regs, addr, NULL, 0); } + +#ifndef cfi_get_offset +/* + * Returns the CFI prefix offset. By default, the compiler emits only + * a 4-byte CFI type hash before the function. If an architecture + * uses -fpatchable-function-entry=N,M where M>0 to change the prefix + * offset, they must override this function. + */ +static inline int cfi_get_offset(void) +{ + return 4; +} +#endif + +#ifndef cfi_get_func_hash +static inline u32 cfi_get_func_hash(void *func) +{ + u32 hash; + + if (get_kernel_nofault(hash, func - cfi_get_offset())) + return 0; + + return hash; +} +#endif + +/* CFI type hashes for BPF function types */ +extern u32 cfi_bpf_hash; +extern u32 cfi_bpf_subprog_hash; + +#else /* CONFIG_CFI_CLANG */ + +static inline int cfi_get_offset(void) { return 0; } +static inline u32 cfi_get_func_hash(void *func) { return 0; } + +#define cfi_bpf_hash 0U +#define cfi_bpf_subprog_hash 0U + #endif /* CONFIG_CFI_CLANG */ #ifdef CONFIG_ARCH_USES_CFI_TRAPS diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h index 6b8713675765..685f7181780f 100644 --- a/include/linux/cfi_types.h +++ b/include/linux/cfi_types.h @@ -41,5 +41,28 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CFI_CLANG +#define DEFINE_CFI_TYPE(name, func) \ + /* \ + * Force a reference to the function so the compiler generates \ + * __kcfi_typeid_<func>. \ + */ \ + __ADDRESSABLE(func); \ + /* u32 name __ro_after_init = __kcfi_typeid_<func> */ \ + extern u32 name; \ + asm ( \ + " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \ + " .type " #name ",\%object \n" \ + " .globl " #name " \n" \ + " .p2align 2, 0x0 \n" \ + #name ": \n" \ + " .4byte __kcfi_typeid_" #func " \n" \ + " .size " #name ", 4 \n" \ + " .popsection \n" \ + ); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_CFI_TYPES_H */ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index e61687d5e496..6b93a64115fe 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -375,15 +375,12 @@ struct css_rstat_cpu { * Child cgroups with stat updates on this cpu since the last read * are linked on the parent's ->updated_children through * ->updated_next. updated_children is terminated by its container css. - * - * In addition to being more compact, singly-linked list pointing to - * the css makes it unnecessary for each per-cpu struct to point back - * to the associated css. - * - * Protected by per-cpu css->ss->rstat_ss_cpu_lock. */ struct cgroup_subsys_state *updated_children; struct cgroup_subsys_state *updated_next; /* NULL if not on the list */ + + struct llist_node lnode; /* lockless list for update */ + struct cgroup_subsys_state *owner; /* back pointer */ }; /* @@ -821,7 +818,7 @@ struct cgroup_subsys { unsigned int depends_on; spinlock_t rstat_ss_lock; - raw_spinlock_t __percpu *rstat_ss_cpu_lock; + struct llist_head __percpu *lhead; /* lockless update list head */ }; extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; @@ -898,14 +895,12 @@ static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) #endif } +#ifdef CONFIG_CGROUP_NET_CLASSID static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) { -#ifdef CONFIG_CGROUP_NET_CLASSID return READ_ONCE(skcd->classid); -#else - return 0; -#endif } +#endif static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, u16 prioidx) @@ -915,13 +910,13 @@ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, #endif } +#ifdef CONFIG_CGROUP_NET_CLASSID static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, u32 classid) { -#ifdef CONFIG_CGROUP_NET_CLASSID WRITE_ONCE(skcd->classid, classid); -#endif } +#endif #else /* CONFIG_SOCK_CGROUP_DATA */ diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index bee606bebaca..2573585b7f06 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -3,6 +3,8 @@ #define _LINUX_CLEANUP_H #include <linux/compiler.h> +#include <linux/err.h> +#include <linux/args.h> /** * DOC: scope-based cleanup helpers @@ -61,9 +63,20 @@ * Observe the lock is held for the remainder of the "if ()" block not * the remainder of "func()". * - * Now, when a function uses both __free() and guard(), or multiple - * instances of __free(), the LIFO order of variable definition order - * matters. GCC documentation says: + * The ACQUIRE() macro can be used in all places that guard() can be + * used and additionally support conditional locks:: + * + * DEFINE_GUARD_COND(pci_dev, _try, pci_dev_trylock(_T)) + * ... + * ACQUIRE(pci_dev_try, lock)(dev); + * rc = ACQUIRE_ERR(pci_dev_try, &lock); + * if (rc) + * return rc; + * // @lock is held + * + * Now, when a function uses both __free() and guard()/ACQUIRE(), or + * multiple instances of __free(), the LIFO order of variable definition + * order matters. GCC documentation says: * * "When multiple variables in the same scope have cleanup attributes, * at exit from the scope their associated cleanup functions are run in @@ -313,14 +326,46 @@ _label: \ * acquire fails. * * Only for conditional locks. + * + * ACQUIRE(name, var): + * a named instance of the (guard) class, suitable for conditional + * locks when paired with ACQUIRE_ERR(). + * + * ACQUIRE_ERR(name, &var): + * a helper that is effectively a PTR_ERR() conversion of the guard + * pointer. Returns 0 when the lock was acquired and a negative + * error code otherwise. */ #define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond -#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ - static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ - { return (void *)(__force unsigned long)*(_exp); } +#define __GUARD_IS_ERR(_ptr) \ + ({ \ + unsigned long _rc = (__force unsigned long)(_ptr); \ + unlikely((_rc - 1) >= -MAX_ERRNO - 1); \ + }) + +#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ + static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { \ + void *_ptr = (void *)(__force unsigned long)*(_exp); \ + if (IS_ERR(_ptr)) { \ + _ptr = NULL; \ + } \ + return _ptr; \ + } \ + static inline int class_##_name##_lock_err(class_##_name##_t *_T) \ + { \ + long _rc = (__force unsigned long)*(_exp); \ + if (!_rc) { \ + _rc = -EBUSY; \ + } \ + if (!IS_ERR_VALUE(_rc)) { \ + _rc = 0; \ + } \ + return _rc; \ + } #define DEFINE_CLASS_IS_GUARD(_name) \ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ @@ -331,23 +376,37 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond __DEFINE_GUARD_LOCK_PTR(_name, _T) #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ + DEFINE_CLASS(_name, _type, if (!__GUARD_IS_ERR(_T)) { _unlock; }, ({ _lock; _T; }), _type _T); \ DEFINE_CLASS_IS_GUARD(_name) -#define DEFINE_GUARD_COND(_name, _ext, _condlock) \ +#define DEFINE_GUARD_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ - ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ + ({ void *_t = _T; int _RET = (_lock); if (_T && !(_cond)) _t = ERR_PTR(_RET); _t; }), \ class_##_name##_t _T) \ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ - { return class_##_name##_lock_ptr(_T); } + { return class_##_name##_lock_ptr(_T); } \ + static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \ + { return class_##_name##_lock_err(_T); } + +/* + * Default binary condition; success on 'true'. + */ +#define DEFINE_GUARD_COND_3(_name, _ext, _lock) \ + DEFINE_GUARD_COND_4(_name, _ext, _lock, _RET) + +#define DEFINE_GUARD_COND(X...) CONCATENATE(DEFINE_GUARD_COND_, COUNT_ARGS(X))(X) #define guard(_name) \ CLASS(_name, __UNIQUE_ID(guard)) #define __guard_ptr(_name) class_##_name##_lock_ptr +#define __guard_err(_name) class_##_name##_lock_err #define __is_cond_ptr(_name) class_##_name##_is_conditional +#define ACQUIRE(_name, _var) CLASS(_name, _var) +#define ACQUIRE_ERR(_name, _var) __guard_err(_name)(_var) + /* * Helper macro for scoped_guard(). * @@ -409,7 +468,7 @@ typedef struct { \ \ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ { \ - if (_T->lock) { _unlock; } \ + if (!__GUARD_IS_ERR(_T->lock)) { _unlock; } \ } \ \ __DEFINE_GUARD_LOCK_PTR(_name, &_T->lock) @@ -441,15 +500,22 @@ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) -#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ +#define DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ - if (_T->lock && !(_condlock)) _T->lock = NULL; \ + int _RET = (_lock); \ + if (_T->lock && !(_cond)) _T->lock = ERR_PTR(_RET);\ _t; }), \ typeof_member(class_##_name##_t, lock) l) \ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ - { return class_##_name##_lock_ptr(_T); } + { return class_##_name##_lock_ptr(_T); } \ + static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \ + { return class_##_name##_lock_err(_T); } + +#define DEFINE_LOCK_GUARD_1_COND_3(_name, _ext, _lock) \ + DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _RET) +#define DEFINE_LOCK_GUARD_1_COND(X...) CONCATENATE(DEFINE_LOCK_GUARD_1_COND_, COUNT_ARGS(X))(X) #endif /* _LINUX_CLEANUP_H */ diff --git a/include/linux/codetag.h b/include/linux/codetag.h index 5f2b9a1f722c..457ed8fd3214 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -54,6 +54,7 @@ struct codetag_iterator { struct codetag_module *cmod; unsigned long mod_id; struct codetag *ct; + unsigned long mod_seq; }; #ifdef MODULE diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 96e8a66da133..68861da4cf7c 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -10,7 +10,7 @@ #ifdef CONFIG_COREDUMP struct core_vma_metadata { unsigned long start, end; - unsigned long flags; + vm_flags_t flags; unsigned long dump_size; unsigned long pgoff; struct file *file; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d381420bbd5f..edfa61d80702 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -90,7 +90,6 @@ enum cpuhp_state { CPUHP_RADIX_DEAD, CPUHP_PAGE_ALLOC, CPUHP_NET_DEV_DEAD, - CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_IOVA_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ede95bbe8b80..ff8f41ab7ce6 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -355,6 +355,18 @@ unsigned int cpumask_next_wrap(int n, const struct cpumask *src) } /** + * cpumask_random - get random cpu in *src. + * @src: cpumask pointer + * + * Return: random set bit, or >= nr_cpu_ids if @src is empty. + */ +static __always_inline +unsigned int cpumask_random(const struct cpumask *src) +{ + return find_random_bit(cpumask_bits(src), nr_cpu_ids); +} + +/** * for_each_cpu - iterate over every cpu in a mask * @cpu: the (optionally unsigned) integer iterator * @mask: the cpumask pointer @@ -547,22 +559,6 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, } /** - * cpumask_nth_andnot - get the Nth cpu set in 1st cpumask, and clear in 2nd. - * @srcp1: the cpumask pointer - * @srcp2: the cpumask pointer - * @cpu: the Nth cpu to find, starting from 0 - * - * Return: >= nr_cpu_ids if such cpu doesn't exist. - */ -static __always_inline -unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, - const struct cpumask *srcp2) -{ - return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), - small_cpumask_bits, cpumask_check(cpu)); -} - -/** * cpumask_nth_and_andnot - get the Nth cpu set in 1st and 2nd cpumask, and clear in 3rd. * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer diff --git a/include/linux/damon.h b/include/linux/damon.h index a4011726cb3b..f13664c62ddd 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -448,12 +448,29 @@ struct damos_access_pattern { }; /** + * struct damos_migrate_dests - Migration destination nodes and their weights. + * @node_id_arr: Array of migration destination node ids. + * @weight_arr: Array of migration weights for @node_id_arr. + * @nr_dests: Length of the @node_id_arr and @weight_arr arrays. + * + * @node_id_arr is an array of the ids of migration destination nodes. + * @weight_arr is an array of the weights for those. The weights in + * @weight_arr are for nodes in @node_id_arr of same array index. + */ +struct damos_migrate_dests { + unsigned int *node_id_arr; + unsigned int *weight_arr; + size_t nr_dests; +}; + +/** * struct damos - Represents a Data Access Monitoring-based Operation Scheme. * @pattern: Access pattern of target regions. - * @action: &damo_action to be applied to the target regions. + * @action: &damos_action to be applied to the target regions. * @apply_interval_us: The time between applying the @action. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. + * @migrate_dests: Destination nodes if @action is "migrate_{hot,cold}". * @target_nid: Destination node if @action is "migrate_{hot,cold}". * @filters: Additional set of &struct damos_filter for &action. * @ops_filters: ops layer handling &struct damos_filter objects list. @@ -472,9 +489,12 @@ struct damos_access_pattern { * monitoring context are inactive, DAMON stops monitoring either, and just * repeatedly checks the watermarks. * + * @migrate_dests specifies multiple migration target nodes with different + * weights for migrate_hot or migrate_cold actions. @target_nid is ignored if + * this is set. + * * @target_nid is used to set the migration target node for migrate_hot or - * migrate_cold actions, which means it's only meaningful when @action is either - * "migrate_hot" or "migrate_cold". + * migrate_cold actions, and @migrate_dests is unset. * * Before applying the &action to a memory region, &struct damon_operations * implementation could check pages of the region and skip &action to respect @@ -517,7 +537,10 @@ struct damos { struct damos_quota quota; struct damos_watermarks wmarks; union { - int target_nid; + struct { + int target_nid; + struct damos_migrate_dests migrate_dests; + }; }; struct list_head filters; struct list_head ops_filters; @@ -553,6 +576,7 @@ enum damon_ops_id { * @get_scheme_score: Get the score of a region for a scheme. * @apply_scheme: Apply a DAMON-based operation scheme. * @target_valid: Determine if the target is valid. + * @cleanup_target: Clean up each target before deallocation. * @cleanup: Clean up the context. * * DAMON can be extended for various address spaces and usages. For this, @@ -585,6 +609,7 @@ enum damon_ops_id { * filters (&struct damos_filter) that handled by itself. * @target_valid should check whether the target is still valid for the * monitoring. + * @cleanup_target is called before the target will be deallocated. * @cleanup is called from @kdamond just before its termination. */ struct damon_operations { @@ -600,42 +625,16 @@ struct damon_operations { struct damon_target *t, struct damon_region *r, struct damos *scheme, unsigned long *sz_filter_passed); bool (*target_valid)(struct damon_target *t); + void (*cleanup_target)(struct damon_target *t); void (*cleanup)(struct damon_ctx *context); }; -/** - * struct damon_callback - Monitoring events notification callbacks. - * - * @after_wmarks_check: Called after each schemes' watermarks check. - * @after_aggregation: Called after each aggregation. - * @before_terminate: Called before terminating the monitoring. - * - * The monitoring thread (&damon_ctx.kdamond) calls @before_terminate just - * before finishing the monitoring. - * - * The monitoring thread calls @after_wmarks_check after each DAMON-based - * operation schemes' watermarks check. If users need to make changes to the - * attributes of the monitoring context while it's deactivated due to the - * watermarks, this is the good place to do. - * - * The monitoring thread calls @after_aggregation for each of the aggregation - * intervals. Therefore, users can safely access the monitoring results - * without additional protection. For the reason, users are recommended to use - * these callback for the accesses to the results. - * - * If any callback returns non-zero, monitoring stops. - */ -struct damon_callback { - int (*after_wmarks_check)(struct damon_ctx *context); - int (*after_aggregation)(struct damon_ctx *context); - void (*before_terminate)(struct damon_ctx *context); -}; - /* * struct damon_call_control - Control damon_call(). * * @fn: Function to be called back. * @data: Data that will be passed to @fn. + * @repeat: Repeat invocations. * @return_code: Return code from @fn invocation. * * Control damon_call(), which requests specific kdamond to invoke a given @@ -644,19 +643,22 @@ struct damon_callback { struct damon_call_control { int (*fn)(void *data); void *data; + bool repeat; int return_code; /* private: internal use only */ /* informs if the kdamond finished handling of the request */ struct completion completion; /* informs if the kdamond canceled @fn infocation */ bool canceled; + /* List head for siblings. */ + struct list_head list; }; /** * struct damon_intervals_goal - Monitoring intervals auto-tuning goal. * * @access_bp: Access events observation ratio to achieve in bp. - * @aggrs: Number of aggregations to acheive @access_bp within. + * @aggrs: Number of aggregations to achieve @access_bp within. * @min_sample_us: Minimum resulting sampling interval in microseconds. * @max_sample_us: Maximum resulting sampling interval in microseconds. * @@ -697,7 +699,7 @@ struct damon_intervals_goal { * ``mmap()`` calls from the application, in case of virtual memory monitoring) * and applies the changes for each @ops_update_interval. All time intervals * are in micro-seconds. Please refer to &struct damon_operations and &struct - * damon_callback for more detail. + * damon_call_control for more detail. */ struct damon_attrs { unsigned long sample_interval; @@ -744,7 +746,6 @@ struct damon_attrs { * Accesses to other fields must be protected by themselves. * * @ops: Set of monitoring operations for given use cases. - * @callback: Set of callbacks for monitoring events notifications. * * @adaptive_targets: Head of monitoring targets (&damon_target) list. * @schemes: Head of schemes (&damos) list. @@ -775,8 +776,9 @@ struct damon_ctx { /* for scheme quotas prioritization */ unsigned long *regions_score_histogram; - struct damon_call_control *call_control; - struct mutex call_control_lock; + /* lists of &struct damon_call_control */ + struct list_head call_controls; + struct mutex call_controls_lock; struct damos_walk_control *walk_control; struct mutex walk_control_lock; @@ -786,7 +788,6 @@ struct damon_ctx { struct mutex kdamond_lock; struct damon_operations ops; - struct damon_callback callback; struct list_head adaptive_targets; struct list_head schemes; @@ -905,7 +906,7 @@ struct damon_target *damon_new_target(void); void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); bool damon_targets_empty(struct damon_ctx *ctx); void damon_free_target(struct damon_target *t); -void damon_destroy_target(struct damon_target *t); +void damon_destroy_target(struct damon_target *t, struct damon_ctx *ctx); unsigned int damon_nr_regions(struct damon_target *t); struct damon_ctx *damon_new_ctx(void); @@ -934,6 +935,7 @@ static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); +bool damon_is_running(struct damon_ctx *ctx); int damon_call(struct damon_ctx *ctx, struct damon_call_control *control); int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control); diff --git a/include/linux/dax.h b/include/linux/dax.h index 78891518291d..9d624f4d9df6 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -26,7 +26,7 @@ struct dax_operations { * number of pages available for DAX at that pfn. */ long (*direct_access)(struct dax_device *, pgoff_t, long, - enum dax_access_mode, void **, pfn_t *); + enum dax_access_mode, void **, unsigned long *); /* zero_page_range: required operation. Zero page range */ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); /* @@ -243,7 +243,7 @@ static inline void dax_break_layout_final(struct inode *inode) bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, - enum dax_access_mode mode, void **kaddr, pfn_t *pfn); + enum dax_access_mode mode, void **kaddr, unsigned long *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, @@ -257,9 +257,10 @@ void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, - pfn_t *pfnp, int *errp, const struct iomap_ops *ops); + unsigned long *pfnp, int *errp, + const struct iomap_ops *ops); vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, - unsigned int order, pfn_t pfn); + unsigned int order, unsigned long pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); void dax_delete_mapping_range(struct address_space *mapping, loff_t start, loff_t end); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index cb95951547ab..84fdc3a6a19a 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -156,7 +156,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, enum dax_access_mode node, void **kaddr, - pfn_t *pfn); + unsigned long *pfn); typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages); diff --git a/include/linux/find.h b/include/linux/find.h index 5a2c267ea7f9..9d720ad92bc1 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -44,6 +44,8 @@ unsigned long _find_next_bit_le(const unsigned long *addr, unsigned long size, unsigned long offset); #endif +unsigned long find_random_bit(const unsigned long *addr, unsigned long size); + #ifndef find_next_bit /** * find_next_bit - find the next set bit in a memory region @@ -268,33 +270,6 @@ unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long * } /** - * find_nth_andnot_bit - find N'th set bit in 2 memory regions, - * flipping bits in 2nd region - * @addr1: The 1st address to start the search at - * @addr2: The 2nd address to start the search at - * @size: The maximum number of bits to search - * @n: The number of set bit, which position is needed, counting from 0 - * - * Returns the bit number of the N'th set bit. - * If no such, returns @size. - */ -static __always_inline -unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, - unsigned long size, unsigned long n) -{ - if (n >= size) - return size; - - if (small_const_nbits(size)) { - unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0); - - return val ? fns(val, n) : size; - } - - return __find_nth_andnot_bit(addr1, addr2, size, n); -} - -/** * find_nth_and_andnot_bit - find N'th set bit in 2 memory regions, * excluding those set in 3rd region * @addr1: The 1st address to start the search at diff --git a/include/linux/firewire.h b/include/linux/firewire.h index b632eec3ab52..cceb70415ed2 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -136,6 +136,7 @@ struct fw_card { __be32 maint_utility_register; struct workqueue_struct *isoc_wq; + struct workqueue_struct *async_wq; }; static inline struct fw_card *fw_card_get(struct fw_card *card) @@ -307,8 +308,7 @@ struct fw_packet { * For successful transmission, the status code is the ack received * from the destination. Otherwise it is one of the juju-specific * rcodes: RCODE_SEND_ERROR, _CANCELLED, _BUSY, _GENERATION, _NO_ACK. - * The callback can be called from tasklet context and thus - * must never block. + * The callback can be called from workqueue and thus must never block. */ fw_packet_callback_t callback; int ack; @@ -381,6 +381,10 @@ void __fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode * * A variation of __fw_send_request() to generate callback for response subaction without time * stamp. + * + * The callback is invoked in the workqueue context in most cases. However, if an error is detected + * before queueing or the destination address refers to the local node, it is invoked in the + * current context instead. */ static inline void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode, int destination_id, int generation, int speed, @@ -410,6 +414,10 @@ static inline void fw_send_request(struct fw_card *card, struct fw_transaction * * @callback_data: data to be passed to the transaction completion callback * * A variation of __fw_send_request() to generate callback for response subaction with time stamp. + * + * The callback is invoked in the workqueue context in most cases. However, if an error is detected + * before queueing or the destination address refers to the local node, it is invoked in the current + * context instead. */ static inline void fw_send_request_with_tstamp(struct fw_card *card, struct fw_transaction *t, int tcode, int destination_id, int generation, int speed, unsigned long long offset, diff --git a/include/linux/fs.h b/include/linux/fs.h index 8b16f90f81bc..d7ab4f96d705 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -526,7 +526,7 @@ struct address_space { /* * On most architectures that alignment is already the case; but * must be enforced here for CRIS, to let the least significant bit - * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. + * of struct folio's "mapping" pointer be used for FOLIO_MAPPING_ANON. */ /* XArray tags, for tagging dirty and writeback pages in the pagecache. */ @@ -1043,6 +1043,7 @@ struct fown_struct { * and so were/are genuinely "ahead". Start next readahead when * the first of these pages is accessed. * @ra_pages: Maximum size of a readahead request, copied from the bdi. + * @order: Preferred folio order used for most recent readahead. * @mmap_miss: How many mmap accesses missed in the page cache. * @prev_pos: The last byte in the most recent read request. * @@ -1054,7 +1055,8 @@ struct file_ra_state { unsigned int size; unsigned int async_size; unsigned int ra_pages; - unsigned int mmap_miss; + unsigned short order; + unsigned short mmap_miss; loff_t prev_pos; }; @@ -3756,9 +3758,14 @@ void setattr_copy(struct mnt_idmap *, struct inode *inode, extern int file_update_time(struct file *file); +static inline bool file_is_dax(const struct file *file) +{ + return file && IS_DAX(file->f_mapping->host); +} + static inline bool vma_is_dax(const struct vm_area_struct *vma) { - return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); + return file_is_dax(vma->vm_file); } static inline bool vma_is_fsdax(struct vm_area_struct *vma) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index be160e8d8bcb..5ebf26fcdcfa 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -423,9 +423,14 @@ static inline bool gfp_compaction_allowed(gfp_t gfp_mask) extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_CONTIG_ALLOC + +typedef unsigned int __bitwise acr_flags_t; +#define ACR_FLAGS_NONE ((__force acr_flags_t)0) // ordinary allocation request +#define ACR_FLAGS_CMA ((__force acr_flags_t)BIT(0)) // allocate for CMA + /* The below functions must be run on a range from a single zone. */ extern int alloc_contig_range_noprof(unsigned long start, unsigned long end, - unsigned migratetype, gfp_t gfp_mask); + acr_flags_t alloc_flags, gfp_t gfp_mask); #define alloc_contig_range(...) alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__)) extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, diff --git a/include/linux/hid.h b/include/linux/hid.h index 568a9d8c749b..2cc4f1e4ea96 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1216,7 +1216,11 @@ static inline void hid_hw_wait(struct hid_device *hdev) /** * hid_report_len - calculate the report length * - * @report: the report we want to know the length + * @report: the report whose length we want to know + * + * The length counts the report ID byte, but only if the ID is nonzero + * and therefore is included in the report. Reports whose ID is zero + * never include an ID byte. */ static inline u32 hid_report_len(struct hid_report *report) { @@ -1239,6 +1243,8 @@ void hid_quirks_exit(__u16 bus); dev_notice(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_warn(hid, fmt, ...) \ dev_warn(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_warn_ratelimited(hid, fmt, ...) \ + dev_warn_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_info(hid, fmt, ...) \ dev_info(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_dbg(hid, fmt, ...) \ diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 9a7683d79a4b..36053c3d6d64 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -195,7 +195,7 @@ static inline void *kmap_local_page_try_from_panic(struct page *page) static inline void *kmap_local_folio(struct folio *folio, size_t offset) { - return page_address(&folio->page) + offset; + return folio_address(folio) + offset; } static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index e48d7f27b0b9..6234f316468c 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -292,12 +292,6 @@ static inline void zero_user_segment(struct page *page, zero_user_segments(page, start, end, 0, 0); } -static inline void zero_user(struct page *page, - unsigned start, unsigned size) -{ - zero_user_segments(page, start, start + size, 0, 0); -} - #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE static inline void copy_user_highpage(struct page *to, struct page *from, @@ -688,10 +682,4 @@ static inline void folio_release_kmap(struct folio *folio, void *addr) kunmap_local(addr); folio_put(folio); } - -static inline void unmap_and_put_page(struct page *page, void *addr) -{ - folio_release_kmap(page_folio(page), addr); -} - #endif /* _LINUX_HIGHMEM_H */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 2f190c90192d..7748489fde1b 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -37,8 +37,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, unsigned long cp_flags); -vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); -vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); +vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, unsigned long pfn, + bool write); +vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, unsigned long pfn, + bool write); vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio, bool write); vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio, @@ -261,7 +263,7 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, } unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, - unsigned long vm_flags, + vm_flags_t vm_flags, unsigned long tva_flags, unsigned long orders); @@ -282,7 +284,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, */ static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, - unsigned long vm_flags, + vm_flags_t vm_flags, unsigned long tva_flags, unsigned long orders) { @@ -317,7 +319,7 @@ struct thpsize { (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) static inline bool vma_thp_disabled(struct vm_area_struct *vma, - unsigned long vm_flags) + vm_flags_t vm_flags) { /* * Explicitly disabled through madvise or prctl, or some @@ -400,8 +402,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ - if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \ - || pmd_devmap(*____pmd)) \ + if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ false); \ } while (0) @@ -426,16 +427,14 @@ change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, #define split_huge_pud(__vma, __pud, __address) \ do { \ pud_t *____pud = (__pud); \ - if (pud_trans_huge(*____pud) \ - || pud_devmap(*____pud)) \ + if (pud_trans_huge(*____pud)) \ __split_huge_pud(__vma, __pud, __address); \ } while (0) -int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, +int hugepage_madvise(struct vm_area_struct *vma, vm_flags_t *vm_flags, int advice); -int madvise_collapse(struct vm_area_struct *vma, - struct vm_area_struct **prev, - unsigned long start, unsigned long end); +int madvise_collapse(struct vm_area_struct *vma, unsigned long start, + unsigned long end, bool *lock_dropped); void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct vm_area_struct *next); spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); @@ -450,7 +449,7 @@ static inline int is_swap_pmd(pmd_t pmd) static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) + if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else return NULL; @@ -458,7 +457,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) { - if (pud_trans_huge(*pud) || pud_devmap(*pud)) + if (pud_trans_huge(*pud)) return __pud_trans_huge_lock(pud, vma); else return NULL; @@ -473,9 +472,6 @@ static inline bool folio_test_pmd_mappable(struct folio *folio) return folio_order(folio) >= HPAGE_PMD_ORDER; } -struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd, int flags, struct dev_pagemap **pgmap); - vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); extern struct folio *huge_zero_folio; @@ -486,9 +482,14 @@ static inline bool is_huge_zero_folio(const struct folio *folio) return READ_ONCE(huge_zero_folio) == folio; } +static inline bool is_huge_zero_pfn(unsigned long pfn) +{ + return READ_ONCE(huge_zero_pfn) == (pfn & ~(HPAGE_PMD_NR - 1)); +} + static inline bool is_huge_zero_pmd(pmd_t pmd) { - return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd); + return pmd_present(pmd) && is_huge_zero_pfn(pmd_pfn(pmd)); } struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); @@ -524,7 +525,7 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, } static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, - unsigned long vm_flags, + vm_flags_t vm_flags, unsigned long tva_flags, unsigned long orders) { @@ -593,14 +594,14 @@ static inline bool unmap_huge_pmd_locked(struct vm_area_struct *vma, do { } while (0) static inline int hugepage_madvise(struct vm_area_struct *vma, - unsigned long *vm_flags, int advice) + vm_flags_t *vm_flags, int advice) { return -EINVAL; } static inline int madvise_collapse(struct vm_area_struct *vma, - struct vm_area_struct **prev, - unsigned long start, unsigned long end) + unsigned long start, + unsigned long end, bool *lock_dropped) { return -EINVAL; } @@ -636,6 +637,11 @@ static inline bool is_huge_zero_folio(const struct folio *folio) return false; } +static inline bool is_huge_zero_pfn(unsigned long pfn) +{ + return false; +} + static inline bool is_huge_zero_pmd(pmd_t pmd) { return false; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 42f374e828a2..526d27e88b3b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -149,7 +149,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, uffd_flags_t flags, struct folio **foliop); #endif /* CONFIG_USERFAULTFD */ -bool hugetlb_reserve_pages(struct inode *inode, long from, long to, +long hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, @@ -359,12 +359,6 @@ static inline void hugetlb_show_meminfo_node(int nid) { } -static inline int prepare_hugepage_range(struct file *file, - unsigned long addr, unsigned long len) -{ - return -EINVAL; -} - static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) { } @@ -396,13 +390,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, return 0; } -static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) -{ - BUG(); -} - #ifdef CONFIG_USERFAULTFD static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte, struct vm_area_struct *dst_vma, @@ -740,6 +727,11 @@ extern unsigned int default_hstate_idx; #define default_hstate (hstates[default_hstate_idx]) +static inline struct hugepage_subpool *subpool_inode(struct inode *inode) +{ + return HUGETLBFS_SB(inode->i_sb)->spool; +} + static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) { return folio->_hugetlb_subpool; diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h index 9efbc54e35e5..be5417303ecf 100644 --- a/include/linux/hypervisor.h +++ b/include/linux/hypervisor.h @@ -37,6 +37,9 @@ static inline bool hypervisor_isolated_pci_functions(void) if (IS_ENABLED(CONFIG_S390)) return true; + if (IS_ENABLED(CONFIG_LOONGARCH)) + return true; + return jailhouse_paravirt(); } diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h index 4fb2f93d82ed..d643c7c87822 100644 --- a/include/linux/irq-entry-common.h +++ b/include/linux/irq-entry-common.h @@ -7,6 +7,7 @@ #include <linux/context_tracking.h> #include <linux/tick.h> #include <linux/kmsan.h> +#include <linux/unwind_deferred.h> #include <asm/entry-common.h> @@ -256,6 +257,7 @@ static __always_inline void exit_to_user_mode(void) lockdep_hardirqs_on_prepare(); instrumentation_end(); + unwind_reset_info(); user_enter_irqoff(); arch_exit_to_user_mode(); lockdep_hardirqs_on(CALLER_ADDR0); diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index b8d69cfbb58b..ff6120463745 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -12,7 +12,7 @@ extern int start_stop_khugepaged(void); extern void __khugepaged_enter(struct mm_struct *mm); extern void __khugepaged_exit(struct mm_struct *mm); extern void khugepaged_enter_vma(struct vm_area_struct *vma, - unsigned long vm_flags); + vm_flags_t vm_flags); extern void khugepaged_min_free_kbytes_update(void); extern bool current_is_khugepaged(void); extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, @@ -37,7 +37,7 @@ static inline void khugepaged_exit(struct mm_struct *mm) { } static inline void khugepaged_enter_vma(struct vm_area_struct *vma, - unsigned long vm_flags) + vm_flags_t vm_flags) { } static inline int collapse_pte_mapped_thp(struct mm_struct *mm, diff --git a/include/linux/ksm.h b/include/linux/ksm.h index d73095b5cd96..c17b955e7b0b 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -16,9 +16,9 @@ #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, - unsigned long end, int advice, unsigned long *vm_flags); - -void ksm_add_vma(struct vm_area_struct *vma); + unsigned long end, int advice, vm_flags_t *vm_flags); +vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file, + vm_flags_t vm_flags); int ksm_enable_merge_any(struct mm_struct *mm); int ksm_disable_merge_any(struct mm_struct *mm); int ksm_disable(struct mm_struct *mm); @@ -97,8 +97,10 @@ bool ksm_process_mergeable(struct mm_struct *mm); #else /* !CONFIG_KSM */ -static inline void ksm_add_vma(struct vm_area_struct *vma) +static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm, + const struct file *file, vm_flags_t vm_flags) { + return vm_flags; } static inline int ksm_disable(struct mm_struct *mm) @@ -131,7 +133,7 @@ static inline void collect_procs_ksm(const struct folio *folio, #ifdef CONFIG_MMU static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, - unsigned long end, int advice, unsigned long *vm_flags) + unsigned long end, int advice, vm_flags_t *vm_flags) { return 0; } diff --git a/include/linux/llist.h b/include/linux/llist.h index 27b17f64bcee..607b2360c938 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -83,7 +83,7 @@ static inline void init_llist_head(struct llist_head *list) */ static inline void init_llist_node(struct llist_node *node) { - node->next = node; + WRITE_ONCE(node->next, node); } /** @@ -97,7 +97,7 @@ static inline void init_llist_node(struct llist_node *node) */ static inline bool llist_on_list(const struct llist_node *node) { - return node->next != node; + return READ_ONCE(node->next) != node; } /** @@ -220,7 +220,7 @@ static inline bool llist_empty(const struct llist_head *head) static inline struct llist_node *llist_next(struct llist_node *node) { - return node->next; + return READ_ONCE(node->next); } /** diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 9ef129038224..bafe143b1f78 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -75,8 +75,8 @@ * searching for gaps or any other code that needs to find the end of the data. */ struct maple_metadata { - unsigned char end; - unsigned char gap; + unsigned char end; /* end of data */ + unsigned char gap; /* offset of largest gap */ }; /* diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 87b6688f124a..785173aa0739 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -251,8 +251,10 @@ struct mem_cgroup { * that this indicator should NOT be used in legacy cgroup mode * where socket memory is accounted/charged separately. */ - unsigned long socket_pressure; - + u64 socket_pressure; +#if BITS_PER_LONG < 64 + seqlock_t socket_pressure_seqlock; +#endif int kmemcg_id; /* * memcg->objcg is wiped out as a part of the objcg repaprenting @@ -1602,6 +1604,42 @@ extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); + +#if BITS_PER_LONG < 64 +static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) +{ + u64 val = get_jiffies_64() + HZ; + unsigned long flags; + + write_seqlock_irqsave(&memcg->socket_pressure_seqlock, flags); + memcg->socket_pressure = val; + write_sequnlock_irqrestore(&memcg->socket_pressure_seqlock, flags); +} + +static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg) +{ + unsigned int seq; + u64 val; + + do { + seq = read_seqbegin(&memcg->socket_pressure_seqlock); + val = memcg->socket_pressure; + } while (read_seqretry(&memcg->socket_pressure_seqlock, seq)); + + return val; +} +#else +static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) +{ + WRITE_ONCE(memcg->socket_pressure, jiffies + HZ); +} + +static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg) +{ + return READ_ONCE(memcg->socket_pressure); +} +#endif + static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { #ifdef CONFIG_MEMCG_V1 @@ -1609,7 +1647,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) return !!memcg->tcpmem_pressure; #endif /* CONFIG_MEMCG_V1 */ do { - if (time_before(jiffies, READ_ONCE(memcg->socket_pressure))) + if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) return true; } while ((memcg = parent_mem_cgroup(memcg))); return false; diff --git a/include/linux/memfd.h b/include/linux/memfd.h index 246daadbfde8..6f606d9573c3 100644 --- a/include/linux/memfd.h +++ b/include/linux/memfd.h @@ -14,7 +14,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx); * We also update VMA flags if appropriate by manipulating the VMA flags pointed * to by vm_flags_ptr. */ -int memfd_check_seals_mmap(struct file *file, unsigned long *vm_flags_ptr); +int memfd_check_seals_mmap(struct file *file, vm_flags_t *vm_flags_ptr); #else static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a) { @@ -25,7 +25,7 @@ static inline struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) return ERR_PTR(-EINVAL); } static inline int memfd_check_seals_mmap(struct file *file, - unsigned long *vm_flags_ptr) + vm_flags_t *vm_flags_ptr) { return 0; } diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 0dc0cf2863e2..7a805796fcfd 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -18,7 +18,7 @@ * adistance value (slightly faster) than default DRAM adistance to be part of * the same memory tier. */ -#define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1)) +#define MEMTIER_ADISTANCE_DRAM ((4L * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1)) struct memory_tier; struct memory_dev_type { diff --git a/include/linux/memory.h b/include/linux/memory.h index 5ec4e6d209b9..40eb70ccb09d 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -109,8 +109,6 @@ struct memory_notify { unsigned long altmap_nr_pages; unsigned long start_pfn; unsigned long nr_pages; - int status_change_nid_normal; - int status_change_nid; }; struct notifier_block; @@ -179,12 +177,30 @@ struct memory_group *memory_group_find_by_id(int mgid); typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *); int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, struct memory_group *excluded, void *arg); +struct memory_block *find_memory_block_by_id(unsigned long block_id); #define hotplug_memory_notifier(fn, pri) ({ \ static __meminitdata struct notifier_block fn##_mem_nb =\ { .notifier_call = fn, .priority = pri };\ register_memory_notifier(&fn##_mem_nb); \ }) +extern int sections_per_block; + +static inline unsigned long memory_block_id(unsigned long section_nr) +{ + return section_nr / sections_per_block; +} + +static inline unsigned long pfn_to_block_id(unsigned long pfn) +{ + return memory_block_id(pfn_to_section_nr(pfn)); +} + +static inline unsigned long phys_to_block_id(unsigned long phys) +{ + return pfn_to_block_id(PFN_DOWN(phys)); +} + #ifdef CONFIG_NUMA void memory_block_add_nid(struct memory_block *mem, int nid, enum meminit_context context); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index eaac5ae8c05c..23f038a16231 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -314,7 +314,8 @@ extern int add_memory_driver_managed(int nid, u64 start, u64 size, mhp_t mhp_flags); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, int migratetype); + struct vmem_altmap *altmap, int migratetype, + bool isolate_pageblock); extern void remove_pfn_range_from_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index aaa2114498d6..acadd41e0b5c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -35,8 +35,8 @@ struct migration_target_control; * @src page. The driver should copy the contents of the * @src page to the @dst page and set up the fields of @dst page. * Both pages are locked. - * If page migration is successful, the driver should call - * __ClearPageMovable(@src) and return MIGRATEPAGE_SUCCESS. + * If page migration is successful, the driver should + * return MIGRATEPAGE_SUCCESS. * If the driver cannot migrate the page at the moment, it can return * -EAGAIN. The VM interprets this as a temporary migration failure and * will retry it later. Any other error value is a permanent migration @@ -69,7 +69,7 @@ int migrate_pages(struct list_head *l, new_folio_t new, free_folio_t free, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded); struct folio *alloc_migration_target(struct folio *src, unsigned long private); -bool isolate_movable_page(struct page *page, isolate_mode_t mode); +bool isolate_movable_ops_page(struct page *page, isolate_mode_t mode); bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, @@ -90,7 +90,7 @@ static inline int migrate_pages(struct list_head *l, new_folio_t new, static inline struct folio *alloc_migration_target(struct folio *src, unsigned long private) { return NULL; } -static inline bool isolate_movable_page(struct page *page, isolate_mode_t mode) +static inline bool isolate_movable_ops_page(struct page *page, isolate_mode_t mode) { return false; } static inline bool isolate_folio_to_list(struct folio *folio, struct list_head *list) { return false; } @@ -103,44 +103,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #endif /* CONFIG_MIGRATION */ -#ifdef CONFIG_COMPACTION -bool PageMovable(struct page *page); -void __SetPageMovable(struct page *page, const struct movable_operations *ops); -void __ClearPageMovable(struct page *page); -#else -static inline bool PageMovable(struct page *page) { return false; } -static inline void __SetPageMovable(struct page *page, - const struct movable_operations *ops) -{ -} -static inline void __ClearPageMovable(struct page *page) -{ -} -#endif - -static inline bool folio_test_movable(struct folio *folio) -{ - return PageMovable(&folio->page); -} - -static inline -const struct movable_operations *folio_movable_ops(struct folio *folio) -{ - VM_BUG_ON(!__folio_test_movable(folio)); - - return (const struct movable_operations *) - ((unsigned long)folio->mapping - PAGE_MAPPING_MOVABLE); -} - -static inline -const struct movable_operations *page_movable_ops(struct page *page) -{ - VM_BUG_ON(!__PageMovable(page)); - - return (const struct movable_operations *) - ((unsigned long)page->mapping - PAGE_MAPPING_MOVABLE); -} - #ifdef CONFIG_NUMA_BALANCING int migrate_misplaced_folio_prepare(struct folio *folio, struct vm_area_struct *vma, int node); diff --git a/include/linux/mm.h b/include/linux/mm.h index 0c44bb8ce544..349f0d9aad22 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1818,7 +1818,24 @@ static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot) { return pmd_mkhuge(pfn_pmd(folio_pfn(folio), pgprot)); } -#endif + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +/** + * folio_mk_pud - Create a PUD for this folio + * @folio: The folio to create a PUD for + * @pgprot: The page protection bits to use + * + * Create a page table entry for the first page of this folio. + * This is suitable for passing to set_pud_at(). + * + * Return: A page table entry suitable for mapping this folio. + */ +static inline pud_t folio_mk_pud(struct folio *folio, pgprot_t pgprot) +{ + return pud_mkhuge(pfn_pud(folio_pfn(folio), pgprot)); +} +#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_MMU */ static inline bool folio_has_pincount(const struct folio *folio) @@ -2152,13 +2169,13 @@ static inline int folio_expected_ref_count(const struct folio *folio) const int order = folio_order(folio); int ref_count = 0; - if (WARN_ON_ONCE(folio_test_slab(folio))) + if (WARN_ON_ONCE(page_has_type(&folio->page) && !folio_test_hugetlb(folio))) return 0; if (folio_test_anon(folio)) { /* One reference per page from the swapcache. */ ref_count += folio_test_swapcache(folio) << order; - } else if (!((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS)) { + } else { /* One reference per page from the pagecache. */ ref_count += !!folio->mapping << order; /* One reference from PG_private. */ @@ -2549,7 +2566,7 @@ extern long change_protection(struct mmu_gather *tlb, unsigned long end, unsigned long cp_flags); extern int mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, struct vm_area_struct *vma, struct vm_area_struct **pprev, - unsigned long start, unsigned long end, unsigned long newflags); + unsigned long start, unsigned long end, vm_flags_t newflags); /* * doesn't attempt to fault and will return short. @@ -2694,13 +2711,6 @@ static inline pud_t pud_mkspecial(pud_t pud) } #endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */ -#ifndef CONFIG_ARCH_HAS_PTE_DEVMAP -static inline int pte_devmap(pte_t pte) -{ - return 0; -} -#endif - extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, @@ -3313,9 +3323,9 @@ extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages); extern bool vma_is_special_mapping(const struct vm_area_struct *vma, const struct vm_special_mapping *sm); -extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, +struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, - unsigned long flags, + vm_flags_t vm_flags, const struct vm_special_mapping *spec); unsigned long randomize_stack_top(unsigned long stack_top); @@ -3479,10 +3489,10 @@ static inline bool range_in_vma(struct vm_area_struct *vma, } #ifdef CONFIG_MMU -pgprot_t vm_get_page_prot(unsigned long vm_flags); +pgprot_t vm_get_page_prot(vm_flags_t vm_flags); void vma_set_page_prot(struct vm_area_struct *vma); #else -static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) +static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags) { return __pgprot(0); } @@ -3519,9 +3529,9 @@ vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr, vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot); vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, - pfn_t pfn); + unsigned long pfn); vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, - unsigned long addr, pfn_t pfn); + unsigned long addr, unsigned long pfn); int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, @@ -4050,14 +4060,14 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping, #endif #ifdef CONFIG_ANON_VMA_NAME -int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, - struct anon_vma_name *anon_name); +int set_anon_vma_name(unsigned long addr, unsigned long size, + const char __user *uname); #else -static inline int -madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, struct anon_vma_name *anon_name) { - return 0; +static inline +int set_anon_vma_name(unsigned long addr, unsigned long size, + const char __user *uname) +{ + return -EINVAL; } #endif @@ -4190,4 +4200,23 @@ static inline bool page_pool_page_is_pp(const struct page *page) } #endif +#define PAGE_SNAPSHOT_FAITHFUL (1 << 0) +#define PAGE_SNAPSHOT_PG_BUDDY (1 << 1) +#define PAGE_SNAPSHOT_PG_IDLE (1 << 2) + +struct page_snapshot { + struct folio folio_snapshot; + struct page page_snapshot; + unsigned long pfn; + unsigned long idx; + unsigned long flags; +}; + +static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps) +{ + return ps->flags & PAGE_SNAPSHOT_FAITHFUL; +} + +void snapshot_page(struct page_snapshot *ps, const struct page *page); + #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0f0662157066..08bc2442db93 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -105,7 +105,6 @@ struct page { unsigned int order; }; }; - /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; union { pgoff_t __folio_index; /* Our offset within mapping. */ @@ -1086,7 +1085,7 @@ struct mm_struct { unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ unsigned long stack_vm; /* VM_STACK */ - unsigned long def_flags; + vm_flags_t def_flags; /** * @write_protect_seq: Locked when any thread is write diff --git a/include/linux/mman.h b/include/linux/mman.h index f4c6346a8fcd..de9e8e6229a4 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -137,7 +137,7 @@ static inline bool arch_validate_flags(unsigned long flags) /* * Combine the mmap "prot" argument into "vm_flags" used internally. */ -static inline unsigned long +static inline vm_flags_t calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { return _calc_vm_trans(prot, PROT_READ, VM_READ ) | @@ -149,7 +149,7 @@ calc_vm_prot_bits(unsigned long prot, unsigned long pkey) /* * Combine the mmap "flags" argument into "vm_flags" used internally. */ -static inline unsigned long +static inline vm_flags_t calc_vm_flag_bits(struct file *file, unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 5da384bd0a26..1f4f44951abe 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -309,6 +309,17 @@ void vma_mark_detached(struct vm_area_struct *vma); struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, unsigned long address); +/* + * Locks next vma pointed by the iterator. Confirms the locked vma has not + * been modified and will retry under mmap_lock protection if modification + * was detected. Should be called from read RCU section. + * Returns either a valid locked VMA, NULL if no more VMAs or -EINTR if the + * process was interrupted. + */ +struct vm_area_struct *lock_next_vma(struct mm_struct *mm, + struct vma_iterator *iter, + unsigned long address); + #else /* CONFIG_PER_VMA_LOCK */ static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index a0a3894900ed..14a45979cccc 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); } \ unlikely(__ret_warn_once); \ }) +#define VM_WARN_ON_ONCE_VMA(cond, vma) ({ \ + static bool __section(".data..once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + dump_vma(vma); \ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) #define VM_WARN_ON_VMG(cond, vmg) ({ \ int __ret_warn = !!(cond); \ \ @@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); #define VM_WARN_ON_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_MM(cond, mm) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE_VMA(cond, vma) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_VMG(cond, vmg) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 283913d42d7b..0c5da9141983 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -38,19 +38,19 @@ #define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) /* Defines the order for the number of pages that have a migrate type. */ -#ifndef CONFIG_PAGE_BLOCK_ORDER -#define PAGE_BLOCK_ORDER MAX_PAGE_ORDER +#ifndef CONFIG_PAGE_BLOCK_MAX_ORDER +#define PAGE_BLOCK_MAX_ORDER MAX_PAGE_ORDER #else -#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER -#endif /* CONFIG_PAGE_BLOCK_ORDER */ +#define PAGE_BLOCK_MAX_ORDER CONFIG_PAGE_BLOCK_MAX_ORDER +#endif /* CONFIG_PAGE_BLOCK_MAX_ORDER */ /* * The MAX_PAGE_ORDER, which defines the max order of pages to be allocated - * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER, + * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_MAX_ORDER, * which defines the order for the number of pages that can have a migrate type */ -#if (PAGE_BLOCK_ORDER > MAX_PAGE_ORDER) -#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_ORDER +#if (PAGE_BLOCK_MAX_ORDER > MAX_PAGE_ORDER) +#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_MAX_ORDER #endif /* @@ -79,6 +79,9 @@ enum migratetype { * __free_pageblock_cma() function. */ MIGRATE_CMA, + __MIGRATE_TYPE_END = MIGRATE_CMA, +#else + __MIGRATE_TYPE_END = MIGRATE_HIGHATOMIC, #endif #ifdef CONFIG_MEMORY_ISOLATION MIGRATE_ISOLATE, /* can't allocate from here */ @@ -92,8 +95,12 @@ extern const char * const migratetype_names[MIGRATE_TYPES]; #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) # define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA) -# define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \ - get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK)) +/* + * __dump_folio() in mm/debug.c passes a folio pointer to on-stack struct folio, + * so folio_pfn() cannot be used and pfn is needed. + */ +# define is_migrate_cma_folio(folio, pfn) \ + (get_pfnblock_migratetype(&folio->page, pfn) == MIGRATE_CMA) #else # define is_migrate_cma(migratetype) false # define is_migrate_cma_page(_page) false @@ -122,14 +129,12 @@ static inline bool migratetype_is_mergeable(int mt) extern int page_group_by_mobility_disabled; -#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1) +#define get_pageblock_migratetype(page) \ + get_pfnblock_migratetype(page, page_to_pfn(page)) -#define get_pageblock_migratetype(page) \ - get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK) +#define folio_migratetype(folio) \ + get_pageblock_migratetype(&folio->page) -#define folio_migratetype(folio) \ - get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \ - MIGRATETYPE_MASK) struct free_area { struct list_head free_list[MIGRATE_TYPES]; unsigned long nr_free; @@ -201,7 +206,6 @@ enum node_stat_item { NR_FILE_PAGES, NR_FILE_DIRTY, NR_WRITEBACK, - NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_SHMEM_THPS, NR_SHMEM_PMDMAPPED, diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 1b56796f6cb3..288ef765a44e 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -8,15 +8,15 @@ #ifndef __LINUX_MTD_MAP_H__ #define __LINUX_MTD_MAP_H__ -#include <linux/types.h> -#include <linux/list.h> -#include <linux/string.h> #include <linux/bug.h> -#include <linux/kernel.h> #include <linux/io.h> - +#include <linux/ioport.h> +#include <linux/string.h> +#include <linux/types.h> #include <linux/unaligned.h> -#include <asm/barrier.h> + +struct device_node; +struct module; #ifdef CONFIG_MTD_MAP_BANK_WIDTH_1 #define map_bankwidth(map) 1 @@ -188,6 +188,7 @@ typedef union { of living. */ +struct mtd_chip_driver; struct map_info { const char *name; unsigned long size; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 15eaa09da998..27a45bdab7ec 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -62,30 +62,33 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(addr, ndummy, buf, len, ...) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1), \ - SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ - SPI_MEM_OP_ADDR(2, addr, 1), \ - SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 1)) + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 1), \ + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_1S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 1)) + SPI_MEM_OP_DATA_IN(len, buf, 1), \ + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_FAST_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_FAST_3A_1S_1S_1S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 1)) + SPI_MEM_OP_DATA_IN(len, buf, 1), \ + SPI_MEM_OP_MAX_FREQ(freq)) #define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_1D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0d, 1), \ @@ -94,17 +97,19 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 1), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_2S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_2S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(freq)) #define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_2D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3d, 1), \ @@ -113,18 +118,19 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, ...) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ SPI_MEM_OP_ADDR(2, addr, 2), \ SPI_MEM_OP_DUMMY(ndummy, 2), \ SPI_MEM_OP_DATA_IN(len, buf, 2), \ - SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ SPI_MEM_OP_ADDR(3, addr, 2), \ SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(freq)) #define SPINAND_PAGE_READ_FROM_CACHE_1S_2D_2D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbd, 1), \ @@ -133,17 +139,19 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_4S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_4S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(freq)) #define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_4D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6d, 1), \ @@ -152,18 +160,19 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, ...) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(2, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ SPI_MEM_OP_DATA_IN(len, buf, 4), \ - SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(3, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(freq)) #define SPINAND_PAGE_READ_FROM_CACHE_1S_4D_4D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xed, 1), \ @@ -484,6 +493,7 @@ struct spinand_user_otp { * @op_variants.update_cache: variants of the update-cache operation * @select_target: function used to select a target/die. Required only for * multi-die chips + * @configure_chip: Align the chip configuration with the core settings * @set_cont_read: enable/disable continuous cached reads * @fact_otp: SPI NAND factory OTP info. * @user_otp: SPI NAND user OTP info. @@ -507,6 +517,7 @@ struct spinand_info { } op_variants; int (*select_target)(struct spinand_device *spinand, unsigned int target); + int (*configure_chip)(struct spinand_device *spinand); int (*set_cont_read)(struct spinand_device *spinand, bool enable); struct spinand_fact_otp fact_otp; @@ -539,6 +550,9 @@ struct spinand_info { #define SPINAND_SELECT_TARGET(__func) \ .select_target = __func +#define SPINAND_CONFIGURE_CHIP(__configure_chip) \ + .configure_chip = __configure_chip + #define SPINAND_CONT_READ(__set_cont_read) \ .set_cont_read = __set_cont_read @@ -607,6 +621,7 @@ struct spinand_dirmap { * passed in spi_mem_op be DMA-able, so we can't based the bufs on * the stack * @manufacturer: SPI NAND manufacturer information + * @configure_chip: Align the chip configuration with the core settings * @cont_read_possible: Field filled by the core once the whole system * configuration is known to tell whether continuous reads are * suitable to use or not in general with this chip/configuration. @@ -647,6 +662,7 @@ struct spinand_device { const struct spinand_manufacturer *manufacturer; void *priv; + int (*configure_chip)(struct spinand_device *spinand); bool cont_read_possible; int (*set_cont_read)(struct spinand_device *spinand, bool enable); @@ -723,7 +739,9 @@ int spinand_match_and_init(struct spinand_device *spinand, enum spinand_readid_method rdid_method); int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val); +int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val); int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val); +int spinand_write_enable_op(struct spinand_device *spinand); int spinand_select_target(struct spinand_device *spinand, unsigned int target); int spinand_wait(struct spinand_device *spinand, unsigned long initial_delay_us, diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 00afd341d293..847b81ca6436 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -227,7 +227,7 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) -DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) +DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T), _RET == 0) extern unsigned long mutex_get_owner(struct mutex *lock); diff --git a/include/linux/node.h b/include/linux/node.h index 2b7517892230..2c7529335b21 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -111,43 +111,64 @@ struct memory_block; extern struct node *node_devices[]; #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) -void register_memory_blocks_under_node(int nid, unsigned long start_pfn, - unsigned long end_pfn, - enum meminit_context context); +void register_memory_blocks_under_node_hotplug(int nid, unsigned long start_pfn, + unsigned long end_pfn); #else -static inline void register_memory_blocks_under_node(int nid, unsigned long start_pfn, - unsigned long end_pfn, - enum meminit_context context) +static inline void register_memory_blocks_under_node_hotplug(int nid, + unsigned long start_pfn, + unsigned long end_pfn) +{ +} +static inline void register_memory_blocks_under_nodes(void) { } #endif extern void unregister_node(struct node *node); -#ifdef CONFIG_NUMA -extern void node_dev_init(void); -/* Core of the node registration - only memory hotplug should use this */ -extern int __register_one_node(int nid); - -/* Registers an online node */ -static inline int register_one_node(int nid) -{ - int error = 0; - if (node_online(nid)) { - struct pglist_data *pgdat = NODE_DATA(nid); - unsigned long start_pfn = pgdat->node_start_pfn; - unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; +struct node_notify { + int nid; +}; - error = __register_one_node(nid); - if (error) - return error; - register_memory_blocks_under_node(nid, start_pfn, end_pfn, - MEMINIT_EARLY); - } +#define NODE_ADDING_FIRST_MEMORY (1<<0) +#define NODE_ADDED_FIRST_MEMORY (1<<1) +#define NODE_CANCEL_ADDING_FIRST_MEMORY (1<<2) +#define NODE_REMOVING_LAST_MEMORY (1<<3) +#define NODE_REMOVED_LAST_MEMORY (1<<4) +#define NODE_CANCEL_REMOVING_LAST_MEMORY (1<<5) - return error; +#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) +extern int register_node_notifier(struct notifier_block *nb); +extern void unregister_node_notifier(struct notifier_block *nb); +extern int node_notify(unsigned long val, void *v); + +#define hotplug_node_notifier(fn, pri) ({ \ + static __meminitdata struct notifier_block fn##_node_nb =\ + { .notifier_call = fn, .priority = pri };\ + register_node_notifier(&fn##_node_nb); \ +}) +#else +static inline int register_node_notifier(struct notifier_block *nb) +{ + return 0; +} +static inline void unregister_node_notifier(struct notifier_block *nb) +{ +} +static inline int node_notify(unsigned long val, void *v) +{ + return 0; +} +static inline int hotplug_node_notifier(notifier_fn_t fn, int pri) +{ + return 0; } +#endif +#ifdef CONFIG_NUMA +extern void node_dev_init(void); +/* Core of the node registration - only memory hotplug should use this */ +extern int register_one_node(int nid); extern void unregister_one_node(int nid); extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); @@ -160,10 +181,6 @@ extern int register_memory_node_under_compute_node(unsigned int mem_nid, static inline void node_dev_init(void) { } -static inline int __register_one_node(int nid) -{ - return 0; -} static inline int register_one_node(int nid) { return 0; diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index f08ae71585fa..7ad1f5c7407e 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -492,21 +492,9 @@ static __always_inline int num_node_state(enum node_states state) static __always_inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) - int w, bit; - - w = nodes_weight(*maskp); - switch (w) { - case 0: - bit = NUMA_NO_NODE; - break; - case 1: - bit = first_node(*maskp); - break; - default: - bit = find_nth_bit(maskp->bits, MAX_NUMNODES, get_random_u32_below(w)); - break; - } - return bit; + int node = find_random_bit(maskp->bits, MAX_NUMNODES); + + return node < MAX_NUMNODES ? node : NUMA_NO_NODE; #else return 0; #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4fe5ee67535b..8e4d6eda8a8d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -167,8 +167,12 @@ enum pageflags { /* Remapped by swiotlb-xen. */ PG_xen_remapped = PG_owner_priv_1, - /* non-lru isolated movable page */ - PG_isolated = PG_reclaim, +#ifdef CONFIG_MIGRATION + /* movable_ops page that is isolated for migration */ + PG_movable_ops_isolated = PG_reclaim, + /* this is a movable_ops page (for selected typed pages only) */ + PG_movable_ops = PG_uptodate, +#endif /* Only valid for buddy pages. Used to track pages that are reported */ PG_reported = PG_uptodate, @@ -691,15 +695,12 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) /* * On an anonymous folio mapped into a user virtual memory area, * folio->mapping points to its anon_vma, not to a struct address_space; - * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. + * with the FOLIO_MAPPING_ANON bit set to distinguish it. See rmap.h. * - * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled, - * the PAGE_MAPPING_MOVABLE bit may be set along with the PAGE_MAPPING_ANON + * On an anonymous folio in a VM_MERGEABLE area, if CONFIG_KSM is enabled, + * the FOLIO_MAPPING_ANON_KSM bit may be set along with the FOLIO_MAPPING_ANON * bit; and then folio->mapping points, not to an anon_vma, but to a private - * structure which KSM associates with that merged page. See ksm.h. - * - * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable - * page and then folio->mapping points to a struct movable_operations. + * structure which KSM associates with that merged folio. See ksm.h. * * Please note that, confusingly, "folio_mapping" refers to the inode * address_space which maps the folio from disk; whereas "folio_mapped" @@ -712,50 +713,27 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) * false before calling the following functions (e.g., folio_test_anon). * See mm/slab.h. */ -#define PAGE_MAPPING_ANON 0x1 -#define PAGE_MAPPING_MOVABLE 0x2 -#define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) -#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) - -static __always_inline bool folio_mapping_flags(const struct folio *folio) -{ - return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; -} - -static __always_inline bool PageMappingFlags(const struct page *page) -{ - return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; -} +#define FOLIO_MAPPING_ANON 0x1 +#define FOLIO_MAPPING_ANON_KSM 0x2 +#define FOLIO_MAPPING_KSM (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM) +#define FOLIO_MAPPING_FLAGS (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM) static __always_inline bool folio_test_anon(const struct folio *folio) { - return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; + return ((unsigned long)folio->mapping & FOLIO_MAPPING_ANON) != 0; } static __always_inline bool PageAnonNotKsm(const struct page *page) { unsigned long flags = (unsigned long)page_folio(page)->mapping; - return (flags & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_ANON; + return (flags & FOLIO_MAPPING_FLAGS) == FOLIO_MAPPING_ANON; } static __always_inline bool PageAnon(const struct page *page) { return folio_test_anon(page_folio(page)); } - -static __always_inline bool __folio_test_movable(const struct folio *folio) -{ - return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == - PAGE_MAPPING_MOVABLE; -} - -static __always_inline bool __PageMovable(const struct page *page) -{ - return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == - PAGE_MAPPING_MOVABLE; -} - #ifdef CONFIG_KSM /* * A KSM page is one of those write-protected "shared pages" or "merged pages" @@ -765,8 +743,8 @@ static __always_inline bool __PageMovable(const struct page *page) */ static __always_inline bool folio_test_ksm(const struct folio *folio) { - return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == - PAGE_MAPPING_KSM; + return ((unsigned long)folio->mapping & FOLIO_MAPPING_FLAGS) == + FOLIO_MAPPING_KSM; } #else FOLIO_TEST_FLAG_FALSE(ksm) @@ -1137,7 +1115,53 @@ static inline bool folio_contain_hwpoisoned_page(struct folio *folio) bool is_free_buddy_page(const struct page *page); -PAGEFLAG(Isolated, isolated, PF_ANY); +#ifdef CONFIG_MIGRATION +/* + * This page is migratable through movable_ops (for selected typed pages + * only). + * + * Page migration of such pages might fail, for example, if the page is + * already isolated by somebody else, or if the page is about to get freed. + * + * While a subsystem might set selected typed pages that support page migration + * as being movable through movable_ops, it must never clear this flag. + * + * This flag is only cleared when the page is freed back to the buddy. + * + * Only selected page types support this flag (see page_movable_ops()) and + * the flag might be used in other context for other pages. Always use + * page_has_movable_ops() instead. + */ +TESTPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL); +SETPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL); +/* + * A movable_ops page has this flag set while it is isolated for migration. + * This flag primarily protects against concurrent migration attempts. + * + * Once migration ended (success or failure), the flag is cleared. The + * flag is managed by the migration core. + */ +PAGEFLAG(MovableOpsIsolated, movable_ops_isolated, PF_NO_TAIL); +#else /* !CONFIG_MIGRATION */ +TESTPAGEFLAG_FALSE(MovableOps, movable_ops); +SETPAGEFLAG_NOOP(MovableOps, movable_ops); +PAGEFLAG_FALSE(MovableOpsIsolated, movable_ops_isolated); +#endif /* CONFIG_MIGRATION */ + +/** + * page_has_movable_ops - test for a movable_ops page + * @page: The page to test. + * + * Test whether this is a movable_ops page. Such pages will stay that + * way until freed. + * + * Returns true if this is a movable_ops page, otherwise false. + */ +static inline bool page_has_movable_ops(const struct page *page) +{ + return PageMovableOps(page) && + (PageOffline(page) || PageZsmalloc(page)); +} static __always_inline int PageAnonExclusive(const struct page *page) { diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 898bb788243b..3e2f960e166c 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -11,6 +11,12 @@ static inline bool is_migrate_isolate(int migratetype) { return migratetype == MIGRATE_ISOLATE; } +#define get_pageblock_isolate(page) \ + get_pfnblock_bit(page, page_to_pfn(page), PB_migrate_isolate) +#define clear_pageblock_isolate(page) \ + clear_pfnblock_bit(page, page_to_pfn(page), PB_migrate_isolate) +#define set_pageblock_isolate(page) \ + set_pfnblock_bit(page, page_to_pfn(page), PB_migrate_isolate) #else static inline bool is_migrate_isolate_page(struct page *page) { @@ -20,22 +26,45 @@ static inline bool is_migrate_isolate(int migratetype) { return false; } +static inline bool get_pageblock_isolate(struct page *page) +{ + return false; +} +static inline void clear_pageblock_isolate(struct page *page) +{ +} +static inline void set_pageblock_isolate(struct page *page) +{ +} #endif -#define MEMORY_OFFLINE 0x1 -#define REPORT_FAILURE 0x2 +/* + * Pageblock isolation modes: + * PB_ISOLATE_MODE_MEM_OFFLINE - isolate to offline (!allocate) memory + * e.g., skip over PageHWPoison() pages and + * PageOffline() pages. Unmovable pages will be + * reported in this mode. + * PB_ISOLATE_MODE_CMA_ALLOC - isolate for CMA allocations + * PB_ISOLATE_MODE_OTHER - isolate for other purposes + */ +enum pb_isolate_mode { + PB_ISOLATE_MODE_MEM_OFFLINE, + PB_ISOLATE_MODE_CMA_ALLOC, + PB_ISOLATE_MODE_OTHER, +}; -void set_pageblock_migratetype(struct page *page, int migratetype); +void __meminit init_pageblock_migratetype(struct page *page, + enum migratetype migratetype, + bool isolate); -bool move_freepages_block_isolate(struct zone *zone, struct page *page, - int migratetype); +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page); +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page); int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - int migratetype, int flags); + enum pb_isolate_mode mode); -void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - int migratetype); +void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, - int isol_flags); + enum pb_isolate_mode mode); #endif diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index debdc25f08b9..3328357f6dba 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page, extern void __split_page_owner(struct page *page, int old_order, int new_order); extern void __folio_copy_owner(struct folio *newfolio, struct folio *old); -extern void __set_page_owner_migrate_reason(struct page *page, int reason); +extern void __folio_set_owner_migrate_reason(struct folio *folio, int reason); extern void __dump_page_owner(const struct page *page); extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone); @@ -43,10 +43,10 @@ static inline void folio_copy_owner(struct folio *newfolio, struct folio *old) if (static_branch_unlikely(&page_owner_inited)) __folio_copy_owner(newfolio, old); } -static inline void set_page_owner_migrate_reason(struct page *page, int reason) +static inline void folio_set_owner_migrate_reason(struct folio *folio, int reason) { if (static_branch_unlikely(&page_owner_inited)) - __set_page_owner_migrate_reason(page, reason); + __folio_set_owner_migrate_reason(folio, reason); } static inline void dump_page_owner(const struct page *page) { @@ -68,7 +68,7 @@ static inline void split_page_owner(struct page *page, int old_order, static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio) { } -static inline void set_page_owner_migrate_reason(struct page *page, int reason) +static inline void folio_set_owner_migrate_reason(struct folio *folio, int reason) { } static inline void dump_page_owner(const struct page *page) diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e73a4292ef02..6a44be0f39f4 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -19,15 +19,33 @@ enum pageblock_bits { PB_migrate, PB_migrate_end = PB_migrate + PB_migratetype_bits - 1, /* 3 bits required for migrate types */ - PB_migrate_skip,/* If set the block is skipped by compaction */ + PB_compact_skip,/* If set the block is skipped by compaction */ +#ifdef CONFIG_MEMORY_ISOLATION + /* + * Pageblock isolation is represented with a separate bit, so that + * the migratetype of a block is not overwritten by isolation. + */ + PB_migrate_isolate, /* If set the block is isolated */ +#endif /* * Assume the bits will always align on a word. If this assumption * changes then get/set pageblock needs updating. */ - NR_PAGEBLOCK_BITS + __NR_PAGEBLOCK_BITS }; +#define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS)) + +#define MIGRATETYPE_MASK ((1UL << (PB_migrate_end + 1)) - 1) + +#ifdef CONFIG_MEMORY_ISOLATION +#define MIGRATETYPE_AND_ISO_MASK \ + (((1UL << (PB_migrate_end + 1)) - 1) | BIT(PB_migrate_isolate)) +#else +#define MIGRATETYPE_AND_ISO_MASK MIGRATETYPE_MASK +#endif + #if defined(CONFIG_HUGETLB_PAGE) #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE @@ -41,18 +59,18 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER) +#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_MAX_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #elif defined(CONFIG_TRANSPARENT_HUGEPAGE) -#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER) +#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_MAX_ORDER) #else /* CONFIG_TRANSPARENT_HUGEPAGE */ -/* If huge pages are not used, group by PAGE_BLOCK_ORDER */ -#define pageblock_order PAGE_BLOCK_ORDER +/* If huge pages are not used, group by PAGE_BLOCK_MAX_ORDER */ +#define pageblock_order PAGE_BLOCK_MAX_ORDER #endif /* CONFIG_HUGETLB_PAGE */ @@ -65,27 +83,23 @@ extern unsigned int pageblock_order; /* Forward declaration */ struct page; -unsigned long get_pfnblock_flags_mask(const struct page *page, - unsigned long pfn, - unsigned long mask); - -void set_pfnblock_flags_mask(struct page *page, - unsigned long flags, - unsigned long pfn, - unsigned long mask); +enum migratetype get_pfnblock_migratetype(const struct page *page, + unsigned long pfn); +bool get_pfnblock_bit(const struct page *page, unsigned long pfn, + enum pageblock_bits pb_bit); +void set_pfnblock_bit(const struct page *page, unsigned long pfn, + enum pageblock_bits pb_bit); +void clear_pfnblock_bit(const struct page *page, unsigned long pfn, + enum pageblock_bits pb_bit); /* Declarations for getting and setting flags. See mm/page_alloc.c */ #ifdef CONFIG_COMPACTION #define get_pageblock_skip(page) \ - get_pfnblock_flags_mask(page, page_to_pfn(page), \ - (1 << (PB_migrate_skip))) + get_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip) #define clear_pageblock_skip(page) \ - set_pfnblock_flags_mask(page, 0, page_to_pfn(page), \ - (1 << PB_migrate_skip)) + clear_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip) #define set_pageblock_skip(page) \ - set_pfnblock_flags_mask(page, (1 << PB_migrate_skip), \ - page_to_pfn(page), \ - (1 << PB_migrate_skip)) + set_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip) #else static inline bool get_pageblock_skip(struct page *page) { diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ce2bcdcadb73..12a12dae727d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -502,7 +502,7 @@ static inline pgoff_t mapping_align_index(struct address_space *mapping, static inline bool mapping_large_folio_support(struct address_space *mapping) { /* AS_FOLIO_ORDER is only reasonable for pagecache folios */ - VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON, + VM_WARN_ONCE((unsigned long)mapping & FOLIO_MAPPING_ANON, "Anonymous mapping always supports large folio"); return mapping_max_folio_order(mapping) > 0; @@ -905,7 +905,8 @@ static inline struct page *find_or_create_page(struct address_space *mapping, * @mapping: target address_space * @index: the page index * - * Same as grab_cache_page(), but do not wait if the page is unavailable. + * Returns locked page at given index in given cache, creating it if + * needed, but do not wait if the page is locked or to reclaim memory. * This is intended for speculative data generators, where the data can * be regenerated if the page couldn't be grabbed. This routine should * be safe to call while holding the lock for another page. @@ -969,15 +970,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch); -/* - * Returns locked page at given index in given cache, creating it if needed. - */ -static inline struct page *grab_cache_page(struct address_space *mapping, - pgoff_t index) -{ - return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); -} - struct folio *read_cache_folio(struct address_space *, pgoff_t index, filler_t *filler, struct file *file); struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index, diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index 9700a29f8afb..682472c15495 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -14,6 +14,8 @@ enum page_walk_lock { PGWALK_WRLOCK = 1, /* vma is expected to be already write-locked during the walk */ PGWALK_WRLOCK_VERIFY = 2, + /* vma is expected to be already read-locked during the walk */ + PGWALK_VMA_RDLOCK_VERIFY = 3, }; /** @@ -129,10 +131,9 @@ struct mm_walk { int walk_page_range(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private); -int walk_page_range_novma(struct mm_struct *mm, unsigned long start, - unsigned long end, const struct mm_walk_ops *ops, - pgd_t *pgd, - void *private); +int walk_kernel_page_table_range(unsigned long start, + unsigned long end, const struct mm_walk_ops *ops, + pgd_t *pgd, void *private); int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private); diff --git a/include/linux/pci-ep-msi.h b/include/linux/pci-ep-msi.h new file mode 100644 index 000000000000..7c5db90f9620 --- /dev/null +++ b/include/linux/pci-ep-msi.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PCI Endpoint *Function* side MSI header file + * + * Copyright (C) 2024 NXP + * Author: Frank Li <Frank.Li@nxp.com> + */ + +#ifndef __PCI_EP_MSI__ +#define __PCI_EP_MSI__ + +struct pci_epf; + +#ifdef CONFIG_PCI_ENDPOINT_MSI_DOORBELL +int pci_epf_alloc_doorbell(struct pci_epf *epf, u16 nums); +void pci_epf_free_doorbell(struct pci_epf *epf); +#else +static inline int pci_epf_alloc_doorbell(struct pci_epf *epf, u16 nums) +{ + return -ENODATA; +} + +static inline void pci_epf_free_doorbell(struct pci_epf *epf) +{ +} +#endif /* CONFIG_GENERIC_MSI_IRQ */ + +#endif /* __PCI_EP_MSI__ */ diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 749cee0bcf2c..2e85504ba2ba 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -12,6 +12,7 @@ #include <linux/configfs.h> #include <linux/device.h> #include <linux/mod_devicetable.h> +#include <linux/msi.h> #include <linux/pci.h> struct pci_epf; @@ -129,6 +130,16 @@ struct pci_epf_bar { }; /** + * struct pci_epf_doorbell_msg - represents doorbell message + * @msg: MSI message + * @virq: IRQ number of this doorbell MSI message + */ +struct pci_epf_doorbell_msg { + struct msi_msg msg; + int virq; +}; + +/** * struct pci_epf - represents the PCI EPF device * @dev: the PCI EPF device * @name: the name of the PCI EPF device @@ -155,6 +166,8 @@ struct pci_epf_bar { * @vfunction_num_map: bitmap to manage virtual function number * @pci_vepf: list of virtual endpoint functions associated with this function * @event_ops: callbacks for capturing the EPC events + * @db_msg: data for MSI from RC side + * @num_db: number of doorbells */ struct pci_epf { struct device dev; @@ -185,6 +198,8 @@ struct pci_epf { unsigned long vfunction_num_map; struct list_head pci_vepf; const struct pci_epc_event_ops *event_ops; + struct pci_epf_doorbell_msg *db_msg; + u16 num_db; }; /** @@ -226,6 +241,9 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, enum pci_epc_interface_type type); void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar, enum pci_epc_interface_type type); + +int pci_epf_align_inbound_addr(struct pci_epf *epf, enum pci_barno bar, + u64 addr, dma_addr_t *base, size_t *off); int pci_epf_bind(struct pci_epf *epf); void pci_epf_unbind(struct pci_epf *epf); int pci_epf_add_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf); diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h index 7d439b0675e9..4aefc7901cd1 100644 --- a/include/linux/pci-pwrctrl.h +++ b/include/linux/pci-pwrctrl.h @@ -39,7 +39,7 @@ struct device_link; struct pci_pwrctrl { struct device *dev; - /* Private: don't use. */ + /* private: internal use only */ struct notifier_block nb; struct device_link *link; struct work_struct work; diff --git a/include/linux/pci.h b/include/linux/pci.h index 05e68f35f392..59876de13860 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -328,6 +328,11 @@ struct rcec_ea; * determined (e.g., for Root Complex Integrated * Endpoints without the relevant Capability * Registers). + * @is_hotplug_bridge: Hotplug bridge of any kind (e.g. PCIe Hot-Plug Capable, + * Conventional PCI Hot-Plug, ACPI slot). + * Such bridges are allocated additional MMIO and bus + * number resources to allow for hierarchy expansion. + * @is_pciehp: PCIe Hot-Plug Capable bridge. */ struct pci_dev { struct list_head bus_list; /* Node in per-bus list */ @@ -451,6 +456,7 @@ struct pci_dev { unsigned int is_physfn:1; unsigned int is_virtfn:1; unsigned int is_hotplug_bridge:1; + unsigned int is_pciehp:1; unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ /* @@ -744,6 +750,21 @@ static inline bool pci_is_vga(struct pci_dev *pdev) return false; } +/** + * pci_is_display - check if the PCI device is a display controller + * @pdev: PCI device + * + * Determine whether the given PCI device corresponds to a display + * controller. Display controllers are typically used for graphical output + * and are identified based on their class code. + * + * Return: true if the PCI device is a display controller, false otherwise. + */ +static inline bool pci_is_display(struct pci_dev *pdev) +{ + return (pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else @@ -2438,6 +2459,8 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); +int pci_iov_vf_bar_set_size(struct pci_dev *dev, int resno, int size); +u32 pci_iov_vf_bar_get_sizes(struct pci_dev *dev, int resno, int num_vfs); void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); /* Arch may override these (weak) */ @@ -2490,6 +2513,10 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev) #define pci_sriov_configure_simple NULL static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) { return 0; } +static inline int pci_iov_vf_bar_set_size(struct pci_dev *dev, int resno, int size) +{ return -ENODEV; } +static inline u32 pci_iov_vf_bar_get_sizes(struct pci_dev *dev, int resno, int num_vfs) +{ return 0; } static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } #endif diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index ec77ccf1fc4d..ddf79641917f 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -104,6 +104,7 @@ static inline bool shpchp_is_native(struct pci_dev *bridge) { return true; } static inline bool hotplug_is_native(struct pci_dev *bridge) { - return pciehp_is_native(bridge) || shpchp_is_native(bridge); + return (bridge->is_pciehp && pciehp_is_native(bridge)) || + shpchp_is_native(bridge); } #endif diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index c16cdeaa505e..12d90360f6db 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -63,14 +63,15 @@ * 1. The symbol must be globally unique, even the static ones. * 2. Static percpu variables cannot be defined inside a function. * - * Archs which need weak percpu definitions should define - * ARCH_NEEDS_WEAK_PER_CPU in asm/percpu.h when necessary. + * Archs which need weak percpu definitions should set + * CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU when necessary. * * To ensure that the generic code observes the above two * restrictions, if CONFIG_DEBUG_FORCE_WEAK_PER_CPU is set weak * definition is used for all cases. */ -#if defined(ARCH_NEEDS_WEAK_PER_CPU) || defined(CONFIG_DEBUG_FORCE_WEAK_PER_CPU) +#if (defined(CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU) && defined(MODULE)) || \ + defined(CONFIG_DEBUG_FORCE_WEAK_PER_CPU) /* * __pcpu_scope_* dummy variable is used to enforce scope. It * receives the static modifier when it's used in front of diff --git a/include/linux/pfn.h b/include/linux/pfn.h index 14bc053c53d8..b90ca0b6c331 100644 --- a/include/linux/pfn.h +++ b/include/linux/pfn.h @@ -4,15 +4,6 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> - -/* - * pfn_t: encapsulates a page-frame number that is optionally backed - * by memmap (struct page). Whether a pfn_t has a 'struct page' - * backing is indicated by flags in the high bits of the value. - */ -typedef struct { - u64 val; -} pfn_t; #endif #define PFN_ALIGN(x) (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK) diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h deleted file mode 100644 index 2d9148221e9a..000000000000 --- a/include/linux/pfn_t.h +++ /dev/null @@ -1,131 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_PFN_T_H_ -#define _LINUX_PFN_T_H_ -#include <linux/mm.h> - -/* - * PFN_FLAGS_MASK - mask of all the possible valid pfn_t flags - * PFN_SG_CHAIN - pfn is a pointer to the next scatterlist entry - * PFN_SG_LAST - pfn references a page and is the last scatterlist entry - * PFN_DEV - pfn is not covered by system memmap by default - * PFN_MAP - pfn has a dynamic page mapping established by a device driver - * PFN_SPECIAL - for CONFIG_FS_DAX_LIMITED builds to allow XIP, but not - * get_user_pages - */ -#define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT)) -#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1)) -#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) -#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) -#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) -#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5)) - -#define PFN_FLAGS_TRACE \ - { PFN_SPECIAL, "SPECIAL" }, \ - { PFN_SG_CHAIN, "SG_CHAIN" }, \ - { PFN_SG_LAST, "SG_LAST" }, \ - { PFN_DEV, "DEV" }, \ - { PFN_MAP, "MAP" } - -static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags) -{ - pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), }; - - return pfn_t; -} - -/* a default pfn to pfn_t conversion assumes that @pfn is pfn_valid() */ -static inline pfn_t pfn_to_pfn_t(unsigned long pfn) -{ - return __pfn_to_pfn_t(pfn, 0); -} - -static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags) -{ - return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); -} - -static inline bool pfn_t_has_page(pfn_t pfn) -{ - return (pfn.val & PFN_MAP) == PFN_MAP || (pfn.val & PFN_DEV) == 0; -} - -static inline unsigned long pfn_t_to_pfn(pfn_t pfn) -{ - return pfn.val & ~PFN_FLAGS_MASK; -} - -static inline struct page *pfn_t_to_page(pfn_t pfn) -{ - if (pfn_t_has_page(pfn)) - return pfn_to_page(pfn_t_to_pfn(pfn)); - return NULL; -} - -static inline phys_addr_t pfn_t_to_phys(pfn_t pfn) -{ - return PFN_PHYS(pfn_t_to_pfn(pfn)); -} - -static inline pfn_t page_to_pfn_t(struct page *page) -{ - return pfn_to_pfn_t(page_to_pfn(page)); -} - -static inline int pfn_t_valid(pfn_t pfn) -{ - return pfn_valid(pfn_t_to_pfn(pfn)); -} - -#ifdef CONFIG_MMU -static inline pte_t pfn_t_pte(pfn_t pfn, pgprot_t pgprot) -{ - return pfn_pte(pfn_t_to_pfn(pfn), pgprot); -} -#endif - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot) -{ - return pfn_pmd(pfn_t_to_pfn(pfn), pgprot); -} - -#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot) -{ - return pfn_pud(pfn_t_to_pfn(pfn), pgprot); -} -#endif -#endif - -#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP -static inline bool pfn_t_devmap(pfn_t pfn) -{ - const u64 flags = PFN_DEV|PFN_MAP; - - return (pfn.val & flags) == flags; -} -#else -static inline bool pfn_t_devmap(pfn_t pfn) -{ - return false; -} -pte_t pte_mkdevmap(pte_t pte); -pmd_t pmd_mkdevmap(pmd_t pmd); -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ - defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) -pud_t pud_mkdevmap(pud_t pud); -#endif -#endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */ - -#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL -static inline bool pfn_t_special(pfn_t pfn) -{ - return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL; -} -#else -static inline bool pfn_t_special(pfn_t pfn) -{ - return false; -} -#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ -#endif /* _LINUX_PFN_T_H_ */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 0b6e1f781d86..e3b99920be05 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -456,6 +456,17 @@ static inline bool arch_has_hw_pte_young(void) } #endif +#ifndef exec_folio_order +/* + * Returns preferred minimum folio order for executable file-backed memory. Must + * be in range [0, PMD_ORDER). Default to order-0. + */ +static inline unsigned int exec_folio_order(void) +{ + return 0; +} +#endif + #ifndef arch_check_zapped_pte static inline void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte) @@ -1320,7 +1331,9 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, /* * Commit an update to a pte, leaving any hardware-controlled bits in - * the PTE unmodified. + * the PTE unmodified. The pte returned from ptep_modify_prot_start() may + * additionally have young and/or dirty bits set where previously they were not, + * so the updated pte may have these additional changes. */ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, @@ -1329,6 +1342,86 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, __ptep_modify_prot_commit(vma, addr, ptep, pte); } #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ + +/** + * modify_prot_start_ptes - Start a pte protection read-modify-write transaction + * over a batch of ptes, which protects against asynchronous hardware + * modifications to the ptes. The intention is not to prevent the hardware from + * making pte updates, but to prevent any updates it may make from being lost. + * Please see the comment above ptep_modify_prot_start() for full description. + * + * @vma: The virtual memory area the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_modify_prot_start(), collecting the a/d bits from each pte + * in the batch. + * + * Note that PTE bits in the PTE batch besides the PFN can differ. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. All other PTE bits must be identical for + * all PTEs in the batch except for young and dirty bits. The PTEs are all in + * the same PMD. + */ +#ifndef modify_prot_start_ptes +static inline pte_t modify_prot_start_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr) +{ + pte_t pte, tmp_pte; + + pte = ptep_modify_prot_start(vma, addr, ptep); + while (--nr) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = ptep_modify_prot_start(vma, addr, ptep); + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } + return pte; +} +#endif + +/** + * modify_prot_commit_ptes - Commit an update to a batch of ptes, leaving any + * hardware-controlled bits in the PTE unmodified. + * + * @vma: The virtual memory area the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @old_pte: Old page table entry (for the first entry) which is now cleared. + * @pte: New page table entry to be set. + * @nr: Number of entries. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_modify_prot_commit(). + * + * Context: The caller holds the page table lock. The PTEs are all in the same + * PMD. On exit, the set ptes in the batch map the same folio. The ptes set by + * ptep_modify_prot_start() may additionally have young and/or dirty bits set + * where previously they were not, so the updated ptes may have these + * additional changes. + */ +#ifndef modify_prot_commit_ptes +static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte, unsigned int nr) +{ + int i; + + for (i = 0; i < nr; ++i, ++ptep, addr += PAGE_SIZE) { + ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte); + + /* Advance PFN only, set same prot */ + old_pte = pte_next_pfn(old_pte); + pte = pte_next_pfn(pte); + } +} +#endif + #endif /* CONFIG_MMU */ /* @@ -1632,21 +1725,6 @@ static inline int pud_write(pud_t pud) } #endif /* pud_write */ -#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) -static inline int pmd_devmap(pmd_t pmd) -{ - return 0; -} -static inline int pud_devmap(pud_t pud) -{ - return 0; -} -static inline int pgd_devmap(pgd_t pgd) -{ - return 0; -} -#endif - #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) static inline int pud_trans_huge(pud_t pud) @@ -1661,7 +1739,7 @@ static inline int pud_trans_unstable(pud_t *pud) defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) pud_t pudval = READ_ONCE(*pud); - if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval)) + if (pud_none(pudval) || pud_trans_huge(pudval)) return 1; if (unlikely(pud_bad(pudval))) { pud_clear_bad(pud); @@ -1901,8 +1979,8 @@ typedef unsigned int pgtbl_mod_mask; * - It should contain a huge PFN, which points to a huge page larger than * PAGE_SIZE of the platform. The PFN format isn't important here. * - * - It should cover all kinds of huge mappings (e.g., pXd_trans_huge(), - * pXd_devmap(), or hugetlb mappings). + * - It should cover all kinds of huge mappings (i.e. pXd_trans_huge() + * or hugetlb mappings). */ #ifndef pgd_leaf #define pgd_leaf(x) false @@ -2005,7 +2083,7 @@ typedef unsigned int pgtbl_mod_mask; * x: (yes) yes */ #define DECLARE_VM_GET_PAGE_PROT \ -pgprot_t vm_get_page_prot(unsigned long vm_flags) \ +pgprot_t vm_get_page_prot(vm_flags_t vm_flags) \ { \ return protection_map[vm_flags & \ (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)]; \ diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 0cca01b5607b..f21f806bfb38 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -232,7 +232,6 @@ struct power_supply; /* Run-time specific power supply configuration */ struct power_supply_config { - struct device_node *of_node; struct fwnode_handle *fwnode; /* Driver private data */ @@ -808,19 +807,10 @@ static inline void power_supply_put(struct power_supply *psy) {} static inline struct power_supply *power_supply_get_by_name(const char *name) { return NULL; } #endif -#ifdef CONFIG_OF -extern struct power_supply *power_supply_get_by_phandle(struct device_node *np, - const char *property); -extern struct power_supply *devm_power_supply_get_by_phandle( +extern struct power_supply *power_supply_get_by_reference(struct fwnode_handle *fwnode, + const char *property); +extern struct power_supply *devm_power_supply_get_by_reference( struct device *dev, const char *property); -#else /* !CONFIG_OF */ -static inline struct power_supply * -power_supply_get_by_phandle(struct device_node *np, const char *property) -{ return NULL; } -static inline struct power_supply * -devm_power_supply_get_by_phandle(struct device *dev, const char *property) -{ return NULL; } -#endif /* CONFIG_OF */ extern const enum power_supply_property power_supply_battery_info_properties[]; extern const size_t power_supply_battery_info_properties_size; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index de1d24f19f76..f139377f4b31 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -27,6 +27,7 @@ enum { PROC_ENTRY_proc_read_iter = 1U << 1, PROC_ENTRY_proc_compat_ioctl = 1U << 2, + PROC_ENTRY_proc_lseek = 1U << 3, PROC_ENTRY_FORCE_LOOKUP = 1U << 7, }; diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c4f4903b1088..20803fcb49a7 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -893,7 +893,7 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, * Called from mm/vmscan.c to handle paging out */ int folio_referenced(struct folio *, int is_locked, - struct mem_cgroup *memcg, unsigned long *vm_flags); + struct mem_cgroup *memcg, vm_flags_t *vm_flags); void try_to_migrate(struct folio *folio, enum ttu_flags flags); void try_to_unmap(struct folio *, enum ttu_flags flags); @@ -1025,7 +1025,7 @@ struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio, static inline int folio_referenced(struct folio *folio, int is_locked, struct mem_cgroup *memcg, - unsigned long *vm_flags) + vm_flags_t *vm_flags) { *vm_flags = 0; return 0; diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c8b543d428b0..cbafdc12e743 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -240,10 +240,11 @@ extern void up_write(struct rw_semaphore *sem); DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) -DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) +DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T), _RET == 0) DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) +DEFINE_GUARD_COND(rwsem_write, _kill, down_write_killable(_T), _RET == 0) /* * downgrade write lock to read lock diff --git a/include/linux/sched.h b/include/linux/sched.h index a9693f179c58..2b272382673d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -47,6 +47,7 @@ #include <linux/rv.h> #include <linux/uidgid_types.h> #include <linux/tracepoint-defs.h> +#include <linux/unwind_deferred_types.h> #include <asm/kmap_size.h> /* task_struct member predeclarations (sorted alphabetically): */ @@ -1646,6 +1647,10 @@ struct task_struct { struct user_event_mm *user_event_mm; #endif +#ifdef CONFIG_UNWIND_USER + struct unwind_task_info unwind_info; +#endif + /* CPU-specific state of this task: */ struct thread_struct thread; diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index f7545430a548..7047101dbf58 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -164,7 +164,7 @@ struct sched_ext_entity { /* * Runtime budget in nsecs. This is usually set through - * scx_bpf_dispatch() but can also be modified directly by the BPF + * scx_bpf_dsq_insert() but can also be modified directly by the BPF * scheduler. Automatically decreased by SCX as the task executes. On * depletion, a scheduling event is triggered. * @@ -176,10 +176,10 @@ struct sched_ext_entity { /* * Used to order tasks when dispatching to the vtime-ordered priority - * queue of a dsq. This is usually set through scx_bpf_dispatch_vtime() - * but can also be modified directly by the BPF scheduler. Modifying it - * while a task is queued on a dsq may mangle the ordering and is not - * recommended. + * queue of a dsq. This is usually set through + * scx_bpf_dsq_insert_vtime() but can also be modified directly by the + * BPF scheduler. Modifying it while a task is queued on a dsq may + * mangle the ordering and is not recommended. */ u64 dsq_vtime; @@ -206,12 +206,25 @@ struct sched_ext_entity { void sched_ext_free(struct task_struct *p); void print_scx_info(const char *log_lvl, struct task_struct *p); void scx_softlockup(u32 dur_s); +bool scx_rcu_cpu_stall(void); #else /* !CONFIG_SCHED_CLASS_EXT */ static inline void sched_ext_free(struct task_struct *p) {} static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {} static inline void scx_softlockup(u32 dur_s) {} +static inline bool scx_rcu_cpu_stall(void) { return false; } #endif /* CONFIG_SCHED_CLASS_EXT */ + +struct scx_task_group { +#ifdef CONFIG_EXT_GROUP_SCHED + u32 flags; /* SCX_TG_* */ + u32 weight; + u64 bw_period_us; + u64 bw_quota_us; + u64 bw_burst_us; +#endif +}; + #endif /* _LINUX_SCHED_EXT_H */ diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 5f03a39a26f7..6d0f9c599ff7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -11,6 +11,8 @@ #include <linux/fs_parser.h> #include <linux/userfaultfd_k.h> +struct swap_iocb; + /* inode in-kernel data */ #ifdef CONFIG_TMPFS_QUOTA @@ -107,7 +109,8 @@ static inline bool shmem_mapping(struct address_space *mapping) void shmem_unlock_mapping(struct address_space *mapping); struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); -int shmem_writeout(struct folio *folio, struct writeback_control *wbc); +int shmem_writeout(struct folio *folio, struct swap_iocb **plug, + struct list_head *folio_list); void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 1a2c0e0838f9..fa28a8784d65 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -662,6 +662,14 @@ #define EXYNOS5433_PAD_RETENTION_UFS_OPTION (0x3268) #define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION (0x32A8) +/* For Exynos990 */ +#define EXYNOS990_PHY_CTRL_USB20 (0x72C) + +/* For Exynos7870 */ +#define EXYNOS7870_MIPI_PHY_CONTROL0 (0x070c) +#define EXYNOS7870_MIPI_PHY_CONTROL1 (0x0714) +#define EXYNOS7870_MIPI_PHY_CONTROL2 (0x0734) + /* For Tensor GS101 */ /* PMU ALIVE */ #define GS101_SYSIP_DAT0 (0x810) diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index 6b839987f14c..fe31773d5210 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -30,6 +30,7 @@ #define ACP63_PCI_REV_ID 0x63 #define ACP70_PCI_REV_ID 0x70 #define ACP71_PCI_REV_ID 0x71 +#define ACP72_PCI_REV_ID 0x72 struct acp_sdw_pdata { u16 instance; diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index c4830dfaff3d..82390712794c 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -424,7 +424,7 @@ bool spi_mem_default_supports_op(struct spi_mem *mem, int spi_mem_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op); void spi_mem_adjust_op_freq(struct spi_mem *mem, struct spi_mem_op *op); -u64 spi_mem_calc_op_duration(struct spi_mem_op *op); +u64 spi_mem_calc_op_duration(struct spi_mem *mem, struct spi_mem_op *op); bool spi_mem_supports_op(struct spi_mem *mem, const struct spi_mem_op *op); diff --git a/include/linux/swap.h b/include/linux/swap.h index bc0e1c275fc0..2fe6ed2cc3fd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -376,8 +376,9 @@ extern unsigned long totalreserve_pages; /* linux/mm/swap.c */ -void lru_note_cost(struct lruvec *lruvec, bool file, - unsigned int nr_io, unsigned int nr_rotated); +void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file, + unsigned int nr_io, unsigned int nr_rotated) + __releases(lruvec->lru_lock); void lru_note_cost_refault(struct folio *); void folio_add_lru(struct folio *); void folio_add_lru_vma(struct folio *, struct vm_area_struct *); @@ -415,7 +416,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #define MIN_SWAPPINESS 0 #define MAX_SWAPPINESS 200 -/* Just recliam from anon folios in proactive memory reclaim */ +/* Just reclaim from anon folios in proactive memory reclaim */ #define SWAPPINESS_ANON_ONLY (MAX_SWAPPINESS + 1) extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, @@ -431,6 +432,22 @@ extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; long remove_mapping(struct address_space *mapping, struct folio *folio); +#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) +extern int reclaim_register_node(struct node *node); +extern void reclaim_unregister_node(struct node *node); + +#else + +static inline int reclaim_register_node(struct node *node) +{ + return 0; +} + +static inline void reclaim_unregister_node(struct node *node) +{ +} +#endif /* CONFIG_SYSFS && CONFIG_NUMA */ + #ifdef CONFIG_NUMA extern int sysctl_min_unmapped_ratio; extern int sysctl_min_slab_ratio; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index fa9cf4292dff..04307a19cde3 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -480,7 +480,6 @@ enum { EVENT_FILE_FL_RECORDED_TGID_BIT, EVENT_FILE_FL_FILTERED_BIT, EVENT_FILE_FL_NO_SET_FILTER_BIT, - EVENT_FILE_FL_SOFT_MODE_BIT, EVENT_FILE_FL_SOFT_DISABLED_BIT, EVENT_FILE_FL_TRIGGER_MODE_BIT, EVENT_FILE_FL_TRIGGER_COND_BIT, @@ -618,7 +617,6 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...); * RECORDED_TGID - The tgids should be recorded at sched_switch * FILTERED - The event has a filter attached * NO_SET_FILTER - Set when filter has error and is to be ignored - * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED * SOFT_DISABLED - When set, do not trace the event (even though its * tracepoint may be enabled) * TRIGGER_MODE - When set, invoke the triggers associated with the event @@ -633,7 +631,6 @@ enum { EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT), EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), - EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h new file mode 100644 index 000000000000..26122d00708a --- /dev/null +++ b/include/linux/unwind_deferred.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_DEFERRED_H +#define _LINUX_UNWIND_USER_DEFERRED_H + +#include <linux/task_work.h> +#include <linux/unwind_user.h> +#include <linux/unwind_deferred_types.h> + +struct unwind_work; + +typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stacktrace *trace, u64 cookie); + +struct unwind_work { + struct list_head list; + unwind_callback_t func; + int bit; +}; + +#ifdef CONFIG_UNWIND_USER + +enum { + UNWIND_PENDING_BIT = 0, + UNWIND_USED_BIT, +}; + +enum { + UNWIND_PENDING = BIT(UNWIND_PENDING_BIT), + + /* Set if the unwinding was used (directly or deferred) */ + UNWIND_USED = BIT(UNWIND_USED_BIT) +}; + +void unwind_task_init(struct task_struct *task); +void unwind_task_free(struct task_struct *task); + +int unwind_user_faultable(struct unwind_stacktrace *trace); + +int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func); +int unwind_deferred_request(struct unwind_work *work, u64 *cookie); +void unwind_deferred_cancel(struct unwind_work *work); + +void unwind_deferred_task_exit(struct task_struct *task); + +static __always_inline void unwind_reset_info(void) +{ + struct unwind_task_info *info = ¤t->unwind_info; + unsigned long bits; + + /* Was there any unwinding? */ + if (unlikely(info->unwind_mask)) { + bits = info->unwind_mask; + do { + /* Is a task_work going to run again before going back */ + if (bits & UNWIND_PENDING) + return; + } while (!try_cmpxchg(&info->unwind_mask, &bits, 0UL)); + current->unwind_info.id.id = 0; + + if (unlikely(info->cache)) { + info->cache->nr_entries = 0; + info->cache->unwind_completed = 0; + } + } +} + +#else /* !CONFIG_UNWIND_USER */ + +static inline void unwind_task_init(struct task_struct *task) {} +static inline void unwind_task_free(struct task_struct *task) {} + +static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; } +static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) { return -ENOSYS; } +static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; } +static inline void unwind_deferred_cancel(struct unwind_work *work) {} + +static inline void unwind_deferred_task_exit(struct task_struct *task) {} +static inline void unwind_reset_info(void) {} + +#endif /* !CONFIG_UNWIND_USER */ + +#endif /* _LINUX_UNWIND_USER_DEFERRED_H */ diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h new file mode 100644 index 000000000000..33b62ac25c86 --- /dev/null +++ b/include/linux/unwind_deferred_types.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H +#define _LINUX_UNWIND_USER_DEFERRED_TYPES_H + +struct unwind_cache { + unsigned long unwind_completed; + unsigned int nr_entries; + unsigned long entries[]; +}; + +/* + * The unwind_task_id is a unique identifier that maps to a user space + * stacktrace. It is generated the first time a deferred user space + * stacktrace is requested after a task has entered the kerenl and + * is cleared to zero when it exits. The mapped id will be a non-zero + * number. + * + * To simplify the generation of the 64 bit number, 32 bits will be + * the CPU it was generated on, and the other 32 bits will be a per + * cpu counter that gets incremented by two every time a new identifier + * is generated. The LSB will always be set to keep the value + * from being zero. + */ +union unwind_task_id { + struct { + u32 cpu; + u32 cnt; + }; + u64 id; +}; + +struct unwind_task_info { + unsigned long unwind_mask; + struct unwind_cache *cache; + struct callback_head work; + union unwind_task_id id; +}; + +#endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h new file mode 100644 index 000000000000..7f7282516bf5 --- /dev/null +++ b/include/linux/unwind_user.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_H +#define _LINUX_UNWIND_USER_H + +#include <linux/unwind_user_types.h> +#include <asm/unwind_user.h> + +#ifndef ARCH_INIT_USER_FP_FRAME + #define ARCH_INIT_USER_FP_FRAME +#endif + +int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries); + +#endif /* _LINUX_UNWIND_USER_H */ diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_types.h new file mode 100644 index 000000000000..a449f15be890 --- /dev/null +++ b/include/linux/unwind_user_types.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_TYPES_H +#define _LINUX_UNWIND_USER_TYPES_H + +#include <linux/types.h> + +/* + * Unwind types, listed in priority order: lower numbers are attempted first if + * available. + */ +enum unwind_user_type_bits { + UNWIND_USER_TYPE_FP_BIT = 0, + + NR_UNWIND_USER_TYPE_BITS, +}; + +enum unwind_user_type { + /* Type "none" for the start of stack walk iteration. */ + UNWIND_USER_TYPE_NONE = 0, + UNWIND_USER_TYPE_FP = BIT(UNWIND_USER_TYPE_FP_BIT), +}; + +struct unwind_stacktrace { + unsigned int nr; + unsigned long *entries; +}; + +struct unwind_user_frame { + s32 cfa_off; + s32 ra_off; + s32 fp_off; + bool use_fp; +}; + +struct unwind_user_state { + unsigned long ip; + unsigned long sp; + unsigned long fp; + enum unwind_user_type current_type; + unsigned int available_types; + bool done; +}; + +#endif /* _LINUX_UNWIND_USER_TYPES_H */ diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 75342022d144..c0e716aec26a 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -30,11 +30,7 @@ * from userfaultfd, in order to leave a free define-space for * shared O_* flags. */ -#define UFFD_CLOEXEC O_CLOEXEC -#define UFFD_NONBLOCK O_NONBLOCK - #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) -#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) /* * Start with fault_pending_wqh and fault_wqh so they're more likely @@ -213,12 +209,12 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) } static inline bool vma_can_userfault(struct vm_area_struct *vma, - unsigned long vm_flags, + vm_flags_t vm_flags, bool wp_async) { vm_flags &= __VM_UFFD_FLAGS; - if (vm_flags & VM_DROPPABLE) + if (vma->vm_flags & VM_DROPPABLE) return false; if ((vm_flags & VM_UFFD_MINOR) && @@ -263,6 +259,7 @@ extern void mremap_userfaultfd_prep(struct vm_area_struct *, extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); +void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *); extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, @@ -285,7 +282,7 @@ struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, int userfaultfd_register_range(struct userfaultfd_ctx *ctx, struct vm_area_struct *vma, - unsigned long vm_flags, + vm_flags_t vm_flags, unsigned long start, unsigned long end, bool wp_async); @@ -375,6 +372,10 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, { } +static inline void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *ctx) +{ +} + static inline bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 04b90c88d164..db31fc6f4f1f 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -199,7 +199,7 @@ int virtio_device_reset_done(struct virtio_device *dev); size_t virtio_max_dma_size(const struct virtio_device *vdev); #define virtio_device_for_each_vq(vdev, vq) \ - list_for_each_entry(vq, &vdev->vqs, list) + list_for_each_entry(vq, &(vdev)->vqs, list) /** * struct virtio_driver - operations for a virtio I/O driver diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 36fb3edfa403..0c67543a45c8 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -47,31 +47,50 @@ static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb) VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = false; } -static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb) +static inline void virtio_vsock_skb_put(struct sk_buff *skb, u32 len) { - u32 len; + DEBUG_NET_WARN_ON_ONCE(skb->len); - len = le32_to_cpu(virtio_vsock_hdr(skb)->len); - - if (len > 0) + if (skb_is_nonlinear(skb)) + skb->len = len; + else skb_put(skb, len); } -static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) +static inline struct sk_buff * +__virtio_vsock_alloc_skb_with_frags(unsigned int header_len, + unsigned int data_len, + gfp_t mask) { struct sk_buff *skb; + int err; - if (size < VIRTIO_VSOCK_SKB_HEADROOM) - return NULL; - - skb = alloc_skb(size, mask); + skb = alloc_skb_with_frags(header_len, data_len, + PAGE_ALLOC_COSTLY_ORDER, &err, mask); if (!skb) return NULL; skb_reserve(skb, VIRTIO_VSOCK_SKB_HEADROOM); + skb->data_len = data_len; return skb; } +static inline struct sk_buff * +virtio_vsock_alloc_linear_skb(unsigned int size, gfp_t mask) +{ + return __virtio_vsock_alloc_skb_with_frags(size, 0, mask); +} + +static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) +{ + if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + return virtio_vsock_alloc_linear_skb(size, mask); + + size -= VIRTIO_VSOCK_SKB_HEADROOM; + return __virtio_vsock_alloc_skb_with_frags(VIRTIO_VSOCK_SKB_HEADROOM, + size, mask); +} + static inline void virtio_vsock_skb_queue_head(struct sk_buff_head *list, struct sk_buff *skb) { @@ -111,7 +130,12 @@ static inline size_t virtio_vsock_skb_len(struct sk_buff *skb) return (size_t)(skb_end_pointer(skb) - skb->head); } -#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) +/* Dimension the RX SKB so that the entire thing fits exactly into + * a single 4KiB page. This avoids wasting memory due to alloc_skb() + * rounding up to the next page order and also means that we + * don't leave higher-order pages sitting around in the RX queue. + */ +#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE SKB_WITH_OVERHEAD(1024 * 4) #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index b2ccb6845595..c287998908bf 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -507,7 +507,7 @@ static inline const char *lru_list_name(enum lru_list lru) return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } -#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) +#if defined(CONFIG_VM_EVENT_COUNTERS) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + @@ -516,7 +516,7 @@ static inline const char *vm_event_name(enum vm_event_item item) NR_VM_STAT_ITEMS + item]; } -#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ +#endif /* CONFIG_VM_EVENT_COUNTERS */ #ifdef CONFIG_MEMCG diff --git a/include/linux/vringh.h b/include/linux/vringh.h index c3a8117dabe8..49e7cbc9697a 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -175,9 +175,6 @@ int vringh_complete_multi_user(struct vringh *vrh, const struct vring_used_elem used[], unsigned num_used); -/* Pretend we've never seen descriptor (for easy error handling). */ -void vringh_abandon_user(struct vringh *vrh, unsigned int num); - /* Do we need to fire the eventfd to notify the other side? */ int vringh_need_notify_user(struct vringh *vrh); @@ -235,10 +232,6 @@ int vringh_getdesc_kern(struct vringh *vrh, u16 *head, gfp_t gfp); -ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len); -ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, - const void *src, size_t len); -void vringh_abandon_kern(struct vringh *vrh, unsigned int num); int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len); bool vringh_notify_enable_kern(struct vringh *vrh); @@ -319,13 +312,8 @@ ssize_t vringh_iov_push_iotlb(struct vringh *vrh, struct vringh_kiov *wiov, const void *src, size_t len); -void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num); - int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len); -bool vringh_notify_enable_iotlb(struct vringh *vrh); -void vringh_notify_disable_iotlb(struct vringh *vrh); - int vringh_need_notify_iotlb(struct vringh *vrh); #endif /* CONFIG_VHOST_IOTLB */ diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 99660197a36c..8c60687a3e55 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -9,14 +9,18 @@ #ifndef _LINUX_WATCHDOG_H #define _LINUX_WATCHDOG_H - #include <linux/bitops.h> -#include <linux/cdev.h> -#include <linux/device.h> -#include <linux/kernel.h> +#include <linux/limits.h> #include <linux/notifier.h> +#include <linux/printk.h> +#include <linux/types.h> + #include <uapi/linux/watchdog.h> +struct attribute_group; +struct device; +struct module; + struct watchdog_ops; struct watchdog_device; struct watchdog_core_data; diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 6e30f275da77..45d5dd470ff6 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -6,6 +6,7 @@ #ifndef _LINUX_WORKQUEUE_H #define _LINUX_WORKQUEUE_H +#include <linux/alloc_tag.h> #include <linux/timer.h> #include <linux/linkage.h> #include <linux/bitops.h> @@ -401,6 +402,7 @@ enum wq_flags { * http://thread.gmane.org/gmane.linux.kernel/1480396 */ WQ_POWER_EFFICIENT = 1 << 7, + WQ_PERCPU = 1 << 8, /* bound to a specific cpu */ __WQ_DESTROYING = 1 << 15, /* internal: workqueue is destroying */ __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ @@ -427,7 +429,7 @@ enum wq_consts { /* * System-wide workqueues which are always present. * - * system_wq is the one used by schedule[_delayed]_work[_on](). + * system_percpu_wq is the one used by schedule[_delayed]_work[_on](). * Multi-CPU multi-threaded. There are users which expect relatively * short queue flush time. Don't queue works which can run for too * long. @@ -438,7 +440,7 @@ enum wq_consts { * system_long_wq is similar to system_wq but may host long running * works. Queue flushing might take relatively long. * - * system_unbound_wq is unbound workqueue. Workers are not bound to + * system_dfl_wq is unbound workqueue. Workers are not bound to * any specific CPU, not concurrency managed, and all queued works are * executed immediately as long as max_active limit is not reached and * resources are available. @@ -455,10 +457,12 @@ enum wq_consts { * system_bh[_highpri]_wq are convenience interface to softirq. BH work items * are executed in the queueing CPU's BH context in the queueing order. */ -extern struct workqueue_struct *system_wq; +extern struct workqueue_struct *system_wq; /* use system_percpu_wq, this will be removed */ +extern struct workqueue_struct *system_percpu_wq; extern struct workqueue_struct *system_highpri_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_unbound_wq; +extern struct workqueue_struct *system_dfl_wq; extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; @@ -505,7 +509,8 @@ void workqueue_softirq_dead(unsigned int cpu); * Pointer to the allocated workqueue on success, %NULL on failure. */ __printf(1, 4) struct workqueue_struct * -alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); +alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...); +#define alloc_workqueue(...) alloc_hooks(alloc_workqueue_noprof(__VA_ARGS__)) #ifdef CONFIG_LOCKDEP /** @@ -544,8 +549,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...) \ - alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), \ - 1, lockdep_map, ##args) + alloc_hooks(alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags),\ + 1, lockdep_map, ##args)) #endif /** @@ -577,7 +582,9 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, extern void destroy_workqueue(struct workqueue_struct *wq); -struct workqueue_attrs *alloc_workqueue_attrs(void); +struct workqueue_attrs *alloc_workqueue_attrs_noprof(void); +#define alloc_workqueue_attrs(...) alloc_hooks(alloc_workqueue_attrs_noprof(__VA_ARGS__)) + void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); @@ -840,19 +847,6 @@ long work_on_cpu_key(int cpu, long (*fn)(void *), work_on_cpu_key(_cpu, _fn, _arg, &__key); \ }) -long work_on_cpu_safe_key(int cpu, long (*fn)(void *), - void *arg, struct lock_class_key *key); - -/* - * A new key is defined for each caller to make sure the work - * associated with the function doesn't share its locking class. - */ -#define work_on_cpu_safe(_cpu, _fn, _arg) \ -({ \ - static struct lock_class_key __key; \ - \ - work_on_cpu_safe_key(_cpu, _fn, _arg, &__key); \ -}) #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER diff --git a/include/linux/writeback.h b/include/linux/writeback.h index eda4b62511f7..a2848d731a46 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -59,7 +59,6 @@ struct writeback_control { unsigned for_kupdate:1; /* A kupdate writeback */ unsigned for_background:1; /* A background writeback */ unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */ - unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ unsigned unpinned_netfs_wb:1; /* Cleared I_PINNING_NETFS_WB */ @@ -72,16 +71,6 @@ struct writeback_control { */ unsigned no_cgroup_owner:1; - /* To enable batching of swap writes to non-block-device backends, - * "plug" can be set point to a 'struct swap_iocb *'. When all swap - * writes have been submitted, if with swap_iocb is not NULL, - * swap_write_unplug() should be called. - */ - struct swap_iocb **swap_plug; - - /* Target list for splitting a large folio */ - struct list_head *list; - /* internal fields used by the ->writepages implementation: */ struct folio_batch fbatch; pgoff_t index; diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 13e9cc5490f7..f3ccff2d966c 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -46,4 +46,6 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle, void zs_obj_write(struct zs_pool *pool, unsigned long handle, void *handle_mem, size_t mem_len); +extern const struct movable_operations zsmalloc_mops; + #endif |