47 files changed, 1112 insertions, 1636 deletions
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index d436bee4d129..8675b7b4ad23 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -9,7 +9,6 @@ mandatory-y += archrandom.h
 mandatory-y += barrier.h
 mandatory-y += bitops.h
 mandatory-y += bug.h
-mandatory-y += bugs.h
 mandatory-y += cacheflush.h
 mandatory-y += cfi.h
 mandatory-y += checksum.h
@@ -22,7 +21,6 @@ mandatory-y += dma-mapping.h
 mandatory-y += dma.h
 mandatory-y += emergency-restart.h
 mandatory-y += exec.h
-mandatory-y += fb.h
 mandatory-y += ftrace.h
 mandatory-y += futex.h
 mandatory-y += hardirq.h
@@ -47,6 +45,8 @@ mandatory-y += pci.h
 mandatory-y += percpu.h
 mandatory-y += pgalloc.h
 mandatory-y += preempt.h
+mandatory-y += rqspinlock.h
+mandatory-y += runtime-const.h
 mandatory-y += rwonce.h
 mandatory-y += sections.h
 mandatory-y += serial.h
@@ -59,8 +59,8 @@ mandatory-y += tlbflush.h
 mandatory-y += topology.h
 mandatory-y += trace_clock.h
 mandatory-y += uaccess.h
-mandatory-y += unaligned.h
 mandatory-y += vermagic.h
 mandatory-y += vga.h
+mandatory-y += video.h
 mandatory-y += word-at-a-time.h
 mandatory-y += xor.h
diff --git a/include/asm-generic/audit_change_attr.h b/include/asm-generic/audit_change_attr.h
index 331670807cf0..cc840537885f 100644
--- a/include/asm-generic/audit_change_attr.h
+++ b/include/asm-generic/audit_change_attr.h
@@ -11,9 +11,15 @@ __NR_lchown,
 __NR_fchown,
 #endif
 __NR_setxattr,
+#ifdef __NR_setxattrat
+__NR_setxattrat,
+#endif
 __NR_lsetxattr,
 __NR_fsetxattr,
 __NR_removexattr,
+#ifdef __NR_removexattrat
+__NR_removexattrat,
+#endif
 __NR_lremovexattr,
 __NR_fremovexattr,
 #ifdef __NR_fchownat
diff --git a/include/asm-generic/bitops/__ffs.h b/include/asm-generic/bitops/__ffs.h
index 446fea6dda78..2d08c750c8a7 100644
--- a/include/asm-generic/bitops/__ffs.h
+++ b/include/asm-generic/bitops/__ffs.h
@@ -10,9 +10,9 @@
  *
  * Undefined if no bit exists, so code should check against 0 first.
  */
-static __always_inline unsigned long generic___ffs(unsigned long word)
+static __always_inline unsigned int generic___ffs(unsigned long word)
 {
-	int num = 0;
+	unsigned int num = 0;
 
 #if BITS_PER_LONG == 64
 	if ((word & 0xffffffff) == 0) {
diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h
index 54ccccf96e21..e974ec932ec1 100644
--- a/include/asm-generic/bitops/__fls.h
+++ b/include/asm-generic/bitops/__fls.h
@@ -10,9 +10,9 @@
  *
  * Undefined if no set bit exists, so code should check against 0 first.
  */
-static __always_inline unsigned long generic___fls(unsigned long word)
+static __always_inline unsigned int generic___fls(unsigned long word)
 {
-	int num = BITS_PER_LONG - 1;
+	unsigned int num = BITS_PER_LONG - 1;
 
 #if BITS_PER_LONG == 64
 	if (!(word & (~0ul << 32))) {
diff --git a/include/asm-generic/bitops/builtin-__ffs.h b/include/asm-generic/bitops/builtin-__ffs.h
index 87024da44d10..cf4b3d33bf96 100644
--- a/include/asm-generic/bitops/builtin-__ffs.h
+++ b/include/asm-generic/bitops/builtin-__ffs.h
@@ -8,7 +8,7 @@
  *
  * Undefined if no bit exists, so code should check against 0 first.
  */
-static __always_inline unsigned long __ffs(unsigned long word)
+static __always_inline unsigned int __ffs(unsigned long word)
 {
 	return __builtin_ctzl(word);
 }
diff --git a/include/asm-generic/bitops/builtin-__fls.h b/include/asm-generic/bitops/builtin-__fls.h
index 43a5aa9afbdb..6d72fc8a5259 100644
--- a/include/asm-generic/bitops/builtin-__fls.h
+++ b/include/asm-generic/bitops/builtin-__fls.h
@@ -8,7 +8,7 @@
  *
  * Undefined if no set bit exists, so code should check against 0 first.
  */
-static __always_inline unsigned long __fls(unsigned long word)
+static __always_inline unsigned int __fls(unsigned long word)
 {
 	return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
 }
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index b7de3a4eade1..387720933973 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -72,7 +72,7 @@ struct bug_entry {
 #endif
 
 /*
- * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
+ * WARN(), WARN_ON(), WARN_ON_ONCE(), and so on can be used to report
  * significant kernel issues that need prompt attention if they should ever
  * appear at runtime.
  *
diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h
new file mode 100644
index 000000000000..372c320c5043
--- /dev/null
+++ b/include/asm-generic/codetag.lds.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_GENERIC_CODETAG_LDS_H
+#define __ASM_GENERIC_CODETAG_LDS_H
+
+#define SECTION_WITH_BOUNDARIES(_name)	\
+	. = ALIGN(8);			\
+	__start_##_name = .;		\
+	KEEP(*(_name))			\
+	__stop_##_name = .;
+
+#define CODETAG_SECTIONS()		\
+	SECTION_WITH_BOUNDARIES(alloc_tags)
+
+/*
+ * Module codetags which aren't used after module unload, therefore have the
+ * same lifespan as the module and can be safely unloaded with the module.
+ */
+#define MOD_CODETAG_SECTIONS()
+
+#define MOD_SEPARATE_CODETAG_SECTION(_name)	\
+	.codetag.##_name : {			\
+		SECTION_WITH_BOUNDARIES(_name)	\
+	}
+
+/*
+ * For codetags which might be used after module unload, therefore might stay
+ * longer in memory. Each such codetag type has its own section so that we can
+ * unload them individually once unused.
+ */
+#define MOD_SEPARATE_CODETAG_SECTIONS()		\
+	MOD_SEPARATE_CODETAG_SECTION(alloc_tags)
+
+#endif /* __ASM_GENERIC_CODETAG_LDS_H */
diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h
index e448ac61430c..03b0ec7afca6 100644
--- a/include/asm-generic/delay.h
+++ b/include/asm-generic/delay.h
@@ -2,6 +2,9 @@
 #ifndef __ASM_GENERIC_DELAY_H
 #define __ASM_GENERIC_DELAY_H
 
+#include <linux/math.h>
+#include <vdso/time64.h>
+
 /* Undefined functions to get compile-time errors */
 extern void __bad_udelay(void);
 extern void __bad_ndelay(void);
@@ -12,34 +15,73 @@ extern void __const_udelay(unsigned long xloops);
 extern void __delay(unsigned long loops);
 
 /*
- * The weird n/20000 thing suppresses a "comparison is always false due to
- * limited range of data type" warning with non-const 8-bit arguments.
+ * The microseconds/nanosecond delay multiplicators are used to convert a
+ * constant microseconds/nanoseconds value to a value which can be used by the
+ * architectures specific implementation to transform it into loops.
+ */
+#define UDELAY_CONST_MULT	((unsigned long)DIV_ROUND_UP(1ULL << 32, USEC_PER_SEC))
+#define NDELAY_CONST_MULT	((unsigned long)DIV_ROUND_UP(1ULL << 32, NSEC_PER_SEC))
+
+/*
+ * The maximum constant udelay/ndelay value picked out of thin air to prevent
+ * too long constant udelays/ndelays.
  */
+#define DELAY_CONST_MAX   20000
 
-/* 0x10c7 is 2**32 / 1000000 (rounded up) */
-#define udelay(n)							\
-	({								\
-		if (__builtin_constant_p(n)) {				\
-			if ((n) / 20000 >= 1)				\
-				 __bad_udelay();			\
-			else						\
-				__const_udelay((n) * 0x10c7ul);		\
-		} else {						\
-			__udelay(n);					\
-		}							\
-	})
-
-/* 0x5 is 2**32 / 1000000000 (rounded up) */
-#define ndelay(n)							\
-	({								\
-		if (__builtin_constant_p(n)) {				\
-			if ((n) / 20000 >= 1)				\
-				__bad_ndelay();				\
-			else						\
-				__const_udelay((n) * 5ul);		\
-		} else {						\
-			__ndelay(n);					\
-		}							\
-	})
+/**
+ * udelay - Inserting a delay based on microseconds with busy waiting
+ * @usec:	requested delay in microseconds
+ *
+ * When delaying in an atomic context ndelay(), udelay() and mdelay() are the
+ * only valid variants of delaying/sleeping to go with.
+ *
+ * When inserting delays in non atomic context which are shorter than the time
+ * which is required to queue e.g. an hrtimer and to enter then the scheduler,
+ * it is also valuable to use udelay(). But it is not simple to specify a
+ * generic threshold for this which will fit for all systems. An approximation
+ * is a threshold for all delays up to 10 microseconds.
+ *
+ * When having a delay which is larger than the architecture specific
+ * %MAX_UDELAY_MS value, please make sure mdelay() is used. Otherwise a overflow
+ * risk is given.
+ *
+ * Please note that ndelay(), udelay() and mdelay() may return early for several
+ * reasons (https://lists.openwall.net/linux-kernel/2011/01/09/56):
+ *
+ * #. computed loops_per_jiffy too low (due to the time taken to execute the
+ *    timer interrupt.)
+ * #. cache behaviour affecting the time it takes to execute the loop function.
+ * #. CPU clock rate changes.
+ */
+static __always_inline void udelay(unsigned long usec)
+{
+	if (__builtin_constant_p(usec)) {
+		if (usec >= DELAY_CONST_MAX)
+			__bad_udelay();
+		else
+			__const_udelay(usec * UDELAY_CONST_MULT);
+	} else {
+		__udelay(usec);
+	}
+}
+
+/**
+ * ndelay - Inserting a delay based on nanoseconds with busy waiting
+ * @nsec:	requested delay in nanoseconds
+ *
+ * See udelay() for basic information about ndelay() and it's variants.
+ */
+static __always_inline void ndelay(unsigned long nsec)
+{
+	if (__builtin_constant_p(nsec)) {
+		if (nsec >= DELAY_CONST_MAX)
+			__bad_ndelay();
+		else
+			__const_udelay(nsec * NDELAY_CONST_MULT);
+	} else {
+		__ndelay(nsec);
+	}
+}
+#define ndelay(x) ndelay(x)
 
 #endif /* __ASM_GENERIC_DELAY_H */
diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index 13f5aa68a455..25e7b4b58dcf 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -74,7 +74,8 @@
 	 * do the trick here).						\
 	 */								\
 	uint64_t ___res, ___x, ___t, ___m, ___n = (n);			\
-	uint32_t ___p, ___bias;						\
+	uint32_t ___p;							\
+	bool ___bias = false;						\
 									\
 	/* determine MSB of b */					\
 	___p = 1 << ilog2(___b);					\
@@ -87,22 +88,14 @@
 	___x = ~0ULL / ___b * ___b - 1;					\
 									\
 	/* test our ___m with res = m * x / (p << 64) */		\
-	___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32;	\
-	___t = ___res += (___m & 0xffffffff) * (___x >> 32);		\
-	___res += (___x & 0xffffffff) * (___m >> 32);			\
-	___t = (___res < ___t) ? (1ULL << 32) : 0;			\
-	___res = (___res >> 32) + ___t;					\
-	___res += (___m >> 32) * (___x >> 32);				\
-	___res /= ___p;							\
+	___res = (___m & 0xffffffff) * (___x & 0xffffffff);		\
+	___t = (___m & 0xffffffff) * (___x >> 32) + (___res >> 32);	\
+	___res = (___m >> 32) * (___x >> 32) + (___t >> 32);		\
+	___t = (___m >> 32) * (___x & 0xffffffff) + (___t & 0xffffffff);\
+	___res = (___res + (___t >> 32)) / ___p;			\
 									\
-	/* Now sanitize and optimize what we've got. */			\
-	if (~0ULL % (___b / (___b & -___b)) == 0) {			\
-		/* special case, can be simplified to ... */		\
-		___n /= (___b & -___b);					\
-		___m = ~0ULL / (___b / (___b & -___b));			\
-		___p = 1;						\
-		___bias = 1;						\
-	} else if (___res != ___x / ___b) {				\
+	/* Now validate what we've got. */				\
+	if (___res != ___x / ___b) {					\
 		/*							\
 		 * We can't get away without a bias to compensate	\
 		 * for bit truncation errors.  To avoid it we'd need an	\
@@ -111,45 +104,18 @@
 		 *							\
 		 * Instead we do m = p / b and n / b = (n * m + m) / p.	\
 		 */							\
-		___bias = 1;						\
+		___bias = true;						\
 		/* Compute m = (p << 64) / b */				\
 		___m = (~0ULL / ___b) * ___p;				\
 		___m += ((~0ULL % ___b + 1) * ___p) / ___b;		\
-	} else {							\
-		/*							\
-		 * Reduce m / p, and try to clear bit 31 of m when	\
-		 * possible, otherwise that'll need extra overflow	\
-		 * handling later.					\
-		 */							\
-		uint32_t ___bits = -(___m & -___m);			\
-		___bits |= ___m >> 32;					\
-		___bits = (~___bits) << 1;				\
-		/*							\
-		 * If ___bits == 0 then setting bit 31 is  unavoidable.	\
-		 * Simply apply the maximum possible reduction in that	\
-		 * case. Otherwise the MSB of ___bits indicates the	\
-		 * best reduction we should apply.			\
-		 */							\
-		if (!___bits) {						\
-			___p /= (___m & -___m);				\
-			___m /= (___m & -___m);				\
-		} else {						\
-			___p >>= ilog2(___bits);			\
-			___m >>= ilog2(___bits);			\
-		}							\
-		/* No bias needed. */					\
-		___bias = 0;						\
 	}								\
 									\
+	/* Reduce m / p to help avoid overflow handling later. */	\
+	___p /= (___m & -___m);						\
+	___m /= (___m & -___m);						\
+									\
 	/*								\
-	 * Now we have a combination of 2 conditions:			\
-	 *								\
-	 * 1) whether or not we need to apply a bias, and		\
-	 *								\
-	 * 2) whether or not there might be an overflow in the cross	\
-	 *    product determined by (___m & ((1 << 63) | (1 << 31))).	\
-	 *								\
-	 * Select the best way to do (m_bias + m * n) / (1 << 64).	\
+	 * Perform (m_bias + m * n) / (1 << 64).			\
 	 * From now on there will be actual runtime code generated.	\
 	 */								\
 	___res = __arch_xprod_64(___m, ___n, ___bias);			\
@@ -165,47 +131,42 @@
  * Semantic:  retval = ((bias ? m : 0) + m * n) >> 64
  *
  * The product is a 128-bit value, scaled down to 64 bits.
- * Assuming constant propagation to optimize away unused conditional code.
+ * Hoping for compile-time optimization of  conditional code.
  * Architectures may provide their own optimized assembly implementation.
  */
-static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
+#ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
+static __always_inline
+#else
+static inline
+#endif
+uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
 {
 	uint32_t m_lo = m;
 	uint32_t m_hi = m >> 32;
 	uint32_t n_lo = n;
 	uint32_t n_hi = n >> 32;
-	uint64_t res;
-	uint32_t res_lo, res_hi, tmp;
-
-	if (!bias) {
-		res = ((uint64_t)m_lo * n_lo) >> 32;
-	} else if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
-		/* there can't be any overflow here */
-		res = (m + (uint64_t)m_lo * n_lo) >> 32;
+	uint64_t x, y;
+
+	/* Determine if overflow handling can be dispensed with. */
+	bool no_ovf = __builtin_constant_p(m) &&
+		      ((m >> 32) + (m & 0xffffffff) < 0x100000000);
+
+	if (no_ovf) {
+		x = (uint64_t)m_lo * n_lo + (bias ? m : 0);
+		x >>= 32;
+		x += (uint64_t)m_lo * n_hi;
+		x += (uint64_t)m_hi * n_lo;
+		x >>= 32;
+		x += (uint64_t)m_hi * n_hi;
 	} else {
-		res = m + (uint64_t)m_lo * n_lo;
-		res_lo = res >> 32;
-		res_hi = (res_lo < m_hi);
-		res = res_lo | ((uint64_t)res_hi << 32);
+		x = (uint64_t)m_lo * n_lo + (bias ? m_lo : 0);
+		y = (uint64_t)m_lo * n_hi + (uint32_t)(x >> 32) + (bias ? m_hi : 0);
+		x = (uint64_t)m_hi * n_hi + (uint32_t)(y >> 32);
+		y = (uint64_t)m_hi * n_lo + (uint32_t)y;
+		x += (uint32_t)(y >> 32);
 	}
 
-	if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
-		/* there can't be any overflow here */
-		res += (uint64_t)m_lo * n_hi;
-		res += (uint64_t)m_hi * n_lo;
-		res >>= 32;
-	} else {
-		res += (uint64_t)m_lo * n_hi;
-		tmp = res >> 32;
-		res += (uint64_t)m_hi * n_lo;
-		res_lo = res >> 32;
-		res_hi = (res_lo < tmp);
-		res = res_lo | ((uint64_t)res_hi << 32);
-	}
-
-	res += (uint64_t)m_hi * n_hi;
-
-	return res;
+	return x;
 }
 #endif
 
diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h
index 9d0479f50f97..5db59a1efb65 100644
--- a/include/asm-generic/early_ioremap.h
+++ b/include/asm-generic/early_ioremap.h
@@ -35,7 +35,7 @@ extern void early_ioremap_reset(void);
 /*
  * Early copy from unmapped memory to kernel mapped memory.
  */
-extern void copy_from_early_mem(void *dest, phys_addr_t src,
+extern int copy_from_early_mem(void *dest, phys_addr_t src,
 				unsigned long size);
 
 #else
diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h
index 8cc7b09c1bc7..29cab7947980 100644
--- a/include/asm-generic/fixmap.h
+++ b/include/asm-generic/fixmap.h
@@ -97,8 +97,5 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
 #define set_fixmap_io(idx, phys) \
 	__set_fixmap(idx, phys, FIXMAP_PAGE_IO)
 
-#define set_fixmap_offset_io(idx, phys) \
-	__set_fixmap_offset(idx, phys, FIXMAP_PAGE_IO)
-
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_GENERIC_FIXMAP_H */
diff --git a/include/asm-generic/fprobe.h b/include/asm-generic/fprobe.h
new file mode 100644
index 000000000000..8659a4dc6eb6
--- /dev/null
+++ b/include/asm-generic/fprobe.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generic arch dependent fprobe macros.
+ */
+#ifndef __ASM_GENERIC_FPROBE_H__
+#define __ASM_GENERIC_FPROBE_H__
+
+#include <linux/bits.h>
+
+#ifdef CONFIG_64BIT
+/*
+ * Encoding the size and the address of fprobe into one 64bit entry.
+ * The 32bit architectures should use 2 entries to store those info.
+ */
+
+#define ARCH_DEFINE_ENCODE_FPROBE_HEADER
+
+#define FPROBE_HEADER_MSB_SIZE_SHIFT (BITS_PER_LONG - FPROBE_DATA_SIZE_BITS)
+#define FPROBE_HEADER_MSB_MASK					\
+	GENMASK(FPROBE_HEADER_MSB_SIZE_SHIFT - 1, 0)
+
+/*
+ * By default, this expects the MSBs in the address of kprobe is 0xf.
+ * If any arch needs another fixed pattern (e.g. s390 is zero filled),
+ * override this.
+ */
+#define FPROBE_HEADER_MSB_PATTERN				\
+	GENMASK(BITS_PER_LONG - 1, FPROBE_HEADER_MSB_SIZE_SHIFT)
+
+#define arch_fprobe_header_encodable(fp)			\
+	(((unsigned long)(fp) & ~FPROBE_HEADER_MSB_MASK) ==	\
+	 FPROBE_HEADER_MSB_PATTERN)
+
+#define arch_encode_fprobe_header(fp, size)			\
+	(((unsigned long)(fp) & FPROBE_HEADER_MSB_MASK) |	\
+	 ((unsigned long)(size) << FPROBE_HEADER_MSB_SIZE_SHIFT))
+
+#define arch_decode_fprobe_header_size(val)			\
+	((unsigned long)(val) >> FPROBE_HEADER_MSB_SIZE_SHIFT)
+
+#define arch_decode_fprobe_header_fp(val)					\
+	((struct fprobe *)(((unsigned long)(val) & FPROBE_HEADER_MSB_MASK) |	\
+			   FPROBE_HEADER_MSB_PATTERN))
+#endif /* CONFIG_64BIT */
+
+#endif /* __ASM_GENERIC_FPROBE_H__ */
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 6dcf4d576970..3e0a8fe9b108 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -5,11 +5,6 @@
 #include <linux/swap.h>
 #include <linux/swapops.h>
 
-static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
-{
-	return mk_pte(page, pgprot);
-}
-
 static inline unsigned long huge_pte_write(pte_t pte)
 {
 	return pte_write(pte);
@@ -42,20 +37,26 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
 	return pte_modify(pte, newprot);
 }
 
+#ifndef __HAVE_ARCH_HUGE_PTE_MKUFFD_WP
 static inline pte_t huge_pte_mkuffd_wp(pte_t pte)
 {
 	return huge_pte_wrprotect(pte_mkuffd_wp(pte));
 }
+#endif
 
+#ifndef __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD_WP
 static inline pte_t huge_pte_clear_uffd_wp(pte_t pte)
 {
 	return pte_clear_uffd_wp(pte);
 }
+#endif
 
+#ifndef __HAVE_ARCH_HUGE_PTE_UFFD_WP
 static inline int huge_pte_uffd_wp(pte_t pte)
 {
 	return pte_uffd_wp(pte);
 }
+#endif
 
 #ifndef __HAVE_ARCH_HUGE_PTE_CLEAR
 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
@@ -84,7 +85,7 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 
 #ifndef __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-		unsigned long addr, pte_t *ptep)
+		unsigned long addr, pte_t *ptep, unsigned long sz)
 {
 	return ptep_get_and_clear(mm, addr, ptep);
 }
@@ -106,22 +107,17 @@ static inline int huge_pte_none(pte_t pte)
 #endif
 
 /* Please refer to comments above pte_none_mostly() for the usage */
+#ifndef __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY
 static inline int huge_pte_none_mostly(pte_t pte)
 {
 	return huge_pte_none(pte) || is_pte_marker(pte);
 }
+#endif
 
 #ifndef __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE
 static inline int prepare_hugepage_range(struct file *file,
 		unsigned long addr, unsigned long len)
 {
-	struct hstate *h = hstate_file(file);
-
-	if (len & ~huge_page_mask(h))
-		return -EINVAL;
-	if (addr & ~huge_page_mask(h))
-		return -EINVAL;
-
 	return 0;
 }
 #endif
@@ -144,7 +140,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 #endif
 
 #ifndef __HAVE_ARCH_HUGE_PTEP_GET
-static inline pte_t huge_ptep_get(pte_t *ptep)
+static inline pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	return ptep_get(ptep);
 }
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
deleted file mode 100644
index 814207e7c37f..000000000000
--- a/include/asm-generic/hyperv-tlfs.h
+++ /dev/null
@@ -1,874 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
- * Specification (TLFS):
- * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
- */
-
-#ifndef _ASM_GENERIC_HYPERV_TLFS_H
-#define _ASM_GENERIC_HYPERV_TLFS_H
-
-#include <linux/types.h>
-#include <linux/bits.h>
-#include <linux/time64.h>
-
-/*
- * While not explicitly listed in the TLFS, Hyper-V always runs with a page size
- * of 4096. These definitions are used when communicating with Hyper-V using
- * guest physical pages and guest physical page addresses, since the guest page
- * size may not be 4096 on all architectures.
- */
-#define HV_HYP_PAGE_SHIFT      12
-#define HV_HYP_PAGE_SIZE       BIT(HV_HYP_PAGE_SHIFT)
-#define HV_HYP_PAGE_MASK       (~(HV_HYP_PAGE_SIZE - 1))
-
-/*
- * Hyper-V provides two categories of flags relevant to guest VMs.  The
- * "Features" category indicates specific functionality that is available
- * to guests on this particular instance of Hyper-V. The "Features"
- * are presented in four groups, each of which is 32 bits. The group A
- * and B definitions are common across architectures and are listed here.
- * However, not all flags are relevant on all architectures.
- *
- * Groups C and D vary across architectures and are listed in the
- * architecture specific portion of hyperv-tlfs.h. Some of these flags exist
- * on multiple architectures, but the bit positions are different so they
- * cannot appear in the generic portion of hyperv-tlfs.h.
- *
- * The "Enlightenments" category provides recommendations on whether to use
- * specific enlightenments that are available. The Enlighenments are a single
- * group of 32 bits, but they vary across architectures and are listed in
- * the architecture specific portion of hyperv-tlfs.h.
- */
-
-/*
- * Group A Features.
- */
-
-/* VP Runtime register available */
-#define HV_MSR_VP_RUNTIME_AVAILABLE		BIT(0)
-/* Partition Reference Counter available*/
-#define HV_MSR_TIME_REF_COUNT_AVAILABLE		BIT(1)
-/* Basic SynIC register available */
-#define HV_MSR_SYNIC_AVAILABLE			BIT(2)
-/* Synthetic Timer registers available */
-#define HV_MSR_SYNTIMER_AVAILABLE		BIT(3)
-/* Virtual APIC assist and VP assist page registers available */
-#define HV_MSR_APIC_ACCESS_AVAILABLE		BIT(4)
-/* Hypercall and Guest OS ID registers available*/
-#define HV_MSR_HYPERCALL_AVAILABLE		BIT(5)
-/* Access virtual processor index register available*/
-#define HV_MSR_VP_INDEX_AVAILABLE		BIT(6)
-/* Virtual system reset register available*/
-#define HV_MSR_RESET_AVAILABLE			BIT(7)
-/* Access statistics page registers available */
-#define HV_MSR_STAT_PAGES_AVAILABLE		BIT(8)
-/* Partition reference TSC register is available */
-#define HV_MSR_REFERENCE_TSC_AVAILABLE		BIT(9)
-/* Partition Guest IDLE register is available */
-#define HV_MSR_GUEST_IDLE_AVAILABLE		BIT(10)
-/* Partition local APIC and TSC frequency registers available */
-#define HV_ACCESS_FREQUENCY_MSRS		BIT(11)
-/* AccessReenlightenmentControls privilege */
-#define HV_ACCESS_REENLIGHTENMENT		BIT(13)
-/* AccessTscInvariantControls privilege */
-#define HV_ACCESS_TSC_INVARIANT			BIT(15)
-
-/*
- * Group B features.
- */
-#define HV_CREATE_PARTITIONS			BIT(0)
-#define HV_ACCESS_PARTITION_ID			BIT(1)
-#define HV_ACCESS_MEMORY_POOL			BIT(2)
-#define HV_ADJUST_MESSAGE_BUFFERS		BIT(3)
-#define HV_POST_MESSAGES			BIT(4)
-#define HV_SIGNAL_EVENTS			BIT(5)
-#define HV_CREATE_PORT				BIT(6)
-#define HV_CONNECT_PORT				BIT(7)
-#define HV_ACCESS_STATS				BIT(8)
-#define HV_DEBUGGING				BIT(11)
-#define HV_CPU_MANAGEMENT			BIT(12)
-#define HV_ENABLE_EXTENDED_HYPERCALLS		BIT(20)
-#define HV_ISOLATION				BIT(22)
-
-/*
- * TSC page layout.
- */
-struct ms_hyperv_tsc_page {
-	volatile u32 tsc_sequence;
-	u32 reserved1;
-	volatile u64 tsc_scale;
-	volatile s64 tsc_offset;
-} __packed;
-
-union hv_reference_tsc_msr {
-	u64 as_uint64;
-	struct {
-		u64 enable:1;
-		u64 reserved:11;
-		u64 pfn:52;
-	} __packed;
-};
-
-/*
- * The guest OS needs to register the guest ID with the hypervisor.
- * The guest ID is a 64 bit entity and the structure of this ID is
- * specified in the Hyper-V specification:
- *
- * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
- *
- * While the current guideline does not specify how Linux guest ID(s)
- * need to be generated, our plan is to publish the guidelines for
- * Linux and other guest operating systems that currently are hosted
- * on Hyper-V. The implementation here conforms to this yet
- * unpublished guidelines.
- *
- *
- * Bit(s)
- * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
- * 62:56 - Os Type; Linux is 0x100
- * 55:48 - Distro specific identification
- * 47:16 - Linux kernel version number
- * 15:0  - Distro specific identification
- *
- *
- */
-
-#define HV_LINUX_VENDOR_ID              0x8100
-
-/*
- * Crash notification flags.
- */
-#define HV_CRASH_CTL_CRASH_NOTIFY_MSG		BIT_ULL(62)
-#define HV_CRASH_CTL_CRASH_NOTIFY		BIT_ULL(63)
-
-/* Declare the various hypercall operations. */
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE	0x0002
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST	0x0003
-#define HVCALL_ENABLE_VP_VTL			0x000f
-#define HVCALL_NOTIFY_LONG_SPIN_WAIT		0x0008
-#define HVCALL_SEND_IPI				0x000b
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX	0x0013
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX	0x0014
-#define HVCALL_SEND_IPI_EX			0x0015
-#define HVCALL_GET_PARTITION_ID			0x0046
-#define HVCALL_DEPOSIT_MEMORY			0x0048
-#define HVCALL_CREATE_VP			0x004e
-#define HVCALL_GET_VP_REGISTERS			0x0050
-#define HVCALL_SET_VP_REGISTERS			0x0051
-#define HVCALL_POST_MESSAGE			0x005c
-#define HVCALL_SIGNAL_EVENT			0x005d
-#define HVCALL_POST_DEBUG_DATA			0x0069
-#define HVCALL_RETRIEVE_DEBUG_DATA		0x006a
-#define HVCALL_RESET_DEBUG_SESSION		0x006b
-#define HVCALL_ADD_LOGICAL_PROCESSOR		0x0076
-#define HVCALL_MAP_DEVICE_INTERRUPT		0x007c
-#define HVCALL_UNMAP_DEVICE_INTERRUPT		0x007d
-#define HVCALL_RETARGET_INTERRUPT		0x007e
-#define HVCALL_START_VP				0x0099
-#define HVCALL_GET_VP_ID_FROM_APIC_ID		0x009a
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
-#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
-#define HVCALL_MMIO_READ			0x0106
-#define HVCALL_MMIO_WRITE			0x0107
-
-/* Extended hypercalls */
-#define HV_EXT_CALL_QUERY_CAPABILITIES		0x8001
-#define HV_EXT_CALL_MEMORY_HEAT_HINT		0x8003
-
-#define HV_FLUSH_ALL_PROCESSORS			BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES	BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY	BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT	BIT(3)
-
-/* Extended capability bits */
-#define HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT BIT(8)
-
-enum HV_GENERIC_SET_FORMAT {
-	HV_GENERIC_SET_SPARSE_4K,
-	HV_GENERIC_SET_ALL,
-};
-
-#define HV_PARTITION_ID_SELF		((u64)-1)
-#define HV_VP_INDEX_SELF		((u32)-2)
-
-#define HV_HYPERCALL_RESULT_MASK	GENMASK_ULL(15, 0)
-#define HV_HYPERCALL_FAST_BIT		BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET	17
-#define HV_HYPERCALL_VARHEAD_MASK	GENMASK_ULL(26, 17)
-#define HV_HYPERCALL_RSVD0_MASK		GENMASK_ULL(31, 27)
-#define HV_HYPERCALL_NESTED		BIT_ULL(31)
-#define HV_HYPERCALL_REP_COMP_OFFSET	32
-#define HV_HYPERCALL_REP_COMP_1		BIT_ULL(32)
-#define HV_HYPERCALL_REP_COMP_MASK	GENMASK_ULL(43, 32)
-#define HV_HYPERCALL_RSVD1_MASK		GENMASK_ULL(47, 44)
-#define HV_HYPERCALL_REP_START_OFFSET	48
-#define HV_HYPERCALL_REP_START_MASK	GENMASK_ULL(59, 48)
-#define HV_HYPERCALL_RSVD2_MASK		GENMASK_ULL(63, 60)
-#define HV_HYPERCALL_RSVD_MASK		(HV_HYPERCALL_RSVD0_MASK | \
-					 HV_HYPERCALL_RSVD1_MASK | \
-					 HV_HYPERCALL_RSVD2_MASK)
-
-/* hypercall status code */
-#define HV_STATUS_SUCCESS			0
-#define HV_STATUS_INVALID_HYPERCALL_CODE	2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT	3
-#define HV_STATUS_INVALID_ALIGNMENT		4
-#define HV_STATUS_INVALID_PARAMETER		5
-#define HV_STATUS_ACCESS_DENIED			6
-#define HV_STATUS_OPERATION_DENIED		8
-#define HV_STATUS_INSUFFICIENT_MEMORY		11
-#define HV_STATUS_INVALID_PORT_ID		17
-#define HV_STATUS_INVALID_CONNECTION_ID		18
-#define HV_STATUS_INSUFFICIENT_BUFFERS		19
-#define HV_STATUS_TIME_OUT                      120
-#define HV_STATUS_VTL_ALREADY_ENABLED		134
-
-/*
- * The Hyper-V TimeRefCount register and the TSC
- * page provide a guest VM clock with 100ns tick rate
- */
-#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
-
-/* Define the number of synthetic interrupt sources. */
-#define HV_SYNIC_SINT_COUNT		(16)
-/* Define the expected SynIC version. */
-#define HV_SYNIC_VERSION_1		(0x1)
-/* Valid SynIC vectors are 16-255. */
-#define HV_SYNIC_FIRST_VALID_VECTOR	(16)
-
-#define HV_SYNIC_CONTROL_ENABLE		(1ULL << 0)
-#define HV_SYNIC_SIMP_ENABLE		(1ULL << 0)
-#define HV_SYNIC_SIEFP_ENABLE		(1ULL << 0)
-#define HV_SYNIC_SINT_MASKED		(1ULL << 16)
-#define HV_SYNIC_SINT_AUTO_EOI		(1ULL << 17)
-#define HV_SYNIC_SINT_VECTOR_MASK	(0xFF)
-
-#define HV_SYNIC_STIMER_COUNT		(4)
-
-/* Define synthetic interrupt controller message constants. */
-#define HV_MESSAGE_SIZE			(256)
-#define HV_MESSAGE_PAYLOAD_BYTE_COUNT	(240)
-#define HV_MESSAGE_PAYLOAD_QWORD_COUNT	(30)
-
-/*
- * Define hypervisor message types. Some of the message types
- * are x86/x64 specific, but there's no good way to separate
- * them out into the arch-specific version of hyperv-tlfs.h
- * because C doesn't provide a way to extend enum types.
- * Keeping them all in the arch neutral hyperv-tlfs.h seems
- * the least messy compromise.
- */
-enum hv_message_type {
-	HVMSG_NONE			= 0x00000000,
-
-	/* Memory access messages. */
-	HVMSG_UNMAPPED_GPA		= 0x80000000,
-	HVMSG_GPA_INTERCEPT		= 0x80000001,
-
-	/* Timer notification messages. */
-	HVMSG_TIMER_EXPIRED		= 0x80000010,
-
-	/* Error messages. */
-	HVMSG_INVALID_VP_REGISTER_VALUE	= 0x80000020,
-	HVMSG_UNRECOVERABLE_EXCEPTION	= 0x80000021,
-	HVMSG_UNSUPPORTED_FEATURE	= 0x80000022,
-
-	/* Trace buffer complete messages. */
-	HVMSG_EVENTLOG_BUFFERCOMPLETE	= 0x80000040,
-
-	/* Platform-specific processor intercept messages. */
-	HVMSG_X64_IOPORT_INTERCEPT	= 0x80010000,
-	HVMSG_X64_MSR_INTERCEPT		= 0x80010001,
-	HVMSG_X64_CPUID_INTERCEPT	= 0x80010002,
-	HVMSG_X64_EXCEPTION_INTERCEPT	= 0x80010003,
-	HVMSG_X64_APIC_EOI		= 0x80010004,
-	HVMSG_X64_LEGACY_FP_ERROR	= 0x80010005
-};
-
-/* Define synthetic interrupt controller message flags. */
-union hv_message_flags {
-	__u8 asu8;
-	struct {
-		__u8 msg_pending:1;
-		__u8 reserved:7;
-	} __packed;
-};
-
-/* Define port identifier type. */
-union hv_port_id {
-	__u32 asu32;
-	struct {
-		__u32 id:24;
-		__u32 reserved:8;
-	} __packed u;
-};
-
-/* Define synthetic interrupt controller message header. */
-struct hv_message_header {
-	__u32 message_type;
-	__u8 payload_size;
-	union hv_message_flags message_flags;
-	__u8 reserved[2];
-	union {
-		__u64 sender;
-		union hv_port_id port;
-	};
-} __packed;
-
-/* Define synthetic interrupt controller message format. */
-struct hv_message {
-	struct hv_message_header header;
-	union {
-		__u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
-	} u;
-} __packed;
-
-/* Define the synthetic interrupt message page layout. */
-struct hv_message_page {
-	struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
-} __packed;
-
-/* Define timer message payload structure. */
-struct hv_timer_message_payload {
-	__u32 timer_index;
-	__u32 reserved;
-	__u64 expiration_time;	/* When the timer expired */
-	__u64 delivery_time;	/* When the message was delivered */
-} __packed;
-
-
-/* Define synthetic interrupt controller flag constants. */
-#define HV_EVENT_FLAGS_COUNT		(256 * 8)
-#define HV_EVENT_FLAGS_LONG_COUNT	(256 / sizeof(unsigned long))
-
-/*
- * Synthetic timer configuration.
- */
-union hv_stimer_config {
-	u64 as_uint64;
-	struct {
-		u64 enable:1;
-		u64 periodic:1;
-		u64 lazy:1;
-		u64 auto_enable:1;
-		u64 apic_vector:8;
-		u64 direct_mode:1;
-		u64 reserved_z0:3;
-		u64 sintx:4;
-		u64 reserved_z1:44;
-	} __packed;
-};
-
-
-/* Define the synthetic interrupt controller event flags format. */
-union hv_synic_event_flags {
-	unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
-};
-
-/* Define SynIC control register. */
-union hv_synic_scontrol {
-	u64 as_uint64;
-	struct {
-		u64 enable:1;
-		u64 reserved:63;
-	} __packed;
-};
-
-/* Define synthetic interrupt source. */
-union hv_synic_sint {
-	u64 as_uint64;
-	struct {
-		u64 vector:8;
-		u64 reserved1:8;
-		u64 masked:1;
-		u64 auto_eoi:1;
-		u64 polling:1;
-		u64 reserved2:45;
-	} __packed;
-};
-
-/* Define the format of the SIMP register */
-union hv_synic_simp {
-	u64 as_uint64;
-	struct {
-		u64 simp_enabled:1;
-		u64 preserved:11;
-		u64 base_simp_gpa:52;
-	} __packed;
-};
-
-/* Define the format of the SIEFP register */
-union hv_synic_siefp {
-	u64 as_uint64;
-	struct {
-		u64 siefp_enabled:1;
-		u64 preserved:11;
-		u64 base_siefp_gpa:52;
-	} __packed;
-};
-
-struct hv_vpset {
-	u64 format;
-	u64 valid_bank_mask;
-	u64 bank_contents[];
-} __packed;
-
-/* The maximum number of sparse vCPU banks which can be encoded by 'struct hv_vpset' */
-#define HV_MAX_SPARSE_VCPU_BANKS (64)
-/* The number of vCPUs in one sparse bank */
-#define HV_VCPUS_PER_SPARSE_BANK (64)
-
-/* HvCallSendSyntheticClusterIpi hypercall */
-struct hv_send_ipi {
-	u32 vector;
-	u32 reserved;
-	u64 cpu_mask;
-} __packed;
-
-/* HvCallSendSyntheticClusterIpiEx hypercall */
-struct hv_send_ipi_ex {
-	u32 vector;
-	u32 reserved;
-	struct hv_vpset vp_set;
-} __packed;
-
-/* HvFlushGuestPhysicalAddressSpace hypercalls */
-struct hv_guest_mapping_flush {
-	u64 address_space;
-	u64 flags;
-} __packed;
-
-/*
- *  HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
- *  by the bitwidth of "additional_pages" in union hv_gpa_page_range.
- */
-#define HV_MAX_FLUSH_PAGES (2048)
-#define HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB		0
-#define HV_GPA_PAGE_RANGE_PAGE_SIZE_1GB		1
-
-/* HvFlushGuestPhysicalAddressList, HvExtCallMemoryHeatHint hypercall */
-union hv_gpa_page_range {
-	u64 address_space;
-	struct {
-		u64 additional_pages:11;
-		u64 largepage:1;
-		u64 basepfn:52;
-	} page;
-	struct {
-		u64 reserved:12;
-		u64 page_size:1;
-		u64 reserved1:8;
-		u64 base_large_pfn:43;
-	};
-};
-
-/*
- * All input flush parameters should be in single page. The max flush
- * count is equal with how many entries of union hv_gpa_page_range can
- * be populated into the input parameter page.
- */
-#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) /	\
-				sizeof(union hv_gpa_page_range))
-
-struct hv_guest_mapping_flush_list {
-	u64 address_space;
-	u64 flags;
-	union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT];
-};
-
-/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
-struct hv_tlb_flush {
-	u64 address_space;
-	u64 flags;
-	u64 processor_mask;
-	u64 gva_list[];
-} __packed;
-
-/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
-struct hv_tlb_flush_ex {
-	u64 address_space;
-	u64 flags;
-	struct hv_vpset hv_vp_set;
-	u64 gva_list[];
-} __packed;
-
-/* HvGetPartitionId hypercall (output only) */
-struct hv_get_partition_id {
-	u64 partition_id;
-} __packed;
-
-/* HvDepositMemory hypercall */
-struct hv_deposit_memory {
-	u64 partition_id;
-	u64 gpa_page_list[];
-} __packed;
-
-struct hv_proximity_domain_flags {
-	u32 proximity_preferred : 1;
-	u32 reserved : 30;
-	u32 proximity_info_valid : 1;
-} __packed;
-
-struct hv_proximity_domain_info {
-	u32 domain_id;
-	struct hv_proximity_domain_flags flags;
-} __packed;
-
-struct hv_lp_startup_status {
-	u64 hv_status;
-	u64 substatus1;
-	u64 substatus2;
-	u64 substatus3;
-	u64 substatus4;
-	u64 substatus5;
-	u64 substatus6;
-} __packed;
-
-/* HvAddLogicalProcessor hypercall */
-struct hv_input_add_logical_processor {
-	u32 lp_index;
-	u32 apic_id;
-	struct hv_proximity_domain_info proximity_domain_info;
-} __packed;
-
-struct hv_output_add_logical_processor {
-	struct hv_lp_startup_status startup_status;
-} __packed;
-
-enum HV_SUBNODE_TYPE
-{
-    HvSubnodeAny = 0,
-    HvSubnodeSocket = 1,
-    HvSubnodeAmdNode = 2,
-    HvSubnodeL3 = 3,
-    HvSubnodeCount = 4,
-    HvSubnodeInvalid = -1
-};
-
-/* HvCreateVp hypercall */
-struct hv_create_vp {
-	u64 partition_id;
-	u32 vp_index;
-	u8 padding[3];
-	u8 subnode_type;
-	u64 subnode_id;
-	struct hv_proximity_domain_info proximity_domain_info;
-	u64 flags;
-} __packed;
-
-enum hv_interrupt_source {
-	HV_INTERRUPT_SOURCE_MSI = 1, /* MSI and MSI-X */
-	HV_INTERRUPT_SOURCE_IOAPIC,
-};
-
-union hv_ioapic_rte {
-	u64 as_uint64;
-
-	struct {
-		u32 vector:8;
-		u32 delivery_mode:3;
-		u32 destination_mode:1;
-		u32 delivery_status:1;
-		u32 interrupt_polarity:1;
-		u32 remote_irr:1;
-		u32 trigger_mode:1;
-		u32 interrupt_mask:1;
-		u32 reserved1:15;
-
-		u32 reserved2:24;
-		u32 destination_id:8;
-	};
-
-	struct {
-		u32 low_uint32;
-		u32 high_uint32;
-	};
-} __packed;
-
-struct hv_interrupt_entry {
-	u32 source;
-	u32 reserved1;
-	union {
-		union hv_msi_entry msi_entry;
-		union hv_ioapic_rte ioapic_rte;
-	};
-} __packed;
-
-/*
- * flags for hv_device_interrupt_target.flags
- */
-#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST		1
-#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET	2
-
-struct hv_device_interrupt_target {
-	u32 vector;
-	u32 flags;
-	union {
-		u64 vp_mask;
-		struct hv_vpset vp_set;
-	};
-} __packed;
-
-struct hv_retarget_device_interrupt {
-	u64 partition_id;		/* use "self" */
-	u64 device_id;
-	struct hv_interrupt_entry int_entry;
-	u64 reserved2;
-	struct hv_device_interrupt_target int_target;
-} __packed __aligned(8);
-
-/*
- * These Hyper-V registers provide information equivalent to the CPUID
- * instruction on x86/x64.
- */
-#define HV_REGISTER_HYPERVISOR_VERSION		0x00000100 /*CPUID 0x40000002 */
-#define HV_REGISTER_FEATURES			0x00000200 /*CPUID 0x40000003 */
-#define HV_REGISTER_ENLIGHTENMENTS		0x00000201 /*CPUID 0x40000004 */
-
-/*
- * Synthetic register definitions equivalent to MSRs on x86/x64
- */
-#define HV_REGISTER_GUEST_CRASH_P0	0x00000210
-#define HV_REGISTER_GUEST_CRASH_P1	0x00000211
-#define HV_REGISTER_GUEST_CRASH_P2	0x00000212
-#define HV_REGISTER_GUEST_CRASH_P3	0x00000213
-#define HV_REGISTER_GUEST_CRASH_P4	0x00000214
-#define HV_REGISTER_GUEST_CRASH_CTL	0x00000215
-
-#define HV_REGISTER_GUEST_OS_ID		0x00090002
-#define HV_REGISTER_VP_INDEX		0x00090003
-#define HV_REGISTER_TIME_REF_COUNT	0x00090004
-#define HV_REGISTER_REFERENCE_TSC	0x00090017
-
-#define HV_REGISTER_SINT0		0x000A0000
-#define HV_REGISTER_SCONTROL		0x000A0010
-#define HV_REGISTER_SIEFP		0x000A0012
-#define HV_REGISTER_SIMP		0x000A0013
-#define HV_REGISTER_EOM			0x000A0014
-
-#define HV_REGISTER_STIMER0_CONFIG	0x000B0000
-#define HV_REGISTER_STIMER0_COUNT	0x000B0001
-
-/* HvGetVpRegisters hypercall input with variable size reg name list*/
-struct hv_get_vp_registers_input {
-	struct {
-		u64 partitionid;
-		u32 vpindex;
-		u8  inputvtl;
-		u8  padding[3];
-	} header;
-	struct input {
-		u32 name0;
-		u32 name1;
-	} element[];
-} __packed;
-
-/* HvGetVpRegisters returns an array of these output elements */
-struct hv_get_vp_registers_output {
-	union {
-		struct {
-			u32 a;
-			u32 b;
-			u32 c;
-			u32 d;
-		} as32 __packed;
-		struct {
-			u64 low;
-			u64 high;
-		} as64 __packed;
-	};
-};
-
-/* HvSetVpRegisters hypercall with variable size reg name/value list*/
-struct hv_set_vp_registers_input {
-	struct {
-		u64 partitionid;
-		u32 vpindex;
-		u8  inputvtl;
-		u8  padding[3];
-	} header;
-	struct {
-		u32 name;
-		u32 padding1;
-		u64 padding2;
-		u64 valuelow;
-		u64 valuehigh;
-	} element[];
-} __packed;
-
-enum hv_device_type {
-	HV_DEVICE_TYPE_LOGICAL = 0,
-	HV_DEVICE_TYPE_PCI = 1,
-	HV_DEVICE_TYPE_IOAPIC = 2,
-	HV_DEVICE_TYPE_ACPI = 3,
-};
-
-typedef u16 hv_pci_rid;
-typedef u16 hv_pci_segment;
-typedef u64 hv_logical_device_id;
-union hv_pci_bdf {
-	u16 as_uint16;
-
-	struct {
-		u8 function:3;
-		u8 device:5;
-		u8 bus;
-	};
-} __packed;
-
-union hv_pci_bus_range {
-	u16 as_uint16;
-
-	struct {
-		u8 subordinate_bus;
-		u8 secondary_bus;
-	};
-} __packed;
-
-union hv_device_id {
-	u64 as_uint64;
-
-	struct {
-		u64 reserved0:62;
-		u64 device_type:2;
-	};
-
-	/* HV_DEVICE_TYPE_LOGICAL */
-	struct {
-		u64 id:62;
-		u64 device_type:2;
-	} logical;
-
-	/* HV_DEVICE_TYPE_PCI */
-	struct {
-		union {
-			hv_pci_rid rid;
-			union hv_pci_bdf bdf;
-		};
-
-		hv_pci_segment segment;
-		union hv_pci_bus_range shadow_bus_range;
-
-		u16 phantom_function_bits:2;
-		u16 source_shadow:1;
-
-		u16 rsvdz0:11;
-		u16 device_type:2;
-	} pci;
-
-	/* HV_DEVICE_TYPE_IOAPIC */
-	struct {
-		u8 ioapic_id;
-		u8 rsvdz0;
-		u16 rsvdz1;
-		u16 rsvdz2;
-
-		u16 rsvdz3:14;
-		u16 device_type:2;
-	} ioapic;
-
-	/* HV_DEVICE_TYPE_ACPI */
-	struct {
-		u32 input_mapping_base;
-		u32 input_mapping_count:30;
-		u32 device_type:2;
-	} acpi;
-} __packed;
-
-enum hv_interrupt_trigger_mode {
-	HV_INTERRUPT_TRIGGER_MODE_EDGE = 0,
-	HV_INTERRUPT_TRIGGER_MODE_LEVEL = 1,
-};
-
-struct hv_device_interrupt_descriptor {
-	u32 interrupt_type;
-	u32 trigger_mode;
-	u32 vector_count;
-	u32 reserved;
-	struct hv_device_interrupt_target target;
-} __packed;
-
-struct hv_input_map_device_interrupt {
-	u64 partition_id;
-	u64 device_id;
-	u64 flags;
-	struct hv_interrupt_entry logical_interrupt_entry;
-	struct hv_device_interrupt_descriptor interrupt_descriptor;
-} __packed;
-
-struct hv_output_map_device_interrupt {
-	struct hv_interrupt_entry interrupt_entry;
-} __packed;
-
-struct hv_input_unmap_device_interrupt {
-	u64 partition_id;
-	u64 device_id;
-	struct hv_interrupt_entry interrupt_entry;
-} __packed;
-
-#define HV_SOURCE_SHADOW_NONE               0x0
-#define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE   0x1
-
-/*
- * Version info reported by hypervisor
- */
-union hv_hypervisor_version_info {
-	struct {
-		u32 build_number;
-
-		u32 minor_version : 16;
-		u32 major_version : 16;
-
-		u32 service_pack;
-
-		u32 service_number : 24;
-		u32 service_branch : 8;
-	};
-	struct {
-		u32 eax;
-		u32 ebx;
-		u32 ecx;
-		u32 edx;
-	};
-};
-
-/*
- * The whole argument should fit in a page to be able to pass to the hypervisor
- * in one hypercall.
- */
-#define HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES  \
-	((HV_HYP_PAGE_SIZE - sizeof(struct hv_memory_hint)) / \
-		sizeof(union hv_gpa_page_range))
-
-/* HvExtCallMemoryHeatHint hypercall */
-#define HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD	2
-struct hv_memory_hint {
-	u64 type:2;
-	u64 reserved:62;
-	union hv_gpa_page_range ranges[];
-} __packed;
-
-/* Data structures for HVCALL_MMIO_READ and HVCALL_MMIO_WRITE */
-#define HV_HYPERCALL_MMIO_MAX_DATA_LENGTH 64
-
-struct hv_mmio_read_input {
-	u64 gpa;
-	u32 size;
-	u32 reserved;
-} __packed;
-
-struct hv_mmio_read_output {
-	u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH];
-} __packed;
-
-struct hv_mmio_write_input {
-	u64 gpa;
-	u32 size;
-	u32 reserved;
-	u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH];
-} __packed;
-
-#endif
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index bac63e874c7b..11abad6c87e1 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -9,6 +9,7 @@
 
 #include <asm/page.h> /* I/O is all done through memory accesses */
 #include <linux/string.h> /* for memset() and memcpy() */
+#include <linux/sizes.h>
 #include <linux/types.h>
 #include <linux/instruction_pointer.h>
 
@@ -539,6 +540,7 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer,
 
 #if !defined(inb) && !defined(_inb)
 #define _inb _inb
+#ifdef CONFIG_HAS_IOPORT
 static inline u8 _inb(unsigned long addr)
 {
 	u8 val;
@@ -548,10 +550,15 @@ static inline u8 _inb(unsigned long addr)
 	__io_par(val);
 	return val;
 }
+#else
+u8 _inb(unsigned long addr)
+	__compiletime_error("inb()) requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #if !defined(inw) && !defined(_inw)
 #define _inw _inw
+#ifdef CONFIG_HAS_IOPORT
 static inline u16 _inw(unsigned long addr)
 {
 	u16 val;
@@ -561,10 +568,15 @@ static inline u16 _inw(unsigned long addr)
 	__io_par(val);
 	return val;
 }
+#else
+u16 _inw(unsigned long addr)
+	__compiletime_error("inw() requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #if !defined(inl) && !defined(_inl)
 #define _inl _inl
+#ifdef CONFIG_HAS_IOPORT
 static inline u32 _inl(unsigned long addr)
 {
 	u32 val;
@@ -574,36 +586,55 @@ static inline u32 _inl(unsigned long addr)
 	__io_par(val);
 	return val;
 }
+#else
+u32 _inl(unsigned long addr)
+	__compiletime_error("inl() requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #if !defined(outb) && !defined(_outb)
 #define _outb _outb
+#ifdef CONFIG_HAS_IOPORT
 static inline void _outb(u8 value, unsigned long addr)
 {
 	__io_pbw();
 	__raw_writeb(value, PCI_IOBASE + addr);
 	__io_paw();
 }
+#else
+void _outb(u8 value, unsigned long addr)
+	__compiletime_error("outb() requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #if !defined(outw) && !defined(_outw)
 #define _outw _outw
+#ifdef CONFIG_HAS_IOPORT
 static inline void _outw(u16 value, unsigned long addr)
 {
 	__io_pbw();
 	__raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
 	__io_paw();
 }
+#else
+void _outw(u16 value, unsigned long addr)
+	__compiletime_error("outw() requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #if !defined(outl) && !defined(_outl)
 #define _outl _outl
+#ifdef CONFIG_HAS_IOPORT
 static inline void _outl(u32 value, unsigned long addr)
 {
 	__io_pbw();
 	__raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
 	__io_paw();
 }
+#else
+void _outl(u32 value, unsigned long addr)
+	__compiletime_error("outl() requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #include <linux/logic_pio.h>
@@ -687,53 +718,83 @@ static inline void outl_p(u32 value, unsigned long addr)
 
 #ifndef insb
 #define insb insb
+#ifdef CONFIG_HAS_IOPORT
 static inline void insb(unsigned long addr, void *buffer, unsigned int count)
 {
 	readsb(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void insb(unsigned long addr, void *buffer, unsigned int count)
+	__compiletime_error("insb() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef insw
 #define insw insw
+#ifdef CONFIG_HAS_IOPORT
 static inline void insw(unsigned long addr, void *buffer, unsigned int count)
 {
 	readsw(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void insw(unsigned long addr, void *buffer, unsigned int count)
+	__compiletime_error("insw() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef insl
 #define insl insl
+#ifdef CONFIG_HAS_IOPORT
 static inline void insl(unsigned long addr, void *buffer, unsigned int count)
 {
 	readsl(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void insl(unsigned long addr, void *buffer, unsigned int count)
+	__compiletime_error("insl() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef outsb
 #define outsb outsb
+#ifdef CONFIG_HAS_IOPORT
 static inline void outsb(unsigned long addr, const void *buffer,
 			 unsigned int count)
 {
 	writesb(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void outsb(unsigned long addr, const void *buffer, unsigned int count)
+	__compiletime_error("outsb() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef outsw
 #define outsw outsw
+#ifdef CONFIG_HAS_IOPORT
 static inline void outsw(unsigned long addr, const void *buffer,
 			 unsigned int count)
 {
 	writesw(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void outsw(unsigned long addr, const void *buffer, unsigned int count)
+	__compiletime_error("outsw() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef outsl
 #define outsl outsl
+#ifdef CONFIG_HAS_IOPORT
 static inline void outsl(unsigned long addr, const void *buffer,
 			 unsigned int count)
 {
 	writesl(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void outsl(unsigned long addr, const void *buffer, unsigned int count)
+	__compiletime_error("outsl() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef insb_p
@@ -991,7 +1052,6 @@ static inline void iowrite64_rep(volatile void __iomem *addr,
 
 #ifdef __KERNEL__
 
-#include <linux/vmalloc.h>
 #define __io_virt(x) ((void __force *)(x))
 
 /*
@@ -1051,7 +1111,7 @@ void __iomem *generic_ioremap_prot(phys_addr_t phys_addr, size_t size,
 				   pgprot_t prot);
 
 void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
-			   unsigned long prot);
+			   pgprot_t prot);
 void iounmap(volatile void __iomem *addr);
 void generic_iounmap(volatile void __iomem *addr);
 
@@ -1060,7 +1120,7 @@ void generic_iounmap(volatile void __iomem *addr);
 static inline void __iomem *ioremap(phys_addr_t addr, size_t size)
 {
 	/* _PAGE_IOREMAP needs to be supplied by the architecture */
-	return ioremap_prot(addr, size, _PAGE_IOREMAP);
+	return ioremap_prot(addr, size, __pgprot(_PAGE_IOREMAP));
 }
 #endif
 #endif /* !CONFIG_MMU || CONFIG_GENERIC_IOREMAP */
@@ -1151,55 +1211,39 @@ static inline void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 #endif
 
 #ifndef memset_io
-#define memset_io memset_io
 /**
- * memset_io	Set a range of I/O memory to a constant value
+ * memset_io -	Set a range of I/O memory to a constant value
  * @addr:	The beginning of the I/O-memory range to set
  * @val:	The value to set the memory to
  * @count:	The number of bytes to set
  *
  * Set a range of I/O memory to a given value.
  */
-static inline void memset_io(volatile void __iomem *addr, int value,
-			     size_t size)
-{
-	memset(__io_virt(addr), value, size);
-}
+void memset_io(volatile void __iomem *addr, int val, size_t count);
 #endif
 
 #ifndef memcpy_fromio
-#define memcpy_fromio memcpy_fromio
 /**
- * memcpy_fromio	Copy a block of data from I/O memory
+ * memcpy_fromio -	Copy a block of data from I/O memory
  * @dst:		The (RAM) destination for the copy
  * @src:		The (I/O memory) source for the data
  * @count:		The number of bytes to copy
  *
  * Copy a block of data from I/O memory.
  */
-static inline void memcpy_fromio(void *buffer,
-				 const volatile void __iomem *addr,
-				 size_t size)
-{
-	memcpy(buffer, __io_virt(addr), size);
-}
+void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count);
 #endif
 
 #ifndef memcpy_toio
-#define memcpy_toio memcpy_toio
 /**
- * memcpy_toio		Copy a block of data into I/O memory
+ * memcpy_toio -	Copy a block of data into I/O memory
  * @dst:		The (I/O memory) destination for the copy
  * @src:		The (RAM) source for the data
  * @count:		The number of bytes to copy
  *
  * Copy a block of data to I/O memory.
  */
-static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer,
-			       size_t size)
-{
-	memcpy(__io_virt(addr), buffer, size);
-}
+void memcpy_toio(volatile void __iomem *dst, const void *src, size_t count);
 #endif
 
 extern int devmem_is_allowed(unsigned long pfn);
diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index 196087a8126e..9f3f25d7fc58 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -31,42 +31,22 @@ extern unsigned int ioread16(const void __iomem *);
 extern unsigned int ioread16be(const void __iomem *);
 extern unsigned int ioread32(const void __iomem *);
 extern unsigned int ioread32be(const void __iomem *);
-#ifdef CONFIG_64BIT
-extern u64 ioread64(const void __iomem *);
-extern u64 ioread64be(const void __iomem *);
-#endif
 
-#ifdef readq
-#define ioread64_lo_hi ioread64_lo_hi
-#define ioread64_hi_lo ioread64_hi_lo
-#define ioread64be_lo_hi ioread64be_lo_hi
-#define ioread64be_hi_lo ioread64be_hi_lo
-extern u64 ioread64_lo_hi(const void __iomem *addr);
-extern u64 ioread64_hi_lo(const void __iomem *addr);
-extern u64 ioread64be_lo_hi(const void __iomem *addr);
-extern u64 ioread64be_hi_lo(const void __iomem *addr);
-#endif
+extern u64 __ioread64_lo_hi(const void __iomem *addr);
+extern u64 __ioread64_hi_lo(const void __iomem *addr);
+extern u64 __ioread64be_lo_hi(const void __iomem *addr);
+extern u64 __ioread64be_hi_lo(const void __iomem *addr);
 
 extern void iowrite8(u8, void __iomem *);
 extern void iowrite16(u16, void __iomem *);
 extern void iowrite16be(u16, void __iomem *);
 extern void iowrite32(u32, void __iomem *);
 extern void iowrite32be(u32, void __iomem *);
-#ifdef CONFIG_64BIT
-extern void iowrite64(u64, void __iomem *);
-extern void iowrite64be(u64, void __iomem *);
-#endif
 
-#ifdef writeq
-#define iowrite64_lo_hi iowrite64_lo_hi
-#define iowrite64_hi_lo iowrite64_hi_lo
-#define iowrite64be_lo_hi iowrite64be_lo_hi
-#define iowrite64be_hi_lo iowrite64be_hi_lo
-extern void iowrite64_lo_hi(u64 val, void __iomem *addr);
-extern void iowrite64_hi_lo(u64 val, void __iomem *addr);
-extern void iowrite64be_lo_hi(u64 val, void __iomem *addr);
-extern void iowrite64be_hi_lo(u64 val, void __iomem *addr);
-#endif
+extern void __iowrite64_lo_hi(u64 val, void __iomem *addr);
+extern void __iowrite64_hi_lo(u64 val, void __iomem *addr);
+extern void __iowrite64be_lo_hi(u64 val, void __iomem *addr);
+extern void __iowrite64be_hi_lo(u64 val, void __iomem *addr);
 
 /*
  * "string" versions of the above. Note that they
diff --git a/include/asm-generic/mcs_spinlock.h b/include/asm-generic/mcs_spinlock.h
index 10cd4ffc6ba2..39c94012b88a 100644
--- a/include/asm-generic/mcs_spinlock.h
+++ b/include/asm-generic/mcs_spinlock.h
@@ -1,6 +1,12 @@
 #ifndef __ASM_MCS_SPINLOCK_H
 #define __ASM_MCS_SPINLOCK_H
 
+struct mcs_spinlock {
+	struct mcs_spinlock *next;
+	int locked; /* 1 if lock acquired */
+	int count;  /* nesting count, see qspinlock.c */
+};
+
 /*
  * Architectures can define their own:
  *
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 6796abe1900e..74d0077cc5fa 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -19,17 +19,26 @@
 #define __page_to_pfn(page)	((unsigned long)((page) - mem_map) + \
 				 ARCH_PFN_OFFSET)
 
+/* avoid <linux/mm.h> include hell */
+extern unsigned long max_mapnr;
+
 #ifndef pfn_valid
 static inline int pfn_valid(unsigned long pfn)
 {
-	/* avoid <linux/mm.h> include hell */
-	extern unsigned long max_mapnr;
 	unsigned long pfn_offset = ARCH_PFN_OFFSET;
 
 	return pfn >= pfn_offset && (pfn - pfn_offset) < max_mapnr;
 }
 #define pfn_valid pfn_valid
-#endif
+
+#ifndef for_each_valid_pfn
+#define for_each_valid_pfn(pfn, start_pfn, end_pfn)			 \
+	for ((pfn) = max_t(unsigned long, (start_pfn), ARCH_PFN_OFFSET); \
+	     (pfn) < min_t(unsigned long, (end_pfn),			 \
+			   ARCH_PFN_OFFSET + max_mapnr);		 \
+	     (pfn)++)
+#endif /* for_each_valid_pfn */
+#endif /* valid_pfn */
 
 #elif defined(CONFIG_SPARSEMEM_VMEMMAP)
 
@@ -64,6 +73,19 @@ static inline int pfn_valid(unsigned long pfn)
 #define page_to_pfn __page_to_pfn
 #define pfn_to_page __pfn_to_page
 
+#ifdef CONFIG_DEBUG_VIRTUAL
+#define page_to_phys(page)						\
+({									\
+	unsigned long __pfn = page_to_pfn(page);			\
+									\
+	WARN_ON_ONCE(!pfn_valid(__pfn));				\
+	PFN_PHYS(__pfn);						\
+})
+#else
+#define page_to_phys(page)	PFN_PHYS(page_to_pfn(page))
+#endif /* CONFIG_DEBUG_VIRTUAL */
+#define phys_to_page(phys)	pfn_to_page(PHYS_PFN(phys))
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
index 4dbb177d1150..6eea3b3c1e65 100644
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Define generic no-op hooks for arch_dup_mmap, arch_exit_mmap
- * and arch_unmap to be included in asm-FOO/mmu_context.h for any
- * arch FOO which doesn't need to hook these.
+ * Define generic no-op hooks for arch_dup_mmap and arch_exit_mmap
+ * to be included in asm-FOO/mmu_context.h for any arch FOO which
+ * doesn't need to hook these.
  */
 #ifndef _ASM_GENERIC_MM_HOOKS_H
 #define _ASM_GENERIC_MM_HOOKS_H
@@ -17,11 +17,6 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
 {
 }
 
-static inline void arch_unmap(struct mm_struct *mm,
-			unsigned long start, unsigned long end)
-{
-}
-
 static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 		bool write, bool execute, bool foreign)
 {
diff --git a/include/asm-generic/mmzone.h b/include/asm-generic/mmzone.h
new file mode 100644
index 000000000000..2ab5193e8394
--- /dev/null
+++ b/include/asm-generic/mmzone.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_MMZONE_H
+#define _ASM_GENERIC_MMZONE_H
+
+#endif
diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h
index 98e1541b72b7..a8622501b975 100644
--- a/include/asm-generic/module.h
+++ b/include/asm-generic/module.h
@@ -19,12 +19,8 @@ struct mod_arch_specific
 #define Elf_Dyn		Elf64_Dyn
 #define Elf_Ehdr	Elf64_Ehdr
 #define Elf_Addr	Elf64_Addr
-#ifdef CONFIG_MODULES_USE_ELF_REL
 #define Elf_Rel		Elf64_Rel
-#endif
-#ifdef CONFIG_MODULES_USE_ELF_RELA
 #define Elf_Rela	Elf64_Rela
-#endif
 #define ELF_R_TYPE(X)	ELF64_R_TYPE(X)
 #define ELF_R_SYM(X)	ELF64_R_SYM(X)
 
@@ -36,12 +32,8 @@ struct mod_arch_specific
 #define Elf_Dyn		Elf32_Dyn
 #define Elf_Ehdr	Elf32_Ehdr
 #define Elf_Addr	Elf32_Addr
-#ifdef CONFIG_MODULES_USE_ELF_REL
 #define Elf_Rel		Elf32_Rel
-#endif
-#ifdef CONFIG_MODULES_USE_ELF_RELA
 #define Elf_Rela	Elf32_Rela
-#endif
 #define ELF_R_TYPE(X)	ELF32_R_TYPE(X)
 #define ELF_R_SYM(X)	ELF32_R_SYM(X)
 #endif
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 8fe7aaab2599..ccccb1cbf7df 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -6,9 +6,8 @@
  * independent. See arch/<arch>/include/asm/mshyperv.h for definitions
  * that are specific to architecture <arch>.
  *
- * Definitions that are specified in the Hyper-V Top Level Functional
- * Spec (TLFS) should not go in this file, but should instead go in
- * hyperv-tlfs.h.
+ * Definitions that are derived from Hyper-V code or headers should not go in
+ * this file, but should instead go in the relevant files in include/hyperv.
  *
  * Copyright (C) 2019, Microsoft, Inc.
  *
@@ -25,13 +24,19 @@
 #include <linux/cpumask.h>
 #include <linux/nmi.h>
 #include <asm/ptrace.h>
-#include <asm/hyperv-tlfs.h>
+#include <hyperv/hvhdk.h>
 
 #define VTPM_BASE_ADDRESS 0xfed40000
 
+enum hv_partition_type {
+	HV_PARTITION_TYPE_GUEST,
+	HV_PARTITION_TYPE_ROOT,
+};
+
 struct ms_hyperv_info {
 	u32 features;
 	u32 priv_high;
+	u32 ext_features;
 	u32 misc_features;
 	u32 hints;
 	u32 nested_features;
@@ -59,15 +64,32 @@ struct ms_hyperv_info {
 };
 extern struct ms_hyperv_info ms_hyperv;
 extern bool hv_nested;
+extern u64 hv_current_partition_id;
+extern enum hv_partition_type hv_curr_partition_type;
 
 extern void * __percpu *hyperv_pcpu_input_arg;
 extern void * __percpu *hyperv_pcpu_output_arg;
 
-extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
-extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
+u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
+u64 hv_do_fast_hypercall8(u16 control, u64 input8);
+u64 hv_do_fast_hypercall16(u16 control, u64 input1, u64 input2);
+
 bool hv_isolation_type_snp(void);
 bool hv_isolation_type_tdx(void);
 
+/*
+ * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
+ * it doesn't provide a recommendation flag and AEOI must be disabled.
+ */
+static inline bool hv_recommend_using_aeoi(void)
+{
+#ifdef HV_DEPRECATING_AEOI_RECOMMENDED
+	return !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
+#else
+	return false;
+#endif
+}
+
 static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node)
 {
 	struct hv_proximity_domain_info pxm_info = {};
@@ -186,12 +208,11 @@ void hv_setup_kexec_handler(void (*handler)(void));
 void hv_remove_kexec_handler(void);
 void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
 void hv_remove_crash_handler(void);
+void hv_setup_mshv_handler(void (*handler)(void));
 
 extern int vmbus_interrupt;
 extern int vmbus_irq;
 
-extern bool hv_root_partition;
-
 #if IS_ENABLED(CONFIG_HYPERV)
 /*
  * Hypervisor's notion of virtual processor ID is different from
@@ -208,10 +229,12 @@ extern u64 (*hv_read_reference_counter)(void);
 #define VP_INVAL	U32_MAX
 
 int __init hv_common_init(void);
+void __init hv_get_partition_id(void);
 void __init hv_common_free(void);
 void __init ms_hyperv_late_init(void);
 int hv_common_cpu_init(unsigned int cpu);
 int hv_common_cpu_die(unsigned int cpu);
+void hv_identify_partition_type(void);
 
 void *hv_alloc_hyperv_page(void);
 void *hv_alloc_hyperv_zeroed_page(void);
@@ -292,6 +315,20 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
 	return __cpumask_to_vpset(vpset, cpus, func);
 }
 
+#define _hv_status_fmt(fmt) "%s: Hyper-V status: %#x = %s: " fmt
+#define hv_status_printk(level, status, fmt, ...) \
+do { \
+	u64 __status = (status); \
+	pr_##level(_hv_status_fmt(fmt), __func__, hv_result(__status), \
+		   hv_result_to_string(__status), ##__VA_ARGS__); \
+} while (0)
+#define hv_status_err(status, fmt, ...) \
+	hv_status_printk(err, status, fmt, ##__VA_ARGS__)
+#define hv_status_debug(status, fmt, ...) \
+	hv_status_printk(debug, status, fmt, ##__VA_ARGS__)
+
+const char *hv_result_to_string(u64 hv_status);
+int hv_result_to_errno(u64 status);
 void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
 bool hv_is_hyperv_initialized(void);
 bool hv_is_hibernation_supported(void);
@@ -304,6 +341,7 @@ void hyperv_cleanup(void);
 bool hv_query_ext_cap(u64 cap_query);
 void hv_setup_dma_ops(struct device *dev, bool coherent);
 #else /* CONFIG_HYPERV */
+static inline void hv_identify_partition_type(void) {}
 static inline bool hv_is_hyperv_initialized(void) { return false; }
 static inline bool hv_is_hibernation_supported(void) { return false; }
 static inline void hyperv_cleanup(void) {}
@@ -315,4 +353,29 @@ static inline enum hv_isolation_type hv_get_isolation_type(void)
 }
 #endif /* CONFIG_HYPERV */
 
+#if IS_ENABLED(CONFIG_MSHV_ROOT)
+static inline bool hv_root_partition(void)
+{
+	return hv_curr_partition_type == HV_PARTITION_TYPE_ROOT;
+}
+int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
+int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
+int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
+
+#else /* CONFIG_MSHV_ROOT */
+static inline bool hv_root_partition(void) { return false; }
+static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
+{
+	return -EOPNOTSUPP;
+}
+static inline int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id)
+{
+	return -EOPNOTSUPP;
+}
+static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_MSHV_ROOT */
+
 #endif
diff --git a/include/asm-generic/numa.h b/include/asm-generic/numa.h
index c32e0cf23c90..e063d6487f66 100644
--- a/include/asm-generic/numa.h
+++ b/include/asm-generic/numa.h
@@ -32,10 +32,8 @@ static inline const struct cpumask *cpumask_of_node(int node)
 
 void __init arch_numa_init(void);
 int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-void __init numa_set_distance(int from, int to, int distance);
-void __init numa_free_distance(void);
 void __init early_map_cpu_to_node(unsigned int cpu, int nid);
-int __init early_cpu_to_node(int cpu);
+int early_cpu_to_node(int cpu);
 void numa_store_cpu_info(unsigned int cpu);
 void numa_add_cpu(unsigned int cpu);
 void numa_remove_cpu(unsigned int cpu);
@@ -51,4 +49,8 @@ static inline int early_cpu_to_node(int cpu) { return 0; }
 
 #endif	/* CONFIG_NUMA */
 
+#ifdef CONFIG_NUMA_EMU
+void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable);
+#endif
+
 #endif	/* __ASM_GENERIC_NUMA_H */
diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h
deleted file mode 100644
index 9773582fd96e..000000000000
--- a/include/asm-generic/page.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_GENERIC_PAGE_H
-#define __ASM_GENERIC_PAGE_H
-/*
- * Generic page.h implementation, for NOMMU architectures.
- * This provides the dummy definitions for the memory management.
- */
-
-#ifdef CONFIG_MMU
-#error need to provide a real asm/page.h
-#endif
-
-
-/* PAGE_SHIFT determines the page size */
-
-#define PAGE_SHIFT	12
-#ifdef __ASSEMBLY__
-#define PAGE_SIZE	(1 << PAGE_SHIFT)
-#else
-#define PAGE_SIZE	(1UL << PAGE_SHIFT)
-#endif
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-
-#include <asm/setup.h>
-
-#ifndef __ASSEMBLY__
-
-#define clear_page(page)	memset((page), 0, PAGE_SIZE)
-#define copy_page(to,from)	memcpy((to), (from), PAGE_SIZE)
-
-#define clear_user_page(page, vaddr, pg)	clear_page(page)
-#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
-
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct {
-	unsigned long pte;
-} pte_t;
-typedef struct {
-	unsigned long pmd[16];
-} pmd_t;
-typedef struct {
-	unsigned long pgd;
-} pgd_t;
-typedef struct {
-	unsigned long pgprot;
-} pgprot_t;
-typedef struct page *pgtable_t;
-
-#define pte_val(x)	((x).pte)
-#define pmd_val(x)	((&x)->pmd[0])
-#define pgd_val(x)	((x).pgd)
-#define pgprot_val(x)	((x).pgprot)
-
-#define __pte(x)	((pte_t) { (x) } )
-#define __pmd(x)	((pmd_t) { (x) } )
-#define __pgd(x)	((pgd_t) { (x) } )
-#define __pgprot(x)	((pgprot_t) { (x) } )
-
-extern unsigned long memory_start;
-extern unsigned long memory_end;
-
-#endif /* !__ASSEMBLY__ */
-
-#define PAGE_OFFSET		(0)
-
-#ifndef ARCH_PFN_OFFSET
-#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
-#endif
-
-#ifndef __ASSEMBLY__
-
-#define __va(x) ((void *)((unsigned long) (x)))
-#define __pa(x) ((unsigned long) (x))
-
-static inline unsigned long virt_to_pfn(const void *kaddr)
-{
-	return __pa(kaddr) >> PAGE_SHIFT;
-}
-#define virt_to_pfn virt_to_pfn
-static inline void *pfn_to_virt(unsigned long pfn)
-{
-	return __va(pfn) << PAGE_SHIFT;
-}
-#define pfn_to_virt pfn_to_virt
-
-#define virt_to_page(addr)	pfn_to_page(virt_to_pfn(addr))
-#define page_to_virt(page)	pfn_to_virt(page_to_pfn(page))
-
-#ifndef page_to_phys
-#define page_to_phys(page)      ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-#endif
-
-#define	virt_addr_valid(kaddr)	(((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
-				((void *)(kaddr) < (void *)memory_end))
-
-#endif /* __ASSEMBLY__ */
-
-#include <asm-generic/memory_model.h>
-#include <asm-generic/getorder.h>
-
-#endif /* __ASM_GENERIC_PAGE_H */
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 94cbd50cc870..02aeca21479a 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -6,6 +6,19 @@
 #include <linux/threads.h>
 #include <linux/percpu-defs.h>
 
+/*
+ * __percpu_qual is the qualifier for the percpu named address space.
+ *
+ * Most arches use generic named address space for percpu variables but
+ * some arches define percpu variables in different named address space
+ * (on the x86 arch, percpu variable may be declared as being relative
+ * to the %fs or %gs segments using __seg_fs or __seg_gs named address
+ * space qualifier).
+ */
+#ifndef __percpu_qual
+# define __percpu_qual
+#endif
+
 #ifdef CONFIG_SMP
 
 /*
@@ -74,7 +87,7 @@ do {									\
 
 #define raw_cpu_generic_add_return(pcp, val)				\
 ({									\
-	typeof(pcp) *__p = raw_cpu_ptr(&(pcp));				\
+	TYPEOF_UNQUAL(pcp) *__p = raw_cpu_ptr(&(pcp));			\
 									\
 	*__p += val;							\
 	*__p;								\
@@ -82,8 +95,8 @@ do {									\
 
 #define raw_cpu_generic_xchg(pcp, nval)					\
 ({									\
-	typeof(pcp) *__p = raw_cpu_ptr(&(pcp));				\
-	typeof(pcp) __ret;						\
+	TYPEOF_UNQUAL(pcp) *__p = raw_cpu_ptr(&(pcp));			\
+	TYPEOF_UNQUAL(pcp) __ret;					\
 	__ret = *__p;							\
 	*__p = nval;							\
 	__ret;								\
@@ -91,7 +104,7 @@ do {									\
 
 #define __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, _cmpxchg)		\
 ({									\
-	typeof(pcp) __val, __old = *(ovalp);				\
+	TYPEOF_UNQUAL(pcp) __val, __old = *(ovalp);			\
 	__val = _cmpxchg(pcp, __old, nval);				\
 	if (__val != __old)						\
 		*(ovalp) = __val;					\
@@ -100,8 +113,8 @@ do {									\
 
 #define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)			\
 ({									\
-	typeof(pcp) *__p = raw_cpu_ptr(&(pcp));				\
-	typeof(pcp) __val = *__p, ___old = *(ovalp);			\
+	TYPEOF_UNQUAL(pcp) *__p = raw_cpu_ptr(&(pcp));			\
+	TYPEOF_UNQUAL(pcp) __val = *__p, ___old = *(ovalp);		\
 	bool __ret;							\
 	if (__val == ___old) {						\
 		*__p = nval;						\
@@ -115,14 +128,14 @@ do {									\
 
 #define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
 ({									\
-	typeof(pcp) __old = (oval);					\
+	TYPEOF_UNQUAL(pcp) __old = (oval);				\
 	raw_cpu_generic_try_cmpxchg(pcp, &__old, nval);			\
 	__old;								\
 })
 
 #define __this_cpu_generic_read_nopreempt(pcp)				\
 ({									\
-	typeof(pcp) ___ret;						\
+	TYPEOF_UNQUAL(pcp) ___ret;					\
 	preempt_disable_notrace();					\
 	___ret = READ_ONCE(*raw_cpu_ptr(&(pcp)));			\
 	preempt_enable_notrace();					\
@@ -131,7 +144,7 @@ do {									\
 
 #define __this_cpu_generic_read_noirq(pcp)				\
 ({									\
-	typeof(pcp) ___ret;						\
+	TYPEOF_UNQUAL(pcp) ___ret;					\
 	unsigned long ___flags;						\
 	raw_local_irq_save(___flags);					\
 	___ret = raw_cpu_generic_read(pcp);				\
@@ -141,7 +154,7 @@ do {									\
 
 #define this_cpu_generic_read(pcp)					\
 ({									\
-	typeof(pcp) __ret;						\
+	TYPEOF_UNQUAL(pcp) __ret;					\
 	if (__native_word(pcp))						\
 		__ret = __this_cpu_generic_read_nopreempt(pcp);		\
 	else								\
@@ -160,7 +173,7 @@ do {									\
 
 #define this_cpu_generic_add_return(pcp, val)				\
 ({									\
-	typeof(pcp) __ret;						\
+	TYPEOF_UNQUAL(pcp) __ret;					\
 	unsigned long __flags;						\
 	raw_local_irq_save(__flags);					\
 	__ret = raw_cpu_generic_add_return(pcp, val);			\
@@ -170,7 +183,7 @@ do {									\
 
 #define this_cpu_generic_xchg(pcp, nval)				\
 ({									\
-	typeof(pcp) __ret;						\
+	TYPEOF_UNQUAL(pcp) __ret;					\
 	unsigned long __flags;						\
 	raw_local_irq_save(__flags);					\
 	__ret = raw_cpu_generic_xchg(pcp, nval);			\
@@ -190,7 +203,7 @@ do {									\
 
 #define this_cpu_generic_cmpxchg(pcp, oval, nval)			\
 ({									\
-	typeof(pcp) __ret;						\
+	TYPEOF_UNQUAL(pcp) __ret;					\
 	unsigned long __flags;						\
 	raw_local_irq_save(__flags);					\
 	__ret = raw_cpu_generic_cmpxchg(pcp, oval, nval);		\
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 879e5f8aa5e9..3c8ec3bfea44 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -16,15 +16,21 @@
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm)
+static inline pte_t *__pte_alloc_one_kernel_noprof(struct mm_struct *mm)
 {
-	struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL &
+	struct ptdesc *ptdesc = pagetable_alloc_noprof(GFP_PGTABLE_KERNEL &
 			~__GFP_HIGHMEM, 0);
 
 	if (!ptdesc)
 		return NULL;
+	if (!pagetable_pte_ctor(mm, ptdesc)) {
+		pagetable_free(ptdesc);
+		return NULL;
+	}
+
 	return ptdesc_address(ptdesc);
 }
+#define __pte_alloc_one_kernel(...)	alloc_hooks(__pte_alloc_one_kernel_noprof(__VA_ARGS__))
 
 #ifndef __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
 /**
@@ -33,10 +39,11 @@ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm)
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+static inline pte_t *pte_alloc_one_kernel_noprof(struct mm_struct *mm)
 {
-	return __pte_alloc_one_kernel(mm);
+	return __pte_alloc_one_kernel_noprof(mm);
 }
+#define pte_alloc_one_kernel(...)	alloc_hooks(pte_alloc_one_kernel_noprof(__VA_ARGS__))
 #endif
 
 /**
@@ -46,7 +53,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
  */
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-	pagetable_free(virt_to_ptdesc(pte));
+	pagetable_dtor_free(virt_to_ptdesc(pte));
 }
 
 /**
@@ -61,20 +68,21 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
  *
  * Return: `struct page` referencing the ptdesc or %NULL on error
  */
-static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
+static inline pgtable_t __pte_alloc_one_noprof(struct mm_struct *mm, gfp_t gfp)
 {
 	struct ptdesc *ptdesc;
 
-	ptdesc = pagetable_alloc(gfp, 0);
+	ptdesc = pagetable_alloc_noprof(gfp, 0);
 	if (!ptdesc)
 		return NULL;
-	if (!pagetable_pte_ctor(ptdesc)) {
+	if (!pagetable_pte_ctor(mm, ptdesc)) {
 		pagetable_free(ptdesc);
 		return NULL;
 	}
 
 	return ptdesc_page(ptdesc);
 }
+#define __pte_alloc_one(...)	alloc_hooks(__pte_alloc_one_noprof(__VA_ARGS__))
 
 #ifndef __HAVE_ARCH_PTE_ALLOC_ONE
 /**
@@ -85,10 +93,11 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
  *
  * Return: `struct page` referencing the ptdesc or %NULL on error
  */
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
+static inline pgtable_t pte_alloc_one_noprof(struct mm_struct *mm)
 {
-	return __pte_alloc_one(mm, GFP_PGTABLE_USER);
+	return __pte_alloc_one_noprof(mm, GFP_PGTABLE_USER);
 }
+#define pte_alloc_one(...)	alloc_hooks(pte_alloc_one_noprof(__VA_ARGS__))
 #endif
 
 /*
@@ -105,8 +114,7 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
 {
 	struct ptdesc *ptdesc = page_ptdesc(pte_page);
 
-	pagetable_pte_dtor(ptdesc);
-	pagetable_free(ptdesc);
+	pagetable_dtor_free(ptdesc);
 }
 
 
@@ -124,22 +132,23 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pmd_t *pmd_alloc_one_noprof(struct mm_struct *mm, unsigned long addr)
 {
 	struct ptdesc *ptdesc;
 	gfp_t gfp = GFP_PGTABLE_USER;
 
 	if (mm == &init_mm)
 		gfp = GFP_PGTABLE_KERNEL;
-	ptdesc = pagetable_alloc(gfp, 0);
+	ptdesc = pagetable_alloc_noprof(gfp, 0);
 	if (!ptdesc)
 		return NULL;
-	if (!pagetable_pmd_ctor(ptdesc)) {
+	if (!pagetable_pmd_ctor(mm, ptdesc)) {
 		pagetable_free(ptdesc);
 		return NULL;
 	}
 	return ptdesc_address(ptdesc);
 }
+#define pmd_alloc_one(...)	alloc_hooks(pmd_alloc_one_noprof(__VA_ARGS__))
 #endif
 
 #ifndef __HAVE_ARCH_PMD_FREE
@@ -148,8 +157,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
 
 	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
-	pagetable_pmd_dtor(ptdesc);
-	pagetable_free(ptdesc);
+	pagetable_dtor_free(ptdesc);
 }
 #endif
 
@@ -157,7 +165,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #if CONFIG_PGTABLE_LEVELS > 3
 
-static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pud_t *__pud_alloc_one_noprof(struct mm_struct *mm, unsigned long addr)
 {
 	gfp_t gfp = GFP_PGTABLE_USER;
 	struct ptdesc *ptdesc;
@@ -166,13 +174,14 @@ static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 		gfp = GFP_PGTABLE_KERNEL;
 	gfp &= ~__GFP_HIGHMEM;
 
-	ptdesc = pagetable_alloc(gfp, 0);
+	ptdesc = pagetable_alloc_noprof(gfp, 0);
 	if (!ptdesc)
 		return NULL;
 
 	pagetable_pud_ctor(ptdesc);
 	return ptdesc_address(ptdesc);
 }
+#define __pud_alloc_one(...)	alloc_hooks(__pud_alloc_one_noprof(__VA_ARGS__))
 
 #ifndef __HAVE_ARCH_PUD_ALLOC_ONE
 /**
@@ -184,10 +193,11 @@ static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pud_t *pud_alloc_one_noprof(struct mm_struct *mm, unsigned long addr)
 {
-	return __pud_alloc_one(mm, addr);
+	return __pud_alloc_one_noprof(mm, addr);
 }
+#define pud_alloc_one(...)	alloc_hooks(pud_alloc_one_noprof(__VA_ARGS__))
 #endif
 
 static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
@@ -195,8 +205,7 @@ static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
 	struct ptdesc *ptdesc = virt_to_ptdesc(pud);
 
 	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-	pagetable_pud_dtor(ptdesc);
-	pagetable_free(ptdesc);
+	pagetable_dtor_free(ptdesc);
 }
 
 #ifndef __HAVE_ARCH_PUD_FREE
@@ -208,10 +217,82 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 
 #endif /* CONFIG_PGTABLE_LEVELS > 3 */
 
+#if CONFIG_PGTABLE_LEVELS > 4
+
+static inline p4d_t *__p4d_alloc_one_noprof(struct mm_struct *mm, unsigned long addr)
+{
+	gfp_t gfp = GFP_PGTABLE_USER;
+	struct ptdesc *ptdesc;
+
+	if (mm == &init_mm)
+		gfp = GFP_PGTABLE_KERNEL;
+	gfp &= ~__GFP_HIGHMEM;
+
+	ptdesc = pagetable_alloc_noprof(gfp, 0);
+	if (!ptdesc)
+		return NULL;
+
+	pagetable_p4d_ctor(ptdesc);
+	return ptdesc_address(ptdesc);
+}
+#define __p4d_alloc_one(...)	alloc_hooks(__p4d_alloc_one_noprof(__VA_ARGS__))
+
+#ifndef __HAVE_ARCH_P4D_ALLOC_ONE
+static inline p4d_t *p4d_alloc_one_noprof(struct mm_struct *mm, unsigned long addr)
+{
+	return __p4d_alloc_one_noprof(mm, addr);
+}
+#define p4d_alloc_one(...)	alloc_hooks(p4d_alloc_one_noprof(__VA_ARGS__))
+#endif
+
+static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+	struct ptdesc *ptdesc = virt_to_ptdesc(p4d);
+
+	BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+	pagetable_dtor_free(ptdesc);
+}
+
+#ifndef __HAVE_ARCH_P4D_FREE
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+	if (!mm_p4d_folded(mm))
+		__p4d_free(mm, p4d);
+}
+#endif
+
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+
+static inline pgd_t *__pgd_alloc_noprof(struct mm_struct *mm, unsigned int order)
+{
+	gfp_t gfp = GFP_PGTABLE_USER;
+	struct ptdesc *ptdesc;
+
+	if (mm == &init_mm)
+		gfp = GFP_PGTABLE_KERNEL;
+	gfp &= ~__GFP_HIGHMEM;
+
+	ptdesc = pagetable_alloc_noprof(gfp, order);
+	if (!ptdesc)
+		return NULL;
+
+	pagetable_pgd_ctor(ptdesc);
+	return ptdesc_address(ptdesc);
+}
+#define __pgd_alloc(...)	alloc_hooks(__pgd_alloc_noprof(__VA_ARGS__))
+
+static inline void __pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	struct ptdesc *ptdesc = virt_to_ptdesc(pgd);
+
+	BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
+	pagetable_dtor_free(ptdesc);
+}
+
 #ifndef __HAVE_ARCH_PGD_FREE
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-	pagetable_free(virt_to_ptdesc(pgd));
+	__pgd_free(mm, pgd);
 }
 #endif
 
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 0655aa5b57b2..bf47cca2c375 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 }
 #endif
 
+#ifndef __no_arch_spinlock_redefine
 /*
  * Remapping spinlock architecture specific functions to the corresponding
  * queued spinlock functions.
@@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 #define arch_spin_lock(l)		queued_spin_lock(l)
 #define arch_spin_trylock(l)		queued_spin_trylock(l)
 #define arch_spin_unlock(l)		queued_spin_unlock(l)
+#endif
 
 #endif /* __ASM_GENERIC_QSPINLOCK_H */
diff --git a/include/asm-generic/rqspinlock.h b/include/asm-generic/rqspinlock.h
new file mode 100644
index 000000000000..6d4244d643df
--- /dev/null
+++ b/include/asm-generic/rqspinlock.h
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Resilient Queued Spin Lock
+ *
+ * (C) Copyright 2024-2025 Meta Platforms, Inc. and affiliates.
+ *
+ * Authors: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+ */
+#ifndef __ASM_GENERIC_RQSPINLOCK_H
+#define __ASM_GENERIC_RQSPINLOCK_H
+
+#include <linux/types.h>
+#include <vdso/time64.h>
+#include <linux/percpu.h>
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#endif
+
+struct rqspinlock {
+	union {
+		atomic_t val;
+		u32 locked;
+	};
+};
+
+/* Even though this is same as struct rqspinlock, we need to emit a distinct
+ * type in BTF for BPF programs.
+ */
+struct bpf_res_spin_lock {
+	u32 val;
+};
+
+struct qspinlock;
+#ifdef CONFIG_QUEUED_SPINLOCKS
+typedef struct qspinlock rqspinlock_t;
+#else
+typedef struct rqspinlock rqspinlock_t;
+#endif
+
+extern int resilient_tas_spin_lock(rqspinlock_t *lock);
+#ifdef CONFIG_QUEUED_SPINLOCKS
+extern int resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val);
+#endif
+
+#ifndef resilient_virt_spin_lock_enabled
+static __always_inline bool resilient_virt_spin_lock_enabled(void)
+{
+	return false;
+}
+#endif
+
+#ifndef resilient_virt_spin_lock
+static __always_inline int resilient_virt_spin_lock(rqspinlock_t *lock)
+{
+	return 0;
+}
+#endif
+
+/*
+ * Default timeout for waiting loops is 0.25 seconds
+ */
+#define RES_DEF_TIMEOUT (NSEC_PER_SEC / 4)
+
+/*
+ * Choose 31 as it makes rqspinlock_held cacheline-aligned.
+ */
+#define RES_NR_HELD 31
+
+struct rqspinlock_held {
+	int cnt;
+	void *locks[RES_NR_HELD];
+};
+
+DECLARE_PER_CPU_ALIGNED(struct rqspinlock_held, rqspinlock_held_locks);
+
+static __always_inline void grab_held_lock_entry(void *lock)
+{
+	int cnt = this_cpu_inc_return(rqspinlock_held_locks.cnt);
+
+	if (unlikely(cnt > RES_NR_HELD)) {
+		/* Still keep the inc so we decrement later. */
+		return;
+	}
+
+	/*
+	 * Implied compiler barrier in per-CPU operations; otherwise we can have
+	 * the compiler reorder inc with write to table, allowing interrupts to
+	 * overwrite and erase our write to the table (as on interrupt exit it
+	 * will be reset to NULL).
+	 *
+	 * It is fine for cnt inc to be reordered wrt remote readers though,
+	 * they won't observe our entry until the cnt update is visible, that's
+	 * all.
+	 */
+	this_cpu_write(rqspinlock_held_locks.locks[cnt - 1], lock);
+}
+
+/*
+ * We simply don't support out-of-order unlocks, and keep the logic simple here.
+ * The verifier prevents BPF programs from unlocking out-of-order, and the same
+ * holds for in-kernel users.
+ *
+ * It is possible to run into misdetection scenarios of AA deadlocks on the same
+ * CPU, and missed ABBA deadlocks on remote CPUs if this function pops entries
+ * out of order (due to lock A, lock B, unlock A, unlock B) pattern. The correct
+ * logic to preserve right entries in the table would be to walk the array of
+ * held locks and swap and clear out-of-order entries, but that's too
+ * complicated and we don't have a compelling use case for out of order unlocking.
+ */
+static __always_inline void release_held_lock_entry(void)
+{
+	struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
+
+	if (unlikely(rqh->cnt > RES_NR_HELD))
+		goto dec;
+	WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL);
+dec:
+	/*
+	 * Reordering of clearing above with inc and its write in
+	 * grab_held_lock_entry that came before us (in same acquisition
+	 * attempt) is ok, we either see a valid entry or NULL when it's
+	 * visible.
+	 *
+	 * But this helper is invoked when we unwind upon failing to acquire the
+	 * lock. Unlike the unlock path which constitutes a release store after
+	 * we clear the entry, we need to emit a write barrier here. Otherwise,
+	 * we may have a situation as follows:
+	 *
+	 * <error> for lock B
+	 * release_held_lock_entry
+	 *
+	 * try_cmpxchg_acquire for lock A
+	 * grab_held_lock_entry
+	 *
+	 * Lack of any ordering means reordering may occur such that dec, inc
+	 * are done before entry is overwritten. This permits a remote lock
+	 * holder of lock B (which this CPU failed to acquire) to now observe it
+	 * as being attempted on this CPU, and may lead to misdetection (if this
+	 * CPU holds a lock it is attempting to acquire, leading to false ABBA
+	 * diagnosis).
+	 *
+	 * In case of unlock, we will always do a release on the lock word after
+	 * releasing the entry, ensuring that other CPUs cannot hold the lock
+	 * (and make conclusions about deadlocks) until the entry has been
+	 * cleared on the local CPU, preventing any anomalies. Reordering is
+	 * still possible there, but a remote CPU cannot observe a lock in our
+	 * table which it is already holding, since visibility entails our
+	 * release store for the said lock has not retired.
+	 *
+	 * In theory we don't have a problem if the dec and WRITE_ONCE above get
+	 * reordered with each other, we either notice an empty NULL entry on
+	 * top (if dec succeeds WRITE_ONCE), or a potentially stale entry which
+	 * cannot be observed (if dec precedes WRITE_ONCE).
+	 *
+	 * Emit the write barrier _before_ the dec, this permits dec-inc
+	 * reordering but that is harmless as we'd have new entry set to NULL
+	 * already, i.e. they cannot precede the NULL store above.
+	 */
+	smp_wmb();
+	this_cpu_dec(rqspinlock_held_locks.cnt);
+}
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+/**
+ * res_spin_lock - acquire a queued spinlock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * Return:
+ * * 0		- Lock was acquired successfully.
+ * * -EDEADLK	- Lock acquisition failed because of AA/ABBA deadlock.
+ * * -ETIMEDOUT - Lock acquisition failed because of timeout.
+ */
+static __always_inline int res_spin_lock(rqspinlock_t *lock)
+{
+	int val = 0;
+
+	if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) {
+		grab_held_lock_entry(lock);
+		return 0;
+	}
+	return resilient_queued_spin_lock_slowpath(lock, val);
+}
+
+#else
+
+#define res_spin_lock(lock) resilient_tas_spin_lock(lock)
+
+#endif /* CONFIG_QUEUED_SPINLOCKS */
+
+static __always_inline void res_spin_unlock(rqspinlock_t *lock)
+{
+	struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
+
+	if (unlikely(rqh->cnt > RES_NR_HELD))
+		goto unlock;
+	WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL);
+unlock:
+	/*
+	 * Release barrier, ensures correct ordering. See release_held_lock_entry
+	 * for details.  Perform release store instead of queued_spin_unlock,
+	 * since we use this function for test-and-set fallback as well. When we
+	 * have CONFIG_QUEUED_SPINLOCKS=n, we clear the full 4-byte lockword.
+	 *
+	 * Like release_held_lock_entry, we can do the release before the dec.
+	 * We simply care about not seeing the 'lock' in our table from a remote
+	 * CPU once the lock has been released, which doesn't rely on the dec.
+	 *
+	 * Unlike smp_wmb(), release is not a two way fence, hence it is
+	 * possible for a inc to move up and reorder with our clearing of the
+	 * entry. This isn't a problem however, as for a misdiagnosis of ABBA,
+	 * the remote CPU needs to hold this lock, which won't be released until
+	 * the store below is done, which would ensure the entry is overwritten
+	 * to NULL, etc.
+	 */
+	smp_store_release(&lock->locked, 0);
+	this_cpu_dec(rqspinlock_held_locks.cnt);
+}
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#define raw_res_spin_lock_init(lock) ({ *(lock) = (rqspinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; })
+#else
+#define raw_res_spin_lock_init(lock) ({ *(lock) = (rqspinlock_t){0}; })
+#endif
+
+#define raw_res_spin_lock(lock)                    \
+	({                                         \
+		int __ret;                         \
+		preempt_disable();                 \
+		__ret = res_spin_lock(lock);	   \
+		if (__ret)                         \
+			preempt_enable();          \
+		__ret;                             \
+	})
+
+#define raw_res_spin_unlock(lock) ({ res_spin_unlock(lock); preempt_enable(); })
+
+#define raw_res_spin_lock_irqsave(lock, flags)    \
+	({                                        \
+		int __ret;                        \
+		local_irq_save(flags);            \
+		__ret = raw_res_spin_lock(lock);  \
+		if (__ret)                        \
+			local_irq_restore(flags); \
+		__ret;                            \
+	})
+
+#define raw_res_spin_unlock_irqrestore(lock, flags) ({ raw_res_spin_unlock(lock); local_irq_restore(flags); })
+
+#endif /* __ASM_GENERIC_RQSPINLOCK_H */
diff --git a/include/asm-generic/runtime-const.h b/include/asm-generic/runtime-const.h
new file mode 100644
index 000000000000..670499459514
--- /dev/null
+++ b/include/asm-generic/runtime-const.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+/*
+ * This is the fallback for when the architecture doesn't
+ * support the runtime const operations.
+ *
+ * We just use the actual symbols as-is.
+ */
+#define runtime_const_ptr(sym) (sym)
+#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym))
+#define runtime_const_init(type,sym) do { } while (0)
+
+#endif
diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
index 8d0a6280e982..52b969c7cef9 100644
--- a/include/asm-generic/rwonce.h
+++ b/include/asm-generic/rwonce.h
@@ -79,10 +79,18 @@ unsigned long __read_once_word_nocheck(const void *addr)
 	(typeof(x))__read_once_word_nocheck(&(x));			\
 })
 
-static __no_kasan_or_inline
+static __no_sanitize_or_inline
 unsigned long read_word_at_a_time(const void *addr)
 {
+	/* open-coded instrument_read(addr, 1) */
 	kasan_check_read(addr, 1);
+	kcsan_check_read(addr, 1);
+
+	/*
+	 * This load can race with concurrent stores to out-of-bounds memory,
+	 * but READ_ONCE() can't be used because it requires higher alignment
+	 * than plain loads in arm64 builds with LTO.
+	 */
 	return *(unsigned long *)addr;
 }
 
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index db13bb620f52..0755bc39b0d8 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -39,7 +39,7 @@ extern char __init_begin[], __init_end[];
 extern char _sinittext[], _einittext[];
 extern char __start_ro_after_init[], __end_ro_after_init[];
 extern char _end[];
-extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
+extern char __per_cpu_start[], __per_cpu_end[];
 extern char __kprobes_text_start[], __kprobes_text_end[];
 extern char __entry_text_start[], __entry_text_end[];
 extern char __start_rodata[], __end_rodata[];
@@ -180,6 +180,11 @@ static inline bool is_kernel_rodata(unsigned long addr)
 	       addr < (unsigned long)__end_rodata;
 }
 
+static inline bool is_kernel_ro_after_init(unsigned long addr)
+{
+	return addr >= (unsigned long)__start_ro_after_init &&
+	       addr < (unsigned long)__end_ro_after_init;
+}
 /**
  * is_kernel_inittext - checks if the pointer address is located in the
  *                      .init.text section
diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h
index d0343d58a74a..ac29a22eb7cf 100644
--- a/include/asm-generic/simd.h
+++ b/include/asm-generic/simd.h
@@ -1,6 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_SIMD_H
+#define _ASM_GENERIC_SIMD_H
 
-#include <linux/hardirq.h>
+#include <linux/compiler_attributes.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
 
 /*
  * may_use_simd - whether it is allowable at this time to issue SIMD
@@ -13,3 +17,5 @@ static __must_check inline bool may_use_simd(void)
 {
 	return !in_interrupt();
 }
+
+#endif	/* _ASM_GENERIC_SIMD_H */
diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
index 90803a826ba0..970590baf61b 100644
--- a/include/asm-generic/spinlock.h
+++ b/include/asm-generic/spinlock.h
@@ -1,94 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
-/*
- * 'Generic' ticket-lock implementation.
- *
- * It relies on atomic_fetch_add() having well defined forward progress
- * guarantees under contention. If your architecture cannot provide this, stick
- * to a test-and-set lock.
- *
- * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a
- * sub-word of the value. This is generally true for anything LL/SC although
- * you'd be hard pressed to find anything useful in architecture specifications
- * about this. If your architecture cannot do this you might be better off with
- * a test-and-set.
- *
- * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
- * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
- * a full fence after the spin to upgrade the otherwise-RCpc
- * atomic_cond_read_acquire().
- *
- * The implementation uses smp_cond_load_acquire() to spin, so if the
- * architecture has WFE like instructions to sleep instead of poll for word
- * modifications be sure to implement that (see ARM64 for example).
- *
- */
-
 #ifndef __ASM_GENERIC_SPINLOCK_H
 #define __ASM_GENERIC_SPINLOCK_H
 
-#include <linux/atomic.h>
-#include <asm-generic/spinlock_types.h>
-
-static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	u32 val = atomic_fetch_add(1<<16, lock);
-	u16 ticket = val >> 16;
-
-	if (ticket == (u16)val)
-		return;
-
-	/*
-	 * atomic_cond_read_acquire() is RCpc, but rather than defining a
-	 * custom cond_read_rcsc() here we just emit a full fence.  We only
-	 * need the prior reads before subsequent writes ordering from
-	 * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
-	 * have no outstanding writes due to the atomic_fetch_add() the extra
-	 * orderings are free.
-	 */
-	atomic_cond_read_acquire(lock, ticket == (u16)VAL);
-	smp_mb();
-}
-
-static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
-{
-	u32 old = atomic_read(lock);
-
-	if ((old >> 16) != (old & 0xffff))
-		return false;
-
-	return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */
-}
-
-static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
-	u32 val = atomic_read(lock);
-
-	smp_store_release(ptr, (u16)val + 1);
-}
-
-static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
-	u32 val = lock.counter;
-
-	return ((val >> 16) == (val & 0xffff));
-}
-
-static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
-	arch_spinlock_t val = READ_ONCE(*lock);
-
-	return !arch_spin_value_unlocked(val);
-}
-
-static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
-{
-	u32 val = atomic_read(lock);
-
-	return (s16)((val >> 16) - (val & 0xffff)) > 1;
-}
-
+#include <asm-generic/ticket_spinlock.h>
 #include <asm/qrwlock.h>
 
 #endif /* __ASM_GENERIC_SPINLOCK_H */
diff --git a/include/asm-generic/spinlock_types.h b/include/asm-generic/spinlock_types.h
index 8962bb730945..f534aa5de394 100644
--- a/include/asm-generic/spinlock_types.h
+++ b/include/asm-generic/spinlock_types.h
@@ -3,15 +3,7 @@
 #ifndef __ASM_GENERIC_SPINLOCK_TYPES_H
 #define __ASM_GENERIC_SPINLOCK_TYPES_H
 
-#include <linux/types.h>
-typedef atomic_t arch_spinlock_t;
-
-/*
- * qrwlock_types depends on arch_spinlock_t, so we must typedef that before the
- * include.
- */
-#include <asm/qrwlock_types.h>
-
-#define __ARCH_SPIN_LOCK_UNLOCKED	ATOMIC_INIT(0)
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
 
 #endif /* __ASM_GENERIC_SPINLOCK_TYPES_H */
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 5a80fe728dc8..c5a3ad53beec 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -5,7 +5,7 @@
  * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
  *
  * This file is a stub providing documentation for what functions
- * asm-ARCH/syscall.h files need to define.  Most arch definitions
+ * arch/ARCH/include/asm/syscall.h files need to define.  Most arch definitions
  * will be simple inlines.
  *
  * All of these functions expect to be called with no locks,
@@ -38,6 +38,20 @@ struct pt_regs;
 int syscall_get_nr(struct task_struct *task, struct pt_regs *regs);
 
 /**
+ * syscall_set_nr - change the system call a task is executing
+ * @task:	task of interest, must be blocked
+ * @regs:	task_pt_regs() of @task
+ * @nr:		system call number
+ *
+ * Changes the system call number @task is about to execute.
+ *
+ * It's only valid to call this when @task is stopped for tracing on
+ * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT.
+ */
+void syscall_set_nr(struct task_struct *task, struct pt_regs *regs, int nr);
+
+/**
  * syscall_rollback - roll back registers after an aborted system call
  * @task:	task of interest, must be in system call exit tracing
  * @regs:	task_pt_regs() of @task
@@ -118,6 +132,22 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
 			   unsigned long *args);
 
 /**
+ * syscall_set_arguments - change system call parameter value
+ * @task:	task of interest, must be in system call entry tracing
+ * @regs:	task_pt_regs() of @task
+ * @args:	array of argument values to store
+ *
+ * Changes 6 arguments to the system call.
+ * The first argument gets value @args[0], and so on.
+ *
+ * It's only valid to call this when @task is stopped for tracing on
+ * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT.
+ */
+void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
+			   const unsigned long *args);
+
+/**
  * syscall_get_arch - return the AUDIT_ARCH for the current system call
  * @task:	task of interest, must be blocked
  *
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index 933ca6581aba..fabcefe8a80a 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -19,7 +19,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 #ifndef sys_mmap
 asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
 			unsigned long prot, unsigned long flags,
-			unsigned long fd, off_t pgoff);
+			unsigned long fd, unsigned long off);
 #endif
 
 #ifndef sys_rt_sigreturn
diff --git a/include/asm-generic/text-patching.h b/include/asm-generic/text-patching.h
new file mode 100644
index 000000000000..2245c641b741
--- /dev/null
+++ b/include/asm-generic/text-patching.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_TEXT_PATCHING_H
+#define _ASM_GENERIC_TEXT_PATCHING_H
+
+#endif /* _ASM_GENERIC_TEXT_PATCHING_H */
diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
new file mode 100644
index 000000000000..325779970d8a
--- /dev/null
+++ b/include/asm-generic/ticket_spinlock.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * 'Generic' ticket-lock implementation.
+ *
+ * It relies on atomic_fetch_add() having well defined forward progress
+ * guarantees under contention. If your architecture cannot provide this, stick
+ * to a test-and-set lock.
+ *
+ * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a
+ * sub-word of the value. This is generally true for anything LL/SC although
+ * you'd be hard pressed to find anything useful in architecture specifications
+ * about this. If your architecture cannot do this you might be better off with
+ * a test-and-set.
+ *
+ * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
+ * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
+ * a full fence after the spin to upgrade the otherwise-RCpc
+ * atomic_cond_read_acquire().
+ *
+ * The implementation uses smp_cond_load_acquire() to spin, so if the
+ * architecture has WFE like instructions to sleep instead of poll for word
+ * modifications be sure to implement that (see ARM64 for example).
+ *
+ */
+
+#ifndef __ASM_GENERIC_TICKET_SPINLOCK_H
+#define __ASM_GENERIC_TICKET_SPINLOCK_H
+
+#include <linux/atomic.h>
+#include <asm-generic/spinlock_types.h>
+
+static __always_inline void ticket_spin_lock(arch_spinlock_t *lock)
+{
+	u32 val = atomic_fetch_add(1<<16, &lock->val);
+	u16 ticket = val >> 16;
+
+	if (ticket == (u16)val)
+		return;
+
+	/*
+	 * atomic_cond_read_acquire() is RCpc, but rather than defining a
+	 * custom cond_read_rcsc() here we just emit a full fence.  We only
+	 * need the prior reads before subsequent writes ordering from
+	 * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
+	 * have no outstanding writes due to the atomic_fetch_add() the extra
+	 * orderings are free.
+	 */
+	atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL);
+	smp_mb();
+}
+
+static __always_inline bool ticket_spin_trylock(arch_spinlock_t *lock)
+{
+	u32 old = atomic_read(&lock->val);
+
+	if ((old >> 16) != (old & 0xffff))
+		return false;
+
+	return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */
+}
+
+static __always_inline void ticket_spin_unlock(arch_spinlock_t *lock)
+{
+	u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+	u32 val = atomic_read(&lock->val);
+
+	smp_store_release(ptr, (u16)val + 1);
+}
+
+static __always_inline int ticket_spin_value_unlocked(arch_spinlock_t lock)
+{
+	u32 val = lock.val.counter;
+
+	return ((val >> 16) == (val & 0xffff));
+}
+
+static __always_inline int ticket_spin_is_locked(arch_spinlock_t *lock)
+{
+	arch_spinlock_t val = READ_ONCE(*lock);
+
+	return !ticket_spin_value_unlocked(val);
+}
+
+static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
+{
+	u32 val = atomic_read(&lock->val);
+
+	return (s16)((val >> 16) - (val & 0xffff)) > 1;
+}
+
+#ifndef __no_arch_spinlock_redefine
+/*
+ * Remapping spinlock architecture specific functions to the corresponding
+ * ticket spinlock functions.
+ */
+#define arch_spin_is_locked(l)		ticket_spin_is_locked(l)
+#define arch_spin_is_contended(l)	ticket_spin_is_contended(l)
+#define arch_spin_value_unlocked(l)	ticket_spin_value_unlocked(l)
+#define arch_spin_lock(l)		ticket_spin_lock(l)
+#define arch_spin_trylock(l)		ticket_spin_trylock(l)
+#define arch_spin_unlock(l)		ticket_spin_unlock(l)
+#endif
+
+#endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 709830274b75..88a42973fa47 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -67,22 +67,21 @@
  *
  *    See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE.
  *
- *  - tlb_remove_page() / __tlb_remove_page()
- *  - tlb_remove_page_size() / __tlb_remove_page_size()
- *  - __tlb_remove_folio_pages()
+ *  - tlb_remove_page() / tlb_remove_page_size()
+ *  - __tlb_remove_folio_pages() / __tlb_remove_page_size()
+ *  - __tlb_remove_folio_pages_size()
  *
- *    __tlb_remove_page_size() is the basic primitive that queues a page for
- *    freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a
- *    boolean indicating if the queue is (now) full and a call to
- *    tlb_flush_mmu() is required.
+ *    __tlb_remove_folio_pages_size() is the basic primitive that queues pages
+ *    for freeing. It will return a boolean indicating if the queue is (now)
+ *    full and a call to tlb_flush_mmu() is required.
  *
  *    tlb_remove_page() and tlb_remove_page_size() imply the call to
  *    tlb_flush_mmu() when required and has no return value.
  *
- *    __tlb_remove_folio_pages() is similar to __tlb_remove_page(), however,
- *    instead of removing a single page, remove the given number of consecutive
- *    pages that are all part of the same (large) folio: just like calling
- *    __tlb_remove_page() on each page individually.
+ *    __tlb_remove_folio_pages() is similar to __tlb_remove_page_size(),
+ *    however, instead of removing a single page, assume PAGE_SIZE and remove
+ *    the given number of consecutive pages that are all part of the
+ *    same (large) folio.
  *
  *  - tlb_change_page_size()
  *
@@ -153,8 +152,9 @@
  *
  *  Useful if your architecture has non-page page directories.
  *
- *  When used, an architecture is expected to provide __tlb_remove_table()
- *  which does the actual freeing of these pages.
+ *  When used, an architecture is expected to provide __tlb_remove_table() or
+ *  use the generic __tlb_remove_table(), which does the actual freeing of these
+ *  pages.
  *
  *  MMU_GATHER_RCU_TABLE_FREE
  *
@@ -207,16 +207,31 @@ struct mmu_table_batch {
 #define MAX_TABLE_BATCH		\
 	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
 
+#ifndef __HAVE_ARCH_TLB_REMOVE_TABLE
+static inline void __tlb_remove_table(void *table)
+{
+	struct ptdesc *ptdesc = (struct ptdesc *)table;
+
+	pagetable_dtor_free(ptdesc);
+}
+#endif
+
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
-#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */
+#else /* !CONFIG_MMU_GATHER_TABLE_FREE */
 
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page);
 /*
  * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based
  * page directories and we can use the normal page batching to free them.
  */
-#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page))
+static inline void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+	struct ptdesc *ptdesc = (struct ptdesc *)table;
 
+	pagetable_dtor(ptdesc);
+	tlb_remove_page(tlb, ptdesc_page(ptdesc));
+}
 #endif /* CONFIG_MMU_GATHER_TABLE_FREE */
 
 #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
@@ -473,32 +488,16 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb,
 		tlb_flush_mmu(tlb);
 }
 
-static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb,
-		struct page *page, bool delay_rmap)
-{
-	return __tlb_remove_page_size(tlb, page, delay_rmap, PAGE_SIZE);
-}
-
-/* tlb_remove_page
- *	Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
- *	required.
- */
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	return tlb_remove_page_size(tlb, page, PAGE_SIZE);
 }
 
-static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt)
+static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt)
 {
 	tlb_remove_table(tlb, pt);
 }
 
-/* Like tlb_remove_ptdesc, but for page-like page directories. */
-static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt)
-{
-	tlb_remove_page(tlb, ptdesc_page(pt));
-}
-
 static inline void tlb_change_page_size(struct mmu_gather *tlb,
 						     unsigned int page_size)
 {
diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
index a5be9e61a2a2..b276f783494c 100644
--- a/include/asm-generic/uaccess.h
+++ b/include/asm-generic/uaccess.h
@@ -11,7 +11,7 @@
 #include <asm-generic/access_ok.h>
 
 #ifdef CONFIG_UACCESS_MEMCPY
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 static __always_inline int
 __get_user_fn(size_t size, const void __user *from, void *to)
diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h
deleted file mode 100644
index a84c64e5f11e..000000000000
--- a/include/asm-generic/unaligned.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_GENERIC_UNALIGNED_H
-#define __ASM_GENERIC_UNALIGNED_H
-
-/*
- * This is the most generic implementation of unaligned accesses
- * and should work almost anywhere.
- */
-#include <linux/unaligned/packed_struct.h>
-#include <asm/byteorder.h>
-
-#define __get_unaligned_t(type, ptr) ({						\
-	const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);	\
-	__pptr->x;								\
-})
-
-#define __put_unaligned_t(type, val, ptr) do {					\
-	struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);		\
-	__pptr->x = (val);							\
-} while (0)
-
-#define get_unaligned(ptr)	__get_unaligned_t(typeof(*(ptr)), (ptr))
-#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr))
-
-static inline u16 get_unaligned_le16(const void *p)
-{
-	return le16_to_cpu(__get_unaligned_t(__le16, p));
-}
-
-static inline u32 get_unaligned_le32(const void *p)
-{
-	return le32_to_cpu(__get_unaligned_t(__le32, p));
-}
-
-static inline u64 get_unaligned_le64(const void *p)
-{
-	return le64_to_cpu(__get_unaligned_t(__le64, p));
-}
-
-static inline void put_unaligned_le16(u16 val, void *p)
-{
-	__put_unaligned_t(__le16, cpu_to_le16(val), p);
-}
-
-static inline void put_unaligned_le32(u32 val, void *p)
-{
-	__put_unaligned_t(__le32, cpu_to_le32(val), p);
-}
-
-static inline void put_unaligned_le64(u64 val, void *p)
-{
-	__put_unaligned_t(__le64, cpu_to_le64(val), p);
-}
-
-static inline u16 get_unaligned_be16(const void *p)
-{
-	return be16_to_cpu(__get_unaligned_t(__be16, p));
-}
-
-static inline u32 get_unaligned_be32(const void *p)
-{
-	return be32_to_cpu(__get_unaligned_t(__be32, p));
-}
-
-static inline u64 get_unaligned_be64(const void *p)
-{
-	return be64_to_cpu(__get_unaligned_t(__be64, p));
-}
-
-static inline void put_unaligned_be16(u16 val, void *p)
-{
-	__put_unaligned_t(__be16, cpu_to_be16(val), p);
-}
-
-static inline void put_unaligned_be32(u32 val, void *p)
-{
-	__put_unaligned_t(__be32, cpu_to_be32(val), p);
-}
-
-static inline void put_unaligned_be64(u64 val, void *p)
-{
-	__put_unaligned_t(__be64, cpu_to_be64(val), p);
-}
-
-static inline u32 __get_unaligned_be24(const u8 *p)
-{
-	return p[0] << 16 | p[1] << 8 | p[2];
-}
-
-static inline u32 get_unaligned_be24(const void *p)
-{
-	return __get_unaligned_be24(p);
-}
-
-static inline u32 __get_unaligned_le24(const u8 *p)
-{
-	return p[0] | p[1] << 8 | p[2] << 16;
-}
-
-static inline u32 get_unaligned_le24(const void *p)
-{
-	return __get_unaligned_le24(p);
-}
-
-static inline void __put_unaligned_be24(const u32 val, u8 *p)
-{
-	*p++ = (val >> 16) & 0xff;
-	*p++ = (val >> 8) & 0xff;
-	*p++ = val & 0xff;
-}
-
-static inline void put_unaligned_be24(const u32 val, void *p)
-{
-	__put_unaligned_be24(val, p);
-}
-
-static inline void __put_unaligned_le24(const u32 val, u8 *p)
-{
-	*p++ = val & 0xff;
-	*p++ = (val >> 8) & 0xff;
-	*p++ = (val >> 16) & 0xff;
-}
-
-static inline void put_unaligned_le24(const u32 val, void *p)
-{
-	__put_unaligned_le24(val, p);
-}
-
-static inline void __put_unaligned_be48(const u64 val, u8 *p)
-{
-	*p++ = (val >> 40) & 0xff;
-	*p++ = (val >> 32) & 0xff;
-	*p++ = (val >> 24) & 0xff;
-	*p++ = (val >> 16) & 0xff;
-	*p++ = (val >> 8) & 0xff;
-	*p++ = val & 0xff;
-}
-
-static inline void put_unaligned_be48(const u64 val, void *p)
-{
-	__put_unaligned_be48(val, p);
-}
-
-static inline u64 __get_unaligned_be48(const u8 *p)
-{
-	return (u64)p[0] << 40 | (u64)p[1] << 32 | (u64)p[2] << 24 |
-		p[3] << 16 | p[4] << 8 | p[5];
-}
-
-static inline u64 get_unaligned_be48(const void *p)
-{
-	return __get_unaligned_be48(p);
-}
-
-#endif /* __ASM_GENERIC_UNALIGNED_H */
diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h
index c835607f78ae..b550afa15ecd 100644
--- a/include/asm-generic/vdso/vsyscall.h
+++ b/include/asm-generic/vdso/vsyscall.h
@@ -4,25 +4,35 @@
 
 #ifndef __ASSEMBLY__
 
-#ifndef __arch_get_k_vdso_data
-static __always_inline struct vdso_data *__arch_get_k_vdso_data(void)
+#ifdef CONFIG_GENERIC_VDSO_DATA_STORE
+
+#ifndef __arch_get_vdso_u_time_data
+static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void)
 {
-	return NULL;
+	return &vdso_u_time_data;
 }
-#endif /* __arch_get_k_vdso_data */
+#endif
+
+#ifndef __arch_get_vdso_u_rng_data
+static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(void)
+{
+	return &vdso_u_rng_data;
+}
+#endif
+
+#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */
 
 #ifndef __arch_update_vsyscall
-static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata,
-						   struct timekeeper *tk)
+static __always_inline void __arch_update_vsyscall(struct vdso_time_data *vdata)
 {
 }
 #endif /* __arch_update_vsyscall */
 
-#ifndef __arch_sync_vdso_data
-static __always_inline void __arch_sync_vdso_data(struct vdso_data *vdata)
+#ifndef __arch_sync_vdso_time_data
+static __always_inline void __arch_sync_vdso_time_data(struct vdso_time_data *vdata)
 {
 }
-#endif /* __arch_sync_vdso_data */
+#endif /* __arch_sync_vdso_time_data */
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/include/asm-generic/vga.h b/include/asm-generic/vga.h
index adf91a783b5c..5dcaf4ae904a 100644
--- a/include/asm-generic/vga.h
+++ b/include/asm-generic/vga.h
@@ -1,25 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- *	Access to VGA videoram
- *
- *	(c) 1998 Martin Mares <mj@ucw.cz>
- */
 #ifndef __ASM_GENERIC_VGA_H
 #define __ASM_GENERIC_VGA_H
-
-/*
- *	On most architectures that support VGA, we can just
- *	recalculate addresses and then access the videoram
- *	directly without any black magic.
- *
- *	Everyone else needs to ioremap the address and use
- *	proper I/O accesses.
- */
-#ifndef VGA_MAP_MEM
-#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x)
-#endif
-
-#define vga_readb(x) (*(x))
-#define vga_writeb(x, y) (*(y) = (x))
-
-#endif /* _ASM_GENERIC_VGA_H */
+#endif /* __ASM_GENERIC_VGA_H */
diff --git a/include/asm-generic/fb.h b/include/asm-generic/video.h
index 6ccabb400aa6..b1da2309d943 100644
--- a/include/asm-generic/fb.h
+++ b/include/asm-generic/video.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
-#ifndef __ASM_GENERIC_FB_H_
-#define __ASM_GENERIC_FB_H_
+#ifndef __ASM_GENERIC_VIDEO_H_
+#define __ASM_GENERIC_VIDEO_H_
 
 /*
  * Only include this header file from your architecture's <asm/fb.h>.
@@ -10,8 +10,9 @@
 #include <linux/io.h>
 #include <linux/mm_types.h>
 #include <linux/pgtable.h>
+#include <linux/types.h>
 
-struct fb_info;
+struct device;
 
 #ifndef pgprot_framebuffer
 #define pgprot_framebuffer pgprot_framebuffer
@@ -23,11 +24,11 @@ static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
 }
 #endif
 
-#ifndef fb_is_primary_device
-#define fb_is_primary_device fb_is_primary_device
-static inline int fb_is_primary_device(struct fb_info *info)
+#ifndef video_is_primary_device
+#define video_is_primary_device video_is_primary_device
+static inline bool video_is_primary_device(struct device *dev)
 {
-	return 0;
+	return false;
 }
 #endif
 
@@ -132,4 +133,4 @@ static inline void fb_memset_io(volatile void __iomem *addr, int c, size_t n)
 #define fb_memset fb_memset_io
 #endif
 
-#endif /* __ASM_GENERIC_FB_H_ */
+#endif /* __ASM_GENERIC_VIDEO_H_ */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f7749d0f2562..fa5f19b8d53a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -3,7 +3,7 @@
  * linker scripts.
  *
  * A minimal linker scripts has following content:
- * [This is a sample, architectures may have special requiriements]
+ * [This is a sample, architectures may have special requirements]
  *
  * OUTPUT_FORMAT(...)
  * OUTPUT_ARCH(...)
@@ -50,6 +50,8 @@
  *               [__nosave_begin, __nosave_end] for the nosave data
  */
 
+#include <asm-generic/codetag.lds.h>
+
 #ifndef LOAD_OFFSET
 #define LOAD_OFFSET 0
 #endif
@@ -93,19 +95,26 @@
  * With LTO_CLANG, the linker also splits sections by default, so we need
  * these macros to combine the sections during the final link.
  *
+ * With AUTOFDO_CLANG and PROPELLER_CLANG, by default, the linker splits
+ * text sections and regroups functions into subsections.
+ *
  * RODATA_MAIN is not used because existing code already defines .rodata.x
  * sections to be brought in with rodata.
  */
-#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \
+defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
 #define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
-#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L*
+#else
+#define TEXT_MAIN .text
+#endif
+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
+#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data.rel.* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L*
 #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
 #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L*
-#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral*
+#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral*
 #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]*
 #else
-#define TEXT_MAIN .text
-#define DATA_MAIN .data
+#define DATA_MAIN .data .data.rel .data.rel.local
 #define SDATA_MAIN .sdata
 #define RODATA_MAIN .rodata
 #define BSS_MAIN .bss
@@ -131,6 +140,7 @@
 	*(__dl_sched_class)			\
 	*(__rt_sched_class)			\
 	*(__fair_sched_class)			\
+	*(__ext_sched_class)			\
 	*(__idle_sched_class)			\
 	__sched_class_lowest = .;
 
@@ -139,14 +149,6 @@
  * often happens at runtime)
  */
 
-#if defined(CONFIG_MEMORY_HOTPLUG)
-#define MEM_KEEP(sec)    *(.mem##sec)
-#define MEM_DISCARD(sec)
-#else
-#define MEM_KEEP(sec)
-#define MEM_DISCARD(sec) *(.mem##sec)
-#endif
-
 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
 #define KEEP_PATCHABLE		KEEP(*(__patchable_function_entries))
 #define PATCHABLE_DISCARDS
@@ -355,10 +357,9 @@
 	*(.data..decrypted)						\
 	*(.ref.data)							\
 	*(.data..shared_aligned) /* percpu related */			\
-	MEM_KEEP(init.data*)						\
-	*(.data.unlikely)						\
+	*(.data..unlikely)						\
 	__start_once = .;						\
-	*(.data.once)							\
+	*(.data..once)							\
 	__end_once = .;							\
 	STRUCT_ALIGN();							\
 	*(__tracepoints)						\
@@ -366,6 +367,7 @@
 	. = ALIGN(8);							\
 	BOUNDED_SECTION_BY(__dyndbg_classes, ___dyndbg_classes)		\
 	BOUNDED_SECTION_BY(__dyndbg, ___dyndbg)				\
+	CODETAG_SECTIONS()						\
 	LIKELY_PROFILE()		       				\
 	BRANCH_PROFILE()						\
 	TRACE_PRINTKS()							\
@@ -383,6 +385,11 @@
 	. = ALIGN(PAGE_SIZE);						\
 	__nosave_end = .;
 
+#define CACHE_HOT_DATA(align)						\
+	. = ALIGN(align);						\
+	*(SORT_BY_ALIGNMENT(.data..hot.*))				\
+	. = ALIGN(align);
+
 #define PAGE_ALIGNED_DATA(page_align)					\
 	. = ALIGN(page_align);						\
 	*(.data..page_aligned)						\
@@ -399,13 +406,12 @@
 
 #define INIT_TASK_DATA(align)						\
 	. = ALIGN(align);						\
-	__start_init_task = .;						\
+	__start_init_stack = .;						\
 	init_thread_union = .;						\
 	init_stack = .;							\
-	KEEP(*(.data..init_task))					\
 	KEEP(*(.data..init_thread_info))				\
-	. = __start_init_task + THREAD_SIZE;				\
-	__end_init_task = .;
+	. = __start_init_stack + THREAD_SIZE;				\
+	__end_init_stack = .;
 
 #define JUMP_TABLE_DATA							\
 	. = ALIGN(8);							\
@@ -455,7 +461,7 @@
 	. = ALIGN((align));						\
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
 		__start_rodata = .;					\
-		*(.rodata) *(.rodata.*)					\
+		*(.rodata) *(.rodata.*) *(.data.rel.ro*)		\
 		SCHED_DATA						\
 		RO_AFTER_INIT_DATA	/* Read only after init */	\
 		. = ALIGN(8);						\
@@ -520,7 +526,6 @@
 	/* __*init sections */						\
 	__init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) {		\
 		*(.ref.rodata)						\
-		MEM_KEEP(init.rodata)					\
 	}								\
 									\
 	/* Built-in module parameters. */				\
@@ -555,25 +560,44 @@
 		__cpuidle_text_end = .;					\
 		__noinstr_text_end = .;
 
+#define TEXT_SPLIT							\
+		__split_text_start = .;					\
+		*(.text.split .text.split.[0-9a-zA-Z_]*)		\
+		__split_text_end = .;
+
+#define TEXT_UNLIKELY							\
+		__unlikely_text_start = .;				\
+		*(.text.unlikely .text.unlikely.*)			\
+		__unlikely_text_end = .;
+
+#define TEXT_HOT							\
+		__hot_text_start = .;					\
+		*(.text.hot .text.hot.*)				\
+		__hot_text_end = .;
+
 /*
  * .text section. Map to function alignment to avoid address changes
  * during second ld run in second ld pass when generating System.map
  *
- * TEXT_MAIN here will match .text.fixup and .text.unlikely if dead
- * code elimination is enabled, so these sections should be converted
- * to use ".." first.
+ * TEXT_MAIN here will match symbols with a fixed pattern (for example,
+ * .text.hot or .text.unlikely) if dead code elimination or
+ * function-section is enabled. Match these symbols first before
+ * TEXT_MAIN to ensure they are grouped together.
+ *
+ * Also placing .text.hot section at the beginning of a page, this
+ * would help the TLB performance.
  */
 #define TEXT_TEXT							\
 		ALIGN_FUNCTION();					\
-		*(.text.hot .text.hot.*)				\
-		*(TEXT_MAIN .text.fixup)				\
-		*(.text.unlikely .text.unlikely.*)			\
+		*(.text.asan.* .text.tsan.*)				\
 		*(.text.unknown .text.unknown.*)			\
+		TEXT_SPLIT						\
+		TEXT_UNLIKELY						\
+		. = ALIGN(PAGE_SIZE);					\
+		TEXT_HOT						\
+		*(TEXT_MAIN .text.fixup)				\
 		NOINSTR_TEXT						\
-		*(.ref.text)						\
-		*(.text.asan.* .text.tsan.*)				\
-	MEM_KEEP(init.text*)						\
-
+		*(.ref.text)
 
 /* sched.text is aling to function alignment to secure we have same
  * address even at second ld pass when generating System.map */
@@ -643,10 +667,11 @@
  */
 #ifdef CONFIG_DEBUG_INFO_BTF
 #define BTF								\
+	. = ALIGN(PAGE_SIZE);						\
 	.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) {				\
 		BOUNDED_SECTION_BY(.BTF, _BTF)				\
 	}								\
-	. = ALIGN(4);							\
+	. = ALIGN(PAGE_SIZE);						\
 	.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) {			\
 		*(.BTF_ids)						\
 	}
@@ -679,7 +704,6 @@
 #define INIT_DATA							\
 	KEEP(*(SORT(___kentry+*)))					\
 	*(.init.data .init.data.*)					\
-	MEM_DISCARD(init.data*)						\
 	KERNEL_CTORS()							\
 	MCOUNT_REC()							\
 	*(.init.rodata .init.rodata.*)					\
@@ -687,7 +711,6 @@
 	TRACE_SYSCALLS()						\
 	KPROBE_BLACKLIST()						\
 	ERROR_INJECT_WHITELIST()					\
-	MEM_DISCARD(init.rodata)					\
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
 	TIMER_OF_TABLES()						\
@@ -705,8 +728,7 @@
 
 #define INIT_TEXT							\
 	*(.init.text .init.text.*)					\
-	*(.text.startup)						\
-	MEM_DISCARD(init.text*)
+	*(.text.startup)
 
 #define EXIT_DATA							\
 	*(.exit.data .exit.data.*)					\
@@ -922,6 +944,17 @@
 #define CON_INITCALL							\
 	BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
 
+#define NAMED_SECTION(name) \
+	. = ALIGN(8); \
+	name : AT(ADDR(name) - LOAD_OFFSET) \
+	{ BOUNDED_SECTION_PRE_LABEL(name, name, __start_, __stop_) }
+
+#define RUNTIME_CONST(t,x) NAMED_SECTION(runtime_##t##_##x)
+
+#define RUNTIME_CONST_VARIABLES						\
+		RUNTIME_CONST(shift, d_hash_shift)			\
+		RUNTIME_CONST(ptr, dentry_hashtable)
+
 /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
 #define KUNIT_TABLE()							\
 		. = ALIGN(8);						\
@@ -1010,6 +1043,7 @@
 	*(.discard)							\
 	*(.discard.*)							\
 	*(.export_symbol)						\
+	*(.no_trim_symbol)						\
 	*(.modinfo)							\
 	/* ld.bfd warns about .gnu.version* even when not emitted */	\
 	*(.gnu.version*)						\
@@ -1033,10 +1067,13 @@
  */
 #define PERCPU_INPUT(cacheline)						\
 	__per_cpu_start = .;						\
-	*(.data..percpu..first)						\
 	. = ALIGN(PAGE_SIZE);						\
 	*(.data..percpu..page_aligned)					\
 	. = ALIGN(cacheline);						\
+	__per_cpu_hot_start = .;					\
+	*(SORT_BY_ALIGNMENT(.data..percpu..hot.*))			\
+	__per_cpu_hot_end = .;						\
+	. = ALIGN(cacheline);						\
 	*(.data..percpu..read_mostly)					\
 	. = ALIGN(cacheline);						\
 	*(.data..percpu)						\
@@ -1045,52 +1082,17 @@
 	__per_cpu_end = .;
 
 /**
- * PERCPU_VADDR - define output section for percpu area
+ * PERCPU_SECTION - define output section for percpu area
  * @cacheline: cacheline size
- * @vaddr: explicit base address (optional)
- * @phdr: destination PHDR (optional)
  *
  * Macro which expands to output section for percpu area.
  *
  * @cacheline is used to align subsections to avoid false cacheline
  * sharing between subsections for different purposes.
- *
- * If @vaddr is not blank, it specifies explicit base address and all
- * percpu symbols will be offset from the given address.  If blank,
- * @vaddr always equals @laddr + LOAD_OFFSET.
- *
- * @phdr defines the output PHDR to use if not blank.  Be warned that
- * output PHDR is sticky.  If @phdr is specified, the next output
- * section in the linker script will go there too.  @phdr should have
- * a leading colon.
- *
- * Note that this macros defines __per_cpu_load as an absolute symbol.
- * If there is no need to put the percpu section at a predetermined
- * address, use PERCPU_SECTION.
- */
-#define PERCPU_VADDR(cacheline, vaddr, phdr)				\
-	__per_cpu_load = .;						\
-	.data..percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) {	\
-		PERCPU_INPUT(cacheline)					\
-	} phdr								\
-	. = __per_cpu_load + SIZEOF(.data..percpu);
-
-/**
- * PERCPU_SECTION - define output section for percpu area, simple version
- * @cacheline: cacheline size
- *
- * Align to PAGE_SIZE and outputs output section for percpu area.  This
- * macro doesn't manipulate @vaddr or @phdr and __per_cpu_load and
- * __per_cpu_start will be identical.
- *
- * This macro is equivalent to ALIGN(PAGE_SIZE); PERCPU_VADDR(@cacheline,,)
- * except that __per_cpu_load is defined as a relative symbol against
- * .data..percpu which is required for relocatable x86_32 configuration.
  */
 #define PERCPU_SECTION(cacheline)					\
 	. = ALIGN(PAGE_SIZE);						\
 	.data..percpu	: AT(ADDR(.data..percpu) - LOAD_OFFSET) {	\
-		__per_cpu_load = .;					\
 		PERCPU_INPUT(cacheline)					\
 	}
 
@@ -1119,6 +1121,7 @@
 		INIT_TASK_DATA(inittask)				\
 		NOSAVE_DATA						\
 		PAGE_ALIGNED_DATA(pagealigned)				\
+		CACHE_HOT_DATA(cacheline)				\
 		CACHELINE_ALIGNED_DATA(cacheline)			\
 		READ_MOSTLY_DATA(cacheline)				\
 		DATA_DATA						\
diff --git a/include/asm-generic/vtime.h b/include/asm-generic/vtime.h
deleted file mode 100644
index b1a49677fe25..000000000000
--- a/include/asm-generic/vtime.h
+++ /dev/null
@@ -1 +0,0 @@
-/* no content, but patch(1) dislikes empty files */