From 14d72af7ab00d9c2eb5b473b4fdf3b751b8cf8e2 Mon Sep 17 00:00:00 2001
From: "周琰杰 (Zhou Yanjie)" <zhouyanjie@wanyeetech.com>
Date: Fri, 16 Jul 2021 00:57:07 +0800
Subject: MIPS: Ingenic: Add system type for new Ingenic SoCs.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add JZ4730, JZ4750, JZ4755, JZ4760, JZ4760B, X2000H, and X2100 system
type for cat /proc/cpuinfo to give out JZ4730, JZ4750, JZ4755, JZ4760,
JZ4760B, X2000H, and X2100.

Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
Reviewed-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/generic/board-ingenic.c | 21 +++++++++++++++++++++
 arch/mips/include/asm/bootinfo.h  |  3 +++
 arch/mips/include/asm/cpu.h       |  4 ++--
 3 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/generic/board-ingenic.c b/arch/mips/generic/board-ingenic.c
index 0cec0bea13d6..e86e0016c548 100644
--- a/arch/mips/generic/board-ingenic.c
+++ b/arch/mips/generic/board-ingenic.c
@@ -21,6 +21,10 @@
 static __init char *ingenic_get_system_type(unsigned long machtype)
 {
 	switch (machtype) {
+	case MACH_INGENIC_X2100:
+		return "X2100";
+	case MACH_INGENIC_X2000H:
+		return "X2000H";
 	case MACH_INGENIC_X2000E:
 		return "X2000E";
 	case MACH_INGENIC_X2000:
@@ -37,8 +41,18 @@ static __init char *ingenic_get_system_type(unsigned long machtype)
 		return "JZ4775";
 	case MACH_INGENIC_JZ4770:
 		return "JZ4770";
+	case MACH_INGENIC_JZ4760B:
+		return "JZ4760B";
+	case MACH_INGENIC_JZ4760:
+		return "JZ4760";
+	case MACH_INGENIC_JZ4755:
+		return "JZ4755";
+	case MACH_INGENIC_JZ4750:
+		return "JZ4750";
 	case MACH_INGENIC_JZ4725B:
 		return "JZ4725B";
+	case MACH_INGENIC_JZ4730:
+		return "JZ4730";
 	default:
 		return "JZ4740";
 	}
@@ -61,8 +75,13 @@ static __init const void *ingenic_fixup_fdt(const void *fdt, const void *match_d
 }
 
 static const struct of_device_id ingenic_of_match[] __initconst = {
+	{ .compatible = "ingenic,jz4730", .data = (void *)MACH_INGENIC_JZ4730 },
 	{ .compatible = "ingenic,jz4740", .data = (void *)MACH_INGENIC_JZ4740 },
 	{ .compatible = "ingenic,jz4725b", .data = (void *)MACH_INGENIC_JZ4725B },
+	{ .compatible = "ingenic,jz4750", .data = (void *)MACH_INGENIC_JZ4750 },
+	{ .compatible = "ingenic,jz4755", .data = (void *)MACH_INGENIC_JZ4755 },
+	{ .compatible = "ingenic,jz4760", .data = (void *)MACH_INGENIC_JZ4760 },
+	{ .compatible = "ingenic,jz4760b", .data = (void *)MACH_INGENIC_JZ4760B },
 	{ .compatible = "ingenic,jz4770", .data = (void *)MACH_INGENIC_JZ4770 },
 	{ .compatible = "ingenic,jz4775", .data = (void *)MACH_INGENIC_JZ4775 },
 	{ .compatible = "ingenic,jz4780", .data = (void *)MACH_INGENIC_JZ4780 },
@@ -71,6 +90,8 @@ static const struct of_device_id ingenic_of_match[] __initconst = {
 	{ .compatible = "ingenic,x1830", .data = (void *)MACH_INGENIC_X1830 },
 	{ .compatible = "ingenic,x2000", .data = (void *)MACH_INGENIC_X2000 },
 	{ .compatible = "ingenic,x2000e", .data = (void *)MACH_INGENIC_X2000E },
+	{ .compatible = "ingenic,x2000h", .data = (void *)MACH_INGENIC_X2000H },
+	{ .compatible = "ingenic,x2100", .data = (void *)MACH_INGENIC_X2100 },
 	{}
 };
 
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index 4c2e8173e6ec..2128ba903391 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -75,6 +75,7 @@ enum ingenic_machine_type {
 	MACH_INGENIC_JZ4750,
 	MACH_INGENIC_JZ4755,
 	MACH_INGENIC_JZ4760,
+	MACH_INGENIC_JZ4760B,
 	MACH_INGENIC_JZ4770,
 	MACH_INGENIC_JZ4775,
 	MACH_INGENIC_JZ4780,
@@ -83,6 +84,8 @@ enum ingenic_machine_type {
 	MACH_INGENIC_X1830,
 	MACH_INGENIC_X2000,
 	MACH_INGENIC_X2000E,
+	MACH_INGENIC_X2000H,
+	MACH_INGENIC_X2100,
 };
 
 extern char *system_type;
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 9e6211e6d76b..d45a52f65b7a 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -46,8 +46,8 @@
 #define PRID_COMP_NETLOGIC	0x0c0000
 #define PRID_COMP_CAVIUM	0x0d0000
 #define PRID_COMP_LOONGSON	0x140000
-#define PRID_COMP_INGENIC_13	0x130000	/* X2000 */
-#define PRID_COMP_INGENIC_D0	0xd00000	/* JZ4740, JZ4750, X1830 */
+#define PRID_COMP_INGENIC_13	0x130000	/* X2000, X2100 */
+#define PRID_COMP_INGENIC_D0	0xd00000	/* JZ4730, JZ4740, JZ4750, JZ4755, JZ4760, X1830 */
 #define PRID_COMP_INGENIC_D1	0xd10000	/* JZ4770, JZ4775, X1000 */
 #define PRID_COMP_INGENIC_E1	0xe10000	/* JZ4780 */
 
-- 
cgit 


From e98b461bb057aaea6fa766260788c08825213837 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 13 Jul 2021 10:49:15 -0700
Subject: MIPS: octeon: Remove vestiges of CONFIG_CAVIUM_RESERVE32

The config option CAVIUM_RESERVE32 is not used.
Remove the dead code controlled by it.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c | 21 +++---------
 arch/mips/cavium-octeon/setup.c                    | 38 +---------------------
 2 files changed, 5 insertions(+), 54 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
index 3839feba68f2..fb42e8e21ea0 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
@@ -57,27 +57,14 @@ EXPORT_SYMBOL_GPL(__cvmx_cmd_queue_state_ptr);
 static cvmx_cmd_queue_result_t __cvmx_cmd_queue_init_state_ptr(void)
 {
 	char *alloc_name = "cvmx_cmd_queues";
-#if defined(CONFIG_CAVIUM_RESERVE32) && CONFIG_CAVIUM_RESERVE32
-	extern uint64_t octeon_reserve32_memory;
-#endif
 
 	if (likely(__cvmx_cmd_queue_state_ptr))
 		return CVMX_CMD_QUEUE_SUCCESS;
 
-#if defined(CONFIG_CAVIUM_RESERVE32) && CONFIG_CAVIUM_RESERVE32
-	if (octeon_reserve32_memory)
-		__cvmx_cmd_queue_state_ptr =
-		    cvmx_bootmem_alloc_named_range(sizeof(*__cvmx_cmd_queue_state_ptr),
-						   octeon_reserve32_memory,
-						   octeon_reserve32_memory +
-						   (CONFIG_CAVIUM_RESERVE32 <<
-						    20) - 1, 128, alloc_name);
-	else
-#endif
-		__cvmx_cmd_queue_state_ptr =
-		    cvmx_bootmem_alloc_named(sizeof(*__cvmx_cmd_queue_state_ptr),
-					    128,
-					    alloc_name);
+	__cvmx_cmd_queue_state_ptr =
+		cvmx_bootmem_alloc_named(sizeof(*__cvmx_cmd_queue_state_ptr),
+					 128,
+					 alloc_name);
 	if (__cvmx_cmd_queue_state_ptr)
 		memset(__cvmx_cmd_queue_state_ptr, 0,
 		       sizeof(*__cvmx_cmd_queue_state_ptr));
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index ce4e2806159b..0ddd3cc16ee4 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -284,11 +284,6 @@ void octeon_crash_smp_send_stop(void)
 
 #endif /* CONFIG_KEXEC */
 
-#ifdef CONFIG_CAVIUM_RESERVE32
-uint64_t octeon_reserve32_memory;
-EXPORT_SYMBOL(octeon_reserve32_memory);
-#endif
-
 #ifdef CONFIG_KEXEC
 /* crashkernel cmdline parameter is parsed _after_ memory setup
  * we also parse it here (workaround for EHB5200) */
@@ -665,9 +660,7 @@ void __init prom_init(void)
 	int i;
 	u64 t;
 	int argc;
-#ifdef CONFIG_CAVIUM_RESERVE32
-	int64_t addr = -1;
-#endif
+
 	/*
 	 * The bootloader passes a pointer to the boot descriptor in
 	 * $a3, this is available as fw_arg3.
@@ -782,25 +775,6 @@ void __init prom_init(void)
 		cvmx_write_csr(CVMX_LED_UDD_DATX(1), 0);
 		cvmx_write_csr(CVMX_LED_EN, 1);
 	}
-#ifdef CONFIG_CAVIUM_RESERVE32
-	/*
-	 * We need to temporarily allocate all memory in the reserve32
-	 * region. This makes sure the kernel doesn't allocate this
-	 * memory when it is getting memory from the
-	 * bootloader. Later, after the memory allocations are
-	 * complete, the reserve32 will be freed.
-	 *
-	 * Allocate memory for RESERVED32 aligned on 2MB boundary. This
-	 * is in case we later use hugetlb entries with it.
-	 */
-	addr = cvmx_bootmem_phy_named_block_alloc(CONFIG_CAVIUM_RESERVE32 << 20,
-						0, 0, 2 << 20,
-						"CAVIUM_RESERVE32", 0);
-	if (addr < 0)
-		pr_err("Failed to allocate CAVIUM_RESERVE32 memory area\n");
-	else
-		octeon_reserve32_memory = addr;
-#endif
 
 #ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2
 	if (cvmx_read_csr(CVMX_L2D_FUS3) & (3ull << 34)) {
@@ -1078,16 +1052,6 @@ void __init plat_mem_setup(void)
 	cvmx_bootmem_unlock();
 #endif /* CONFIG_CRASH_DUMP */
 
-#ifdef CONFIG_CAVIUM_RESERVE32
-	/*
-	 * Now that we've allocated the kernel memory it is safe to
-	 * free the reserved region. We free it here so that builtin
-	 * drivers can use the memory.
-	 */
-	if (octeon_reserve32_memory)
-		cvmx_bootmem_free_named("CAVIUM_RESERVE32");
-#endif /* CONFIG_CAVIUM_RESERVE32 */
-
 	if (total == 0)
 		panic("Unable to allocate memory from "
 		      "cvmx_bootmem_phy_alloc");
-- 
cgit 


From b7d2be48cc08a9d42e347d944efa9f37ab9b83d2 Mon Sep 17 00:00:00 2001
From: Chen Lifu <chenlifu@huawei.com>
Date: Tue, 29 Jun 2021 10:34:54 +0800
Subject: riscv: kprobes: implement the auipc instruction

This has been tested by probing a module that contains an auipc
instruction.

Signed-off-by: Chen Lifu <chenlifu@huawei.com>
[Palmer: commit message]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/probes/decode-insn.c   |  2 +-
 arch/riscv/kernel/probes/simulate-insn.c | 34 ++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/riscv/kernel/probes/decode-insn.c b/arch/riscv/kernel/probes/decode-insn.c
index 0ed043acc882..5eb03fb61450 100644
--- a/arch/riscv/kernel/probes/decode-insn.c
+++ b/arch/riscv/kernel/probes/decode-insn.c
@@ -38,11 +38,11 @@ riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api)
 	RISCV_INSN_REJECTED(c_ebreak,		insn);
 #endif
 
-	RISCV_INSN_REJECTED(auipc,		insn);
 	RISCV_INSN_REJECTED(branch,		insn);
 
 	RISCV_INSN_SET_SIMULATE(jal,		insn);
 	RISCV_INSN_SET_SIMULATE(jalr,		insn);
+	RISCV_INSN_SET_SIMULATE(auipc,		insn);
 
 	return INSN_GOOD;
 }
diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c
index 2519ce26377d..b81719522d5c 100644
--- a/arch/riscv/kernel/probes/simulate-insn.c
+++ b/arch/riscv/kernel/probes/simulate-insn.c
@@ -83,3 +83,37 @@ bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *reg
 
 	return ret;
 }
+
+#define auipc_rd_idx(opcode) \
+	((opcode >> 7) & 0x1f)
+
+#define auipc_imm(opcode) \
+	((((opcode) >> 12) & 0xfffff) << 12)
+
+#if __riscv_xlen == 64
+#define auipc_offset(opcode)	sign_extend64(auipc_imm(opcode), 31)
+#elif __riscv_xlen == 32
+#define auipc_offset(opcode)	auipc_imm(opcode)
+#else
+#error "Unexpected __riscv_xlen"
+#endif
+
+bool __kprobes simulate_auipc(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+	/*
+	 * auipc instruction:
+	 *  31        12 11 7 6      0
+	 * | imm[31:12] | rd | opcode |
+	 *        20       5     7
+	 */
+
+	u32 rd_idx = auipc_rd_idx(opcode);
+	unsigned long rd_val = addr + auipc_offset(opcode);
+
+	if (!rv_insn_reg_set_val(regs, rd_idx, rd_val))
+		return false;
+
+	instruction_pointer_set(regs, addr + 4);
+
+	return true;
+}
-- 
cgit 


From 67979e927dd053bde3b71128495f651256b3161c Mon Sep 17 00:00:00 2001
From: Chen Lifu <chenlifu@huawei.com>
Date: Tue, 29 Jun 2021 10:34:55 +0800
Subject: riscv: kprobes: implement the branch instructions

This has been tested by probing a module that contains each of the
flavors of branches we have.

Signed-off-by: Chen Lifu <chenlifu@huawei.com>
[Palmer: commit message, fix kconfig errors]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/probes/decode-insn.c   |  3 +-
 arch/riscv/kernel/probes/simulate-insn.c | 78 ++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/kernel/probes/decode-insn.c b/arch/riscv/kernel/probes/decode-insn.c
index 5eb03fb61450..64f6183b4717 100644
--- a/arch/riscv/kernel/probes/decode-insn.c
+++ b/arch/riscv/kernel/probes/decode-insn.c
@@ -38,11 +38,10 @@ riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api)
 	RISCV_INSN_REJECTED(c_ebreak,		insn);
 #endif
 
-	RISCV_INSN_REJECTED(branch,		insn);
-
 	RISCV_INSN_SET_SIMULATE(jal,		insn);
 	RISCV_INSN_SET_SIMULATE(jalr,		insn);
 	RISCV_INSN_SET_SIMULATE(auipc,		insn);
+	RISCV_INSN_SET_SIMULATE(branch,		insn);
 
 	return INSN_GOOD;
 }
diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c
index b81719522d5c..d73e96f6ed7c 100644
--- a/arch/riscv/kernel/probes/simulate-insn.c
+++ b/arch/riscv/kernel/probes/simulate-insn.c
@@ -117,3 +117,81 @@ bool __kprobes simulate_auipc(u32 opcode, unsigned long addr, struct pt_regs *re
 
 	return true;
 }
+
+#define branch_rs1_idx(opcode) \
+	(((opcode) >> 15) & 0x1f)
+
+#define branch_rs2_idx(opcode) \
+	(((opcode) >> 20) & 0x1f)
+
+#define branch_funct3(opcode) \
+	(((opcode) >> 12) & 0x7)
+
+#define branch_imm(opcode) \
+	(((((opcode) >>  8) & 0xf ) <<  1) | \
+	 ((((opcode) >> 25) & 0x3f) <<  5) | \
+	 ((((opcode) >>  7) & 0x1 ) << 11) | \
+	 ((((opcode) >> 31) & 0x1 ) << 12))
+
+#define branch_offset(opcode) \
+	sign_extend32((branch_imm(opcode)), 12)
+
+#define BRANCH_BEQ	0x0
+#define BRANCH_BNE	0x1
+#define BRANCH_BLT	0x4
+#define BRANCH_BGE	0x5
+#define BRANCH_BLTU	0x6
+#define BRANCH_BGEU	0x7
+
+bool __kprobes simulate_branch(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+	/*
+	 * branch instructions:
+	 *      31    30       25 24 20 19 15 14    12 11       8    7      6      0
+	 * | imm[12] | imm[10:5] | rs2 | rs1 | funct3 | imm[4:1] | imm[11] | opcode |
+	 *     1           6        5     5      3         4         1         7
+	 *     imm[12|10:5]        rs2   rs1    000       imm[4:1|11]       1100011  BEQ
+	 *     imm[12|10:5]        rs2   rs1    001       imm[4:1|11]       1100011  BNE
+	 *     imm[12|10:5]        rs2   rs1    100       imm[4:1|11]       1100011  BLT
+	 *     imm[12|10:5]        rs2   rs1    101       imm[4:1|11]       1100011  BGE
+	 *     imm[12|10:5]        rs2   rs1    110       imm[4:1|11]       1100011  BLTU
+	 *     imm[12|10:5]        rs2   rs1    111       imm[4:1|11]       1100011  BGEU
+	 */
+
+	s32 offset;
+	s32 offset_tmp;
+	unsigned long rs1_val;
+	unsigned long rs2_val;
+
+	if (!rv_insn_reg_get_val(regs, branch_rs1_idx(opcode), &rs1_val) ||
+	    !rv_insn_reg_get_val(regs, branch_rs2_idx(opcode), &rs2_val))
+		return false;
+
+	offset_tmp = branch_offset(opcode);
+	switch (branch_funct3(opcode)) {
+	case BRANCH_BEQ:
+		offset = (rs1_val == rs2_val) ? offset_tmp : 4;
+		break;
+	case BRANCH_BNE:
+		offset = (rs1_val != rs2_val) ? offset_tmp : 4;
+		break;
+	case BRANCH_BLT:
+		offset = ((long)rs1_val < (long)rs2_val) ? offset_tmp : 4;
+		break;
+	case BRANCH_BGE:
+		offset = ((long)rs1_val >= (long)rs2_val) ? offset_tmp : 4;
+		break;
+	case BRANCH_BLTU:
+		offset = (rs1_val < rs2_val) ? offset_tmp : 4;
+		break;
+	case BRANCH_BGEU:
+		offset = (rs1_val >= rs2_val) ? offset_tmp : 4;
+		break;
+	default:
+		return false;
+	}
+
+	instruction_pointer_set(regs, addr + offset);
+
+	return true;
+}
-- 
cgit 


From faff43da31ae469ff28d71af4aa47f0c08b0b26c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 19 Jul 2021 21:16:17 -0700
Subject: mips: cavium-octeon: clean up kernel-doc in cvmx-interrupt-decodes.c

Fix kernel-doc warnings reported by kernel test robot:

arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c:49: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * __cvmx_interrupt_gmxx_rxx_int_en_enable enables all interrupt bits in cvmx_gmxx_rxx_int_en_t
   arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c:230: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * __cvmx_interrupt_pcsx_intx_en_reg_enable enables all interrupt bits in cvmx_pcsx_intx_en_reg_t
   arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c:271: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * __cvmx_interrupt_pcsxx_int_en_reg_enable enables all interrupt bits in cvmx_pcsxx_int_en_reg_t
   arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c:301: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * __cvmx_interrupt_spxx_int_msk_enable enables all interrupt bits in cvmx_spxx_int_msk_t
   arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c:340: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * __cvmx_interrupt_stxx_int_msk_enable enables all interrupt bits in cvmx_stxx_int_msk_t

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Aditya Srivastava <yashsri421@gmail.com>
Cc: David Daney <ddaney@caviumnetworks.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 .../cavium-octeon/executive/cvmx-interrupt-decodes.c    | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c b/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c
index 2f415d9d0f3c..67d6da21d49f 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c
@@ -46,7 +46,9 @@
 
 
 /**
- * __cvmx_interrupt_gmxx_rxx_int_en_enable enables all interrupt bits in cvmx_gmxx_rxx_int_en_t
+ * __cvmx_interrupt_gmxx_rxx_int_en_enable - enable all interrupt bits in cvmx_gmxx_rxx_int_en_t
+ * @index: interrupt register offset
+ * @block: interrupt register block_id
  */
 void __cvmx_interrupt_gmxx_rxx_int_en_enable(int index, int block)
 {
@@ -227,7 +229,9 @@ void __cvmx_interrupt_gmxx_rxx_int_en_enable(int index, int block)
 	cvmx_write_csr(CVMX_GMXX_RXX_INT_EN(index, block), gmx_rx_int_en.u64);
 }
 /**
- * __cvmx_interrupt_pcsx_intx_en_reg_enable enables all interrupt bits in cvmx_pcsx_intx_en_reg_t
+ * __cvmx_interrupt_pcsx_intx_en_reg_enable - enable all interrupt bits in cvmx_pcsx_intx_en_reg_t
+ * @index: interrupt register offset
+ * @block: interrupt register block_id
  */
 void __cvmx_interrupt_pcsx_intx_en_reg_enable(int index, int block)
 {
@@ -268,7 +272,8 @@ void __cvmx_interrupt_pcsx_intx_en_reg_enable(int index, int block)
 	cvmx_write_csr(CVMX_PCSX_INTX_EN_REG(index, block), pcs_int_en_reg.u64);
 }
 /**
- * __cvmx_interrupt_pcsxx_int_en_reg_enable enables all interrupt bits in cvmx_pcsxx_int_en_reg_t
+ * __cvmx_interrupt_pcsxx_int_en_reg_enable - enable all interrupt bits in cvmx_pcsxx_int_en_reg_t
+ * @index: interrupt register block_id
  */
 void __cvmx_interrupt_pcsxx_int_en_reg_enable(int index)
 {
@@ -298,7 +303,8 @@ void __cvmx_interrupt_pcsxx_int_en_reg_enable(int index)
 }
 
 /**
- * __cvmx_interrupt_spxx_int_msk_enable enables all interrupt bits in cvmx_spxx_int_msk_t
+ * __cvmx_interrupt_spxx_int_msk_enable - enable all interrupt bits in cvmx_spxx_int_msk_t
+ * @index: interrupt register block_id
  */
 void __cvmx_interrupt_spxx_int_msk_enable(int index)
 {
@@ -337,7 +343,8 @@ void __cvmx_interrupt_spxx_int_msk_enable(int index)
 	cvmx_write_csr(CVMX_SPXX_INT_MSK(index), spx_int_msk.u64);
 }
 /**
- * __cvmx_interrupt_stxx_int_msk_enable enables all interrupt bits in cvmx_stxx_int_msk_t
+ * __cvmx_interrupt_stxx_int_msk_enable - enable all interrupt bits in cvmx_stxx_int_msk_t
+ * @index: interrupt register block_id
  */
 void __cvmx_interrupt_stxx_int_msk_enable(int index)
 {
-- 
cgit 


From 73b9919f3c17851b4c1d90fb6c343d9c438c554c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 19 Jul 2021 21:34:32 -0700
Subject: mips: netlogic: fix kernel-doc complaints in fmn-config.c

Clean up kernel-doc warnings in netlogic/xlr/fmn-config.c by using
correct kernel-doc format. Fixes these warnings:

arch/mips/netlogic/xlr/fmn-config.c:106: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Configure bucket size and credits for a device. 'size' is the size of
arch/mips/netlogic/xlr/fmn-config.c:181: warning: expecting prototype for Setup the FMN details for each devices according to the device available(). Prototype was for xlr_board_info_setup() instead

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Ganesan Ramalingam <ganesanr@broadcom.com>
Cc: John Crispin <blogic@openwrt.org>
Cc: Aditya Srivastava <yashsri421@gmail.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/netlogic/xlr/fmn-config.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/netlogic/xlr/fmn-config.c b/arch/mips/netlogic/xlr/fmn-config.c
index c7622c6e5f67..15483537e8cf 100644
--- a/arch/mips/netlogic/xlr/fmn-config.c
+++ b/arch/mips/netlogic/xlr/fmn-config.c
@@ -103,18 +103,19 @@ static void check_credit_distribution(void)
 }
 
 /**
- * Configure bucket size and credits for a device. 'size' is the size of
- * the buckets for the device. This size is distributed among all the CPUs
- * so that all of them can send messages to the device.
- *
- * The device is also given 'cpu_credits' to send messages to the CPUs
- *
+ * setup_fmn_cc -  Configure bucket size and credits for a device.
  * @dev_info: FMN information structure for each devices
  * @start_stn_id: Starting station id of dev_info
  * @end_stn_id: End station id of dev_info
  * @num_buckets: Total number of buckets for den_info
  * @cpu_credits: Allowed credits to cpu for each devices pointing by dev_info
  * @size: Size of the each buckets in the device station
+ *
+ * 'size' is the size of the buckets for the device. This size is
+ * distributed among all the CPUs
+ * so that all of them can send messages to the device.
+ *
+ * The device is also given 'cpu_credits' to send messages to the CPUs
  */
 static void setup_fmn_cc(struct xlr_fmn_info *dev_info, int start_stn_id,
 		int end_stn_id, int num_buckets, int cpu_credits, int size)
@@ -174,6 +175,8 @@ static void setup_cpu_fmninfo(struct xlr_fmn_info *cpu, int num_core)
 }
 
 /**
+ * xlr_board_info_setup - Setup FMN details
+ *
  * Setup the FMN details for each devices according to the device available
  * in each variant of XLR/XLS processor
  */
-- 
cgit 


From d17eef2767d84b7dc4e1b8029be1eacb1d8679f6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 1 Apr 2021 00:06:56 +0900
Subject: mips: replace deprecated EXTRA_CFLAGS with ccflags-y

As Documentation/kbuild/makefiles.rst says, EXTRA_CFLAGS is deprecated.
Replace it with ccflags-y.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/kvm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index c67250a956b8..18c69eff1d3f 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -4,7 +4,7 @@
 
 common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o)
 
-EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
+ccflags-y += -Ivirt/kvm -Iarch/mips/kvm
 
 common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
 
-- 
cgit 


From d656132d2a2abc06917d822f7adcda86fd6dd192 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 1 Apr 2021 00:06:57 +0900
Subject: mips: clean up kvm Makefile

You can use kvm-y instead of kvm-objs to create the composite module.
kvm-$(CONFIG_...) looks cleaner.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/kvm/Makefile | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index 18c69eff1d3f..d3710959da55 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -2,21 +2,18 @@
 # Makefile for KVM support for MIPS
 #
 
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o)
-
 ccflags-y += -Ivirt/kvm -Iarch/mips/kvm
 
-common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
+kvm-y := $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o)
+kvm-$(CONFIG_CPU_HAS_MSA) += msa.o
 
-kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \
+kvm-y +=    mips.o emulate.o entry.o \
 	    interrupt.o stats.o \
 	    fpu.o
-kvm-objs += hypcall.o
-kvm-objs += mmu.o
-ifdef CONFIG_CPU_LOONGSON64
-kvm-objs += loongson_ipi.o
-endif
+kvm-y += hypcall.o
+kvm-y += mmu.o
+kvm-$(CONFIG_CPU_LOONGSON64) += loongson_ipi.o
 
-kvm-objs		+= vz.o
+kvm-y		+= vz.o
 obj-$(CONFIG_KVM)	+= kvm.o
 obj-y			+= callback.o tlb.o
-- 
cgit 


From 93ebb6828723b8aef114415c4dc3518342f7dcad Mon Sep 17 00:00:00 2001
From: Halil Pasic <pasic@linux.ibm.com>
Date: Sat, 24 Jul 2021 01:17:46 +0200
Subject: s390/pv: fix the forcing of the swiotlb

Since commit 903cd0f315fe ("swiotlb: Use is_swiotlb_force_bounce for
swiotlb data bouncing") if code sets swiotlb_force it needs to do so
before the swiotlb is initialised. Otherwise
io_tlb_default_mem->force_bounce will not get set to true, and devices
that use (the default) swiotlb will not bounce despite switolb_force
having the value of SWIOTLB_FORCE.

Let us restore swiotlb functionality for PV by fulfilling this new
requirement.

This change addresses what turned out to be a fragility in
commit 64e1f0c531d1 ("s390/mm: force swiotlb for protected
virtualization"), which ain't exactly broken in its original context,
but could give us some more headache if people backport the broken
change and forget this fix.

Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Fixes: 903cd0f315fe ("swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing")
Fixes: 64e1f0c531d1 ("s390/mm: force swiotlb for protected virtualization")
Cc: stable@vger.kernel.org #5.3+
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 arch/s390/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8ac710de1ab1..07bbee9b7320 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -186,9 +186,9 @@ static void pv_init(void)
 		return;
 
 	/* make sure bounce buffers are shared */
+	swiotlb_force = SWIOTLB_FORCE;
 	swiotlb_init(1);
 	swiotlb_update_mem_attributes();
-	swiotlb_force = SWIOTLB_FORCE;
 }
 
 void __init mem_init(void)
-- 
cgit 


From 8119cefd9a29b71997e62b762932d23499ba4896 Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Wed, 14 Jul 2021 18:17:58 +0530
Subject: powerpc/kexec: blacklist functions called in real mode for kprobe

As kprobe does not handle events happening in real mode, blacklist the
functions that only get called in real mode or in kexec sequence with
MMU turned off.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/162626687834.155313.4692863392927831843.stgit@hbathini-workstation.ibm.com
---
 arch/powerpc/kernel/head_64.S            | 2 ++
 arch/powerpc/kexec/core_64.c             | 6 ++++--
 arch/powerpc/mm/book3s64/hash_native.c   | 2 +-
 arch/powerpc/mm/book3s64/pgtable.c       | 4 ++--
 arch/powerpc/mm/book3s64/radix_pgtable.c | 3 ++-
 arch/powerpc/platforms/ps3/htab.c        | 3 ++-
 arch/powerpc/platforms/ps3/mm.c          | 8 ++++++--
 arch/powerpc/platforms/pseries/lpar.c    | 9 ++++++---
 8 files changed, 25 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 79930b0bc781..f17ae2083733 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -712,6 +712,8 @@ _GLOBAL(copy_and_flush)
 	isync
 	blr
 
+_ASM_NOKPROBE_SYMBOL(copy_and_flush); /* Called in real mode */
+
 .align 8
 copy_to_here:
 
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 8a449b2d8715..84618d3c8013 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -72,7 +72,8 @@ int default_machine_kexec_prepare(struct kimage *image)
 	return 0;
 }
 
-static void copy_segments(unsigned long ind)
+/* Called during kexec sequence with MMU off */
+static notrace void copy_segments(unsigned long ind)
 {
 	unsigned long entry;
 	unsigned long *ptr;
@@ -105,7 +106,8 @@ static void copy_segments(unsigned long ind)
 	}
 }
 
-void kexec_copy_flush(struct kimage *image)
+/* Called during kexec sequence with MMU off */
+notrace void kexec_copy_flush(struct kimage *image)
 {
 	long i, nr_segments = image->nr_segments;
 	struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 52e170bd95ae..d8279bfe68ea 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -787,7 +787,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
  * TODO: add batching support when enabled.  remember, no dynamic memory here,
  * although there is the control page available...
  */
-static void native_hpte_clear(void)
+static notrace void native_hpte_clear(void)
 {
 	unsigned long vpn = 0;
 	unsigned long slot, slots;
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 9ffa65074cb0..300099de553b 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -172,8 +172,8 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-/* For use by kexec */
-void mmu_cleanup_all(void)
+/* For use by kexec, called with MMU off */
+notrace void mmu_cleanup_all(void)
 {
 	if (radix_enabled())
 		radix__mmu_cleanup_all();
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index e50ddf129c15..ae20add7954a 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -679,7 +679,8 @@ void radix__early_init_mmu_secondary(void)
 	mtspr(SPRN_UAMOR, 0);
 }
 
-void radix__mmu_cleanup_all(void)
+/* Called during kexec sequence with MMU off */
+notrace void radix__mmu_cleanup_all(void)
 {
 	unsigned long lpcr;
 
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 7ddc7ec6a7c0..ef710a715903 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -169,7 +169,8 @@ static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
 	spin_unlock_irqrestore(&ps3_htab_lock, flags);
 }
 
-static void ps3_hpte_clear(void)
+/* Called during kexec sequence with MMU off */
+static notrace void ps3_hpte_clear(void)
 {
 	unsigned long hpte_count = (1UL << ppc64_pft_size) >> 4;
 	u64 i;
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index a81eac35d900..9c44f335c0b9 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -195,9 +195,11 @@ fail:
 
 /**
  * ps3_mm_vas_destroy -
+ *
+ * called during kexec sequence with MMU off.
  */
 
-void ps3_mm_vas_destroy(void)
+notrace void ps3_mm_vas_destroy(void)
 {
 	int result;
 
@@ -1243,9 +1245,11 @@ void __init ps3_mm_init(void)
 
 /**
  * ps3_mm_shutdown - final cleanup of address space
+ *
+ * called during kexec sequence with MMU off.
  */
 
-void ps3_mm_shutdown(void)
+notrace void ps3_mm_shutdown(void)
 {
 	ps3_mm_region_destroy(&map.r1);
 }
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index dab356e3ff87..869ef638698a 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -801,7 +801,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
 	return -1;
 }
 
-static void manual_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace void manual_hpte_clear_all(void)
 {
 	unsigned long size_bytes = 1UL << ppc64_pft_size;
 	unsigned long hpte_count = size_bytes >> 4;
@@ -834,7 +835,8 @@ static void manual_hpte_clear_all(void)
 	}
 }
 
-static int hcall_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace int hcall_hpte_clear_all(void)
 {
 	int rc;
 
@@ -845,7 +847,8 @@ static int hcall_hpte_clear_all(void)
 	return rc;
 }
 
-static void pseries_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace void pseries_hpte_clear_all(void)
 {
 	int rc;
 
-- 
cgit 


From 16df55ce104125b4bf1070467973070fb6fc16ff Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 25 Jul 2021 09:02:56 -0700
Subject: mips: clean up (remove) kernel-doc in cavium-octeon/executive/

Remove all kernel-doc notation in arch/mips/cavium-octeon/executive/.
This removes dozens of kernel-doc warnings.

Most of these functions are static and don't need to be documented
with kernel-doc.
The function comments are still present for anyone who wants to read them.
These files are part of the OCTEON SDK so presumably they are documented
there as well.

   arch/mips/cavium-octeon/executive/cvmx-bootmem.c:61: warning: Function parameter or member 'addr' not described in 'CVMX_BOOTMEM_NAMED_GET_FIELD'
   arch/mips/cavium-octeon/executive/cvmx-bootmem.c:61: warning: Function parameter or member 'field' not described in 'CVMX_BOOTMEM_NAMED_GET_FIELD'
   arch/mips/cavium-octeon/executive/cvmx-bootmem.c:61: warning: expecting prototype for This macro returns a member of the(). Prototype was for CVMX_BOOTMEM_NAMED_GET_FIELD() instead
   arch/mips/cavium-octeon/executive/cvmx-bootmem.c:77: warning: Function parameter or member 'base' not described in '__cvmx_bootmem_desc_get'
   arch/mips/cavium-octeon/executive/cvmx-bootmem.c:77: warning: Function parameter or member 'offset' not described in '__cvmx_bootmem_desc_get'
   arch/mips/cavium-octeon/executive/cvmx-bootmem.c:77: warning: Function parameter or member 'size' not described in '__cvmx_bootmem_desc_get'
   arch/mips/cavium-octeon/executive/cvmx-bootmem.c:77: warning: expecting prototype for This function is the implementation of the get macros defined(). Prototype was for __cvmx_bootmem_desc_get() instead
   arch/mips/cavium-octeon/executive/cvmx-bootmem.c:133: warning: expecting prototype for Allocate a block of memory from the free list that was(). Prototype was for cvmx_bootmem_alloc_range() instead
   arch/mips/cavium-octeon/executive/cvmx-bootmem.c:554: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Finds a named memory block by name.
   arch/mips/cavium-octeon/executive/cvmx-bootmem.c:661: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Frees a named block.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/mips/cavium-octeon/executive/cvmx-helper-board.c:64: warning: expecting prototype for Return the MII PHY address associated with the given IPD(). Prototype was for cvmx_helper_board_get_mii_address() instead
   arch/mips/cavium-octeon/executive/cvmx-helper-board.c:211: warning: expecting prototype for This function is the board specific method of determining an(). Prototype was for __cvmx_helper_board_link_get() instead
   arch/mips/cavium-octeon/executive/cvmx-helper-board.c:278: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * This function is called by cvmx_helper_interface_probe() after it
   arch/mips/cavium-octeon/executive/cvmx-helper-board.c:324: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Get the clock type used for the USB block based on board type.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/mips/cavium-octeon/executive/cvmx-spi.c:77: warning: expecting prototype for Get current SPI4 initialization callbacks(). Prototype was for cvmx_spi_get_callbacks() instead
   arch/mips/cavium-octeon/executive/cvmx-spi.c:87: warning: expecting prototype for Set new SPI4 initialization callbacks(). Prototype was for cvmx_spi_set_callbacks() instead
   arch/mips/cavium-octeon/executive/cvmx-spi.c:92: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Initialize and start the SPI interface.
   arch/mips/cavium-octeon/executive/cvmx-spi.c:151: warning: expecting prototype for This routine restarts the SPI interface after it has lost synchronization(). Prototype was for cvmx_spi_restart_interface() instead
   arch/mips/cavium-octeon/executive/cvmx-spi.c:196: warning: expecting prototype for Callback to perform SPI4 reset(). Prototype was for cvmx_spi_reset_cb() instead
   arch/mips/cavium-octeon/executive/cvmx-spi.c:313: warning: expecting prototype for Callback to setup calendar and miscellaneous settings before clock detection(). Prototype was for cvmx_spi_calendar_setup_cb() instead
   arch/mips/cavium-octeon/executive/cvmx-spi.c:431: warning: expecting prototype for Callback to perform clock detection(). Prototype was for cvmx_spi_clock_detect_cb() instead
   arch/mips/cavium-octeon/executive/cvmx-spi.c:509: warning: expecting prototype for Callback to perform link training(). Prototype was for cvmx_spi_training_cb() instead
   arch/mips/cavium-octeon/executive/cvmx-spi.c:578: warning: expecting prototype for Callback to perform calendar data synchronization(). Prototype was for cvmx_spi_calendar_sync_cb() instead
   arch/mips/cavium-octeon/executive/cvmx-spi.c:634: warning: expecting prototype for Callback to handle interface up(). Prototype was for cvmx_spi_interface_up_cb() instead
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c:67: warning: expecting prototype for Probe a XAUI interface and determine the number of ports(). Prototype was for __cvmx_helper_xaui_probe() instead
   arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c:106: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Bringup and enable a XAUI interface. After this call packet
   arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c:253: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Return the link state of an IPD/PKO port as returned by
   arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c:292: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Configure an IPD/PKO port for the specified link state. This
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/mips/cavium-octeon/executive/cvmx-pko.c:43: warning: Function parameter or member 'interface' not described in '__cvmx_pko_int'
   arch/mips/cavium-octeon/executive/cvmx-pko.c:43: warning: Function parameter or member 'index' not described in '__cvmx_pko_int'
   arch/mips/cavium-octeon/executive/cvmx-pko.c:43: warning: expecting prototype for Internal state of packet output(). Prototype was for __cvmx_pko_int() instead
   arch/mips/cavium-octeon/executive/cvmx-pko.c:186: warning: expecting prototype for Call before any other calls to initialize the packet(). Prototype was for cvmx_pko_initialize_global() instead
   arch/mips/cavium-octeon/executive/cvmx-pko.c:241: warning: expecting prototype for This function does per(). Prototype was for cvmx_pko_initialize_local() instead
   arch/mips/cavium-octeon/executive/cvmx-pko.c:247: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Enables the packet output hardware. It must already be
   arch/mips/cavium-octeon/executive/cvmx-pko.c:270: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Disables the packet output. Does not affect any configuration.
   arch/mips/cavium-octeon/executive/cvmx-pko.c:282: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Reset the packet output.
   arch/mips/cavium-octeon/executive/cvmx-pko.c:293: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Shutdown and free resources required by packet output.
   arch/mips/cavium-octeon/executive/cvmx-pko.c:324: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Configure a output port and the associated queues for use.
   arch/mips/cavium-octeon/executive/cvmx-pko.c:555: warning: expecting prototype for Show map of ports(). Prototype was for cvmx_pko_show_queue_map() instead
   arch/mips/cavium-octeon/executive/cvmx-pko.c:577: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Rate limit a PKO port to a max packets/sec. This function is only
   arch/mips/cavium-octeon/executive/cvmx-pko.c:610: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Rate limit a PKO port to a max bits/sec. This function is only
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:49: warning: cannot understand function prototype: '__cvmx_cmd_queue_all_state_t *__cvmx_cmd_queue_state_ptr; '
   arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:53: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Initialize the Global queue state pointer.
   arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:101: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Initialize a command queue for use. The initial FPA buffer is
   arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:199: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Shutdown a queue a free it's command buffers to the FPA. The
   arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:235: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Return the number of command words pending in the queue. This
   arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:291: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Return the command buffer to be written to. The purpose of this
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:53: warning: expecting prototype for Probe RGMII ports and determine the number present(). Prototype was for __cvmx_helper_rgmii_probe() instead
   arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:92: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Put an RGMII interface in loopback mode. Internal packets sent
   arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:135: warning: expecting prototype for Workaround ASX setup errata with CN38XX pass1(). Prototype was for __cvmx_helper_errata_asx_pass1() instead
   arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:152: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Configure all of the ASX, GMX, and PKO registers required
   arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:255: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Return the link state of an IPD/PKO port as returned by
   arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:284: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Configure an IPD/PKO port for the specified link state. This
--
arch/mips/cavium-octeon/executive/cvmx-l2c.c:768: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Return log base 2 of the number of sets in the L2 cache
   arch/mips/cavium-octeon/executive/cvmx-l2c.c:861: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Flush a line from the L2 cache

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Aditya Srivastava <yashsri421@gmail.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/cavium-octeon/executive/cvmx-bootmem.c   | 10 +++++-----
 arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c | 12 ++++++------
 .../cavium-octeon/executive/cvmx-helper-board.c    |  8 ++++----
 .../cavium-octeon/executive/cvmx-helper-rgmii.c    | 12 ++++++------
 .../cavium-octeon/executive/cvmx-helper-xaui.c     |  8 ++++----
 arch/mips/cavium-octeon/executive/cvmx-l2c.c       |  9 ++++-----
 arch/mips/cavium-octeon/executive/cvmx-pko.c       | 22 +++++++++++-----------
 arch/mips/cavium-octeon/executive/cvmx-spi.c       | 20 ++++++++++----------
 8 files changed, 50 insertions(+), 51 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
index e794b2d53adf..b63ad5d42cc7 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
@@ -44,7 +44,7 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc;
 
 /* See header file for descriptions of functions */
 
-/**
+/*
  * This macro returns a member of the
  * cvmx_bootmem_named_block_desc_t structure. These members can't
  * be directly addressed as they might be in memory not directly
@@ -60,7 +60,7 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc;
 		offsetof(struct cvmx_bootmem_named_block_desc, field),	\
 		sizeof_field(struct cvmx_bootmem_named_block_desc, field))
 
-/**
+/*
  * This function is the implementation of the get macros defined
  * for individual structure members. The argument are generated
  * by the macros inorder to read only the needed memory.
@@ -115,7 +115,7 @@ static uint64_t cvmx_bootmem_phy_get_next(uint64_t addr)
 	return cvmx_read64_uint64((addr + NEXT_OFFSET) | (1ull << 63));
 }
 
-/**
+/*
  * Allocate a block of memory from the free list that was
  * passed to the application by the bootloader within a specified
  * address range. This is an allocate-only algorithm, so
@@ -550,7 +550,7 @@ bootmem_free_done:
 
 }
 
-/**
+/*
  * Finds a named memory block by name.
  * Also used for finding an unused entry in the named block table.
  *
@@ -657,7 +657,7 @@ struct cvmx_bootmem_named_block_desc *cvmx_bootmem_find_named_block(char *name)
 }
 EXPORT_SYMBOL(cvmx_bootmem_find_named_block);
 
-/**
+/*
  * Frees a named block.
  *
  * @name:   name of block to free
diff --git a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
index fb42e8e21ea0..20189e9ad94d 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
@@ -42,14 +42,14 @@
 #include <asm/octeon/cvmx-pexp-defs.h>
 #include <asm/octeon/cvmx-pko-defs.h>
 
-/**
+/*
  * This application uses this pointer to access the global queue
  * state. It points to a bootmem named block.
  */
 __cvmx_cmd_queue_all_state_t *__cvmx_cmd_queue_state_ptr;
 EXPORT_SYMBOL_GPL(__cvmx_cmd_queue_state_ptr);
 
-/**
+/*
  * Initialize the Global queue state pointer.
  *
  * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code
@@ -84,7 +84,7 @@ static cvmx_cmd_queue_result_t __cvmx_cmd_queue_init_state_ptr(void)
 	return CVMX_CMD_QUEUE_SUCCESS;
 }
 
-/**
+/*
  * Initialize a command queue for use. The initial FPA buffer is
  * allocated and the hardware unit is configured to point to the
  * new command queue.
@@ -182,7 +182,7 @@ cvmx_cmd_queue_result_t cvmx_cmd_queue_initialize(cvmx_cmd_queue_id_t queue_id,
 	}
 }
 
-/**
+/*
  * Shutdown a queue a free it's command buffers to the FPA. The
  * hardware connected to the queue must be stopped before this
  * function is called.
@@ -218,7 +218,7 @@ cvmx_cmd_queue_result_t cvmx_cmd_queue_shutdown(cvmx_cmd_queue_id_t queue_id)
 	return CVMX_CMD_QUEUE_SUCCESS;
 }
 
-/**
+/*
  * Return the number of command words pending in the queue. This
  * function may be relatively slow for some hardware units.
  *
@@ -274,7 +274,7 @@ int cvmx_cmd_queue_length(cvmx_cmd_queue_id_t queue_id)
 	return CVMX_CMD_QUEUE_INVALID_PARAM;
 }
 
-/**
+/*
  * Return the command buffer to be written to. The purpose of this
  * function is to allow CVMX routine access t othe low level buffer
  * for initial hardware setup. User applications should not call this
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
index abd11b7af22f..1daa0c6b6f4e 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
@@ -44,7 +44,7 @@
 #include <asm/octeon/cvmx-gmxx-defs.h>
 #include <asm/octeon/cvmx-asxx-defs.h>
 
-/**
+/*
  * Return the MII PHY address associated with the given IPD
  * port. A result of -1 means there isn't a MII capable PHY
  * connected to this port. On chips supporting multiple MII
@@ -189,7 +189,7 @@ int cvmx_helper_board_get_mii_address(int ipd_port)
 	return -1;
 }
 
-/**
+/*
  * This function is the board specific method of determining an
  * ethernet ports link speed. Most Octeon boards have Marvell PHYs
  * and are handled by the fall through case. This function must be
@@ -274,7 +274,7 @@ union cvmx_helper_link_info __cvmx_helper_board_link_get(int ipd_port)
 	return result;
 }
 
-/**
+/*
  * This function is called by cvmx_helper_interface_probe() after it
  * determines the number of ports Octeon can support on a specific
  * interface. This function is the per board location to override
@@ -320,7 +320,7 @@ int __cvmx_helper_board_interface_probe(int interface, int supported_ports)
 	return supported_ports;
 }
 
-/**
+/*
  * Get the clock type used for the USB block based on board type.
  * Used by the USB code for auto configuration of clock type.
  *
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c b/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c
index c4b58598aa9d..a8c3be4eb6f0 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c
@@ -42,7 +42,7 @@
 #include <asm/octeon/cvmx-asxx-defs.h>
 #include <asm/octeon/cvmx-dbg-defs.h>
 
-/**
+/*
  * Probe RGMII ports and determine the number present
  *
  * @interface: Interface to probe
@@ -88,7 +88,7 @@ int __cvmx_helper_rgmii_probe(int interface)
 	return num_ports;
 }
 
-/**
+/*
  * Put an RGMII interface in loopback mode. Internal packets sent
  * out will be received back again on the same port. Externally
  * received packets will echo back out.
@@ -120,7 +120,7 @@ void cvmx_helper_rgmii_internal_loopback(int port)
 	cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface), gmx_cfg.u64);
 }
 
-/**
+/*
  * Workaround ASX setup errata with CN38XX pass1
  *
  * @interface: Interface to setup
@@ -148,7 +148,7 @@ static int __cvmx_helper_errata_asx_pass1(int interface, int port,
 	return 0;
 }
 
-/**
+/*
  * Configure all of the ASX, GMX, and PKO registers required
  * to get RGMII to function on the supplied interface.
  *
@@ -251,7 +251,7 @@ int __cvmx_helper_rgmii_enable(int interface)
 	return 0;
 }
 
-/**
+/*
  * Return the link state of an IPD/PKO port as returned by
  * auto negotiation. The result of this function may not match
  * Octeon's link config if auto negotiation has changed since
@@ -280,7 +280,7 @@ union cvmx_helper_link_info __cvmx_helper_rgmii_link_get(int ipd_port)
 		return __cvmx_helper_board_link_get(ipd_port);
 }
 
-/**
+/*
  * Configure an IPD/PKO port for the specified link state. This
  * function does not influence auto negotiation at the PHY level.
  * The passed link state must always match the link state returned
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c b/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c
index 842990e8404f..fea71a85bb29 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c
@@ -54,7 +54,7 @@ int __cvmx_helper_xaui_enumerate(int interface)
 		return 1;
 }
 
-/**
+/*
  * Probe a XAUI interface and determine the number of ports
  * connected to it. The XAUI interface should still be down
  * after this call.
@@ -102,7 +102,7 @@ int __cvmx_helper_xaui_probe(int interface)
 	return __cvmx_helper_xaui_enumerate(interface);
 }
 
-/**
+/*
  * Bringup and enable a XAUI interface. After this call packet
  * I/O should be fully functional. This is called with IPD
  * enabled but PKO disabled.
@@ -249,7 +249,7 @@ int __cvmx_helper_xaui_enable(int interface)
 	return 0;
 }
 
-/**
+/*
  * Return the link state of an IPD/PKO port as returned by
  * auto negotiation. The result of this function may not match
  * Octeon's link config if auto negotiation has changed since
@@ -288,7 +288,7 @@ union cvmx_helper_link_info __cvmx_helper_xaui_link_get(int ipd_port)
 	return result;
 }
 
-/**
+/*
  * Configure an IPD/PKO port for the specified link state. This
  * function does not influence auto negotiation at the PHY level.
  * The passed link state must always match the link state returned
diff --git a/arch/mips/cavium-octeon/executive/cvmx-l2c.c b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
index 83df0a963a8b..33b303691bc2 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-l2c.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
@@ -281,7 +281,7 @@ uint64_t cvmx_l2c_read_perf(uint32_t counter)
 	}
 }
 
-/**
+/*
  * @INTERNAL
  * Helper function use to fault in cache lines for L2 cache locking
  *
@@ -575,7 +575,7 @@ union __cvmx_l2c_tag {
 };
 
 
-/**
+/*
  * @INTERNAL
  * Function to read a L2C tag.  This code make the current core
  * the 'debug core' for the L2.  This code must only be executed by
@@ -764,9 +764,8 @@ int cvmx_l2c_get_cache_size_bytes(void)
 		CVMX_CACHE_LINE_SIZE;
 }
 
-/**
+/*
  * Return log base 2 of the number of sets in the L2 cache
- * Returns
  */
 int cvmx_l2c_get_set_bits(void)
 {
@@ -857,7 +856,7 @@ int cvmx_l2c_get_num_assoc(void)
 	return l2_assoc;
 }
 
-/**
+/*
  * Flush a line from the L2 cache
  * This should only be called from one core at a time, as this routine
  * sets the core to the 'debug' core in order to flush the line.
diff --git a/arch/mips/cavium-octeon/executive/cvmx-pko.c b/arch/mips/cavium-octeon/executive/cvmx-pko.c
index b0efc35e95c4..7c4879e74318 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-pko.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-pko.c
@@ -35,7 +35,7 @@
 #include <asm/octeon/cvmx-pko.h>
 #include <asm/octeon/cvmx-helper.h>
 
-/**
+/*
  * Internal state of packet output
  */
 
@@ -176,7 +176,7 @@ static void __cvmx_pko_chip_init(void)
 	}
 }
 
-/**
+/*
  * Call before any other calls to initialize the packet
  * output system.  This does chip global config, and should only be
  * done by one core.
@@ -229,7 +229,7 @@ void cvmx_pko_initialize_global(void)
 	}
 }
 
-/**
+/*
  * This function does per-core initialization required by the PKO routines.
  * This must be called on all cores that will do packet output, and must
  * be called after the FPA has been initialized and filled with pages.
@@ -243,7 +243,7 @@ int cvmx_pko_initialize_local(void)
 	return 0;
 }
 
-/**
+/*
  * Enables the packet output hardware. It must already be
  * configured.
  */
@@ -266,7 +266,7 @@ void cvmx_pko_enable(void)
 	cvmx_write_csr(CVMX_PKO_REG_FLAGS, flags.u64);
 }
 
-/**
+/*
  * Disables the packet output. Does not affect any configuration.
  */
 void cvmx_pko_disable(void)
@@ -278,7 +278,7 @@ void cvmx_pko_disable(void)
 }
 EXPORT_SYMBOL_GPL(cvmx_pko_disable);
 
-/**
+/*
  * Reset the packet output.
  */
 static void __cvmx_pko_reset(void)
@@ -289,7 +289,7 @@ static void __cvmx_pko_reset(void)
 	cvmx_write_csr(CVMX_PKO_REG_FLAGS, pko_reg_flags.u64);
 }
 
-/**
+/*
  * Shutdown and free resources required by packet output.
  */
 void cvmx_pko_shutdown(void)
@@ -320,7 +320,7 @@ void cvmx_pko_shutdown(void)
 }
 EXPORT_SYMBOL_GPL(cvmx_pko_shutdown);
 
-/**
+/*
  * Configure a output port and the associated queues for use.
  *
  * @port:	Port to configure.
@@ -548,7 +548,7 @@ cvmx_pko_status_t cvmx_pko_config_port(uint64_t port, uint64_t base_queue,
 }
 
 #ifdef PKO_DEBUG
-/**
+/*
  * Show map of ports -> queues for different cores.
  */
 void cvmx_pko_show_queue_map()
@@ -573,7 +573,7 @@ void cvmx_pko_show_queue_map()
 }
 #endif
 
-/**
+/*
  * Rate limit a PKO port to a max packets/sec. This function is only
  * supported on CN51XX and higher, excluding CN58XX.
  *
@@ -606,7 +606,7 @@ int cvmx_pko_rate_limit_packets(int port, int packets_s, int burst)
 	return 0;
 }
 
-/**
+/*
  * Rate limit a PKO port to a max bits/sec. This function is only
  * supported on CN51XX and higher, excluding CN58XX.
  *
diff --git a/arch/mips/cavium-octeon/executive/cvmx-spi.c b/arch/mips/cavium-octeon/executive/cvmx-spi.c
index f51957a3e915..eb9333e84a6b 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-spi.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-spi.c
@@ -66,7 +66,7 @@ static cvmx_spi_callbacks_t cvmx_spi_callbacks = {
 	.interface_up_cb = cvmx_spi_interface_up_cb
 };
 
-/**
+/*
  * Get current SPI4 initialization callbacks
  *
  * @callbacks:	Pointer to the callbacks structure.to fill
@@ -78,7 +78,7 @@ void cvmx_spi_get_callbacks(cvmx_spi_callbacks_t *callbacks)
 	memcpy(callbacks, &cvmx_spi_callbacks, sizeof(cvmx_spi_callbacks));
 }
 
-/**
+/*
  * Set new SPI4 initialization callbacks
  *
  * @new_callbacks:  Pointer to an updated callbacks structure.
@@ -88,7 +88,7 @@ void cvmx_spi_set_callbacks(cvmx_spi_callbacks_t *new_callbacks)
 	memcpy(&cvmx_spi_callbacks, new_callbacks, sizeof(cvmx_spi_callbacks));
 }
 
-/**
+/*
  * Initialize and start the SPI interface.
  *
  * @interface: The identifier of the packet interface to configure and
@@ -133,7 +133,7 @@ int cvmx_spi_start_interface(int interface, cvmx_spi_mode_t mode, int timeout,
 	return res;
 }
 
-/**
+/*
  * This routine restarts the SPI interface after it has lost synchronization
  * with its correspondent system.
  *
@@ -179,7 +179,7 @@ int cvmx_spi_restart_interface(int interface, cvmx_spi_mode_t mode, int timeout)
 }
 EXPORT_SYMBOL_GPL(cvmx_spi_restart_interface);
 
-/**
+/*
  * Callback to perform SPI4 reset
  *
  * @interface: The identifier of the packet interface to configure and
@@ -294,7 +294,7 @@ int cvmx_spi_reset_cb(int interface, cvmx_spi_mode_t mode)
 	return 0;
 }
 
-/**
+/*
  * Callback to setup calendar and miscellaneous settings before clock detection
  *
  * @interface: The identifier of the packet interface to configure and
@@ -413,7 +413,7 @@ int cvmx_spi_calendar_setup_cb(int interface, cvmx_spi_mode_t mode,
 	return 0;
 }
 
-/**
+/*
  * Callback to perform clock detection
  *
  * @interface: The identifier of the packet interface to configure and
@@ -491,7 +491,7 @@ int cvmx_spi_clock_detect_cb(int interface, cvmx_spi_mode_t mode, int timeout)
 	return 0;
 }
 
-/**
+/*
  * Callback to perform link training
  *
  * @interface: The identifier of the packet interface to configure and
@@ -560,7 +560,7 @@ int cvmx_spi_training_cb(int interface, cvmx_spi_mode_t mode, int timeout)
 	return 0;
 }
 
-/**
+/*
  * Callback to perform calendar data synchronization
  *
  * @interface: The identifier of the packet interface to configure and
@@ -617,7 +617,7 @@ int cvmx_spi_calendar_sync_cb(int interface, cvmx_spi_mode_t mode, int timeout)
 	return 0;
 }
 
-/**
+/*
  * Callback to handle interface up
  *
  * @interface: The identifier of the packet interface to configure and
-- 
cgit 


From 64c888ce33602c66f1a043c1f7943e3d453186c2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 25 Jul 2021 09:02:57 -0700
Subject: mips: clean up kernel-doc in cavium-octeon/*.c

Convert function comments to kernel-doc notation to remove
kernel-doc warnings in arch/mips/cavium-octeon/*.c.
Also clean up the comments in a few places.

   arch/mips/cavium-octeon/flash_setup.c:66: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Module/ driver initialization.
--
   arch/mips/cavium-octeon/setup.c:308: warning: expecting prototype for Return non zero if we are currently running in the Octeon simulator(). Prototype was for octeon_is_simulation() instead
   arch/mips/cavium-octeon/setup.c:314: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Return true if Octeon is in PCI Host mode. This means
   arch/mips/cavium-octeon/setup.c:334: warning: expecting prototype for Get the clock rate of Octeon(). Prototype was for octeon_get_clock_rate() instead
   arch/mips/cavium-octeon/setup.c:351: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Write to the LCD display connected to the bootbus. This display
   arch/mips/cavium-octeon/setup.c:380: warning: expecting prototype for Return the console uart passed by the bootloader(). Prototype was for octeon_get_boot_uart() instead
   arch/mips/cavium-octeon/setup.c:386: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Get the coremask Linux was booted on.
   arch/mips/cavium-octeon/setup.c:399: warning: expecting prototype for Check the hardware BIST results for a CPU(). Prototype was for octeon_check_cpu_bist() instead
   arch/mips/cavium-octeon/setup.c:432: warning: expecting prototype for Reboot Octeon(). Prototype was for octeon_restart() instead
   arch/mips/cavium-octeon/setup.c:452: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Permanently stop a core.
   arch/mips/cavium-octeon/setup.c:475: warning: expecting prototype for Halt the system(). Prototype was for octeon_halt() instead
   arch/mips/cavium-octeon/setup.c:520: warning: expecting prototype for Return a string representing the system type(). Prototype was for octeon_board_type_string() instead
   arch/mips/cavium-octeon/setup.c:661: warning: expecting prototype for Early entry point for arch setup(). Prototype was for prom_init() instead
--
   arch/mips/cavium-octeon/smp.c:100: warning: Function parameter or member 'cpu' not described in 'octeon_send_ipi_single'
   arch/mips/cavium-octeon/smp.c:100: warning: Function parameter or member 'action' not described in 'octeon_send_ipi_single'
   arch/mips/cavium-octeon/smp.c:100: warning: expecting prototype for Cause the function described by call_data to be executed on the passed(). Prototype was for octeon_send_ipi_single() instead
   arch/mips/cavium-octeon/smp.c:119: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Detect available CPUs, populate cpu_possible_mask
   arch/mips/cavium-octeon/smp.c:210: warning: Function parameter or member 'cpu' not described in 'octeon_boot_secondary'
   arch/mips/cavium-octeon/smp.c:210: warning: Function parameter or member 'idle' not described in 'octeon_boot_secondary'
   arch/mips/cavium-octeon/smp.c:210: warning: expecting prototype for Firmware CPU startup hook(). Prototype was for octeon_boot_secondary() instead
   arch/mips/cavium-octeon/smp.c:236: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * After we've done initial boot, this function is called to allow the
   arch/mips/cavium-octeon/smp.c:258: warning: Function parameter or member 'max_cpus' not described in 'octeon_prepare_cpus'
   arch/mips/cavium-octeon/smp.c:258: warning: expecting prototype for Callout to firmware before smp_init(). Prototype was for octeon_prepare_cpus() instead
   arch/mips/cavium-octeon/smp.c:276: warning: expecting prototype for Last chance for the board code to finish SMP initialization before(). Prototype was for octeon_smp_finish() instead

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Aditya Srivastava <yashsri421@gmail.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/cavium-octeon/flash_setup.c |  2 +-
 arch/mips/cavium-octeon/setup.c       | 43 ++++++++++++++++++-----------------
 arch/mips/cavium-octeon/smp.c         | 14 +++++-------
 3 files changed, 29 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/cavium-octeon/flash_setup.c b/arch/mips/cavium-octeon/flash_setup.c
index a5e8f4a784af..c8a8c6d359b9 100644
--- a/arch/mips/cavium-octeon/flash_setup.c
+++ b/arch/mips/cavium-octeon/flash_setup.c
@@ -62,7 +62,7 @@ static void octeon_flash_map_copy_to(struct map_info *map, unsigned long to,
 	up(&octeon_bootbus_sem);
 }
 
-/**
+/*
  * Module/ driver initialization.
  *
  * Returns Zero on success
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 0ddd3cc16ee4..00bf269763cf 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -295,9 +295,10 @@ static int octeon_uart;
 extern asmlinkage void handle_int(void);
 
 /**
- * Return non zero if we are currently running in the Octeon simulator
+ * octeon_is_simulation - Return non-zero if we are currently running
+ * in the Octeon simulator
  *
- * Returns
+ * Return: non-0 if running in the Octeon simulator, 0 otherwise
  */
 int octeon_is_simulation(void)
 {
@@ -306,10 +307,10 @@ int octeon_is_simulation(void)
 EXPORT_SYMBOL(octeon_is_simulation);
 
 /**
- * Return true if Octeon is in PCI Host mode. This means
+ * octeon_is_pci_host - Return true if Octeon is in PCI Host mode. This means
  * Linux can control the PCI bus.
  *
- * Returns Non zero if Octeon in host mode.
+ * Return: Non-zero if Octeon is in host mode.
  */
 int octeon_is_pci_host(void)
 {
@@ -321,9 +322,9 @@ int octeon_is_pci_host(void)
 }
 
 /**
- * Get the clock rate of Octeon
+ * octeon_get_clock_rate - Get the clock rate of Octeon
  *
- * Returns Clock rate in HZ
+ * Return: Clock rate in HZ
  */
 uint64_t octeon_get_clock_rate(void)
 {
@@ -343,11 +344,11 @@ EXPORT_SYMBOL(octeon_get_io_clock_rate);
 
 
 /**
- * Write to the LCD display connected to the bootbus. This display
- * exists on most Cavium evaluation boards. If it doesn't exist, then
- * this function doesn't do anything.
- *
+ * octeon_write_lcd - Write to the LCD display connected to the bootbus.
  * @s:	    String to write
+ *
+ * This display exists on most Cavium evaluation boards. If it doesn't exist,
+ * then this function doesn't do anything.
  */
 static void octeon_write_lcd(const char *s)
 {
@@ -367,9 +368,9 @@ static void octeon_write_lcd(const char *s)
 }
 
 /**
- * Return the console uart passed by the bootloader
+ * octeon_get_boot_uart - Return the console uart passed by the bootloader
  *
- * Returns uart	  (0 or 1)
+ * Return: uart number (0 or 1)
  */
 static int octeon_get_boot_uart(void)
 {
@@ -378,9 +379,9 @@ static int octeon_get_boot_uart(void)
 }
 
 /**
- * Get the coremask Linux was booted on.
+ * octeon_get_boot_coremask - Get the coremask Linux was booted on.
  *
- * Returns Core mask
+ * Return: Core mask
  */
 int octeon_get_boot_coremask(void)
 {
@@ -388,7 +389,7 @@ int octeon_get_boot_coremask(void)
 }
 
 /**
- * Check the hardware BIST results for a CPU
+ * octeon_check_cpu_bist - Check the hardware BIST results for a CPU
  */
 void octeon_check_cpu_bist(void)
 {
@@ -419,7 +420,7 @@ void octeon_check_cpu_bist(void)
 }
 
 /**
- * Reboot Octeon
+ * octeon_restart - Reboot Octeon
  *
  * @command: Command to pass to the bootloader. Currently ignored.
  */
@@ -444,7 +445,7 @@ static void octeon_restart(char *command)
 
 
 /**
- * Permanently stop a core.
+ * octeon_kill_core - Permanently stop a core.
  *
  * @arg: Ignored.
  */
@@ -464,7 +465,7 @@ static void octeon_kill_core(void *arg)
 
 
 /**
- * Halt the system
+ * octeon_halt - Halt the system
  */
 static void octeon_halt(void)
 {
@@ -507,9 +508,9 @@ static void __init init_octeon_system_type(void)
 }
 
 /**
- * Return a string representing the system type
+ * octeon_board_type_string - Return a string representing the system type
  *
- * Returns
+ * Return: system type string
  */
 const char *octeon_board_type_string(void)
 {
@@ -650,7 +651,7 @@ void octeon_user_io_init(void)
 }
 
 /**
- * Early entry point for arch setup
+ * prom_init - Early entry point for arch setup
  */
 void __init prom_init(void)
 {
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 66ce5527da54..89954f5f87fb 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -91,7 +91,7 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-/**
+/*
  * Cause the function described by call_data to be executed on the passed
  * cpu.	 When the function has finished, increment the finished field of
  * call_data.
@@ -115,7 +115,7 @@ static inline void octeon_send_ipi_mask(const struct cpumask *mask,
 		octeon_send_ipi_single(i, action);
 }
 
-/**
+/*
  * Detect available CPUs, populate cpu_possible_mask
  */
 static void octeon_smp_hotplug_setup(void)
@@ -202,9 +202,8 @@ int plat_post_relocation(long offset)
 }
 #endif /* CONFIG_RELOCATABLE */
 
-/**
+/*
  * Firmware CPU startup hook
- *
  */
 static int octeon_boot_secondary(int cpu, struct task_struct *idle)
 {
@@ -232,7 +231,7 @@ static int octeon_boot_secondary(int cpu, struct task_struct *idle)
 	return 0;
 }
 
-/**
+/*
  * After we've done initial boot, this function is called to allow the
  * board code to clean up state, if needed
  */
@@ -250,9 +249,8 @@ static void octeon_init_secondary(void)
 	octeon_irq_setup_secondary();
 }
 
-/**
+/*
  * Callout to firmware before smp_init
- *
  */
 static void __init octeon_prepare_cpus(unsigned int max_cpus)
 {
@@ -268,7 +266,7 @@ static void __init octeon_prepare_cpus(unsigned int max_cpus)
 	}
 }
 
-/**
+/*
  * Last chance for the board code to finish SMP initialization before
  * the CPU is "online".
  */
-- 
cgit 


From d2ac3a11cba23454516ba17f03e078a6831b8be5 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 25 Jul 2021 09:02:58 -0700
Subject: mips: clean up kernel-doc in mm/c-octeon.c

Clean up kernel-doc warnings in arch/mips/mm/c-octeon.c.

   arch/mips/mm/c-octeon.c:34: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Octeon automatically flushes the dcache on tlb changes, so
   arch/mips/mm/c-octeon.c:65: warning: expecting prototype for Flush caches as necessary for all cores affected by a(). Prototype was for octeon_flush_icache_all_cores() instead
   arch/mips/mm/c-octeon.c:99: warning: expecting prototype for Called to flush the icache on all cores(). Prototype was for octeon_flush_icache_all() instead
   arch/mips/mm/c-octeon.c:111: warning: expecting prototype for Called to flush all memory associated with a memory(). Prototype was for octeon_flush_cache_mm() instead
   arch/mips/mm/c-octeon.c:124: warning: Function parameter or member 'start' not described in 'octeon_flush_icache_range'
   arch/mips/mm/c-octeon.c:124: warning: Function parameter or member 'end' not described in 'octeon_flush_icache_range'
   arch/mips/mm/c-octeon.c:124: warning: expecting prototype for Flush a range of kernel addresses out of the icache(). Prototype was for octeon_flush_icache_range() instead
   arch/mips/mm/c-octeon.c:138: warning: expecting prototype for Flush a range out of a vma(). Prototype was for octeon_flush_cache_range() instead
   arch/mips/mm/c-octeon.c:153: warning: expecting prototype for Flush a specific page of a vma(). Prototype was for octeon_flush_cache_page() instead
   arch/mips/mm/c-octeon.c:164: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * Probe Octeon's caches
   arch/mips/mm/c-octeon.c:264: warning: expecting prototype for Setup the Octeon cache flush routines(). Prototype was for octeon_cache_init() instead
   arch/mips/mm/c-octeon.c:349: warning: expecting prototype for Called when the the exception is not recoverable(). Prototype was for cache_parity_error_octeon_non_recoverable() instead

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Aditya Srivastava <yashsri421@gmail.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/mm/c-octeon.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index 8ae181e08311..ec2ae501539a 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -30,7 +30,7 @@
 unsigned long long cache_err_dcache[NR_CPUS];
 EXPORT_SYMBOL_GPL(cache_err_dcache);
 
-/**
+/*
  * Octeon automatically flushes the dcache on tlb changes, so
  * from Linux's viewpoint it acts much like a physically
  * tagged cache. No flushing is needed
@@ -56,8 +56,8 @@ static void local_octeon_flush_icache_range(unsigned long start,
 }
 
 /**
- * Flush caches as necessary for all cores affected by a
- * vma. If no vma is supplied, all cores are flushed.
+ * octeon_flush_icache_all_cores -  Flush caches as necessary for all cores
+ * affected by a vma. If no vma is supplied, all cores are flushed.
  *
  * @vma:    VMA to flush or NULL to flush all icaches.
  */
@@ -92,7 +92,7 @@ static void octeon_flush_icache_all_cores(struct vm_area_struct *vma)
 }
 
 
-/**
+/*
  * Called to flush the icache on all cores
  */
 static void octeon_flush_icache_all(void)
@@ -102,8 +102,7 @@ static void octeon_flush_icache_all(void)
 
 
 /**
- * Called to flush all memory associated with a memory
- * context.
+ * octeon_flush_cache_mm - flush all memory associated with a memory context.
  *
  * @mm:	    Memory context to flush
  */
@@ -116,7 +115,7 @@ static void octeon_flush_cache_mm(struct mm_struct *mm)
 }
 
 
-/**
+/*
  * Flush a range of kernel addresses out of the icache
  *
  */
@@ -127,11 +126,11 @@ static void octeon_flush_icache_range(unsigned long start, unsigned long end)
 
 
 /**
- * Flush a range out of a vma
+ * octeon_flush_cache_range - Flush a range out of a vma
  *
  * @vma:    VMA to flush
- * @start:
- * @end:
+ * @start:  beginning address for flush
+ * @end:    ending address for flush
  */
 static void octeon_flush_cache_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end)
@@ -142,11 +141,11 @@ static void octeon_flush_cache_range(struct vm_area_struct *vma,
 
 
 /**
- * Flush a specific page of a vma
+ * octeon_flush_cache_page - Flush a specific page of a vma
  *
  * @vma:    VMA to flush page for
  * @page:   Page to flush
- * @pfn:
+ * @pfn:    Page frame number
  */
 static void octeon_flush_cache_page(struct vm_area_struct *vma,
 				    unsigned long page, unsigned long pfn)
@@ -160,7 +159,7 @@ static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size)
 	BUG();
 }
 
-/**
+/*
  * Probe Octeon's caches
  *
  */
@@ -256,7 +255,7 @@ static void  octeon_cache_error_setup(void)
 	set_handler(0x100, &except_vec2_octeon, 0x80);
 }
 
-/**
+/*
  * Setup the Octeon cache flush routines
  *
  */
@@ -341,7 +340,7 @@ asmlinkage void cache_parity_error_octeon_recoverable(void)
 	co_cache_error_call_notifiers(0);
 }
 
-/**
+/*
  * Called when the the exception is not recoverable
  */
 
-- 
cgit 


From a86aadeff2fe9203d9575f1c157d09b3c557d1ce Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 28 Jul 2021 11:26:12 +0100
Subject: MIPS: Alchemy: Fix spelling contraction "cant" -> "can't"

There is a spelling mistake in a pr_warn message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/alchemy/devboards/db1200.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c
index 421d651433b6..1864eb935ca5 100644
--- a/arch/mips/alchemy/devboards/db1200.c
+++ b/arch/mips/alchemy/devboards/db1200.c
@@ -835,7 +835,7 @@ int __init db1200_dev_setup(void)
 	if (!IS_ERR(c)) {
 		pfc = clk_round_rate(c, 50000000);
 		if ((pfc < 1) || (abs(50000000 - pfc) > 2500000))
-			pr_warn("DB1200: cant get I2C close to 50MHz\n");
+			pr_warn("DB1200: can't get I2C close to 50MHz\n");
 		else
 			clk_set_rate(c, pfc);
 		clk_prepare_enable(c);
-- 
cgit 


From a449ffaf9181b5a2dc705d8a06b13e0068207fd4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Jul 2021 12:42:31 +0100
Subject: powerpc/svm: Don't issue ultracalls if !mem_encrypt_active()

Commit ad6c00283163 ("swiotlb: Free tbl memory in swiotlb_exit()")
introduced a set_memory_encrypted() call to swiotlb_exit() so that the
buffer pages are returned to an encrypted state prior to being freed.

Sachin reports that this leads to the following crash on a Power server:

[    0.010799] software IO TLB: tearing down default memory pool
[    0.010805] ------------[ cut here ]------------
[    0.010808] kernel BUG at arch/powerpc/kernel/interrupt.c:98!

Nick spotted that this is because set_memory_encrypted() is issuing an
ultracall which doesn't exist for the processor, and should therefore
be gated by mem_encrypt_active() to mirror the x86 implementation.

Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Claire Chang <tientzu@chromium.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Robin Murphy <robin.murphy@arm.com>
Fixes: ad6c00283163 ("swiotlb: Free tbl memory in swiotlb_exit()")
Suggested-by: Nicholas Piggin <npiggin@gmail.com>
Reported-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/1905CD70-7656-42AE-99E2-A31FC3812EAC@linux.vnet.ibm.com/
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 arch/powerpc/platforms/pseries/svm.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index 1d829e257996..87f001b4c4e4 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -63,6 +63,9 @@ void __init svm_swiotlb_init(void)
 
 int set_memory_encrypted(unsigned long addr, int numpages)
 {
+	if (!mem_encrypt_active())
+		return 0;
+
 	if (!PAGE_ALIGNED(addr))
 		return -EINVAL;
 
@@ -73,6 +76,9 @@ int set_memory_encrypted(unsigned long addr, int numpages)
 
 int set_memory_decrypted(unsigned long addr, int numpages)
 {
+	if (!mem_encrypt_active())
+		return 0;
+
 	if (!PAGE_ALIGNED(addr))
 		return -EINVAL;
 
-- 
cgit 


From 6011cf68c88545e16cb32039c2cecfdae6a32315 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 26 Jul 2021 16:35:48 +0100
Subject: KVM: arm64: Walk userspace page tables to compute the THP mapping
 size

We currently rely on the kvm_is_transparent_hugepage() helper to
discover whether a given page has the potential to be mapped as
a block mapping.

However, this API doesn't really give un everything we want:
- we don't get the size: this is not crucial today as we only
  support PMD-sized THPs, but we'd like to have larger sizes
  in the future
- we're the only user left of the API, and there is a will
  to remove it altogether

To address the above, implement a simple walker using the existing
page table infrastructure, and plumb it into transparent_hugepage_adjust().
No new page sizes are supported in the process.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Link: https://lore.kernel.org/r/20210726153552.1535838-3-maz@kernel.org
---
 arch/arm64/kvm/mmu.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 0625bf2353c2..183c107c06b2 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -433,6 +433,32 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
 	return 0;
 }
 
+static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
+	/* We shouldn't need any other callback to walk the PT */
+	.phys_to_virt		= kvm_host_va,
+};
+
+static int get_user_mapping_size(struct kvm *kvm, u64 addr)
+{
+	struct kvm_pgtable pgt = {
+		.pgd		= (kvm_pte_t *)kvm->mm->pgd,
+		.ia_bits	= VA_BITS,
+		.start_level	= (KVM_PGTABLE_MAX_LEVELS -
+				   CONFIG_PGTABLE_LEVELS),
+		.mm_ops		= &kvm_user_mm_ops,
+	};
+	kvm_pte_t pte = 0;	/* Keep GCC quiet... */
+	u32 level = ~0;
+	int ret;
+
+	ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
+	VM_BUG_ON(ret);
+	VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
+	VM_BUG_ON(!(pte & PTE_VALID));
+
+	return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
+}
+
 static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
 	.zalloc_page		= stage2_memcache_zalloc_page,
 	.zalloc_pages_exact	= kvm_host_zalloc_pages_exact,
@@ -780,7 +806,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
  * Returns the size of the mapping.
  */
 static unsigned long
-transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
+transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			    unsigned long hva, kvm_pfn_t *pfnp,
 			    phys_addr_t *ipap)
 {
@@ -791,8 +817,8 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
 	 * sure that the HVA and IPA are sufficiently aligned and that the
 	 * block map is contained within the memslot.
 	 */
-	if (kvm_is_transparent_hugepage(pfn) &&
-	    fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
+	if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
+	    get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
 		/*
 		 * The address we faulted on is backed by a transparent huge
 		 * page.  However, because we map the compound huge page and
@@ -1051,7 +1077,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 * backed by a THP and thus use block mapping if possible.
 	 */
 	if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
-		vma_pagesize = transparent_hugepage_adjust(memslot, hva,
+		vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva,
 							   &pfn, &fault_ipa);
 
 	if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
-- 
cgit 


From f2cc327303b13a70311e823bd52aa0bca8c7ddbc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 26 Jul 2021 16:35:49 +0100
Subject: KVM: arm64: Avoid mapping size adjustment on permission fault

Since we only support PMD-sized mappings for THP, getting
a permission fault on a level that results in a mapping
being larger than PAGE_SIZE is a sure indication that we have
already upgraded our mapping to a PMD.

In this case, there is no need to try and parse userspace page
tables, as the fault information already tells us everything.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Link: https://lore.kernel.org/r/20210726153552.1535838-4-maz@kernel.org
---
 arch/arm64/kvm/mmu.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 183c107c06b2..116a2910a4a4 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1076,9 +1076,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 * If we are not forced to use page mapping, check if we are
 	 * backed by a THP and thus use block mapping if possible.
 	 */
-	if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
-		vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva,
-							   &pfn, &fault_ipa);
+	if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
+		if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
+			vma_pagesize = fault_granule;
+		else
+			vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
+								   hva, &pfn,
+								   &fault_ipa);
+	}
 
 	if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
 		/* Check the VMM hasn't introduced a new VM_SHARED VMA */
-- 
cgit 


From 0fe49630101b3ce23bd21a2788440ac719ec868a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 26 Jul 2021 16:35:51 +0100
Subject: KVM: arm64: Use get_page() instead of kvm_get_pfn()

When mapping a THP, we are guaranteed that the page isn't reserved,
and we can safely avoid the kvm_is_reserved_pfn() call.

Replace kvm_get_pfn() with get_page(pfn_to_page()).

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210726153552.1535838-6-maz@kernel.org
---
 arch/arm64/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 116a2910a4a4..6e002d30a478 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -840,7 +840,7 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		*ipap &= PMD_MASK;
 		kvm_release_pfn_clean(pfn);
 		pfn &= ~(PTRS_PER_PMD - 1);
-		kvm_get_pfn(pfn);
+		get_page(pfn_to_page(pfn));
 		*pfnp = pfn;
 
 		return PMD_SIZE;
-- 
cgit 


From 63db506e07622c344a3c748a1c06293d48780f83 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 26 Jul 2021 16:35:47 +0100
Subject: KVM: arm64: Introduce helper to retrieve a PTE and its level

It is becoming a common need to fetch the PTE for a given address
together with its level. Add such a helper.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Quentin Perret <qperret@google.com>
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Link: https://lore.kernel.org/r/20210726153552.1535838-2-maz@kernel.org
---
 arch/arm64/include/asm/kvm_pgtable.h | 20 ++++++++++++++++++
 arch/arm64/kvm/hyp/pgtable.c         | 39 ++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index f004c0115d89..e42b55bd50a2 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -432,6 +432,26 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		     struct kvm_pgtable_walker *walker);
 
+/**
+ * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
+ *			    with its level.
+ * @pgt:	Page-table structure initialised by kvm_pgtable_*_init()
+ *		or a similar initialiser.
+ * @addr:	Input address for the start of the walk.
+ * @ptep:	Pointer to storage for the retrieved PTE.
+ * @level:	Pointer to storage for the level of the retrieved PTE.
+ *
+ * The offset of @addr within a page is ignored.
+ *
+ * The walker will walk the page-table entries corresponding to the input
+ * address specified, retrieving the leaf corresponding to this address.
+ * Invalid entries are treated as leaf entries.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
+			 kvm_pte_t *ptep, u32 *level);
+
 /**
  * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
  *				     Addresses with compatible permission
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 05321f4165e3..78f36bd5df6c 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -326,6 +326,45 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
 	return _kvm_pgtable_walk(&walk_data);
 }
 
+struct leaf_walk_data {
+	kvm_pte_t	pte;
+	u32		level;
+};
+
+static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+		       enum kvm_pgtable_walk_flags flag, void * const arg)
+{
+	struct leaf_walk_data *data = arg;
+
+	data->pte   = *ptep;
+	data->level = level;
+
+	return 0;
+}
+
+int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
+			 kvm_pte_t *ptep, u32 *level)
+{
+	struct leaf_walk_data data;
+	struct kvm_pgtable_walker walker = {
+		.cb	= leaf_walker,
+		.flags	= KVM_PGTABLE_WALK_LEAF,
+		.arg	= &data,
+	};
+	int ret;
+
+	ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
+			       PAGE_SIZE, &walker);
+	if (!ret) {
+		if (ptep)
+			*ptep  = data.pte;
+		if (level)
+			*level = data.level;
+	}
+
+	return ret;
+}
+
 struct hyp_map_data {
 	u64				phys;
 	kvm_pte_t			attr;
-- 
cgit 


From 0ab410a93d627ae73136d1a52c096262360b7992 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 19 Jul 2021 13:38:59 +0100
Subject: KVM: arm64: Narrow PMU sysreg reset values to architectural
 requirements

A number of the PMU sysregs expose reset values that are not
compliant with the architecture (set bits in the RES0 ranges,
for example).

This in turn has the effect that we need to pointlessly mask
some register fields when using them.

Let's start by making sure we don't have illegal values in the
shadow registers at reset time. This affects all the registers
that dedicate one bit per counter, the counters themselves,
PMEVTYPERn_EL0 and PMSELR_EL0.

Reported-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Link: https://lore.kernel.org/r/20210719123902.1493805-2-maz@kernel.org
---
 arch/arm64/kvm/sys_regs.c | 43 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f6f126eb6ac1..96bdfa0e68b2 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -603,6 +603,41 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
 	return REG_HIDDEN;
 }
 
+static void reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 n, mask = BIT(ARMV8_PMU_CYCLE_IDX);
+
+	/* No PMU available, any PMU reg may UNDEF... */
+	if (!kvm_arm_support_pmu_v3())
+		return;
+
+	n = read_sysreg(pmcr_el0) >> ARMV8_PMU_PMCR_N_SHIFT;
+	n &= ARMV8_PMU_PMCR_N_MASK;
+	if (n)
+		mask |= GENMASK(n - 1, 0);
+
+	reset_unknown(vcpu, r);
+	__vcpu_sys_reg(vcpu, r->reg) &= mask;
+}
+
+static void reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	reset_unknown(vcpu, r);
+	__vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0);
+}
+
+static void reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	reset_unknown(vcpu, r);
+	__vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_EVTYPE_MASK;
+}
+
+static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	reset_unknown(vcpu, r);
+	__vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_COUNTER_MASK;
+}
+
 static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
 	u64 pmcr, val;
@@ -944,16 +979,18 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	  trap_wcr, reset_wcr, 0, 0,  get_wcr, set_wcr }
 
 #define PMU_SYS_REG(r)						\
-	SYS_DESC(r), .reset = reset_unknown, .visibility = pmu_visibility
+	SYS_DESC(r), .reset = reset_pmu_reg, .visibility = pmu_visibility
 
 /* Macro to expand the PMEVCNTRn_EL0 register */
 #define PMU_PMEVCNTR_EL0(n)						\
 	{ PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)),				\
+	  .reset = reset_pmevcntr,					\
 	  .access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
 
 /* Macro to expand the PMEVTYPERn_EL0 register */
 #define PMU_PMEVTYPER_EL0(n)						\
 	{ PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)),				\
+	  .reset = reset_pmevtyper,					\
 	  .access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
 
 static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
@@ -1595,13 +1632,13 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ PMU_SYS_REG(SYS_PMSWINC_EL0),
 	  .access = access_pmswinc, .reg = PMSWINC_EL0 },
 	{ PMU_SYS_REG(SYS_PMSELR_EL0),
-	  .access = access_pmselr, .reg = PMSELR_EL0 },
+	  .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
 	{ PMU_SYS_REG(SYS_PMCEID0_EL0),
 	  .access = access_pmceid, .reset = NULL },
 	{ PMU_SYS_REG(SYS_PMCEID1_EL0),
 	  .access = access_pmceid, .reset = NULL },
 	{ PMU_SYS_REG(SYS_PMCCNTR_EL0),
-	  .access = access_pmu_evcntr, .reg = PMCCNTR_EL0 },
+	  .access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 },
 	{ PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
 	  .access = access_pmu_evtyper, .reset = NULL },
 	{ PMU_SYS_REG(SYS_PMXEVCNTR_EL0),
-- 
cgit 


From f5eff40058a856c23c5ec2f31756f107a2b1ef84 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 19 Jul 2021 13:39:00 +0100
Subject: KVM: arm64: Drop unnecessary masking of PMU registers

We always sanitise our PMU sysreg on the write side, so there
is no need to do it on the read side as well.

Drop the unnecessary masking.

Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210719123902.1493805-3-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 3 +--
 arch/arm64/kvm/sys_regs.c | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index f33825c995cb..fae4e95b586c 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -373,7 +373,6 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
 		reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
 		reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 		reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
-		reg &= kvm_pmu_valid_counter_mask(vcpu);
 	}
 
 	return reg;
@@ -569,7 +568,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 
 	if (val & ARMV8_PMU_PMCR_E) {
 		kvm_pmu_enable_counter_mask(vcpu,
-		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
+		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
 	} else {
 		kvm_pmu_disable_counter_mask(vcpu, mask);
 	}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 96bdfa0e68b2..f22139658e48 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -880,7 +880,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 			kvm_pmu_disable_counter_mask(vcpu, val);
 		}
 	} else {
-		p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
+		p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 	}
 
 	return true;
@@ -904,7 +904,7 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 			/* accessing PMINTENCLR_EL1 */
 			__vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
 	} else {
-		p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
+		p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
 	}
 
 	return true;
@@ -926,7 +926,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 			/* accessing PMOVSCLR_EL0 */
 			__vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
 	} else {
-		p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
+		p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
 	}
 
 	return true;
-- 
cgit 


From ca4f202d08ba7f24cc97dce14c6d20ec7a679135 Mon Sep 17 00:00:00 2001
From: Alexandre Chartre <alexandre.chartre@oracle.com>
Date: Mon, 19 Jul 2021 13:39:01 +0100
Subject: KVM: arm64: Disabling disabled PMU counters wastes a lot of time

In a KVM guest on arm64, performance counters interrupts have an
unnecessary overhead which slows down execution when using the "perf
record" command and limits the "perf record" sampling period.

The problem is that when a guest VM disables counters by clearing the
PMCR_EL0.E bit (bit 0), KVM will disable all counters defined in
PMCR_EL0 even if they are not enabled in PMCNTENSET_EL0.

KVM disables a counter by calling into the perf framework, in particular
by calling perf_event_create_kernel_counter() which is a time consuming
operation. So, for example, with a Neoverse N1 CPU core which has 6 event
counters and one cycle counter, KVM will always disable all 7 counters
even if only one is enabled.

This typically happens when using the "perf record" command in a guest
VM: perf will disable all event counters with PMCNTENTSET_EL0 and only
uses the cycle counter. And when using the "perf record" -F option with
a high profiling frequency, the overhead of KVM disabling all counters
instead of one on every counter interrupt becomes very noticeable.

The problem is fixed by having KVM disable only counters which are
enabled in PMCNTENSET_EL0. If a counter is not enabled in PMCNTENSET_EL0
then KVM will not enable it when setting PMCR_EL0.E and it will remain
disabled as long as it is not enabled in PMCNTENSET_EL0. So there is
effectively no need to disable a counter when clearing PMCR_EL0.E if it
is not enabled PMCNTENSET_EL0.

Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
[maz: moved 'mask' close to the actual user, simplifying the patch]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210712170345.660272-1-alexandre.chartre@oracle.com
Link: https://lore.kernel.org/r/20210719123902.1493805-4-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index fae4e95b586c..dc65b58dc68f 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -563,20 +563,21 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
  */
 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 {
-	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
 	int i;
 
 	if (val & ARMV8_PMU_PMCR_E) {
 		kvm_pmu_enable_counter_mask(vcpu,
 		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
 	} else {
-		kvm_pmu_disable_counter_mask(vcpu, mask);
+		kvm_pmu_disable_counter_mask(vcpu,
+		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
 	}
 
 	if (val & ARMV8_PMU_PMCR_C)
 		kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
 
 	if (val & ARMV8_PMU_PMCR_P) {
+		unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
 		mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
 		for_each_set_bit(i, &mask, 32)
 			kvm_pmu_set_counter_value(vcpu, i, 0);
-- 
cgit 


From 7a3ba3095a32f9c4ec8f30d680fea5150e12c3f3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 19 Jul 2021 13:39:02 +0100
Subject: KVM: arm64: Remove PMSWINC_EL0 shadow register

We keep an entry for the PMSWINC_EL0 register in the vcpu structure,
while *never* writing anything there outside of reset.

Given that the register is defined as write-only, that we always
trap when this register is accessed, there is little point in saving
anything anyway.

Get rid of the entry, and save a mighty 8 bytes per vcpu structure.

We still need to keep it exposed to userspace in order to preserve
backward compatibility with previously saved VMs. Since userspace
cannot expect any effect of writing to PMSWINC_EL0, treat the
register as RAZ/WI for the purpose of userspace access.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210719123902.1493805-5-maz@kernel.org
---
 arch/arm64/include/asm/kvm_host.h |  1 -
 arch/arm64/kvm/sys_regs.c         | 21 ++++++++++++++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 41911585ae0c..afc169630884 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -185,7 +185,6 @@ enum vcpu_sysreg {
 	PMCNTENSET_EL0,	/* Count Enable Set Register */
 	PMINTENSET_EL1,	/* Interrupt Enable Set Register */
 	PMOVSSET_EL0,	/* Overflow Flag Status Set Register */
-	PMSWINC_EL0,	/* Software Increment Register */
 	PMUSERENR_EL0,	/* User Enable Register */
 
 	/* Pointer Authentication Registers in a strict increasing order. */
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f22139658e48..a1f5101f49a3 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1286,6 +1286,20 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return __set_id_reg(vcpu, rd, uaddr, true);
 }
 
+static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+		      const struct kvm_one_reg *reg, void __user *uaddr)
+{
+	int err;
+	u64 val;
+
+	/* Perform the access even if we are going to ignore the value */
+	err = reg_from_user(&val, uaddr, sys_reg_to_index(rd));
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		       const struct sys_reg_desc *r)
 {
@@ -1629,8 +1643,13 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	  .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
 	{ PMU_SYS_REG(SYS_PMOVSCLR_EL0),
 	  .access = access_pmovs, .reg = PMOVSSET_EL0 },
+	/*
+	 * PM_SWINC_EL0 is exposed to userspace as RAZ/WI, as it was
+	 * previously (and pointlessly) advertised in the past...
+	 */
 	{ PMU_SYS_REG(SYS_PMSWINC_EL0),
-	  .access = access_pmswinc, .reg = PMSWINC_EL0 },
+	  .get_user = get_raz_id_reg, .set_user = set_wi_reg,
+	  .access = access_pmswinc, .reset = NULL },
 	{ PMU_SYS_REG(SYS_PMSELR_EL0),
 	  .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
 	{ PMU_SYS_REG(SYS_PMCEID0_EL0),
-- 
cgit 


From 673692735fdc40ed7da32c0cb3517adaf4227b2b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 2 Jul 2021 15:04:25 -0700
Subject: KVM: x86: Use KVM_BUG/KVM_BUG_ON to handle bugs that are fatal to the
 VM

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <0e8760a26151f47dc47052b25ca8b84fffe0641e.1625186503.git.isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c |  2 +-
 arch/x86/kvm/vmx/vmx.c | 23 ++++++++++++++---------
 arch/x86/kvm/x86.c     |  4 ++++
 3 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e8ccab50ebf6..4ce6d827fccd 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1560,7 +1560,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
 		break;
 	default:
-		WARN_ON_ONCE(1);
+		KVM_BUG_ON(1, vcpu->kvm);
 	}
 }
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 927a552393b9..fb1ac33a9902 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2274,7 +2274,7 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits;
 		break;
 	default:
-		WARN_ON_ONCE(1);
+		KVM_BUG_ON(1, vcpu->kvm);
 		break;
 	}
 }
@@ -4996,6 +4996,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 			return kvm_complete_insn_gp(vcpu, err);
 		case 3:
 			WARN_ON_ONCE(enable_unrestricted_guest);
+
 			err = kvm_set_cr3(vcpu, val);
 			return kvm_complete_insn_gp(vcpu, err);
 		case 4:
@@ -5021,14 +5022,13 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 		}
 		break;
 	case 2: /* clts */
-		WARN_ONCE(1, "Guest should always own CR0.TS");
-		vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
-		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
-		return kvm_skip_emulated_instruction(vcpu);
+		KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS");
+		return -EIO;
 	case 1: /*mov from cr*/
 		switch (cr) {
 		case 3:
 			WARN_ON_ONCE(enable_unrestricted_guest);
+
 			val = kvm_read_cr3(vcpu);
 			kvm_register_write(vcpu, reg, val);
 			trace_kvm_cr_read(cr, val);
@@ -5338,7 +5338,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 
 static int handle_nmi_window(struct kvm_vcpu *vcpu)
 {
-	WARN_ON_ONCE(!enable_vnmi);
+	if (KVM_BUG_ON(!enable_vnmi, vcpu->kvm))
+		return -EIO;
+
 	exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
 	++vcpu->stat.nmi_window_exits;
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -5896,7 +5898,8 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 	 * below) should never happen as that means we incorrectly allowed a
 	 * nested VM-Enter with an invalid vmcs12.
 	 */
-	WARN_ON_ONCE(vmx->nested.nested_run_pending);
+	if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
+		return -EIO;
 
 	/* If guest state is invalid, start emulating */
 	if (vmx->emulation_required)
@@ -6274,7 +6277,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 	int max_irr;
 	bool max_irr_updated;
 
-	WARN_ON(!vcpu->arch.apicv_active);
+	if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
+		return -EIO;
+
 	if (pi_test_on(&vmx->pi_desc)) {
 		pi_clear_on(&vmx->pi_desc);
 		/*
@@ -6357,7 +6362,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
 	gate_desc *desc = (gate_desc *)host_idt_base + vector;
 
-	if (WARN_ONCE(!is_external_intr(intr_info),
+	if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
 	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
 		return;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5d5c5ed7dd4..338844b82c4c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9395,6 +9395,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	if (kvm_request_pending(vcpu)) {
+		if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
+			r = -EIO;
+			goto out;
+		}
 		if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
 			if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
 				r = 0;
-- 
cgit 


From 19025e7bc5977b35c73bea99841245f93470105e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 2 Jul 2021 15:04:26 -0700
Subject: KVM: x86/mmu: Mark VM as bugged if page fault returns RET_PF_INVALID

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <298980aa5fc5707184ac082287d13a800cd9c25f.1625186503.git.isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 66f7f5bc3482..ce1d955a0536 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5134,7 +5134,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
 	if (r == RET_PF_INVALID) {
 		r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
 					  lower_32_bits(error_code), false);
-		if (WARN_ON_ONCE(r == RET_PF_INVALID))
+		if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
 			return -EIO;
 	}
 
-- 
cgit 


From e489a4a6bddb83385cb3d896776c5a7b2ec653d8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 2 Jul 2021 15:04:29 -0700
Subject: KVM: x86: Hoist kvm_dirty_regs check out of sync_regs()

Move the kvm_dirty_regs vs. KVM_SYNC_X86_VALID_FIELDS check out of
sync_regs() and into its sole caller, kvm_arch_vcpu_ioctl_run().  This
allows a future patch to allow synchronizing select state for protected
VMs.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <889017a8d31cea46472e0c64b234ef5919278ed9.1625186503.git.isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 338844b82c4c..e906c05e530e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9980,7 +9980,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
-	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
+	if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) ||
+	    (kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) {
 		r = -EINVAL;
 		goto out;
 	}
@@ -10585,9 +10586,6 @@ static void store_regs(struct kvm_vcpu *vcpu)
 
 static int sync_regs(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
-		return -EINVAL;
-
 	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
 		__set_regs(vcpu, &vcpu->run->s.regs.regs);
 		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
-- 
cgit 


From 03fffc5493c8c8d850586df5740c985026c313bf Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 2 Jul 2021 15:04:50 -0700
Subject: KVM: x86/mmu: Refactor shadow walk in __direct_map() to reduce
 indentation

Employ a 'continue' to reduce the indentation for linking a new shadow
page during __direct_map() in preparation for linking private pages.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <702419686d5700373123f6ea84e7a946c2cad8b4.1625186503.git.isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index ce1d955a0536..85c70492f863 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2939,15 +2939,16 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 			break;
 
 		drop_large_spte(vcpu, it.sptep);
-		if (!is_shadow_present_pte(*it.sptep)) {
-			sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
-					      it.level - 1, true, ACC_ALL);
-
-			link_shadow_page(vcpu, it.sptep, sp);
-			if (is_tdp && huge_page_disallowed &&
-			    req_level >= it.level)
-				account_huge_nx_page(vcpu->kvm, sp);
-		}
+		if (is_shadow_present_pte(*it.sptep))
+			continue;
+
+		sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
+				      it.level - 1, true, ACC_ALL);
+
+		link_shadow_page(vcpu, it.sptep, sp);
+		if (is_tdp && huge_page_disallowed &&
+		    req_level >= it.level)
+			account_huge_nx_page(vcpu->kvm, sp);
 	}
 
 	ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
-- 
cgit 


From 7fa2a347512ab06ea1558302564879c060d52b0c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 2 Jul 2021 15:04:51 -0700
Subject: KVM: x86/mmu: Return old SPTE from mmu_spte_clear_track_bits()

Return the old SPTE when clearing a SPTE and push the "old SPTE present"
check to the caller.  Private shadow page support will use the old SPTE
in rmap_remove() to determine whether or not there is a linked private
shadow page.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <b16bac1fd1357aaf39e425aab2177d3f89ee8318.1625186503.git.isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 85c70492f863..d9e3ac6dda05 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -592,9 +592,9 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
  * Rules for using mmu_spte_clear_track_bits:
  * It sets the sptep from present to nonpresent, and track the
  * state bits, it is used to clear the last level sptep.
- * Returns non-zero if the PTE was previously valid.
+ * Returns the old PTE.
  */
-static int mmu_spte_clear_track_bits(u64 *sptep)
+static u64 mmu_spte_clear_track_bits(u64 *sptep)
 {
 	kvm_pfn_t pfn;
 	u64 old_spte = *sptep;
@@ -605,7 +605,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
 		old_spte = __update_clear_spte_slow(sptep, 0ull);
 
 	if (!is_shadow_present_pte(old_spte))
-		return 0;
+		return old_spte;
 
 	pfn = spte_to_pfn(old_spte);
 
@@ -622,7 +622,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
 	if (is_dirty_spte(old_spte))
 		kvm_set_pfn_dirty(pfn);
 
-	return 1;
+	return old_spte;
 }
 
 /*
@@ -1119,7 +1119,9 @@ out:
 
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 {
-	if (mmu_spte_clear_track_bits(sptep))
+	u64 old_spte = mmu_spte_clear_track_bits(sptep);
+
+	if (is_shadow_present_pte(old_spte))
 		rmap_remove(kvm, sptep);
 }
 
-- 
cgit 


From ec1cf69c376970f42761e641cf5074b84f8db243 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 25 Jun 2021 11:32:06 -0400
Subject: KVM: X86: Add per-vm stat for max rmap list size

Add a new statistic max_mmu_rmap_size, which stores the maximum size of rmap
for the vm.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20210625153214.43106-2-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/mmu/mmu.c          | 2 ++
 arch/x86/kvm/x86.c              | 1 +
 3 files changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 974cbfb1eefe..d798650ad793 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1209,6 +1209,7 @@ struct kvm_vm_stat {
 	u64 lpages;
 	u64 nx_lpage_splits;
 	u64 max_mmu_page_hash_collisions;
+	u64 max_mmu_rmap_size;
 };
 
 struct kvm_vcpu_stat {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index d9e3ac6dda05..0b2954b5fbc6 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2698,6 +2698,8 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	if (is_shadow_present_pte(*sptep)) {
 		if (!was_rmapped) {
 			rmap_count = rmap_add(vcpu, sptep, gfn);
+			if (rmap_count > vcpu->kvm->stat.max_mmu_rmap_size)
+				vcpu->kvm->stat.max_mmu_rmap_size = rmap_count;
 			if (rmap_count > RMAP_RECYCLE_THRESHOLD)
 				rmap_recycle(vcpu, sptep, gfn);
 		}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e906c05e530e..ab200f2cb1f0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -235,6 +235,7 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_ICOUNTER(VM, mmu_unsync),
 	STATS_DESC_ICOUNTER(VM, lpages),
 	STATS_DESC_ICOUNTER(VM, nx_lpage_splits),
+	STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size),
 	STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions)
 };
 static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-- 
cgit 


From 38f703663d4c82ead5b51b8860deeef19d6dcb6d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 27 Jul 2021 12:32:51 +0200
Subject: KVM: arm64: Count VMID-wide TLB invalidations

KVM/ARM has an architecture-specific implementation of
kvm_flush_remote_tlbs; however, unlike the generic one,
it does not count the flushes in kvm->stat.remote_tlb_flush,
so that it inexorably remained stuck to zero.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210727103251.16561-1-pbonzini@redhat.com
---
 arch/arm64/kvm/mmu.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 0625bf2353c2..2ca0a494a111 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -81,6 +81,7 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
 	kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+	++kvm->stat.generic.remote_tlb_flush;
 }
 
 static bool kvm_is_device_pfn(unsigned long pfn)
-- 
cgit 


From 013cc4c6788f1ce9885d3c0281904f93ee8f2271 Mon Sep 17 00:00:00 2001
From: Jason Wang <wangborong@cdjrlc.com>
Date: Wed, 28 Jul 2021 21:06:23 +0800
Subject: KVM: arm64: Fix comments related to GICv2 PMR reporting

Remove the repeated word 'the' from two comments.

Signed-off-by: Jason Wang <wangborong@cdjrlc.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210728130623.12017-1-wangborong@cdjrlc.com
---
 arch/arm64/kvm/vgic/vgic-mmio-v2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
index a016f07adc28..5f9014ae595b 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
@@ -282,7 +282,7 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
 	case GIC_CPU_PRIMASK:
 		/*
 		 * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
-		 * the PMR field as GICH_VMCR.VMPriMask rather than
+		 * PMR field as GICH_VMCR.VMPriMask rather than
 		 * GICC_PMR.Priority, so we expose the upper five bits of
 		 * priority mask to userspace using the lower bits in the
 		 * unsigned long.
@@ -329,7 +329,7 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
 	case GIC_CPU_PRIMASK:
 		/*
 		 * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
-		 * the PMR field as GICH_VMCR.VMPriMask rather than
+		 * PMR field as GICH_VMCR.VMPriMask rather than
 		 * GICC_PMR.Priority, so we expose the upper five bits of
 		 * priority mask to userspace using the lower bits in the
 		 * unsigned long.
-- 
cgit 


From 1694caef426247bb406c2b04672336ef77b5fb87 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 1 Jul 2021 17:41:01 +0200
Subject: x86/kvm: remove non-x86 stuff from arch/x86/kvm/ioapic.h

The file has been moved to arch/x86 long time ago. Time to get rid of
non-x86 stuff.

Signed-off-by: Juergen Gross <jgross@suse.com>
Message-Id: <20210701154105.23215-3-jgross@suse.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/ioapic.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 11e4065e1617..bbd4a5d18b5d 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -35,11 +35,7 @@ struct kvm_vcpu;
 #define	IOAPIC_INIT			0x5
 #define	IOAPIC_EXTINT			0x7
 
-#ifdef CONFIG_X86
 #define RTC_GSI 8
-#else
-#define RTC_GSI -1U
-#endif
 
 struct dest_map {
 	/* vcpu bitmap where IRQ has been sent */
-- 
cgit 


From 76cd325ea75bb2a84c329782d7b8015b6a970c34 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Tue, 13 Jul 2021 22:09:52 +0000
Subject: KVM: x86/mmu: Rename cr2_or_gpa to gpa in fast_page_fault

fast_page_fault is only called from direct_page_fault where we know the
address is a gpa.

Fixes: 736c291c9f36 ("KVM: x86: Use gpa_t for cr2/gpa to fix TDP support on 32-bit KVM")
Reviewed-by: Ben Gardon <bgardon@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210713220957.3493520-2-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0b2954b5fbc6..6f5910b7b9bc 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3103,8 +3103,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte)
 /*
  * Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS.
  */
-static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
-			   u32 error_code)
+static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code)
 {
 	struct kvm_shadow_walk_iterator iterator;
 	struct kvm_mmu_page *sp;
@@ -3120,7 +3119,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 	do {
 		u64 new_spte;
 
-		for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte)
+		for_each_shadow_entry_lockless(vcpu, gpa, iterator, spte)
 			if (!is_shadow_present_pte(spte))
 				break;
 
@@ -3199,8 +3198,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 
 	} while (true);
 
-	trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep,
-			      spte, ret);
+	trace_fast_page_fault(vcpu, gpa, error_code, iterator.sptep, spte, ret);
 	walk_shadow_page_lockless_end(vcpu);
 
 	return ret;
-- 
cgit 


From 61bcd360aa9851cec969b7b40f23fd1faa7f85a4 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Tue, 13 Jul 2021 22:09:53 +0000
Subject: KVM: x86/mmu: Fix use of enums in trace_fast_page_fault

Enum values have to be exported to userspace since the formatting is not
done in the kernel. Without doing this perf maps RET_PF_FIXED and
RET_PF_SPURIOUS to 0, which results in incorrect output:

  $ perf record -a -e kvmmmu:fast_page_fault --filter "ret==3" -- ./access_tracking_perf_test
  $ perf script | head -1
   [...] new 610006048d25877 spurious 0 fixed 0  <------ should be 1

Fix this by exporting the enum values to userspace with TRACE_DEFINE_ENUM.

Fixes: c4371c2a682e ("KVM: x86/mmu: Return unique RET_PF_* values if the fault was fixed")
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210713220957.3493520-3-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu_internal.h | 3 +++
 arch/x86/kvm/mmu/mmutrace.h     | 6 ++++++
 2 files changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 35567293c1fd..626cb848dab4 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -140,6 +140,9 @@ void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
  * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
  * RET_PF_FIXED: The faulting entry has been fixed.
  * RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
+ *
+ * Any names added to this enum should be exported to userspace for use in
+ * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h
  */
 enum {
 	RET_PF_RETRY = 0,
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index efbad33a0645..2924a4081a19 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -54,6 +54,12 @@
 	{ PFERR_RSVD_MASK, "RSVD" },	\
 	{ PFERR_FETCH_MASK, "F" }
 
+TRACE_DEFINE_ENUM(RET_PF_RETRY);
+TRACE_DEFINE_ENUM(RET_PF_EMULATE);
+TRACE_DEFINE_ENUM(RET_PF_INVALID);
+TRACE_DEFINE_ENUM(RET_PF_FIXED);
+TRACE_DEFINE_ENUM(RET_PF_SPURIOUS);
+
 /*
  * A pagetable walk has started
  */
-- 
cgit 


From c5c8c7c53004cb70715320018c3b4287071c1cfd Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Tue, 13 Jul 2021 22:09:54 +0000
Subject: KVM: x86/mmu: Make walk_shadow_page_lockless_{begin,end} interoperate
 with the TDP MMU

Acquire the RCU read lock in walk_shadow_page_lockless_begin and release
it in walk_shadow_page_lockless_end when the TDP MMU is enabled.  This
should not introduce any functional changes but is used in the following
commit to make fast_page_fault interoperate with the TDP MMU.

Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210713220957.3493520-4-dmatlack@google.com>
[Use if...else instead of if(){return;}]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 52 +++++++++++++++++++++++++++-------------------
 arch/x86/kvm/mmu/tdp_mmu.c |  6 ++----
 arch/x86/kvm/mmu/tdp_mmu.h | 10 +++++++++
 3 files changed, 43 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6f5910b7b9bc..d5b0c8b0e9e9 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -686,28 +686,36 @@ static bool mmu_spte_age(u64 *sptep)
 
 static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
 {
-	/*
-	 * Prevent page table teardown by making any free-er wait during
-	 * kvm_flush_remote_tlbs() IPI to all active vcpus.
-	 */
-	local_irq_disable();
+	if (is_tdp_mmu(vcpu->arch.mmu)) {
+		kvm_tdp_mmu_walk_lockless_begin();
+	} else {
+		/*
+		 * Prevent page table teardown by making any free-er wait during
+		 * kvm_flush_remote_tlbs() IPI to all active vcpus.
+		 */
+		local_irq_disable();
 
-	/*
-	 * Make sure a following spte read is not reordered ahead of the write
-	 * to vcpu->mode.
-	 */
-	smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
+		/*
+		 * Make sure a following spte read is not reordered ahead of the write
+		 * to vcpu->mode.
+		 */
+		smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
+	}
 }
 
 static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
 {
-	/*
-	 * Make sure the write to vcpu->mode is not reordered in front of
-	 * reads to sptes.  If it does, kvm_mmu_commit_zap_page() can see us
-	 * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
-	 */
-	smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
-	local_irq_enable();
+	if (is_tdp_mmu(vcpu->arch.mmu)) {
+		kvm_tdp_mmu_walk_lockless_end();
+	} else {
+		/*
+		 * Make sure the write to vcpu->mode is not reordered in front of
+		 * reads to sptes.  If it does, kvm_mmu_commit_zap_page() can see us
+		 * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
+		 */
+		smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
+		local_irq_enable();
+	}
 }
 
 static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
@@ -3617,6 +3625,8 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 /*
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
+ *
+ * Must be called between walk_shadow_page_lockless_{begin,end}.
  */
 static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
 {
@@ -3624,8 +3634,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 	int leaf = -1;
 	u64 spte;
 
-	walk_shadow_page_lockless_begin(vcpu);
-
 	for (shadow_walk_init(&iterator, vcpu, addr),
 	     *root_level = iterator.level;
 	     shadow_walk_okay(&iterator);
@@ -3639,8 +3647,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 			break;
 	}
 
-	walk_shadow_page_lockless_end(vcpu);
-
 	return leaf;
 }
 
@@ -3652,11 +3658,15 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	int root, leaf, level;
 	bool reserved = false;
 
+	walk_shadow_page_lockless_begin(vcpu);
+
 	if (is_tdp_mmu(vcpu->arch.mmu))
 		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
 	else
 		leaf = get_walk(vcpu, addr, sptes, &root);
 
+	walk_shadow_page_lockless_end(vcpu);
+
 	if (unlikely(leaf < 0)) {
 		*sptep = 0ull;
 		return reserved;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 0853370bd811..228f0cc5e2cf 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1516,6 +1516,8 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 /*
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
+ *
+ * Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
  */
 int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 			 int *root_level)
@@ -1527,14 +1529,10 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 
 	*root_level = vcpu->arch.mmu->shadow_root_level;
 
-	rcu_read_lock();
-
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 		leaf = iter.level;
 		sptes[leaf] = iter.old_spte;
 	}
 
-	rcu_read_unlock();
-
 	return leaf;
 }
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 1cae4485b3bc..93e1bf5089c4 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -77,6 +77,16 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 				   struct kvm_memory_slot *slot, gfn_t gfn,
 				   int min_level);
 
+static inline void kvm_tdp_mmu_walk_lockless_begin(void)
+{
+	rcu_read_lock();
+}
+
+static inline void kvm_tdp_mmu_walk_lockless_end(void)
+{
+	rcu_read_unlock();
+}
+
 int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 			 int *root_level);
 
-- 
cgit 


From 6e8eb2060cc7fbc4d388d0ab70502e265aec98c6 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Tue, 13 Jul 2021 22:09:55 +0000
Subject: KVM: x86/mmu: fast_page_fault support for the TDP MMU

Make fast_page_fault interoperate with the TDP MMU by leveraging
walk_shadow_page_lockless_{begin,end} to acquire the RCU read lock and
introducing a new helper function kvm_tdp_mmu_fast_pf_get_last_sptep to
grab the lowest level sptep.

Suggested-by: Ben Gardon <bgardon@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210713220957.3493520-5-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 50 ++++++++++++++++++++++++++++++++++------------
 arch/x86/kvm/mmu/tdp_mmu.c | 41 +++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu/tdp_mmu.h |  2 ++
 3 files changed, 80 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index d5b0c8b0e9e9..29010abb659c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3108,15 +3108,41 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte)
 	return spte & PT_PRESENT_MASK;
 }
 
+/*
+ * Returns the last level spte pointer of the shadow page walk for the given
+ * gpa, and sets *spte to the spte value. This spte may be non-preset. If no
+ * walk could be performed, returns NULL and *spte does not contain valid data.
+ *
+ * Contract:
+ *  - Must be called between walk_shadow_page_lockless_{begin,end}.
+ *  - The returned sptep must not be used after walk_shadow_page_lockless_end.
+ */
+static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte)
+{
+	struct kvm_shadow_walk_iterator iterator;
+	u64 old_spte;
+	u64 *sptep = NULL;
+
+	for_each_shadow_entry_lockless(vcpu, gpa, iterator, old_spte) {
+		sptep = iterator.sptep;
+		*spte = old_spte;
+
+		if (!is_shadow_present_pte(old_spte))
+			break;
+	}
+
+	return sptep;
+}
+
 /*
  * Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS.
  */
 static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code)
 {
-	struct kvm_shadow_walk_iterator iterator;
 	struct kvm_mmu_page *sp;
 	int ret = RET_PF_INVALID;
 	u64 spte = 0ull;
+	u64 *sptep = NULL;
 	uint retry_count = 0;
 
 	if (!page_fault_can_be_fast(error_code))
@@ -3127,14 +3153,15 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code)
 	do {
 		u64 new_spte;
 
-		for_each_shadow_entry_lockless(vcpu, gpa, iterator, spte)
-			if (!is_shadow_present_pte(spte))
-				break;
+		if (is_tdp_mmu(vcpu->arch.mmu))
+			sptep = kvm_tdp_mmu_fast_pf_get_last_sptep(vcpu, gpa, &spte);
+		else
+			sptep = fast_pf_get_last_sptep(vcpu, gpa, &spte);
 
 		if (!is_shadow_present_pte(spte))
 			break;
 
-		sp = sptep_to_sp(iterator.sptep);
+		sp = sptep_to_sp(sptep);
 		if (!is_last_spte(spte, sp->role.level))
 			break;
 
@@ -3192,8 +3219,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code)
 		 * since the gfn is not stable for indirect shadow page. See
 		 * Documentation/virt/kvm/locking.rst to get more detail.
 		 */
-		if (fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte,
-					    new_spte)) {
+		if (fast_pf_fix_direct_spte(vcpu, sp, sptep, spte, new_spte)) {
 			ret = RET_PF_FIXED;
 			break;
 		}
@@ -3206,7 +3232,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code)
 
 	} while (true);
 
-	trace_fast_page_fault(vcpu, gpa, error_code, iterator.sptep, spte, ret);
+	trace_fast_page_fault(vcpu, gpa, error_code, sptep, spte, ret);
 	walk_shadow_page_lockless_end(vcpu);
 
 	return ret;
@@ -3841,11 +3867,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 	if (page_fault_handle_page_track(vcpu, error_code, gfn))
 		return RET_PF_EMULATE;
 
-	if (!is_tdp_mmu_fault) {
-		r = fast_page_fault(vcpu, gpa, error_code);
-		if (r != RET_PF_INVALID)
-			return r;
-	}
+	r = fast_page_fault(vcpu, gpa, error_code);
+	if (r != RET_PF_INVALID)
+		return r;
 
 	r = mmu_topup_memory_caches(vcpu, false);
 	if (r)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 228f0cc5e2cf..88aff5d0512c 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -527,6 +527,10 @@ static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
 	if (is_removed_spte(iter->old_spte))
 		return false;
 
+	/*
+	 * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
+	 * does not hold the mmu_lock.
+	 */
 	if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
 		      new_spte) != iter->old_spte)
 		return false;
@@ -1536,3 +1540,40 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 
 	return leaf;
 }
+
+/*
+ * Returns the last level spte pointer of the shadow page walk for the given
+ * gpa, and sets *spte to the spte value. This spte may be non-preset. If no
+ * walk could be performed, returns NULL and *spte does not contain valid data.
+ *
+ * Contract:
+ *  - Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
+ *  - The returned sptep must not be used after kvm_tdp_mmu_walk_lockless_end.
+ *
+ * WARNING: This function is only intended to be called during fast_page_fault.
+ */
+u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
+					u64 *spte)
+{
+	struct tdp_iter iter;
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
+	gfn_t gfn = addr >> PAGE_SHIFT;
+	tdp_ptep_t sptep = NULL;
+
+	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
+		*spte = iter.old_spte;
+		sptep = iter.sptep;
+	}
+
+	/*
+	 * Perform the rcu_dereference to get the raw spte pointer value since
+	 * we are passing it up to fast_page_fault, which is shared with the
+	 * legacy MMU and thus does not retain the TDP MMU-specific __rcu
+	 * annotation.
+	 *
+	 * This is safe since fast_page_fault obeys the contracts of this
+	 * function as well as all TDP MMU contracts around modifying SPTEs
+	 * outside of mmu_lock.
+	 */
+	return rcu_dereference(sptep);
+}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 93e1bf5089c4..361b47f98cc5 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -89,6 +89,8 @@ static inline void kvm_tdp_mmu_walk_lockless_end(void)
 
 int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 			 int *root_level);
+u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
+					u64 *spte);
 
 #ifdef CONFIG_X86_64
 bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
-- 
cgit 


From 71ba3f3189c78f756a659568fb473600fd78f207 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 26 Jul 2021 12:30:52 -0400
Subject: KVM: x86: enable TDP MMU by default

With the addition of fast page fault support, the TDP-specific MMU has reached
feature parity with the original MMU.  All my testing in the last few months
has been done with the TDP MMU; switch the default on 64-bit machines.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 88aff5d0512c..41cee1d22918 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -10,7 +10,7 @@
 #include <asm/cmpxchg.h>
 #include <trace/events/kvm.h>
 
-static bool __read_mostly tdp_mmu_enabled = false;
+static bool __read_mostly tdp_mmu_enabled = true;
 module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
 
 /* Initializes the TDP MMU for the VM, if enabled. */
-- 
cgit 


From df63202fe52bd1583ce1418c755a81d6d41af3b9 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 13 Jul 2021 17:20:19 +0300
Subject: KVM: x86: APICv: drop immediate APICv disablement on current vCPU

Special case of disabling the APICv on the current vCPU right away in
kvm_request_apicv_update doesn't bring much benefit vs raising
KVM_REQ_APICV_UPDATE on it instead, since this request will be processed
on the next entry to the guest.
(the comment about having another #VMEXIT is wrong).

It also hides various assumptions that APIVc enable state matches
the APICv inhibit state, as this special case only makes those states
match on the current vCPU.

Previous patches fixed few such assumptions so now it should be safe
to drop this special case.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210713142023.106183-5-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ab200f2cb1f0..54fdadacb27f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9264,7 +9264,6 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
  */
 void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
 {
-	struct kvm_vcpu *except;
 	unsigned long old, new, expected;
 
 	if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
@@ -9290,16 +9289,7 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
 	if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
 		static_call(kvm_x86_pre_update_apicv_exec_ctrl)(kvm, activate);
 
-	/*
-	 * Sending request to update APICV for all other vcpus,
-	 * while update the calling vcpu immediately instead of
-	 * waiting for another #VMEXIT to handle the request.
-	 */
-	except = kvm_get_running_vcpu();
-	kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE,
-					 except);
-	if (except)
-		kvm_vcpu_update_apicv(except);
+	kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
 }
 EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
 
-- 
cgit 


From df37ed38e6c26064d5f97ebbe5cacc75eeb89153 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:39 -0700
Subject: KVM: x86: Flush the guest's TLB on INIT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Flush the guest's TLB on INIT, as required by Intel's SDM.  Although
AMD's APM states that the TLBs are unchanged by INIT, it's not clear that
that's correct as the APM also states that the TLB is flush on "External
initialization of the processor."  Regardless, relying on the guest to be
paranoid is unnecessarily risky, while an unnecessary flush is benign
from a functional perspective and likely has no measurable impact on
guest performance.

Note, as of the April 2021 version of Intels' SDM, it also contradicts
itself with respect to TLB flushing.  The overview of INIT explicitly
calls out the TLBs as being invalidated, while a table later in the same
section says they are unchanged.

  9.1 INITIALIZATION OVERVIEW:
    The major difference is that during an INIT, the internal caches, MSRs,
    MTRRs, and x87 FPU state are left unchanged (although, the TLBs and BTB
    are invalidated as with a hardware reset)

  Table 9-1:

  Register                    Power up    Reset      INIT
  Data and Code Cache, TLBs:  Invalid[6]  Invalid[6] Unchanged

Given Core2's erratum[*] about global TLB entries not being flush on INIT,
it's safe to assume that the table is simply wrong.

  AZ28. INIT Does Not Clear Global Entries in the TLB
  Problem: INIT may not flush a TLB entry when:
    • The processor is in protected mode with paging enabled and the page global enable
      flag is set (PGE bit of CR4 register)
    • G bit for the page table entry is set
    • TLB entry is present in TLB when INIT occurs
    • Software may encounter unexpected page fault or incorrect address translation due
      to a TLB entry erroneously left in TLB after INIT.

  Workaround: Write to CR3, CR4 (setting bits PSE, PGE or PAE) or CR0 (setting
              bits PG or PE) registers before writing to memory early in BIOS
              code to clear all the global entries from TLB.

  Status: For the steppings affected, see the Summary Tables of Changes.

[*] https://www.intel.com/content/dam/support/us/en/documents/processors/mobile/celeron/sb/320121.pdf

Fixes: 6aa8b732ca01 ("[PATCH] kvm: userspace interface")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 54fdadacb27f..164ecca7769e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10872,6 +10872,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	 */
 	if (old_cr0 & X86_CR0_PG)
 		kvm_mmu_reset_context(vcpu);
+
+	/*
+	 * Intel's SDM states that all TLB entries are flushed on INIT.  AMD's
+	 * APM states the TLBs are untouched by INIT, but it also states that
+	 * the TLBs are flushed on "External initialization of the processor."
+	 * Flush the guest TLB regardless of vendor, there is no meaningful
+	 * benefit in relying on the guest to flush the TLB immediately after
+	 * INIT.  A spurious TLB flush is benign and likely negligible from a
+	 * performance perspective.
+	 */
+	if (init_event)
+		kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
 }
 
 void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
-- 
cgit 


From afc8de0118be84f4058b9977d481aeb3e0758dbb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:40 -0700
Subject: KVM: nVMX: Set LDTR to its architecturally defined value on nested
 VM-Exit

Set L1's LDTR on VM-Exit per the Intel SDM:

  The host-state area does not contain a selector field for LDTR. LDTR is
  established as follows on all VM exits: the selector is cleared to
  0000H, the segment is marked unusable and is otherwise undefined
  (although the base address is always canonical).

This is likely a benign bug since the LDTR is unusable, as it means the
L1 VMM is conditioned to reload its LDTR in order to function properly on
bare metal.

Fixes: 4704d0befb07 ("KVM: nVMX: Exiting from L2 to L1")
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1a52134b0c42..7f8184f432b4 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4298,6 +4298,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	};
 	vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
 
+	memset(&seg, 0, sizeof(seg));
+	seg.unusable = 1;
+	vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
+
 	kvm_set_dr(vcpu, 7, 0x400);
 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 
-- 
cgit 


From 4f117ce4aefca0e90cd44680219d4c261c1381b9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:41 -0700
Subject: KVM: SVM: Zero out GDTR.base and IDTR.base on INIT

Explicitly set GDTR.base and IDTR.base to zero when intializing the VMCB.
Functionally this only affects INIT, as the bases are implicitly set to
zero on RESET by virtue of the VMCB being zero allocated.

Per AMD's APM, GDTR.base and IDTR.base are zeroed after RESET and INIT.

Fixes: 04d2cc7780d4 ("KVM: Move main vcpu loop into subarch independent code")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4ce6d827fccd..7845232b6fb6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1241,7 +1241,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 		SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
 	save->cs.limit = 0xffff;
 
+	save->gdtr.base = 0;
 	save->gdtr.limit = 0xffff;
+	save->idtr.base = 0;
 	save->idtr.limit = 0xffff;
 
 	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
-- 
cgit 


From 2a24be79b6b7061a486239c3a3489eb67b9587f6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:42 -0700
Subject: KVM: VMX: Set EDX at INIT with CPUID.0x1, Family-Model-Stepping

Set EDX at RESET/INIT based on the userspace-defined CPUID model when
possible, i.e. when CPUID.0x1.EAX is defind by userspace.  At RESET/INIT,
all CPUs that support CPUID set EDX to the FMS enumerated in
CPUID.0x1.EAX.  If no CPUID match is found, fall back to KVM's default
of 0x600 (Family '6'), which is the least awful approximation of KVM's
virtual CPU model.

Fixes: 6aa8b732ca01 ("[PATCH] kvm: userspace interface")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fb1ac33a9902..6946a5161d38 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4394,6 +4394,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct msr_data apic_base_msr;
+	u32 eax, dummy;
 	u64 cr0;
 
 	vmx->rmode.vm86_active = 0;
@@ -4401,7 +4402,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	vmx->msr_ia32_umwait_control = 0;
 
-	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
+	eax = 1;
+	if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true))
+		eax = get_rdx_init_val();
+	kvm_rdx_write(vcpu, eax);
+
 	vmx->hv_deadline_tsc = -1;
 	kvm_set_cr8(vcpu, 0);
 
-- 
cgit 


From 067a456d091d05fdae32cae350410d905968b645 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:43 -0700
Subject: KVM: SVM: Require exact CPUID.0x1 match when stuffing EDX at INIT

Do not allow an inexact CPUID "match" when querying the guest's CPUID.0x1
to stuff EDX during INIT.  In the common case, where the guest CPU model
is an AMD variant, allowing an inexact match is a nop since KVM doesn't
emulate Intel's goofy "out-of-range" logic for AMD and Hygon.  If the
vCPU model happens to be an Intel variant, an inexact match is possible
if and only if the max CPUID leaf is precisely '0'. Aside from the fact
that there's probably no CPU in existence with a single CPUID leaf, if
the max CPUID leaf is '0', that means that CPUID.0.EAX is '0', and thus
an inexact match for CPUID.0x1.EAX will also yield '0'.

So, with lots of twisty logic, no functional change intended.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7845232b6fb6..f4b6fec6252f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1346,7 +1346,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	}
 	init_vmcb(vcpu);
 
-	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, false);
+	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
 	kvm_rdx_write(vcpu, eax);
 
 	if (kvm_vcpu_apicv_active(vcpu) && !init_event)
-- 
cgit 


From 665f4d9238ad83c36dd4e078ccab45b3ddec211d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:44 -0700
Subject: KVM: SVM: Fall back to KVM's hardcoded value for EDX at RESET/INIT

At vCPU RESET/INIT (mostly RESET), stuff EDX with KVM's hardcoded,
default Family-Model-Stepping ID of 0x600 if CPUID.0x1 isn't defined.
At RESET, the CPUID lookup is guaranteed to "miss" because KVM emulates
RESET before exposing the vCPU to userspace, i.e. userspace can't
possibly have done set the vCPU's CPUID model, and thus KVM will always
write '0'.  At INIT, using 0x600 is less bad than using '0'.

While initializing EDX to '0' is _extremely_ unlikely to be noticed by
the guest, let alone break the guest, and can be overridden by
userspace for the RESET case, using 0x600 is preferable as it will allow
consolidating the relevant VMX and SVM RESET/INIT logic in the future.
And, digging through old specs suggests that neither Intel nor AMD have
ever shipped a CPU that initialized EDX to '0' at RESET.

Regarding 0x600 as KVM's default Family, it is a sane default and in
many ways the most appropriate.  Prior to the 386 implementations, DX
was undefined at RESET.  With the 386, 486, 586/P5, and 686/P6/Athlon,
both Intel and AMD set EDX to 3, 4, 5, and 6 respectively.  AMD switched
to using '15' as its primary Family with the introduction of AMD64, but
Intel has continued using '6' for the last few decades.

So, '6' is a valid Family for both Intel and AMD CPUs, is compatible
with both 32-bit and 64-bit CPUs (albeit not a perfect fit for 64-bit
AMD), and of the common Families (3 - 6), is the best fit with respect to
KVM's virtual CPU model.  E.g. prior to the P6, Intel CPUs did not have a
STI window.  Modern operating systems, Linux included, rely on the STI
window, e.g. for "safe halt", and KVM unconditionally assumes the virtual
CPU has an STI window.  Thus enumerating a Family ID of 3, 4, or 5 would
be provably wrong.

Opportunistically remove a stale comment.

Fixes: 66f7b72e1171 ("KVM: x86: Make register state after reset conform to specification")
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f4b6fec6252f..05c1e60d829a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1264,7 +1264,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	kvm_mmu_reset_context(vcpu);
 
 	save->cr4 = X86_CR4_PAE;
-	/* rdx = ?? */
 
 	if (npt_enabled) {
 		/* Setup VMCB for Nested Paging */
@@ -1346,7 +1345,15 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	}
 	init_vmcb(vcpu);
 
-	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
+	/*
+	 * Fall back to KVM's default Family/Model/Stepping if no CPUID match
+	 * is found.  Note, it's impossible to get a match at RESET since KVM
+	 * emulates RESET before exposing the vCPU to userspace, i.e. it's
+	 * impossible for kvm_cpuid() to find a valid entry on RESET.  But, go
+	 * through the motions in case that's ever remedied, and to be pedantic.
+	 */
+	if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true))
+		eax = get_rdx_init_val();
 	kvm_rdx_write(vcpu, eax);
 
 	if (kvm_vcpu_apicv_active(vcpu) && !init_event)
-- 
cgit 


From 61152cd907d59ffd6b0a9479b2fa3b3b7b080409 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:45 -0700
Subject: KVM: VMX: Remove explicit MMU reset in enter_rmode()

Drop an explicit MMU reset when entering emulated real mode now that the
vCPU INIT/RESET path correctly handles conditional MMU resets, e.g. if
INIT arrives while the vCPU is in 64-bit mode.

Note, while there are multiple other direct calls to vmx_set_cr0(), i.e.
paths that change CR0 without invoking kvm_post_set_cr0(), only the INIT
emulation can reach enter_rmode().  CLTS emulation only toggles CR.TS,
VM-Exit (and late VM-Fail) emulation cannot architecturally transition to
Real Mode, and VM-Enter to Real Mode is possible if and only if
Unrestricted Guest is enabled (exposed to L1).

This effectively reverts commit 8668a3c468ed ("KVM: VMX: Reset mmu
context when entering real mode")

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6946a5161d38..207393a429d1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2852,8 +2852,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
 	fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
 	fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
-
-	kvm_mmu_reset_context(vcpu);
 }
 
 int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
-- 
cgit 


From 5d2d7e41e3b80f37ec8673825fae07ffe9f140c3 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:46 -0700
Subject: KVM: SVM: Drop explicit MMU reset at RESET/INIT

Drop an explicit MMU reset in SVM's vCPU RESET/INIT flow now that the
common x86 path correctly handles conditional MMU resets, e.g. if INIT
arrives while the vCPU is in 64-bit mode.

This reverts commit ebae871a509d ("kvm: svm: reset mmu on VCPU reset").

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 05c1e60d829a..a9af9bff4755 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1261,7 +1261,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	 * It also updates the guest-visible cr0 value.
 	 */
 	svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
-	kvm_mmu_reset_context(vcpu);
 
 	save->cr4 = X86_CR4_PAE;
 
-- 
cgit 


From c2f79a65b4b66681894ef7d7e3912ba55acc20d5 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:49 -0700
Subject: KVM: x86: WARN if the APIC map is dirty without an in-kernel local
 APIC

WARN if KVM ends up in a state where it thinks its APIC map needs to be
recalculated, but KVM is not emulating the local APIC.  This is mostly
to document KVM's "rules" in order to provide clarity in future cleanups.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-12-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ba5a27879f1d..add4dd1e3528 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -192,6 +192,9 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
 	if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
 		return;
 
+	WARN_ONCE(!irqchip_in_kernel(kvm),
+		  "Dirty APIC map without an in-kernel local APIC");
+
 	mutex_lock(&kvm->arch.apic_map_lock);
 	/*
 	 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
-- 
cgit 


From 549240e8e09e063b7ba44c9f8497e8499562a34c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:50 -0700
Subject: KVM: x86: Remove defunct BSP "update" in local APIC reset

Remove a BSP APIC update in kvm_lapic_reset() that is a glorified and
confusing nop.  When the code was originally added, kvm_vcpu_is_bsp()
queried kvm->arch.bsp_vcpu, i.e. the intent was to set the BSP bit in the
BSP vCPU's APIC.  But, stuffing the BSP bit at INIT was wrong since the
guest can change its BSP(s); this was fixed by commit 58d269d8cccc ("KVM:
x86: BSP in MSR_IA32_APICBASE is writable").

In other words, kvm_vcpu_is_bsp() is now purely a reflection of
vcpu->arch.apic_base.MSR_IA32_APICBASE_BSP, thus the update will always
set the current value and kvm_lapic_set_base() is effectively a nop if
the new and old values match.  The RESET case, which does need to stuff
the BSP for the reset vCPU, is handled by vendor code (though this will
soon be moved to common code).

No functional change intended.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-13-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index add4dd1e3528..a24ce8fe93e5 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2367,9 +2367,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	apic->highest_isr_cache = -1;
 	update_divide_count(apic);
 	atomic_set(&apic->lapic_timer.pending, 0);
-	if (kvm_vcpu_is_bsp(vcpu))
-		kvm_lapic_set_base(vcpu,
-				vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
+
 	vcpu->arch.pv_eoi.msr_val = 0;
 	apic_update_ppr(apic);
 	if (vcpu->arch.apicv_active) {
-- 
cgit 


From 0214f6bbe564632adba299e38023d681c1bd68c5 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:51 -0700
Subject: KVM: x86: Migrate the PIT only if vcpu0 is migrated, not any BSP

Make vcpu0 the arbitrary owner of the PIT, as was intended when PIT
migration was added by commit 2f5997140f22 ("KVM: migrate PIT timer").
The PIT was unintentionally turned into being owned by the BSP by commit
c5af89b68abb ("KVM: Introduce kvm_vcpu_is_bsp() function."), and was then
unintentionally converted to a shared ownership model when
kvm_vcpu_is_bsp() was modified to check the APIC base MSR instead of
hardcoding vcpu0 as the BSP.

Functionally, this just means the PIT's hrtimer is migrated less often.
The real motivation is to remove the usage of kvm_vcpu_is_bsp(), so that
more legacy/broken crud can be removed in a future patch.

Fixes: 58d269d8cccc ("KVM: x86: BSP in MSR_IA32_APICBASE is writable")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-14-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/i8254.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index a6e218c6140d..5a69cce4d72d 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -220,7 +220,8 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
 	struct hrtimer *timer;
 
-	if (!kvm_vcpu_is_bsp(vcpu) || !pit)
+	/* Somewhat arbitrarily make vcpu0 the owner of the PIT. */
+	if (vcpu->vcpu_id || !pit)
 		return;
 
 	timer = &pit->pit_state.timer;
-- 
cgit 


From 01913c57c225a8301e9a53447507f310a4be22b6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:52 -0700
Subject: KVM: x86: Don't force set BSP bit when local APIC is managed by
 userspace

Don't set the BSP bit in vcpu->arch.apic_base when the local APIC is
managed by userspace.  Forcing all vCPUs to be BSPs is non-sensical, and
was dead code when it was added by commit 97222cc83163 ("KVM: Emulate
local APIC in kernel").  At the time, kvm_lapic_set_base() was invoked
if and only if the local APIC was in-kernel (and it couldn't be called
before the vCPU created its APIC).

kvm_lapic_set_base() eventually gained generic usage, but the latent bug
escaped notice because the only true consumer would be the guest itself
in the form of an explicit RDMSRs on APs.  Out of Linux, SeaBIOS, and
EDK2/OVMF, only OVMF consumes the BSP bit from the APIC_BASE MSR.  For
the vast majority of usage in OVMF, BSP confusion would be benign.
OVMF's BSP election upon SMI rendezvous might be broken, but practically
no one runs KVM with an out-of-kernel local APIC, let alone does so while
utilizing SMIs with OVMF.

Fixes: 97222cc83163 ("KVM: Emulate local APIC in kernel")
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-15-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a24ce8fe93e5..acb201d16b5e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2268,9 +2268,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	u64 old_value = vcpu->arch.apic_base;
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!apic)
-		value |= MSR_IA32_APICBASE_BSP;
-
 	vcpu->arch.apic_base = value;
 
 	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
-- 
cgit 


From 503bc49424df4802ca34e4e1a024381fd7ced80e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:53 -0700
Subject: KVM: x86: Set BSP bit in reset BSP vCPU's APIC base by default

Set the BSP bit appropriately during local APIC "reset" instead of
relying on vendor code to clean up at a later point.  This is a step
towards consolidating the local APIC, VMX, and SVM xAPIC initialization
code.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-16-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index acb201d16b5e..0fb282b64c8f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2321,6 +2321,7 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
+	u64 msr_val;
 	int i;
 
 	if (!apic)
@@ -2330,8 +2331,10 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
 	if (!init_event) {
-		kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
-		                         MSR_IA32_APICBASE_ENABLE);
+		msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
+		if (kvm_vcpu_is_reset_bsp(vcpu))
+			msr_val |= MSR_IA32_APICBASE_BSP;
+		kvm_lapic_set_base(vcpu, msr_val);
 		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
 	}
 	kvm_apic_set_version(apic->vcpu);
-- 
cgit 


From f0428b3dcb2d7efe4b2a2304841645b48f0b6499 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:54 -0700
Subject: KVM: VMX: Stuff vcpu->arch.apic_base directly at vCPU RESET

Write vcpu->arch.apic_base directly instead of bouncing through
kvm_set_apic_base().  This is a glorified nop, and is a step towards
cleaning up the mess that is local APIC creation.

When using an in-kernel APIC, kvm_create_lapic() explicitly sets
vcpu->arch.apic_base to MSR_IA32_APICBASE_ENABLE to avoid its own
kvm_lapic_set_base() call in kvm_lapic_reset() from triggering state
changes.  That call during RESET exists purely to set apic->base_address
to the default base value.  As a result, by the time VMX gets control,
the only missing piece is the BSP bit being set for the reset BSP.

For a userspace APIC, there are no side effects to process (for the APIC).

In both cases, the call to kvm_update_cpuid_runtime() is a nop because
the vCPU hasn't yet been exposed to userspace, i.e. there can't be any
CPUID entries.

No functional change intended.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-17-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 207393a429d1..2fc232e1bc20 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4391,7 +4391,6 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct msr_data apic_base_msr;
 	u32 eax, dummy;
 	u64 cr0;
 
@@ -4409,12 +4408,10 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	kvm_set_cr8(vcpu, 0);
 
 	if (!init_event) {
-		apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
-				     MSR_IA32_APICBASE_ENABLE;
+		vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+				       MSR_IA32_APICBASE_ENABLE;
 		if (kvm_vcpu_is_reset_bsp(vcpu))
-			apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
-		apic_base_msr.host_initiated = true;
-		kvm_set_apic_base(vcpu, &apic_base_msr);
+			vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
 	}
 
 	vmx_segment_cache_clear(vmx);
-- 
cgit 


From 421221234ada41b4a9f0beeb08e30b07388bd4bd Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:55 -0700
Subject: KVM: x86: Open code necessary bits of kvm_lapic_set_base() at vCPU
 RESET

Stuff vcpu->arch.apic_base and apic->base_address directly during APIC
reset, as opposed to bouncing through kvm_set_apic_base() while fudging
the ENABLE bit during creation to avoid the other, unwanted side effects.

This is a step towards consolidating the APIC RESET logic across x86,
VMX, and SVM.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-18-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0fb282b64c8f..295a9d02a9a5 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2321,7 +2321,6 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	u64 msr_val;
 	int i;
 
 	if (!apic)
@@ -2331,10 +2330,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
 	if (!init_event) {
-		msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
+		vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+				       MSR_IA32_APICBASE_ENABLE;
 		if (kvm_vcpu_is_reset_bsp(vcpu))
-			msr_val |= MSR_IA32_APICBASE_BSP;
-		kvm_lapic_set_base(vcpu, msr_val);
+			vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
+
+		apic->base_address = APIC_DEFAULT_PHYS_BASE;
+
 		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
 	}
 	kvm_apic_set_version(apic->vcpu);
@@ -2477,11 +2479,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 		lapic_timer_advance_dynamic = false;
 	}
 
-	/*
-	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
-	 * thinking that APIC state has changed.
-	 */
-	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
 	static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
 
-- 
cgit 


From 4547700a4d190ac419abffa317069aaccf1ac118 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:56 -0700
Subject: KVM: x86: Consolidate APIC base RESET initialization code

Consolidate the APIC base RESET logic, which is currently spread out
across both x86 and vendor code.  For an in-kernel APIC, the vendor code
is redundant.  But for a userspace APIC, KVM relies on the vendor code
to initialize vcpu->arch.apic_base.  Hoist the vcpu->arch.apic_base
initialization above the !apic check so that it applies to both flavors
of APIC emulation, and delete the vendor code.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-19-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c   | 12 +++++++-----
 arch/x86/kvm/svm/svm.c |  6 ------
 arch/x86/kvm/vmx/vmx.c |  7 -------
 3 files changed, 7 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 295a9d02a9a5..76fb00921203 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2323,6 +2323,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	int i;
 
+	if (!init_event) {
+		vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+				       MSR_IA32_APICBASE_ENABLE;
+		if (kvm_vcpu_is_reset_bsp(vcpu))
+			vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
+	}
+
 	if (!apic)
 		return;
 
@@ -2330,11 +2337,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
 	if (!init_event) {
-		vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
-				       MSR_IA32_APICBASE_ENABLE;
-		if (kvm_vcpu_is_reset_bsp(vcpu))
-			vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
-
 		apic->base_address = APIC_DEFAULT_PHYS_BASE;
 
 		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a9af9bff4755..acb7bd14110b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1336,12 +1336,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	svm->spec_ctrl = 0;
 	svm->virt_spec_ctrl = 0;
 
-	if (!init_event) {
-		vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
-				       MSR_IA32_APICBASE_ENABLE;
-		if (kvm_vcpu_is_reset_bsp(vcpu))
-			vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
-	}
 	init_vmcb(vcpu);
 
 	/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 2fc232e1bc20..e487f10e2877 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4407,13 +4407,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmx->hv_deadline_tsc = -1;
 	kvm_set_cr8(vcpu, 0);
 
-	if (!init_event) {
-		vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
-				       MSR_IA32_APICBASE_ENABLE;
-		if (kvm_vcpu_is_reset_bsp(vcpu))
-			vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
-	}
-
 	vmx_segment_cache_clear(vmx);
 
 	seg_setup(VCPU_SREG_CS);
-- 
cgit 


From 49d8665cc20b973995b5f519222d3270daa82f3f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:57 -0700
Subject: KVM: x86: Move EDX initialization at vCPU RESET to common code

Move the EDX initialization at vCPU RESET, which is now identical between
VMX and SVM, into common code.

No functional change intended.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-20-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  5 -----
 arch/x86/kvm/svm/svm.c          | 13 -------------
 arch/x86/kvm/vmx/vmx.c          |  6 ------
 arch/x86/kvm/x86.c              | 13 +++++++++++++
 4 files changed, 13 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d798650ad793..ec8e4aca69c8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1773,11 +1773,6 @@ static inline unsigned long read_msr(unsigned long msr)
 }
 #endif
 
-static inline u32 get_rdx_init_val(void)
-{
-	return 0x600; /* P6 family */
-}
-
 static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 {
 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index acb7bd14110b..f42ec7d4b0cd 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1330,25 +1330,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	u32 dummy;
-	u32 eax = 1;
 
 	svm->spec_ctrl = 0;
 	svm->virt_spec_ctrl = 0;
 
 	init_vmcb(vcpu);
 
-	/*
-	 * Fall back to KVM's default Family/Model/Stepping if no CPUID match
-	 * is found.  Note, it's impossible to get a match at RESET since KVM
-	 * emulates RESET before exposing the vCPU to userspace, i.e. it's
-	 * impossible for kvm_cpuid() to find a valid entry on RESET.  But, go
-	 * through the motions in case that's ever remedied, and to be pedantic.
-	 */
-	if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true))
-		eax = get_rdx_init_val();
-	kvm_rdx_write(vcpu, eax);
-
 	if (kvm_vcpu_apicv_active(vcpu) && !init_event)
 		avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
 }
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e487f10e2877..ad41f3423acf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4391,7 +4391,6 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 eax, dummy;
 	u64 cr0;
 
 	vmx->rmode.vm86_active = 0;
@@ -4399,11 +4398,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	vmx->msr_ia32_umwait_control = 0;
 
-	eax = 1;
-	if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true))
-		eax = get_rdx_init_val();
-	kvm_rdx_write(vcpu, eax);
-
 	vmx->hv_deadline_tsc = -1;
 	kvm_set_cr8(vcpu, 0);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 164ecca7769e..ed8b6d1b4e85 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10792,6 +10792,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	unsigned long old_cr0 = kvm_read_cr0(vcpu);
+	u32 eax, dummy;
 
 	kvm_lapic_reset(vcpu, init_event);
 
@@ -10858,6 +10859,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vcpu->arch.regs_avail = ~0;
 	vcpu->arch.regs_dirty = ~0;
 
+	/*
+	 * Fall back to KVM's default Family/Model/Stepping of 0x600 (P6/Athlon)
+	 * if no CPUID match is found.  Note, it's impossible to get a match at
+	 * RESET since KVM emulates RESET before exposing the vCPU to userspace,
+	 * i.e. it'simpossible for kvm_cpuid() to find a valid entry on RESET.
+	 * But, go through the motions in case that's ever remedied.
+	 */
+	eax = 1;
+	if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true))
+		eax = 0x600;
+	kvm_rdx_write(vcpu, eax);
+
 	vcpu->arch.ia32_xss = 0;
 
 	static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
-- 
cgit 


From 9e90e215d9c9f013cc354c4fe1a1313531a97a05 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:58 -0700
Subject: KVM: SVM: Don't bother writing vmcb->save.rip at vCPU RESET/INIT

Drop unnecessary initialization of vmcb->save.rip during vCPU RESET/INIT,
as svm_vcpu_run() unconditionally propagates VCPU_REGS_RIP to save.rip.

No true functional change intended.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-21-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f42ec7d4b0cd..9007adeb01fb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1253,8 +1253,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	svm_set_efer(vcpu, 0);
 	save->dr6 = 0xffff0ff0;
 	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
-	save->rip = 0x0000fff0;
-	vcpu->arch.regs[VCPU_REGS_RIP] = save->rip;
+	vcpu->arch.regs[VCPU_REGS_RIP] = 0x0000fff0;
 
 	/*
 	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
-- 
cgit 


From ee5a5584cba316bc90bc2fad0c6d10b71f1791cb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:32:59 -0700
Subject: KVM: VMX: Invert handling of CR0.WP for EPT without unrestricted
 guest

Opt-in to forcing CR0.WP=1 for shadow paging, and stop lying about WP
being "always on" for unrestricted guest.  In addition to making KVM a
wee bit more honest, this paves the way for additional cleanup.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-22-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ad41f3423acf..2e84c31efb81 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -136,8 +136,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
 #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
 #define KVM_VM_CR0_ALWAYS_ON				\
-	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | 	\
-	 X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
+	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 
 #define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
@@ -2995,9 +2994,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
 }
 
-static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
-					unsigned long cr0,
-					struct kvm_vcpu *vcpu)
+static void ept_update_paging_mode_cr0(unsigned long cr0, struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
@@ -3016,9 +3013,6 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
 		vcpu->arch.cr0 = cr0;
 		vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
 	}
-
-	if (!(cr0 & X86_CR0_WP))
-		*hw_cr0 &= ~X86_CR0_WP;
 }
 
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
@@ -3031,6 +3025,8 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
 	else {
 		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
+		if (!enable_ept)
+			hw_cr0 |= X86_CR0_WP;
 
 		if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
 			enter_pmode(vcpu);
@@ -3049,7 +3045,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 #endif
 
 	if (enable_ept && !is_unrestricted_guest(vcpu))
-		ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
+		ept_update_paging_mode_cr0(cr0, vcpu);
 
 	vmcs_writel(CR0_READ_SHADOW, cr0);
 	vmcs_writel(GUEST_CR0, hw_cr0);
-- 
cgit 


From 4f0dcb544038e016277fb691f1e60d52d7448cf6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:00 -0700
Subject: KVM: VMX: Remove direct write to vcpu->arch.cr0 during vCPU
 RESET/INIT

Remove a bogus write to vcpu->arch.cr0 that immediately precedes
vmx_set_cr0() during vCPU RESET/INIT.  For RESET, this is a nop since
the "old" CR0 value is meaningless.  But for INIT, if the vCPU is coming
from paging enabled mode, crushing vcpu->arch.cr0 will cause the various
is_paging() checks in vmx_set_cr0() to get false negatives.

For the exit_lmode() case, the false negative is benign as vmx_set_efer()
is called immediately after vmx_set_cr0().

For EPT without unrestricted guest, the false negative will cause KVM to
unnecessarily run with CR3 load/store exiting.  But again, this is
benign, albeit sub-optimal.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-23-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 2e84c31efb81..1e555fb732bf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4387,7 +4387,6 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u64 cr0;
 
 	vmx->rmode.vm86_active = 0;
 	vmx->spec_ctrl = 0;
@@ -4455,9 +4454,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
-	cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
-	vmx->vcpu.arch.cr0 = cr0;
-	vmx_set_cr0(vcpu, cr0); /* enter rmode */
+	vmx_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
 	vmx_set_cr4(vcpu, 0);
 	vmx_set_efer(vcpu, 0);
 
-- 
cgit 


From c834fd7fc1308a0e0429d203a6c3af528cd902fa Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:01 -0700
Subject: KVM: VMX: Fold ept_update_paging_mode_cr0() back into vmx_set_cr0()

Move the CR0/CR3/CR4 shenanigans for EPT without unrestricted guest back
into vmx_set_cr0().  This will allow a future patch to eliminate the
rather gross stuffing of vcpu->arch.cr0 in the paging transition cases
by snapshotting the old CR0.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-24-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 40 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1e555fb732bf..e4b1c24ad079 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2994,27 +2994,6 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
 }
 
-static void ept_update_paging_mode_cr0(unsigned long cr0, struct kvm_vcpu *vcpu)
-{
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-
-	if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
-		vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
-	if (!(cr0 & X86_CR0_PG)) {
-		/* From paging/starting to nonpaging */
-		exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
-					  CPU_BASED_CR3_STORE_EXITING);
-		vcpu->arch.cr0 = cr0;
-		vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
-	} else if (!is_paging(vcpu)) {
-		/* From nonpaging to paging */
-		exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
-					    CPU_BASED_CR3_STORE_EXITING);
-		vcpu->arch.cr0 = cr0;
-		vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
-	}
-}
-
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3044,8 +3023,23 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	}
 #endif
 
-	if (enable_ept && !is_unrestricted_guest(vcpu))
-		ept_update_paging_mode_cr0(cr0, vcpu);
+	if (enable_ept && !is_unrestricted_guest(vcpu)) {
+		if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
+			vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
+		if (!(cr0 & X86_CR0_PG)) {
+			/* From paging/starting to nonpaging */
+			exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
+						  CPU_BASED_CR3_STORE_EXITING);
+			vcpu->arch.cr0 = cr0;
+			vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
+		} else if (!is_paging(vcpu)) {
+			/* From nonpaging to paging */
+			exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
+						    CPU_BASED_CR3_STORE_EXITING);
+			vcpu->arch.cr0 = cr0;
+			vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
+		}
+	}
 
 	vmcs_writel(CR0_READ_SHADOW, cr0);
 	vmcs_writel(GUEST_CR0, hw_cr0);
-- 
cgit 


From 470750b3425513b9f63f176e564e63e0e7998afc Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:02 -0700
Subject: KVM: nVMX: Do not clear CR3 load/store exiting bits if L1 wants 'em

Keep CR3 load/store exiting enable as needed when running L2 in order to
honor L1's desires.  This fixes a largely theoretical bug where L1 could
intercept CR3 but not CR0.PG and end up not getting the desired CR3 exits
when L2 enables paging.  In other words, the existing !is_paging() check
inadvertantly handles the normal case for L2 where vmx_set_cr0() is
called during VM-Enter, which is guaranteed to run with paging enabled,
and thus will never clear the bits.

Removing the !is_paging() check will also allow future consolidation and
cleanup of the related code.  From a performance perspective, this is
all a nop, as the VMCS controls shadow will optimize away the VMWRITE
when the controls are in the desired state.

Add a comment explaining why CR3 is intercepted, with a big disclaimer
about not querying the old CR3.  Because vmx_set_cr0() is used for flows
that are not directly tied to MOV CR3, e.g. vCPU RESET/INIT and nested
VM-Enter, it's possible that is_paging() is not synchronized with CR3
load/store exiting.  This is actually guaranteed in the current code, as
KVM starts with CR3 interception disabled.  Obviously that can be fixed,
but there's no good reason to play whack-a-mole, and it tends to end
poorly, e.g. descriptor table exiting for UMIP emulation attempted to be
precise in the past and ended up botching the interception toggling.

Fixes: fe3ef05c7572 ("KVM: nVMX: Prepare vmcs02 from vmcs01 and vmcs12")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-25-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 46 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e4b1c24ad079..9f69ccc096a3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2994,10 +2994,14 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
 }
 
+#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
+			  CPU_BASED_CR3_STORE_EXITING)
+
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long hw_cr0;
+	u32 tmp;
 
 	hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
 	if (is_unrestricted_guest(vcpu))
@@ -3024,18 +3028,42 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 #endif
 
 	if (enable_ept && !is_unrestricted_guest(vcpu)) {
+		/*
+		 * Ensure KVM has an up-to-date snapshot of the guest's CR3.  If
+		 * the below code _enables_ CR3 exiting, vmx_cache_reg() will
+		 * (correctly) stop reading vmcs.GUEST_CR3 because it thinks
+		 * KVM's CR3 is installed.
+		 */
 		if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
 			vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
+
+		/*
+		 * When running with EPT but not unrestricted guest, KVM must
+		 * intercept CR3 accesses when paging is _disabled_.  This is
+		 * necessary because restricted guests can't actually run with
+		 * paging disabled, and so KVM stuffs its own CR3 in order to
+		 * run the guest when identity mapped page tables.
+		 *
+		 * Do _NOT_ check the old CR0.PG, e.g. to optimize away the
+		 * update, it may be stale with respect to CR3 interception,
+		 * e.g. after nested VM-Enter.
+		 *
+		 * Lastly, honor L1's desires, i.e. intercept CR3 loads and/or
+		 * stores to forward them to L1, even if KVM does not need to
+		 * intercept them to preserve its identity mapped page tables.
+		 */
 		if (!(cr0 & X86_CR0_PG)) {
-			/* From paging/starting to nonpaging */
-			exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
-						  CPU_BASED_CR3_STORE_EXITING);
-			vcpu->arch.cr0 = cr0;
-			vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
-		} else if (!is_paging(vcpu)) {
-			/* From nonpaging to paging */
-			exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
-						    CPU_BASED_CR3_STORE_EXITING);
+			exec_controls_setbit(vmx, CR3_EXITING_BITS);
+		} else if (!is_guest_mode(vcpu)) {
+			exec_controls_clearbit(vmx, CR3_EXITING_BITS);
+		} else {
+			tmp = exec_controls_get(vmx);
+			tmp &= ~CR3_EXITING_BITS;
+			tmp |= get_vmcs12(vcpu)->cpu_based_vm_exec_control & CR3_EXITING_BITS;
+			exec_controls_set(vmx, tmp);
+		}
+
+		if (!is_paging(vcpu) != !(cr0 & X86_CR0_PG)) {
 			vcpu->arch.cr0 = cr0;
 			vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
 		}
-- 
cgit 


From 81ca0e7340eedcbe67911d97f292837e0cf39709 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:03 -0700
Subject: KVM: VMX: Pull GUEST_CR3 from the VMCS iff CR3 load exiting is
 disabled

Tweak the logic for grabbing vmcs.GUEST_CR3 in vmx_cache_reg() to look
directly at the execution controls, as opposed to effectively inferring
the controls based on vCPUs.  Inferring the controls isn't wrong, but it
creates a very subtle dependency between the caching logic, the state of
vcpu->arch.cr0 (via is_paging()), and the behavior of vmx_set_cr0().

Using the execution controls doesn't completely eliminate the dependency
in vmx_set_cr0(), e.g. neglecting to cache CR3 before enabling
interception would still break the guest, but it does reduce the
code dependency and mostly eliminate the logical dependency (that CR3
loads are intercepted in certain scenarios).  Eliminating the subtle
read of vcpu->arch.cr0 will also allow for additional cleanup in
vmx_set_cr0().

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-26-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9f69ccc096a3..0c1e578967ff 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2262,8 +2262,11 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits;
 		break;
 	case VCPU_EXREG_CR3:
-		if (is_unrestricted_guest(vcpu) ||
-		    (enable_ept && is_paging(vcpu)))
+		/*
+		 * When intercepting CR3 loads, e.g. for shadowing paging, KVM's
+		 * CR3 is loaded into hardware, not the guest's CR3.
+		 */
+		if (!(exec_controls_get(to_vmx(vcpu)) & CPU_BASED_CR3_LOAD_EXITING))
 			vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
 		break;
 	case VCPU_EXREG_CR4:
-- 
cgit 


From 908b7d43c02c5f1f95beff66ce4ca30dfc3b134c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:04 -0700
Subject: KVM: x86/mmu: Skip the permission_fault() check on MMIO if CR0.PG=0

Skip the MMU permission_fault() check if paging is disabled when
verifying the cached MMIO GVA is usable.  The check is unnecessary and
can theoretically get a false positive since the MMU doesn't zero out
"permissions" or "pkru_mask" when guest paging is disabled.

The obvious alternative is to zero out all the bitmasks when configuring
nonpaging MMUs, but that's unnecessary work and doesn't align with the
MMU's general approach of doing as little as possible for flows that are
supposed to be unreachable.

This is nearly a nop as the false positive is nothing more than an
insignificant performance blip, and more or less limited to string MMIO
when L1 is running with paging disabled.  KVM doesn't cache MMIO if L2 is
active with nested TDP since the "GVA" is really an L2 GPA.  If L2 is
active without nested TDP, then paging can't be disabled as neither VMX
nor SVM allows entering the guest without paging of some form.

Jumping back to L1 with paging disabled, in that case direct_map is true
and so KVM will use CR2 as a GPA; the only time it doesn't is if the
fault from the emulator doesn't match or emulator_can_use_gpa(), and that
fails only on string MMIO and other instructions with multiple memory
operands.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-27-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ed8b6d1b4e85..c56788f8d180 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6568,9 +6568,9 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
 	 * there is no pkey in EPT page table for L1 guest or EPT
 	 * shadow page table for L2 guest.
 	 */
-	if (vcpu_match_mmio_gva(vcpu, gva)
-	    && !permission_fault(vcpu, vcpu->arch.walk_mmu,
-				 vcpu->arch.mmio_access, 0, access)) {
+	if (vcpu_match_mmio_gva(vcpu, gva) && (!is_paging(vcpu) ||
+	    !permission_fault(vcpu, vcpu->arch.walk_mmu,
+			      vcpu->arch.mmio_access, 0, access))) {
 		*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
 					(gva & (PAGE_SIZE - 1));
 		trace_vcpu_match_mmio(gva, *gpa, write, false);
-- 
cgit 


From 32437c2aea428f9c3479904e342b02f5eee3a7f6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:05 -0700
Subject: KVM: VMX: Process CR0.PG side effects after setting CR0 assets

Move the long mode and EPT w/o unrestricted guest side effect processing
down in vmx_set_cr0() so that the EPT && !URG case doesn't have to stuff
vcpu->arch.cr0 early.  This also fixes an oddity where CR0 might not be
marked available, i.e. the early vcpu->arch.cr0 write would appear to be
in danger of being overwritten, though that can't actually happen in the
current code since CR0.TS is the only guest-owned bit, and CR0.TS is not
read by vmx_set_cr4().

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-28-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0c1e578967ff..aa547100f0df 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3003,9 +3003,11 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long hw_cr0;
+	unsigned long hw_cr0, old_cr0_pg;
 	u32 tmp;
 
+	old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
+
 	hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
 	if (is_unrestricted_guest(vcpu))
 		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
@@ -3021,11 +3023,16 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 			enter_rmode(vcpu);
 	}
 
+	vmcs_writel(CR0_READ_SHADOW, cr0);
+	vmcs_writel(GUEST_CR0, hw_cr0);
+	vcpu->arch.cr0 = cr0;
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR0);
+
 #ifdef CONFIG_X86_64
 	if (vcpu->arch.efer & EFER_LME) {
-		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
+		if (!old_cr0_pg && (cr0 & X86_CR0_PG))
 			enter_lmode(vcpu);
-		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
+		else if (old_cr0_pg && !(cr0 & X86_CR0_PG))
 			exit_lmode(vcpu);
 	}
 #endif
@@ -3066,17 +3073,11 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 			exec_controls_set(vmx, tmp);
 		}
 
-		if (!is_paging(vcpu) != !(cr0 & X86_CR0_PG)) {
-			vcpu->arch.cr0 = cr0;
+		/* Note, vmx_set_cr4() consumes the new vcpu->arch.cr0. */
+		if ((old_cr0_pg ^ cr0) & X86_CR0_PG)
 			vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
-		}
 	}
 
-	vmcs_writel(CR0_READ_SHADOW, cr0);
-	vmcs_writel(GUEST_CR0, hw_cr0);
-	vcpu->arch.cr0 = cr0;
-	kvm_register_mark_available(vcpu, VCPU_EXREG_CR0);
-
 	/* depends on vcpu->arch.cr0 to be set to a new value */
 	vmx->emulation_required = emulation_required(vcpu);
 }
-- 
cgit 


From 1dd7a4f18fbcc0db1acc07df5c1ae8ba4ce10593 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:06 -0700
Subject: KVM: VMX: Skip emulation required checks during pmode/rmode
 transitions

Don't refresh "emulation required" when stuffing segments during
transitions to/from real mode when running without unrestricted guest.
The checks are unnecessary as vmx_set_cr0() unconditionally rechecks
"emulation required".  They also happen to be broken, as enter_pmode()
and enter_rmode() run with a stale vcpu->arch.cr0.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-29-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index aa547100f0df..ff3f75105ae9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2719,6 +2719,8 @@ static __init int alloc_kvm_area(void)
 	return 0;
 }
 
+static void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+
 static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
 		struct kvm_segment *save)
 {
@@ -2735,7 +2737,7 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
 		save->dpl = save->selector & SEGMENT_RPL_MASK;
 		save->s = 1;
 	}
-	vmx_set_segment(vcpu, save, seg);
+	__vmx_set_segment(vcpu, save, seg);
 }
 
 static void enter_pmode(struct kvm_vcpu *vcpu)
@@ -2756,7 +2758,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
 	vmx->rmode.vm86_active = 0;
 
-	vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
+	__vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
 	flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
@@ -3291,7 +3293,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
 	return ar;
 }
 
-void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
+static void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -3304,7 +3306,7 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
 			vmcs_write16(sf->selector, var->selector);
 		else if (var->s)
 			fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
-		goto out;
+		return;
 	}
 
 	vmcs_writel(sf->base, var->base);
@@ -3326,9 +3328,13 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
 		var->type |= 0x1; /* Accessed */
 
 	vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
+}
 
-out:
-	vmx->emulation_required = emulation_required(vcpu);
+void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
+{
+	__vmx_set_segment(vcpu, var, seg);
+
+	to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
 }
 
 static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
-- 
cgit 


From 816be9e9be8d2e19dcea35fbec06f00cd5749fed Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:07 -0700
Subject: KVM: nVMX: Don't evaluate "emulation required" on nested VM-Exit

Use the "internal" variants of setting segment registers when stuffing
state on nested VM-Exit in order to skip the "emulation required"
updates.  VM-Exit must always go to protected mode, and all segments are
mostly hardcoded (to valid values) on VM-Exit.  The bits of the segments
that aren't hardcoded are explicitly checked during VM-Enter, e.g. the
selector RPLs must all be zero.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-30-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 16 ++++++++--------
 arch/x86/kvm/vmx/vmx.c    |  6 ++----
 arch/x86/kvm/vmx/vmx.h    |  2 +-
 3 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7f8184f432b4..a77cfc8bcf11 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4267,7 +4267,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 		seg.l = 1;
 	else
 		seg.db = 1;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
 	seg = (struct kvm_segment) {
 		.base = 0,
 		.limit = 0xFFFFFFFF,
@@ -4278,17 +4278,17 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 		.g = 1
 	};
 	seg.selector = vmcs12->host_ds_selector;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
 	seg.selector = vmcs12->host_es_selector;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
 	seg.selector = vmcs12->host_ss_selector;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
 	seg.selector = vmcs12->host_fs_selector;
 	seg.base = vmcs12->host_fs_base;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
 	seg.selector = vmcs12->host_gs_selector;
 	seg.base = vmcs12->host_gs_base;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
 	seg = (struct kvm_segment) {
 		.base = vmcs12->host_tr_base,
 		.limit = 0x67,
@@ -4296,11 +4296,11 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 		.type = 11,
 		.present = 1
 	};
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
 
 	memset(&seg, 0, sizeof(seg));
 	seg.unusable = 1;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
 
 	kvm_set_dr(vcpu, 7, 0x400);
 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ff3f75105ae9..359c30be62bb 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2719,8 +2719,6 @@ static __init int alloc_kvm_area(void)
 	return 0;
 }
 
-static void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
-
 static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
 		struct kvm_segment *save)
 {
@@ -3293,7 +3291,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
 	return ar;
 }
 
-static void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
+void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -3330,7 +3328,7 @@ static void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, in
 	vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
 }
 
-void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
+static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
 {
 	__vmx_set_segment(vcpu, var, seg);
 
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index db88ed4f2121..fd076bd26676 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -371,7 +371,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
 void ept_save_pdptrs(struct kvm_vcpu *vcpu);
 void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
-void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
 
 bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu);
-- 
cgit 


From ef8a0fa59be78e7b80bdf35c3391d60c061c7b24 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:08 -0700
Subject: KVM: SVM: Tweak order of cr0/cr4/efer writes at RESET/INIT

Hoist svm_set_cr0() up in the sequence of register initialization during
vCPU RESET/INIT, purely to match VMX so that a future patch can move the
sequences to common x86.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-31-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9007adeb01fb..0afa060798a5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1249,18 +1249,13 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
 	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
 
+	svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
 	svm_set_cr4(vcpu, 0);
 	svm_set_efer(vcpu, 0);
 	save->dr6 = 0xffff0ff0;
 	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
 	vcpu->arch.regs[VCPU_REGS_RIP] = 0x0000fff0;
 
-	/*
-	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
-	 * It also updates the guest-visible cr0 value.
-	 */
-	svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
-
 	save->cr4 = X86_CR4_PAE;
 
 	if (npt_enabled) {
-- 
cgit 


From 6cfe7b83acdcdd4d4ea9354f007a88bc0ccb16b9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:09 -0700
Subject: KVM: SVM: Drop redundant writes to vmcb->save.cr4 at RESET/INIT

Drop direct writes to vmcb->save.cr4 during vCPU RESET/INIT, as the
values being written are fully redundant with respect to
svm_set_cr4(vcpu, 0) a few lines earlier.  Note, svm_set_cr4() also
correctly forces X86_CR4_PAE when NPT is disabled.

No functional change intended.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-32-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0afa060798a5..1038bfdd7bbb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1256,8 +1256,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
 	vcpu->arch.regs[VCPU_REGS_RIP] = 0x0000fff0;
 
-	save->cr4 = X86_CR4_PAE;
-
 	if (npt_enabled) {
 		/* Setup VMCB for Nested Paging */
 		control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
@@ -1267,7 +1265,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 		svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
 		save->g_pat = vcpu->arch.pat;
 		save->cr3 = 0;
-		save->cr4 = 0;
 	}
 	svm->current_vmcb->asid_generation = 0;
 	svm->asid = 0;
-- 
cgit 


From d0f9f826d8ac06446391ceb3d4a440f5b48b3134 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:10 -0700
Subject: KVM: SVM: Stuff save->dr6 at during VMSA sync, not at RESET/INIT

Move code to stuff vmcb->save.dr6 to its architectural init value from
svm_vcpu_reset() into sev_es_sync_vmsa().  Except for protected guests,
a.k.a. SEV-ES guests, vmcb->save.dr6 is set during VM-Enter, i.e. the
extra write is unnecessary.  For SEV-ES, stuffing save->dr6 handles a
theoretical case where the VMSA could be encrypted before the first
KVM_RUN.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-33-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 1 +
 arch/x86/kvm/svm/svm.c | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6710d9ee2e4b..9f1585f40c85 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -584,6 +584,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
 	save->xcr0 = svm->vcpu.arch.xcr0;
 	save->pkru = svm->vcpu.arch.pkru;
 	save->xss  = svm->vcpu.arch.ia32_xss;
+	save->dr6  = svm->vcpu.arch.dr6;
 
 	/*
 	 * SEV-ES will use a VMSA that is pointed to by the VMCB, not
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1038bfdd7bbb..64563f876fdf 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1252,7 +1252,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
 	svm_set_cr4(vcpu, 0);
 	svm_set_efer(vcpu, 0);
-	save->dr6 = 0xffff0ff0;
 	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
 	vcpu->arch.regs[VCPU_REGS_RIP] = 0x0000fff0;
 
-- 
cgit 


From 400dd54b37172b64e4ceaa6bfaf2729585840375 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:11 -0700
Subject: KVM: VMX: Skip pointless MSR bitmap update when setting EFER

Split setup_msrs() into vmx_setup_uret_msrs() and an open coded refresh
of the MSR bitmap, and skip the latter when refreshing the user return
MSRs during an EFER load.  Only the x2APIC MSRs are dynamically exposed
and hidden, and those are not affected by a change in EFER.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-34-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 359c30be62bb..d5d9fc5df259 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1647,11 +1647,12 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
 }
 
 /*
- * Set up the vmcs to automatically save and restore system
- * msrs.  Don't touch the 64-bit msrs if the guest is in legacy
- * mode, as fiddling with msrs is very expensive.
+ * Configuring user return MSRs to automatically save, load, and restore MSRs
+ * that need to be shoved into hardware when running the guest.  Note, omitting
+ * an MSR here does _NOT_ mean it's not emulated, only that it will not be
+ * loaded into hardware when running the guest.
  */
-static void setup_msrs(struct vcpu_vmx *vmx)
+static void vmx_setup_uret_msrs(struct vcpu_vmx *vmx)
 {
 #ifdef CONFIG_X86_64
 	bool load_syscall_msrs;
@@ -1681,9 +1682,6 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 	 */
 	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));
 
-	if (cpu_has_vmx_msr_bitmap())
-		vmx_update_msr_bitmap(&vmx->vcpu);
-
 	/*
 	 * The set of MSRs to load may have changed, reload MSRs before the
 	 * next VM-Enter.
@@ -2874,7 +2872,7 @@ int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 
 		msr->data = efer & ~EFER_LME;
 	}
-	setup_msrs(vmx);
+	vmx_setup_uret_msrs(vmx);
 	return 0;
 }
 
@@ -4470,7 +4468,10 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	if (kvm_mpx_supported())
 		vmcs_write64(GUEST_BNDCFGS, 0);
 
-	setup_msrs(vmx);
+	vmx_setup_uret_msrs(vmx);
+
+	if (cpu_has_vmx_msr_bitmap())
+		vmx_update_msr_bitmap(&vmx->vcpu);
 
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
 
-- 
cgit 


From 432979b5034208890e323e86724417f02825abb7 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:12 -0700
Subject: KVM: VMX: Refresh list of user return MSRs after setting guest CPUID

After a CPUID update, refresh the list of user return MSRs that are
loaded into hardware when running the vCPU.  This is necessary to handle
the oddball case where userspace exposes X86_FEATURE_RDTSCP to the guest
after the vCPU is running.

Fixes: 0023ef39dc35 ("kvm: vmx: Set IA32_TSC_AUX for legacy mode guests")
Fixes: 4e47c7a6d714 ("KVM: VMX: Add instruction rdtscp support for guest")
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-35-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d5d9fc5df259..09c5a021e49b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7170,6 +7170,8 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	/* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
 	vcpu->arch.xsaves_enabled = false;
 
+	vmx_setup_uret_msrs(vmx);
+
 	if (cpu_has_secondary_exec_ctrls()) {
 		vmx_compute_secondary_exec_control(vmx);
 		vmcs_set_secondary_exec_control(vmx);
-- 
cgit 


From c5c9f920f7a50ea205c9efec7e589556ebaf85dc Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:13 -0700
Subject: KVM: VMX: Don't _explicitly_ reconfigure user return MSRs on vCPU
 INIT

When emulating vCPU INIT, do not unconditionally refresh the list of user
return MSRs that need to be loaded into hardware when running the guest.
Unconditionally refreshing the list is confusing, as the vast majority of
MSRs are not modified on INIT.  The real motivation is to handle the case
where an INIT during long mode obviates the need to load the SYSCALL MSRs,
and that is handled as needed by vmx_set_efer().

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-36-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 09c5a021e49b..2e2b469b8ced 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4410,6 +4410,8 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 		vmx->pt_desc.guest.output_mask = 0x7F;
 		vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
 	}
+
+	vmx_setup_uret_msrs(vmx);
 }
 
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -4468,8 +4470,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	if (kvm_mpx_supported())
 		vmcs_write64(GUEST_BNDCFGS, 0);
 
-	vmx_setup_uret_msrs(vmx);
-
 	if (cpu_has_vmx_msr_bitmap())
 		vmx_update_msr_bitmap(&vmx->vcpu);
 
-- 
cgit 


From f39e805ee115a67878c5475f1ef84466d424bb2e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:14 -0700
Subject: KVM: x86: Move setting of sregs during vCPU RESET/INIT to common x86

Move the setting of CR0, CR4, EFER, RFLAGS, and RIP from vendor code to
common x86.  VMX and SVM now have near-identical sequences, the only
difference being that VMX updates the exception bitmap.  Updating the
bitmap on SVM is unnecessary, but benign.  Unfortunately it can't be left
behind in VMX due to the need to update exception intercepts after the
control registers are set.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-37-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 6 ------
 arch/x86/kvm/vmx/vmx.c | 9 ---------
 arch/x86/kvm/x86.c     | 8 ++++++++
 3 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 64563f876fdf..8ebdcd92007d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1249,12 +1249,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
 	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
 
-	svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
-	svm_set_cr4(vcpu, 0);
-	svm_set_efer(vcpu, 0);
-	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
-	vcpu->arch.regs[VCPU_REGS_RIP] = 0x0000fff0;
-
 	if (npt_enabled) {
 		/* Setup VMCB for Nested Paging */
 		control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 2e2b469b8ced..7518d4cf8156 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4455,9 +4455,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 	}
 
-	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
-	kvm_rip_write(vcpu, 0xfff0);
-
 	vmcs_writel(GUEST_GDTR_BASE, 0);
 	vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
 
@@ -4485,12 +4482,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
-	vmx_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
-	vmx_set_cr4(vcpu, 0);
-	vmx_set_efer(vcpu, 0);
-
-	vmx_update_exception_bitmap(vcpu);
-
 	vpid_sync_context(vmx->vpid);
 	if (init_event)
 		vmx_clear_hlt(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c56788f8d180..6c55f2e83a7c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10875,6 +10875,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
 
+	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+	kvm_rip_write(vcpu, 0xfff0);
+
+	static_call(kvm_x86_set_cr0)(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
+	static_call(kvm_x86_set_cr4)(vcpu, 0);
+	static_call(kvm_x86_set_efer)(vcpu, 0);
+	static_call(kvm_x86_update_exception_bitmap)(vcpu);
+
 	/*
 	 * Reset the MMU context if paging was enabled prior to INIT (which is
 	 * implied if CR0.PG=1 as CR0 will be '0' prior to RESET).  Unlike the
-- 
cgit 


From 9e4784e19daaa5cd637753d5300fc5a42d0c694a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:15 -0700
Subject: KVM: VMX: Remove obsolete MSR bitmap refresh at vCPU RESET/INIT

Remove an unnecessary MSR bitmap refresh during vCPU RESET/INIT.  In both
cases, the MSR bitmap already has the desired values and state.

At RESET, the vCPU is guaranteed to be running with x2APIC disabled, the
x2APIC MSRs are guaranteed to be intercepted due to the MSR bitmap being
initialized to all ones by alloc_loaded_vmcs(), and vmx->msr_bitmap_mode
is guaranteed to be zero, i.e. reflecting x2APIC disabled.

At INIT, the APIC_BASE MSR is not modified, thus there can't be any
change in x2APIC state.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-38-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7518d4cf8156..af7aa9bb4bdb 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4467,9 +4467,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	if (kvm_mpx_supported())
 		vmcs_write64(GUEST_BNDCFGS, 0);
 
-	if (cpu_has_vmx_msr_bitmap())
-		vmx_update_msr_bitmap(&vmx->vcpu);
-
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
 
 	if (cpu_has_vmx_tpr_shadow() && !init_event) {
-- 
cgit 


From 284036c644a1dc71890503c849b22c3126308067 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:16 -0700
Subject: KVM: nVMX: Remove obsolete MSR bitmap refresh at nested transitions

Drop unnecessary MSR bitmap updates during nested transitions, as L1's
APIC_BASE MSR is not modified by the standard VM-Enter/VM-Exit flows,
and L2's MSR bitmap is managed separately.  In the unlikely event that L1
is pathological and loads APIC_BASE via the VM-Exit load list, KVM will
handle updating the bitmap in its normal WRMSR flows.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-39-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 6 ------
 arch/x86/kvm/vmx/vmx.c    | 2 +-
 arch/x86/kvm/vmx/vmx.h    | 1 -
 3 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a77cfc8bcf11..0d0dd6580cfd 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4305,9 +4305,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	kvm_set_dr(vcpu, 7, 0x400);
 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 
-	if (cpu_has_vmx_msr_bitmap())
-		vmx_update_msr_bitmap(vcpu);
-
 	if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
 				vmcs12->vm_exit_msr_load_count))
 		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
@@ -4386,9 +4383,6 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
 
 	kvm_mmu_reset_context(vcpu);
 
-	if (cpu_has_vmx_msr_bitmap())
-		vmx_update_msr_bitmap(vcpu);
-
 	/*
 	 * This nasty bit of open coding is a compromise between blindly
 	 * loading L1's MSRs using the exit load lists (incorrect emulation
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index af7aa9bb4bdb..6e79f1b2653b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3865,7 +3865,7 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
 	}
 }
 
-void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u8 mode = vmx_msr_bitmap_mode(vcpu);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index fd076bd26676..a408a9070662 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -376,7 +376,6 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
 
 bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu);
 void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
-void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
 bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
 bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
 bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
-- 
cgit 


From 002f87a41e9a6ef795f7f493f9434f51fb9f7d35 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:17 -0700
Subject: KVM: VMX: Don't redo x2APIC MSR bitmaps when userspace filter is
 changed

Drop an explicit call to update the x2APIC MSRs when the userspace MSR
filter is modified.  The x2APIC MSRs are deliberately exempt from
userspace filtering.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-40-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6e79f1b2653b..7100e70ab718 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3936,7 +3936,6 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
 	}
 
 	pt_update_intercept_for_msr(vcpu);
-	vmx_update_msr_bitmap_x2apic(vcpu, vmx_msr_bitmap_mode(vcpu));
 }
 
 static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
-- 
cgit 


From e7c701dd7a5019c8628007c91c0e8d804c194667 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:18 -0700
Subject: KVM: VMX: Remove unnecessary initialization of msr_bitmap_mode

Don't bother initializing msr_bitmap_mode to 0, all of struct vcpu_vmx is
zero initialized.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-41-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7100e70ab718..308820173505 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6841,7 +6841,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 		vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
 		vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
 	}
-	vmx->msr_bitmap_mode = 0;
 
 	vmx->loaded_vmcs = &vmx->vmcs01;
 	cpu = get_cpu();
-- 
cgit 


From 84ec8d2d539f7286d4504c2d377002f1ea7458d6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:19 -0700
Subject: KVM: VMX: Smush x2APIC MSR bitmap adjustments into single function

Consolidate all of the dynamic MSR bitmap adjustments into
vmx_update_msr_bitmap_x2apic(), and rename the mode tracker to reflect
that it is x2APIC specific.  If KVM gains more cases of dynamic MSR
pass-through, odds are very good that those new cases will be better off
with their own logic, e.g. see Intel PT MSRs and MSR_IA32_SPEC_CTRL.

Attempting to handle all updates in a common helper did more harm than
good, as KVM ended up collecting a large number of useless "updates".

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-42-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 55 +++++++++++++++++++-------------------------------
 arch/x86/kvm/vmx/vmx.h |  2 +-
 2 files changed, 22 insertions(+), 35 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 308820173505..96d9147620bf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3812,21 +3812,6 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 		vmx_set_msr_bitmap_write(msr_bitmap, msr);
 }
 
-static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
-{
-	u8 mode = 0;
-
-	if (cpu_has_secondary_exec_ctrls() &&
-	    (secondary_exec_controls_get(to_vmx(vcpu)) &
-	     SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
-		mode |= MSR_BITMAP_MODE_X2APIC;
-		if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
-			mode |= MSR_BITMAP_MODE_X2APIC_APICV;
-	}
-
-	return mode;
-}
-
 static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
 {
 	unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
@@ -3844,11 +3829,29 @@ static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
 	}
 }
 
-static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
+static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u8 mode;
+
 	if (!cpu_has_vmx_msr_bitmap())
 		return;
 
+	if (cpu_has_secondary_exec_ctrls() &&
+	    (secondary_exec_controls_get(vmx) &
+	     SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
+		mode = MSR_BITMAP_MODE_X2APIC;
+		if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
+			mode |= MSR_BITMAP_MODE_X2APIC_APICV;
+	} else {
+		mode = 0;
+	}
+
+	if (mode == vmx->x2apic_msr_bitmap_mode)
+		return;
+
+	vmx->x2apic_msr_bitmap_mode = mode;
+
 	vmx_reset_x2apic_msrs(vcpu, mode);
 
 	/*
@@ -3865,21 +3868,6 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
 	}
 }
 
-static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u8 mode = vmx_msr_bitmap_mode(vcpu);
-	u8 changed = mode ^ vmx->msr_bitmap_mode;
-
-	if (!changed)
-		return;
-
-	if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
-		vmx_update_msr_bitmap_x2apic(vcpu, mode);
-
-	vmx->msr_bitmap_mode = mode;
-}
-
 void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4139,8 +4127,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 					SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
 	}
 
-	if (cpu_has_vmx_msr_bitmap())
-		vmx_update_msr_bitmap(vcpu);
+	vmx_update_msr_bitmap_x2apic(vcpu);
 }
 
 u32 vmx_exec_control(struct vcpu_vmx *vmx)
@@ -6190,7 +6177,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 	}
 	secondary_exec_controls_set(vmx, sec_exec_control);
 
-	vmx_update_msr_bitmap(vcpu);
+	vmx_update_msr_bitmap_x2apic(vcpu);
 }
 
 static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a408a9070662..0ecc41189292 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -227,7 +227,7 @@ struct nested_vmx {
 struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
 	u8                    fail;
-	u8		      msr_bitmap_mode;
+	u8		      x2apic_msr_bitmap_mode;
 
 	/*
 	 * If true, host state has been stored in vmx->loaded_vmcs for
-- 
cgit 


From 7aa13fc3d8260c4215b8aea971d120e675d8781f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:20 -0700
Subject: KVM: VMX: Remove redundant write to set vCPU as active at RESET/INIT

Drop a call to vmx_clear_hlt() during vCPU INIT, the guest's activity
state is unconditionally set to "active" a few lines earlier in
vmx_vcpu_reset().

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-43-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 96d9147620bf..b306ca2238ed 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4466,8 +4466,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
 	vpid_sync_context(vmx->vpid);
-	if (init_event)
-		vmx_clear_hlt(vcpu);
 }
 
 static void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
-- 
cgit 


From e54949408abfbe3905ae19ac3b42ecd2d29bc3c5 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:21 -0700
Subject: KVM: VMX: Move RESET-only VMWRITE sequences to init_vmcs()

Move VMWRITE sequences in vmx_vcpu_reset() guarded by !init_event into
init_vmcs() to make it more obvious that they're, uh, initializing the
VMCS.

No meaningful functional change intended (though the order of VMWRITEs
and whatnot is different).

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-44-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b306ca2238ed..ae8e62df16dd 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4397,6 +4397,19 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 		vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
 	}
 
+	vmcs_write32(GUEST_SYSENTER_CS, 0);
+	vmcs_writel(GUEST_SYSENTER_ESP, 0);
+	vmcs_writel(GUEST_SYSENTER_EIP, 0);
+	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+	if (cpu_has_vmx_tpr_shadow()) {
+		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
+		if (cpu_need_tpr_shadow(&vmx->vcpu))
+			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
+				     __pa(vmx->vcpu.arch.apic->regs));
+		vmcs_write32(TPR_THRESHOLD, 0);
+	}
+
 	vmx_setup_uret_msrs(vmx);
 }
 
@@ -4434,13 +4447,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
 	vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
 
-	if (!init_event) {
-		vmcs_write32(GUEST_SYSENTER_CS, 0);
-		vmcs_writel(GUEST_SYSENTER_ESP, 0);
-		vmcs_writel(GUEST_SYSENTER_EIP, 0);
-		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
-	}
-
 	vmcs_writel(GUEST_GDTR_BASE, 0);
 	vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
 
@@ -4455,14 +4461,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
 
-	if (cpu_has_vmx_tpr_shadow() && !init_event) {
-		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
-		if (cpu_need_tpr_shadow(vcpu))
-			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
-				     __pa(vcpu->arch.apic->regs));
-		vmcs_write32(TPR_THRESHOLD, 0);
-	}
-
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
 	vpid_sync_context(vmx->vpid);
-- 
cgit 


From 265e43530cb2eb14d5b78e89d310c79959e0eb26 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:22 -0700
Subject: KVM: SVM: Emulate #INIT in response to triple fault shutdown

Emulate a full #INIT instead of simply initializing the VMCB if the
guest hits a shutdown.  Initializing the VMCB but not other vCPU state,
much of which is mirrored by the VMCB, results in incoherent and broken
vCPU state.

Ideally, KVM would not automatically init anything on shutdown, and
instead put the vCPU into e.g. KVM_MP_STATE_UNINITIALIZED and force
userspace to explicitly INIT or RESET the vCPU.  Even better would be to
add KVM_MP_STATE_SHUTDOWN, since technically NMI can break shutdown
(and SMI on Intel CPUs).

But, that ship has sailed, and emulating #INIT is the next best thing as
that has at least some connection with reality since there exist bare
metal platforms that automatically INIT the CPU if it hits shutdown.

Fixes: 46fe4ddd9dbb ("[PATCH] KVM: SVM: Propagate cpu shutdown events to userspace")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-45-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 10 +++++++---
 arch/x86/kvm/x86.c     |  1 +
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8ebdcd92007d..4102e9e3bcdc 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2050,11 +2050,15 @@ static int shutdown_interception(struct kvm_vcpu *vcpu)
 		return -EINVAL;
 
 	/*
-	 * VMCB is undefined after a SHUTDOWN intercept
-	 * so reinitialize it.
+	 * VMCB is undefined after a SHUTDOWN intercept.  INIT the vCPU to put
+	 * the VMCB in a known good state.  Unfortuately, KVM doesn't have
+	 * KVM_MP_STATE_SHUTDOWN and can't add it without potentially breaking
+	 * userspace.  At a platform view, INIT is acceptable behavior as
+	 * there exist bare metal platforms that automatically INIT the CPU
+	 * in response to shutdown.
 	 */
 	clear_page(svm->vmcb);
-	init_vmcb(vcpu);
+	kvm_vcpu_reset(vcpu, true);
 
 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
 	return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6c55f2e83a7c..5b04c07c1ec5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10906,6 +10906,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	if (init_event)
 		kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_reset);
 
 void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 {
-- 
cgit 


From 46f4898b207ffeeeaebc0fdd4bf4082bf0f88107 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:23 -0700
Subject: KVM: SVM: Drop redundant clearing of vcpu->arch.hflags at INIT/RESET

Drop redundant clears of vcpu->arch.hflags in init_vmcb() since
kvm_vcpu_reset() always clears hflags, and it is also always
zero at vCPU creation time.  And of course, the second clearing
in init_vmcb() was always redundant.

Suggested-by: Reiji Watanabe <reijiw@google.com>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-46-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4102e9e3bcdc..9d72b1df426e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1161,8 +1161,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	struct vmcb_save_area *save = &svm->vmcb->save;
 
-	vcpu->arch.hflags = 0;
-
 	svm_set_intercept(svm, INTERCEPT_CR0_READ);
 	svm_set_intercept(svm, INTERCEPT_CR3_READ);
 	svm_set_intercept(svm, INTERCEPT_CR4_READ);
@@ -1264,7 +1262,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 
 	svm->nested.vmcb12_gpa = INVALID_GPA;
 	svm->nested.last_vmcb12_gpa = INVALID_GPA;
-	vcpu->arch.hflags = 0;
 
 	if (!kvm_pause_in_guest(vcpu->kvm)) {
 		control->pause_filter_count = pause_filter_count;
-- 
cgit 


From 4c72ab5aa6e0ac2a5c11f9180e1fff89d7f2d38b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 13 Jul 2021 09:33:24 -0700
Subject: KVM: x86: Preserve guest's CR0.CD/NW on INIT

Preserve CR0.CD and CR0.NW on INIT instead of forcing them to '1', as
defined by both Intel's SDM and AMD's APM.

Note, current versions of Intel's SDM are very poorly written with
respect to INIT behavior.  Table 9-1. "IA-32 and Intel 64 Processor
States Following Power-up, Reset, or INIT" quite clearly lists power-up,
RESET, _and_ INIT as setting CR0=60000010H, i.e. CD/NW=1.  But the SDM
then attempts to qualify CD/NW behavior in a footnote:

  2. The CD and NW flags are unchanged, bit 4 is set to 1, all other bits
     are cleared.

Presumably that footnote is only meant for INIT, as the RESET case and
especially the power-up case are rather non-sensical.  Another footnote
all but confirms that:

  6. Internal caches are invalid after power-up and RESET, but left
     unchanged with an INIT.

Bare metal testing shows that CD/NW are indeed preserved on INIT (someone
else can hack their BIOS to check RESET and power-up :-D).

Reported-by: Reiji Watanabe <reijiw@google.com>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210713163324.627647-47-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5b04c07c1ec5..4d246b7f6ce1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10792,6 +10792,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	unsigned long old_cr0 = kvm_read_cr0(vcpu);
+	unsigned long new_cr0;
 	u32 eax, dummy;
 
 	kvm_lapic_reset(vcpu, init_event);
@@ -10878,7 +10879,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
 	kvm_rip_write(vcpu, 0xfff0);
 
-	static_call(kvm_x86_set_cr0)(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
+	/*
+	 * CR0.CD/NW are set on RESET, preserved on INIT.  Note, some versions
+	 * of Intel's SDM list CD/NW as being set on INIT, but they contradict
+	 * (or qualify) that with a footnote stating that CD/NW are preserved.
+	 */
+	new_cr0 = X86_CR0_ET;
+	if (init_event)
+		new_cr0 |= (old_cr0 & (X86_CR0_NW | X86_CR0_CD));
+	else
+		new_cr0 |= X86_CR0_NW | X86_CR0_CD;
+
+	static_call(kvm_x86_set_cr0)(vcpu, new_cr0);
 	static_call(kvm_x86_set_cr4)(vcpu, 0);
 	static_call(kvm_x86_set_efer)(vcpu, 0);
 	static_call(kvm_x86_update_exception_bitmap)(vcpu);
-- 
cgit 


From db105fab8d141fc0d9179600c51eba0d168dad34 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 2 Aug 2021 08:52:50 -0400
Subject: KVM: nSVM: remove useless kvm_clear_*_queue

For an event to be in injected state when nested_svm_vmrun executes,
it must have come from exitintinfo when svm_complete_interrupts ran:

  vcpu_enter_guest
   static_call(kvm_x86_run) -> svm_vcpu_run
    svm_complete_interrupts
     // now the event went from "exitintinfo" to "injected"
   static_call(kvm_x86_handle_exit) -> handle_exit
    svm_invoke_exit_handler
      vmrun_interception
       nested_svm_vmrun

However, no event could have been in exitintinfo before a VMRUN
vmexit.  The code in svm.c is a bit more permissive than the one
in vmx.c:

        if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
            exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
            exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
            exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)

but in any case, a VMRUN instruction would not even start to execute
during an attempted event delivery.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 61738ff8ef33..5e13357da21e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -659,11 +659,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
-
-	/* Clear internal status */
-	kvm_clear_exception_queue(vcpu);
-	kvm_clear_interrupt_queue(vcpu);
-
 	/*
 	 * Since vmcb01 is not in use, we can use it to store some of the L1
 	 * state.
-- 
cgit 


From 269e9552d208179bc14ea7f80a9e3e8ae97795a2 Mon Sep 17 00:00:00 2001
From: Hamza Mahfooz <someguy@effective-light.com>
Date: Mon, 12 Jul 2021 22:33:38 -0400
Subject: KVM: const-ify all relevant uses of struct kvm_memory_slot

As alluded to in commit f36f3f2846b5 ("KVM: add "new" argument to
kvm_arch_commit_memory_region"), a bunch of other places where struct
kvm_memory_slot is used, needs to be refactored to preserve the
"const"ness of struct kvm_memory_slot across-the-board.

Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
Message-Id: <20210713023338.57108-1-someguy@effective-light.com>
[Do not touch body of slot_rmap_walk_init. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  4 ++--
 arch/x86/kvm/mmu/mmu.c          | 42 ++++++++++++++++++++---------------------
 arch/x86/kvm/mmu/mmu_internal.h |  4 ++--
 arch/x86/kvm/mmu/tdp_mmu.c      |  7 ++++---
 arch/x86/kvm/mmu/tdp_mmu.h      |  6 +++---
 arch/x86/kvm/x86.c              |  7 ++-----
 6 files changed, 34 insertions(+), 36 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ec8e4aca69c8..99f37781a6fc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1537,12 +1537,12 @@ void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
-				      struct kvm_memory_slot *memslot,
+				      const struct kvm_memory_slot *memslot,
 				      int start_level);
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 				   const struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot);
+				   const struct kvm_memory_slot *memslot);
 void kvm_mmu_zap_all(struct kvm *kvm);
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
 unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 29010abb659c..e702361b4409 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -794,7 +794,7 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
 	return &slot->arch.lpage_info[level - 2][idx];
 }
 
-static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot,
+static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot,
 					    gfn_t gfn, int count)
 {
 	struct kvm_lpage_info *linfo;
@@ -807,12 +807,12 @@ static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot,
 	}
 }
 
-void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
+void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	update_gfn_disallow_lpage_count(slot, gfn, 1);
 }
 
-void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
+void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	update_gfn_disallow_lpage_count(slot, gfn, -1);
 }
@@ -999,7 +999,7 @@ static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep)
 }
 
 static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
-					   struct kvm_memory_slot *slot)
+					   const struct kvm_memory_slot *slot)
 {
 	unsigned long idx;
 
@@ -1228,7 +1228,7 @@ static bool spte_wrprot_for_clear_dirty(u64 *sptep)
  * Returns true iff any D or W bits were cleared.
  */
 static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-			       struct kvm_memory_slot *slot)
+			       const struct kvm_memory_slot *slot)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1387,7 +1387,7 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 }
 
 static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-			  struct kvm_memory_slot *slot)
+			  const struct kvm_memory_slot *slot)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1452,7 +1452,7 @@ restart:
 
 struct slot_rmap_walk_iterator {
 	/* input fields. */
-	struct kvm_memory_slot *slot;
+	const struct kvm_memory_slot *slot;
 	gfn_t start_gfn;
 	gfn_t end_gfn;
 	int start_level;
@@ -1479,7 +1479,7 @@ rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level)
 
 static void
 slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator,
-		    struct kvm_memory_slot *slot, int start_level,
+		    const struct kvm_memory_slot *slot, int start_level,
 		    int end_level, gfn_t start_gfn, gfn_t end_gfn)
 {
 	iterator->slot = slot;
@@ -5313,12 +5313,13 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
 EXPORT_SYMBOL_GPL(kvm_configure_mmu);
 
 /* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-				    struct kvm_memory_slot *slot);
+typedef bool (*slot_level_handler) (struct kvm *kvm,
+				    struct kvm_rmap_head *rmap_head,
+				    const struct kvm_memory_slot *slot);
 
 /* The caller should hold mmu-lock before calling this function. */
 static __always_inline bool
-slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
+slot_handle_level_range(struct kvm *kvm, const struct kvm_memory_slot *memslot,
 			slot_level_handler fn, int start_level, int end_level,
 			gfn_t start_gfn, gfn_t end_gfn, bool flush_on_yield,
 			bool flush)
@@ -5345,7 +5346,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
 }
 
 static __always_inline bool
-slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+slot_handle_level(struct kvm *kvm, const struct kvm_memory_slot *memslot,
 		  slot_level_handler fn, int start_level, int end_level,
 		  bool flush_on_yield)
 {
@@ -5356,7 +5357,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 }
 
 static __always_inline bool
-slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
+slot_handle_leaf(struct kvm *kvm, const struct kvm_memory_slot *memslot,
 		 slot_level_handler fn, bool flush_on_yield)
 {
 	return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K,
@@ -5615,7 +5616,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 				if (start >= end)
 					continue;
 
-				flush = slot_handle_level_range(kvm, memslot,
+				flush = slot_handle_level_range(kvm,
+						(const struct kvm_memory_slot *) memslot,
 						kvm_zap_rmapp, PG_LEVEL_4K,
 						KVM_MAX_HUGEPAGE_LEVEL, start,
 						end - 1, true, flush);
@@ -5643,13 +5645,13 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 
 static bool slot_rmap_write_protect(struct kvm *kvm,
 				    struct kvm_rmap_head *rmap_head,
-				    struct kvm_memory_slot *slot)
+				    const struct kvm_memory_slot *slot)
 {
 	return __rmap_write_protect(kvm, rmap_head, false);
 }
 
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
-				      struct kvm_memory_slot *memslot,
+				      const struct kvm_memory_slot *memslot,
 				      int start_level)
 {
 	bool flush = false;
@@ -5685,7 +5687,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 
 static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
 					 struct kvm_rmap_head *rmap_head,
-					 struct kvm_memory_slot *slot)
+					 const struct kvm_memory_slot *slot)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -5724,10 +5726,8 @@ restart:
 }
 
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
-				   const struct kvm_memory_slot *memslot)
+				   const struct kvm_memory_slot *slot)
 {
-	/* FIXME: const-ify all uses of struct kvm_memory_slot.  */
-	struct kvm_memory_slot *slot = (struct kvm_memory_slot *)memslot;
 	bool flush = false;
 
 	if (kvm_memslots_have_rmaps(kvm)) {
@@ -5763,7 +5763,7 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
 }
 
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot)
+				   const struct kvm_memory_slot *memslot)
 {
 	bool flush = false;
 
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 626cb848dab4..ca7b7595bbfc 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -124,8 +124,8 @@ static inline bool is_nx_huge_page_enabled(void)
 
 int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync);
 
-void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
-void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
+void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
+void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 				    struct kvm_memory_slot *slot, u64 gfn,
 				    int min_level);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 41cee1d22918..43f12f5d12c0 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1246,8 +1246,8 @@ retry:
  * only affect leaf SPTEs down to min_level.
  * Returns true if an SPTE has been changed and the TLBs need to be flushed.
  */
-bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			     int min_level)
+bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
+			     const struct kvm_memory_slot *slot, int min_level)
 {
 	struct kvm_mmu_page *root;
 	bool spte_set = false;
@@ -1317,7 +1317,8 @@ retry:
  * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
  * be flushed.
  */
-bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
+bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
+				  const struct kvm_memory_slot *slot)
 {
 	struct kvm_mmu_page *root;
 	bool spte_set = false;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 361b47f98cc5..b224d126adf9 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -61,10 +61,10 @@ bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
 bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
 bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
 
-bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			     int min_level);
+bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
+			     const struct kvm_memory_slot *slot, int min_level);
 bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
-				  struct kvm_memory_slot *slot);
+				  const struct kvm_memory_slot *slot);
 void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 				       struct kvm_memory_slot *slot,
 				       gfn_t gfn, unsigned long mask,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4d246b7f6ce1..348452bb16bc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11520,7 +11520,7 @@ static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
 
 static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 				     struct kvm_memory_slot *old,
-				     struct kvm_memory_slot *new,
+				     const struct kvm_memory_slot *new,
 				     enum kvm_mr_change change)
 {
 	bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES;
@@ -11600,10 +11600,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 		kvm_mmu_change_mmu_pages(kvm,
 				kvm_mmu_calculate_default_mmu_pages(kvm));
 
-	/*
-	 * FIXME: const-ify all uses of struct kvm_memory_slot.
-	 */
-	kvm_mmu_slot_apply_flags(kvm, old, (struct kvm_memory_slot *) new, change);
+	kvm_mmu_slot_apply_flags(kvm, old, new, change);
 
 	/* Free the arrays associated with the old memslot. */
 	if (change == KVM_MR_MOVE)
-- 
cgit 


From a6cae77f1bc89368a4e2822afcddc45c3062d499 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Thu, 29 Jul 2021 20:01:03 +0200
Subject: powerpc/stacktrace: Include linux/delay.h

commit 7c6986ade69e ("powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi()")
introduces udelay() call without including the linux/delay.h header.
This may happen to work on master but the header that declares the
functionshould be included nonetheless.

Fixes: 7c6986ade69e ("powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi()")
Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210729180103.15578-1-msuchanek@suse.de
---
 arch/powerpc/kernel/stacktrace.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 2b0d04a1b7d2..9e4a4a7af380 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -8,6 +8,7 @@
  * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
  */
 
+#include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/kallsyms.h>
 #include <linux/module.h>
-- 
cgit 


From 156ca4e650bfb9a4259b427069caa11b5a4df3d4 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 29 Jul 2021 23:19:35 +0900
Subject: powerpc: remove unused zInstall target from
 arch/powerpc/boot/Makefile

Commit c913e5f95e54 ("powerpc/boot: Don't install zImage.* from make
install") added the zInstall target to arch/powerpc/boot/Makefile,
but you cannot use it since the corresponding hook is missing in
arch/powerpc/Makefile.

It has never worked since its addition. Nobody has complained about
it for 7 years, which means this code was unneeded.

With this removal, the install.sh will be passed in with 4 parameters.
Simplify the shell script.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210729141937.445051-1-masahiroy@kernel.org
---
 arch/powerpc/boot/Makefile   |  6 +-----
 arch/powerpc/boot/install.sh | 13 -------------
 2 files changed, 1 insertion(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index e312ea802aa6..a702f9d1ec0d 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -448,11 +448,7 @@ $(obj)/zImage.initrd:	$(addprefix $(obj)/, $(initrd-y))
 install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
 	sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)"
 
-# Install the vmlinux and other built boot targets.
-zInstall: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
-	sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^
-
-PHONY += install zInstall
+PHONY += install
 
 # anything not in $(targets)
 clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \
diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh
index b6a256bc96ee..658c93ca7437 100644
--- a/arch/powerpc/boot/install.sh
+++ b/arch/powerpc/boot/install.sh
@@ -15,7 +15,6 @@
 #   $2 - kernel image file
 #   $3 - kernel map file
 #   $4 - default install path (blank if root directory)
-#   $5 and more - kernel boot files; zImage*, uImage, cuImage.*, etc.
 #
 
 # Bail with error code if anything goes wrong
@@ -41,15 +40,3 @@ fi
 
 cat $2 > $4/$image_name
 cp $3 $4/System.map
-
-# Copy all the bootable image files
-path=$4
-shift 4
-while [ $# -ne 0 ]; do
-	image_name=`basename $1`
-	if [ -f $path/$image_name ]; then
-		mv $path/$image_name $path/$image_name.old
-	fi
-	cat $1 > $path/$image_name
-	shift
-done;
-- 
cgit 


From 9bef456b20581e630ef9a13555ca04fed65a859d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 29 Jul 2021 23:19:36 +0900
Subject: powerpc: make the install target not depend on any build artifact

The install target should not depend on any build artifact.

The reason is explained in commit 19514fc665ff ("arm, kbuild: make
"make install" not depend on vmlinux").

Change the PowerPC installation code in a similar way.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210729141937.445051-2-masahiroy@kernel.org
---
 arch/powerpc/boot/Makefile   |  2 +-
 arch/powerpc/boot/install.sh | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index a702f9d1ec0d..0d165bd98b61 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -445,7 +445,7 @@ $(obj)/zImage.initrd:	$(addprefix $(obj)/, $(initrd-y))
 	$(Q)rm -f $@; ln $< $@
 
 # Only install the vmlinux
-install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
+install:
 	sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)"
 
 PHONY += install
diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh
index 658c93ca7437..14473150ddb4 100644
--- a/arch/powerpc/boot/install.sh
+++ b/arch/powerpc/boot/install.sh
@@ -20,6 +20,20 @@
 # Bail with error code if anything goes wrong
 set -e
 
+verify () {
+	if [ ! -f "$1" ]; then
+		echo ""                                                   1>&2
+		echo " *** Missing file: $1"                              1>&2
+		echo ' *** You need to run "make" before "make install".' 1>&2
+		echo ""                                                   1>&2
+		exit 1
+	fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
 # User may have a custom install script
 
 if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
-- 
cgit 


From 86ff0bce2e9665c8b074930fe6caed615da070c1 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 29 Jul 2021 23:19:37 +0900
Subject: powerpc: move the install rule to arch/powerpc/Makefile

Currently, the install target in arch/powerpc/Makefile descends into
arch/powerpc/boot/Makefile to invoke the shell script, but there is no
good reason to do so.

arch/powerpc/Makefile can run the shell script directly.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210729141937.445051-3-masahiroy@kernel.org
---
 arch/powerpc/Makefile      | 3 ++-
 arch/powerpc/boot/Makefile | 6 ------
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 6505d66f1193..9aaf1abbc641 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -407,7 +407,8 @@ endef
 
 PHONY += install
 install:
-	$(Q)$(MAKE) $(build)=$(boot) install
+	sh -x $(srctree)/$(boot)/install.sh "$(KERNELRELEASE)" vmlinux \
+	System.map "$(INSTALL_PATH)"
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 0d165bd98b61..10c0fb306f15 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -444,12 +444,6 @@ $(obj)/zImage:		$(addprefix $(obj)/, $(image-y))
 $(obj)/zImage.initrd:	$(addprefix $(obj)/, $(initrd-y))
 	$(Q)rm -f $@; ln $< $@
 
-# Only install the vmlinux
-install:
-	sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)"
-
-PHONY += install
-
 # anything not in $(targets)
 clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \
 	zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \
-- 
cgit 


From a4bec516b9c0823d7e2bb8c8928c98b535cf9adf Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Wed, 28 Jul 2021 23:26:05 +0530
Subject: powerpc/cacheinfo: Lookup cache by dt node and thread-group id

Currently the cacheinfo code on powerpc indexes the "cache" objects
(modelling the L1/L2/L3 caches) where the key is device-tree node
corresponding to that cache. On some of the POWER server platforms
thread-groups within the core share different sets of caches (Eg: On
SMT8 POWER9 systems, threads 0,2,4,6 of a core share L1 cache and
threads 1,3,5,7 of the same core share another L1 cache). On such
platforms, there is a single device-tree node corresponding to that
cache and the cache-configuration within the threads of the core is
indicated via "ibm,thread-groups" device-tree property.

Since the current code is not aware of the "ibm,thread-groups"
property, on the aforementoined systems, cacheinfo code still treats
all the threads in the core to be sharing the cache because of the
single device-tree node (In the earlier example, the cacheinfo code
would says CPUs 0-7 share L1 cache).

In this patch, we make the powerpc cacheinfo code aware of the
"ibm,thread-groups" property. We indexe the "cache" objects by the
key-pair (device-tree node, thread-group id). For any CPUX, for a
given level of cache, the thread-group id is defined to be the first
CPU in the "ibm,thread-groups" cache-group containing CPUX. For levels
of cache which are not represented in "ibm,thread-groups" property,
the thread-group id is -1.

[parth: Remove "static" keyword for the definition of "thread_group_l1_cache_map"
and "thread_group_l2_cache_map" to get rid of the compile error.]

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Parth Shah <parth@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210728175607.591679-2-parth@linux.ibm.com
---
 arch/powerpc/include/asm/smp.h  |  3 ++
 arch/powerpc/kernel/cacheinfo.c | 80 +++++++++++++++++++++++++++++------------
 arch/powerpc/kernel/smp.c       |  4 +--
 3 files changed, 63 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 03b3d010cbab..1259040cc3a4 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -33,6 +33,9 @@ extern bool coregroup_enabled;
 extern int cpu_to_chip_id(int cpu);
 extern int *chip_id_lookup_table;
 
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+
 #ifdef CONFIG_SMP
 
 struct smp_ops_t {
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 6f903e9aa20b..5a6925d87424 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -120,6 +120,7 @@ struct cache {
 	struct cpumask shared_cpu_map; /* online CPUs using this cache */
 	int type;                      /* split cache disambiguation */
 	int level;                     /* level not explicit in device tree */
+	int group_id;                  /* id of the group of threads that share this cache */
 	struct list_head list;         /* global list of cache objects */
 	struct cache *next_local;      /* next cache of >= level */
 };
@@ -142,22 +143,24 @@ static const char *cache_type_string(const struct cache *cache)
 }
 
 static void cache_init(struct cache *cache, int type, int level,
-		       struct device_node *ofnode)
+		       struct device_node *ofnode, int group_id)
 {
 	cache->type = type;
 	cache->level = level;
 	cache->ofnode = of_node_get(ofnode);
+	cache->group_id = group_id;
 	INIT_LIST_HEAD(&cache->list);
 	list_add(&cache->list, &cache_list);
 }
 
-static struct cache *new_cache(int type, int level, struct device_node *ofnode)
+static struct cache *new_cache(int type, int level,
+			       struct device_node *ofnode, int group_id)
 {
 	struct cache *cache;
 
 	cache = kzalloc(sizeof(*cache), GFP_KERNEL);
 	if (cache)
-		cache_init(cache, type, level, ofnode);
+		cache_init(cache, type, level, ofnode, group_id);
 
 	return cache;
 }
@@ -309,20 +312,24 @@ static struct cache *cache_find_first_sibling(struct cache *cache)
 		return cache;
 
 	list_for_each_entry(iter, &cache_list, list)
-		if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+		if (iter->ofnode == cache->ofnode &&
+		    iter->group_id == cache->group_id &&
+		    iter->next_local == cache)
 			return iter;
 
 	return cache;
 }
 
-/* return the first cache on a local list matching node */
-static struct cache *cache_lookup_by_node(const struct device_node *node)
+/* return the first cache on a local list matching node and thread-group id */
+static struct cache *cache_lookup_by_node_group(const struct device_node *node,
+						int group_id)
 {
 	struct cache *cache = NULL;
 	struct cache *iter;
 
 	list_for_each_entry(iter, &cache_list, list) {
-		if (iter->ofnode != node)
+		if (iter->ofnode != node ||
+		    iter->group_id != group_id)
 			continue;
 		cache = cache_find_first_sibling(iter);
 		break;
@@ -352,14 +359,15 @@ static int cache_is_unified_d(const struct device_node *np)
 		CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
 }
 
-static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode_unified(struct device_node *node, int group_id,
+						  int level)
 {
 	pr_debug("creating L%d ucache for %pOFP\n", level, node);
 
-	return new_cache(cache_is_unified_d(node), level, node);
+	return new_cache(cache_is_unified_d(node), level, node, group_id);
 }
 
-static struct cache *cache_do_one_devnode_split(struct device_node *node,
+static struct cache *cache_do_one_devnode_split(struct device_node *node, int group_id,
 						int level)
 {
 	struct cache *dcache, *icache;
@@ -367,8 +375,8 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node,
 	pr_debug("creating L%d dcache and icache for %pOFP\n", level,
 		 node);
 
-	dcache = new_cache(CACHE_TYPE_DATA, level, node);
-	icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+	dcache = new_cache(CACHE_TYPE_DATA, level, node, group_id);
+	icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node, group_id);
 
 	if (!dcache || !icache)
 		goto err;
@@ -382,31 +390,32 @@ err:
 	return NULL;
 }
 
-static struct cache *cache_do_one_devnode(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode(struct device_node *node, int group_id, int level)
 {
 	struct cache *cache;
 
 	if (cache_node_is_unified(node))
-		cache = cache_do_one_devnode_unified(node, level);
+		cache = cache_do_one_devnode_unified(node, group_id, level);
 	else
-		cache = cache_do_one_devnode_split(node, level);
+		cache = cache_do_one_devnode_split(node, group_id, level);
 
 	return cache;
 }
 
 static struct cache *cache_lookup_or_instantiate(struct device_node *node,
+						 int group_id,
 						 int level)
 {
 	struct cache *cache;
 
-	cache = cache_lookup_by_node(node);
+	cache = cache_lookup_by_node_group(node, group_id);
 
 	WARN_ONCE(cache && cache->level != level,
 		  "cache level mismatch on lookup (got %d, expected %d)\n",
 		  cache->level, level);
 
 	if (!cache)
-		cache = cache_do_one_devnode(node, level);
+		cache = cache_do_one_devnode(node, group_id, level);
 
 	return cache;
 }
@@ -443,7 +452,27 @@ static void do_subsidiary_caches_debugcheck(struct cache *cache)
 		  of_node_get_device_type(cache->ofnode));
 }
 
-static void do_subsidiary_caches(struct cache *cache)
+/*
+ * If sub-groups of threads in a core containing @cpu_id share the
+ * L@level-cache (information obtained via "ibm,thread-groups"
+ * device-tree property), then we identify the group by the first
+ * thread-sibling in the group. We define this to be the group-id.
+ *
+ * In the absence of any thread-group information for L@level-cache,
+ * this function returns -1.
+ */
+static int get_group_id(unsigned int cpu_id, int level)
+{
+	if (has_big_cores && level == 1)
+		return cpumask_first(per_cpu(thread_group_l1_cache_map,
+					     cpu_id));
+	else if (thread_group_shares_l2 && level == 2)
+		return cpumask_first(per_cpu(thread_group_l2_cache_map,
+					     cpu_id));
+	return -1;
+}
+
+static void do_subsidiary_caches(struct cache *cache, unsigned int cpu_id)
 {
 	struct device_node *subcache_node;
 	int level = cache->level;
@@ -452,9 +481,11 @@ static void do_subsidiary_caches(struct cache *cache)
 
 	while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
 		struct cache *subcache;
+		int group_id;
 
 		level++;
-		subcache = cache_lookup_or_instantiate(subcache_node, level);
+		group_id = get_group_id(cpu_id, level);
+		subcache = cache_lookup_or_instantiate(subcache_node, group_id, level);
 		of_node_put(subcache_node);
 		if (!subcache)
 			break;
@@ -468,6 +499,7 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
 {
 	struct device_node *cpu_node;
 	struct cache *cpu_cache = NULL;
+	int group_id;
 
 	pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
 
@@ -476,11 +508,13 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
 	if (!cpu_node)
 		goto out;
 
-	cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+	group_id = get_group_id(cpu_id, 1);
+
+	cpu_cache = cache_lookup_or_instantiate(cpu_node, group_id, 1);
 	if (!cpu_cache)
 		goto out;
 
-	do_subsidiary_caches(cpu_cache);
+	do_subsidiary_caches(cpu_cache, cpu_id);
 
 	cache_cpu_set(cpu_cache, cpu_id);
 out:
@@ -848,13 +882,15 @@ static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
 {
 	struct device_node *cpu_node;
 	struct cache *cache;
+	int group_id;
 
 	cpu_node = of_get_cpu_node(cpu_id, NULL);
 	WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
 	if (!cpu_node)
 		return NULL;
 
-	cache = cache_lookup_by_node(cpu_node);
+	group_id = get_group_id(cpu_id, 1);
+	cache = cache_lookup_by_node_group(cpu_node, group_id);
 	of_node_put(cpu_node);
 
 	return cache;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 447b78a87c8f..a7fcac44a8e2 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -122,14 +122,14 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata;
  * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
  * the set its siblings that share the L1-cache.
  */
-static DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
 
 /*
  * On some big-cores system, thread_group_l2_cache_map for each CPU
  * corresponds to the set its siblings within the core that share the
  * L2-cache.
  */
-static DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
 
 /* SMP operations for this machine */
 struct smp_ops_t *smp_ops;
-- 
cgit 


From 69aa8e078545bc14d84a8b4b3cb914ac8f9f280e Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Wed, 28 Jul 2021 23:26:06 +0530
Subject: powerpc/cacheinfo: Remove the redundant get_shared_cpu_map()

The helper function get_shared_cpu_map() was added in

'commit 500fe5f550ec ("powerpc/cacheinfo: Report the correct
shared_cpu_map on big-cores")'

and subsequently expanded upon in

'commit 0be47634db0b ("powerpc/cacheinfo: Print correct cache-sibling
map/list for L2 cache")'

in order to help report the correct groups of threads sharing these caches
on big-core systems where groups of threads within a core can share
different sets of caches.

Now that powerpc/cacheinfo is aware of "ibm,thread-groups" property,
cache->shared_cpu_map contains the correct set of thread-siblings
sharing the cache. Hence we no longer need the functions
get_shared_cpu_map(). This patch removes this function. We also remove
the helper function index_dir_to_cpu() which was only called by
get_shared_cpu_map().

With these functions removed, we can still see the correct
cache-sibling map/list for L1 and L2 caches on systems with L1 and L2
caches distributed among groups of threads in a core.

With this patch, on a SMT8 POWER10 system where the L1 and L2 caches
are split between the two groups of threads in a core, for CPUs 8,9,
the L1-Data, L1-Instruction, L2, L3 cache CPU sibling list is as
follows:

$ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8,10,12,14
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8,10,12,14
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8,10,12,14
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8-15
/sys/devices/system/cpu/cpu9/cache/index0/shared_cpu_list:9,11,13,15
/sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_list:9,11,13,15
/sys/devices/system/cpu/cpu9/cache/index2/shared_cpu_list:9,11,13,15
/sys/devices/system/cpu/cpu9/cache/index3/shared_cpu_list:8-15

$ ppc64_cpu --smt=4
$ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8,10
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8,10
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8,10
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8-11
/sys/devices/system/cpu/cpu9/cache/index0/shared_cpu_list:9,11
/sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_list:9,11
/sys/devices/system/cpu/cpu9/cache/index2/shared_cpu_list:9,11
/sys/devices/system/cpu/cpu9/cache/index3/shared_cpu_list:8-11

$ ppc64_cpu --smt=2
$ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8-9
/sys/devices/system/cpu/cpu9/cache/index0/shared_cpu_list:9
/sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_list:9
/sys/devices/system/cpu/cpu9/cache/index2/shared_cpu_list:9
/sys/devices/system/cpu/cpu9/cache/index3/shared_cpu_list:8-9

$ ppc64_cpu --smt=1
$ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210728175607.591679-3-parth@linux.ibm.com
---
 arch/powerpc/kernel/cacheinfo.c | 41 +----------------------------------------
 1 file changed, 1 insertion(+), 40 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 5a6925d87424..20d91693eac1 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -675,45 +675,6 @@ static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *
 static struct kobj_attribute cache_level_attr =
 	__ATTR(level, 0444, level_show, NULL);
 
-static unsigned int index_dir_to_cpu(struct cache_index_dir *index)
-{
-	struct kobject *index_dir_kobj = &index->kobj;
-	struct kobject *cache_dir_kobj = index_dir_kobj->parent;
-	struct kobject *cpu_dev_kobj = cache_dir_kobj->parent;
-	struct device *dev = kobj_to_dev(cpu_dev_kobj);
-
-	return dev->id;
-}
-
-/*
- * On big-core systems, each core has two groups of CPUs each of which
- * has its own L1-cache. The thread-siblings which share l1-cache with
- * @cpu can be obtained via cpu_smallcore_mask().
- *
- * On some big-core systems, the L2 cache is shared only between some
- * groups of siblings. This is already parsed and encoded in
- * cpu_l2_cache_mask().
- *
- * TODO: cache_lookup_or_instantiate() needs to be made aware of the
- *       "ibm,thread-groups" property so that cache->shared_cpu_map
- *       reflects the correct siblings on platforms that have this
- *       device-tree property. This helper function is only a stop-gap
- *       solution so that we report the correct siblings to the
- *       userspace via sysfs.
- */
-static const struct cpumask *get_shared_cpu_map(struct cache_index_dir *index, struct cache *cache)
-{
-	if (has_big_cores) {
-		int cpu = index_dir_to_cpu(index);
-		if (cache->level == 1)
-			return cpu_smallcore_mask(cpu);
-		if (cache->level == 2 && thread_group_shares_l2)
-			return cpu_l2_cache_mask(cpu);
-	}
-
-	return &cache->shared_cpu_map;
-}
-
 static ssize_t
 show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list)
 {
@@ -724,7 +685,7 @@ show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bo
 	index = kobj_to_cache_index_dir(k);
 	cache = index->cache;
 
-	mask = get_shared_cpu_map(index, cache);
+	mask = &cache->shared_cpu_map;
 
 	return cpumap_print_to_pagebuf(list, buf, mask);
 }
-- 
cgit 


From e9ef81e1079b0c4c374fba0f9affa7129c7c913b Mon Sep 17 00:00:00 2001
From: Parth Shah <parth@linux.ibm.com>
Date: Wed, 28 Jul 2021 23:26:07 +0530
Subject: powerpc/smp: Use existing L2 cache_map cpumask to find L3 cache
 siblings

On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus
in thread-group share both L2 and L3 caches. Hence, use cache_property = 2
itself to find both the L2 and L3 cache siblings.
Hence, create a new thread_group_l3_cache_map to keep list of L3 siblings,
but fill the mask using same property "2" array.

Signed-off-by: Parth Shah <parth@linux.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210728175607.591679-4-parth@linux.ibm.com
---
 arch/powerpc/include/asm/smp.h  |  3 ++
 arch/powerpc/kernel/cacheinfo.c |  3 ++
 arch/powerpc/kernel/smp.c       | 66 ++++++++++++++++++++++++++++-------------
 3 files changed, 51 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 1259040cc3a4..7ef1cd8168a0 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -35,6 +35,7 @@ extern int *chip_id_lookup_table;
 
 DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
 DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
 
 #ifdef CONFIG_SMP
 
@@ -144,6 +145,7 @@ extern int cpu_to_core_id(int cpu);
 
 extern bool has_big_cores;
 extern bool thread_group_shares_l2;
+extern bool thread_group_shares_l3;
 
 #define cpu_smt_mask cpu_smt_mask
 #ifdef CONFIG_SCHED_SMT
@@ -198,6 +200,7 @@ extern void __cpu_die(unsigned int cpu);
 #define hard_smp_processor_id()		get_hard_smp_processor_id(0)
 #define smp_setup_cpu_maps()
 #define thread_group_shares_l2  0
+#define thread_group_shares_l3	0
 static inline void inhibit_secondary_onlining(void) {}
 static inline void uninhibit_secondary_onlining(void) {}
 static inline const struct cpumask *cpu_sibling_mask(int cpu)
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 20d91693eac1..cf1be75b7833 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level)
 	else if (thread_group_shares_l2 && level == 2)
 		return cpumask_first(per_cpu(thread_group_l2_cache_map,
 					     cpu_id));
+	else if (thread_group_shares_l3 && level == 3)
+		return cpumask_first(per_cpu(thread_group_l3_cache_map,
+					     cpu_id));
 	return -1;
 }
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index a7fcac44a8e2..f2abd88e0c25 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -78,6 +78,7 @@ struct task_struct *secondary_current;
 bool has_big_cores;
 bool coregroup_enabled;
 bool thread_group_shares_l2;
+bool thread_group_shares_l3;
 
 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -101,7 +102,7 @@ enum {
 
 #define MAX_THREAD_LIST_SIZE	8
 #define THREAD_GROUP_SHARE_L1   1
-#define THREAD_GROUP_SHARE_L2   2
+#define THREAD_GROUP_SHARE_L2_L3 2
 struct thread_groups {
 	unsigned int property;
 	unsigned int nr_groups;
@@ -131,6 +132,12 @@ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
  */
 DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
 
+/*
+ * On P10, thread_group_l3_cache_map for each CPU is equal to the
+ * thread_group_l2_cache_map
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
+
 /* SMP operations for this machine */
 struct smp_ops_t *smp_ops;
 
@@ -889,19 +896,41 @@ out:
 	return tg;
 }
 
+static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start)
+{
+	int first_thread = cpu_first_thread_sibling(cpu);
+	int i;
+
+	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
+
+	for (i = first_thread; i < first_thread + threads_per_core; i++) {
+		int i_group_start = get_cpu_thread_group_start(i, tg);
+
+		if (unlikely(i_group_start == -1)) {
+			WARN_ON_ONCE(1);
+			return -ENODATA;
+		}
+
+		if (i_group_start == cpu_group_start)
+			cpumask_set_cpu(i, *mask);
+	}
+
+	return 0;
+}
+
 static int __init init_thread_group_cache_map(int cpu, int cache_property)
 
 {
-	int first_thread = cpu_first_thread_sibling(cpu);
-	int i, cpu_group_start = -1, err = 0;
+	int cpu_group_start = -1, err = 0;
 	struct thread_groups *tg = NULL;
 	cpumask_var_t *mask = NULL;
 
 	if (cache_property != THREAD_GROUP_SHARE_L1 &&
-	    cache_property != THREAD_GROUP_SHARE_L2)
+	    cache_property != THREAD_GROUP_SHARE_L2_L3)
 		return -EINVAL;
 
 	tg = get_thread_groups(cpu, cache_property, &err);
+
 	if (!tg)
 		return err;
 
@@ -912,25 +941,18 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
 		return -ENODATA;
 	}
 
-	if (cache_property == THREAD_GROUP_SHARE_L1)
+	if (cache_property == THREAD_GROUP_SHARE_L1) {
 		mask = &per_cpu(thread_group_l1_cache_map, cpu);
-	else if (cache_property == THREAD_GROUP_SHARE_L2)
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+	}
+	else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
 		mask = &per_cpu(thread_group_l2_cache_map, cpu);
-
-	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
-
-	for (i = first_thread; i < first_thread + threads_per_core; i++) {
-		int i_group_start = get_cpu_thread_group_start(i, tg);
-
-		if (unlikely(i_group_start == -1)) {
-			WARN_ON_ONCE(1);
-			return -ENODATA;
-		}
-
-		if (i_group_start == cpu_group_start)
-			cpumask_set_cpu(i, *mask);
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+		mask = &per_cpu(thread_group_l3_cache_map, cpu);
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
 	}
 
+
 	return 0;
 }
 
@@ -1020,14 +1042,16 @@ static int __init init_big_cores(void)
 	has_big_cores = true;
 
 	for_each_possible_cpu(cpu) {
-		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);
+		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
 
 		if (err)
 			return err;
 	}
 
 	thread_group_shares_l2 = true;
-	pr_debug("L2 cache only shared by the threads in the small core\n");
+	thread_group_shares_l3 = true;
+	pr_debug("L2/L3 cache only shared by the threads in the small core\n");
+
 	return 0;
 }
 
-- 
cgit 


From cf9c615cde49fb5d2480549c8c955a0a387798d3 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 21 Jul 2021 00:15:04 +1000
Subject: powerpc/64s/perf: Always use SIAR for kernel interrupts

If an interrupt is taken in kernel mode, always use SIAR for it rather than
looking at regs_sipr. This prevents samples piling up around interrupt
enable (hard enable or interrupt replay via soft enable) in PMUs / modes
where the PR sample indication is not in synch with SIAR.

This results in better sampling of interrupt entry and exit in particular.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210720141504.420110-1-npiggin@gmail.com
---
 arch/powerpc/perf/core-book3s.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index bb0ee716de91..91203ed9d0ff 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -340,6 +340,13 @@ static inline void perf_read_regs(struct pt_regs *regs)
 	 * If the PMU doesn't update the SIAR for non marked events use
 	 * pt_regs.
 	 *
+	 * If regs is a kernel interrupt, always use SIAR. Some PMUs have an
+	 * issue with regs_sipr not being in synch with SIAR in interrupt entry
+	 * and return sequences, which can result in regs_sipr being true for
+	 * kernel interrupts and SIAR, which has the effect of causing samples
+	 * to pile up at mtmsrd MSR[EE] 0->1 or pending irq replay around
+	 * interrupt entry/exit.
+	 *
 	 * If the PMU has HV/PR flags then check to see if they
 	 * place the exception in userspace. If so, use pt_regs. In
 	 * continuous sampling mode the SIAR and the PMU exception are
@@ -356,6 +363,8 @@ static inline void perf_read_regs(struct pt_regs *regs)
 		use_siar = 1;
 	else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING))
 		use_siar = 0;
+	else if (!user_mode(regs))
+		use_siar = 1;
 	else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs))
 		use_siar = 0;
 	else
-- 
cgit 


From 13e47bebbe83f58ddc41d2987567e97c5068a1ec Mon Sep 17 00:00:00 2001
From: Tong Tiangen <tongtiangen@huawei.com>
Date: Fri, 2 Jul 2021 04:54:21 +0000
Subject: riscv: Implement thread_struct whitelist for hardened usercopy

This whitelists the FPU register state portion of the thread_struct for
copying to userspace, instead of the default entire struct.

Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig                 | 1 +
 arch/riscv/include/asm/processor.h | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 8fcceb8eda07..1af859b9d5bf 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -72,6 +72,7 @@ config RISCV
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
+	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
 	select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
 	select HAVE_ASM_MODVERSIONS
 	select HAVE_CONTEXT_TRACKING
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 021ed64ee608..46b492c78cbb 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -37,6 +37,14 @@ struct thread_struct {
 	unsigned long bad_cause;
 };
 
+/* Whitelist the fstate from the task_struct for hardened usercopy */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+						unsigned long *size)
+{
+	*offset = offsetof(struct thread_struct, fstate);
+	*size = sizeof_field(struct thread_struct, fstate);
+}
+
 #define INIT_THREAD {					\
 	.sp = sizeof(init_stack) + (long)&init_stack,	\
 }
-- 
cgit 


From dc1cff969101afd08601e90463b44bd572e62dd4 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 30 Jul 2021 18:04:53 -0400
Subject: KVM: X86: MMU: Tune PTE_LIST_EXT to be bigger

Currently rmap array element only contains 3 entries.  However for EPT=N there
could have a lot of guest pages that got tens of even hundreds of rmap entry.

A normal distribution of a 6G guest (even if idle) shows this with rmap count
statistics:

Rmap_Count:     0       1       2-3     4-7     8-15    16-31   32-63   64-127  128-255 256-511 512-1023
Level=4K:       3089171 49005   14016   1363    235     212     15      7       0       0       0
Level=2M:       5951    227     0       0       0       0       0       0       0       0       0
Level=1G:       32      0       0       0       0       0       0       0       0       0       0

If we do some more fork some pages will grow even larger rmap counts.

This patch makes PTE_LIST_EXT bigger so it'll be more efficient for the general
use case of EPT=N as we do list reference less and the loops over PTE_LIST_EXT
will be slightly more efficient; but still not too large so less waste when
array not full.

It should not affecting EPT=Y since EPT normally only has zero or one rmap
entry for each page, so no array is even allocated.

With a test case to fork 500 child and recycle them ("./rmap_fork 500" [1]),
this patch speeds up fork time of about 29%.

    Before: 473.90 (+-5.93%)
    After:  366.10 (+-4.94%)

[1] https://github.com/xzpeter/clibs/commit/825436f825453de2ea5aaee4bdb1c92281efe5b3

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20210730220455.26054-6-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e702361b4409..69ad8189d7eb 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -137,8 +137,8 @@ module_param(dbg, bool, 0644);
 
 #include <trace/events/kvm.h>
 
-/* make pte_list_desc fit well in cache line */
-#define PTE_LIST_EXT 3
+/* make pte_list_desc fit well in cache lines */
+#define PTE_LIST_EXT 15
 
 struct pte_list_desc {
 	u64 *sptes[PTE_LIST_EXT];
-- 
cgit 


From 13236e25ebab91b3e42ddedf5354b569ace1b555 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 30 Jul 2021 18:06:02 -0400
Subject: KVM: X86: Optimize pte_list_desc with per-array counter

Add a counter field into pte_list_desc, so as to simplify the add/remove/loop
logic.  E.g., we don't need to loop over the array any more for most reasons.

This will make more sense after we've switched the array size to be larger
otherwise the counter will be a waste.

Initially I wanted to store a tail pointer at the head of the array list so we
don't need to traverse the list at least for pushing new ones (if without the
counter we traverse both the list and the array).  However that'll need
slightly more change without a huge lot benefit, e.g., after we grow entry
numbers per array the list traversing is not so expensive.

So let's be simple but still try to get as much benefit as we can with just
these extra few lines of changes (not to mention the code looks easier too
without looping over arrays).

I used the same a test case to fork 500 child and recycle them ("./rmap_fork
500" [1]), this patch further speeds up the total fork time of about 4%, which
is a total of 33% of vanilla kernel:

        Vanilla:      473.90 (+-5.93%)
        3->15 slots:  366.10 (+-4.94%)
        Add counter:  351.00 (+-3.70%)

[1] https://github.com/xzpeter/clibs/commit/825436f825453de2ea5aaee4bdb1c92281efe5b3

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20210730220602.26327-1-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 69ad8189d7eb..f1eb2fdfa473 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -138,11 +138,21 @@ module_param(dbg, bool, 0644);
 #include <trace/events/kvm.h>
 
 /* make pte_list_desc fit well in cache lines */
-#define PTE_LIST_EXT 15
+#define PTE_LIST_EXT 14
 
+/*
+ * Slight optimization of cacheline layout, by putting `more' and `spte_count'
+ * at the start; then accessing it will only use one single cacheline for
+ * either full (entries==PTE_LIST_EXT) case or entries<=6.
+ */
 struct pte_list_desc {
-	u64 *sptes[PTE_LIST_EXT];
 	struct pte_list_desc *more;
+	/*
+	 * Stores number of entries stored in the pte_list_desc.  No need to be
+	 * u64 but just for easier alignment.  When PTE_LIST_EXT, means full.
+	 */
+	u64 spte_count;
+	u64 *sptes[PTE_LIST_EXT];
 };
 
 struct kvm_shadow_walk_iterator {
@@ -901,7 +911,7 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
 			struct kvm_rmap_head *rmap_head)
 {
 	struct pte_list_desc *desc;
-	int i, count = 0;
+	int count = 0;
 
 	if (!rmap_head->val) {
 		rmap_printk("%p %llx 0->1\n", spte, *spte);
@@ -911,24 +921,24 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
 		desc = mmu_alloc_pte_list_desc(vcpu);
 		desc->sptes[0] = (u64 *)rmap_head->val;
 		desc->sptes[1] = spte;
+		desc->spte_count = 2;
 		rmap_head->val = (unsigned long)desc | 1;
 		++count;
 	} else {
 		rmap_printk("%p %llx many->many\n", spte, *spte);
 		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
-		while (desc->sptes[PTE_LIST_EXT-1]) {
+		while (desc->spte_count == PTE_LIST_EXT) {
 			count += PTE_LIST_EXT;
-
 			if (!desc->more) {
 				desc->more = mmu_alloc_pte_list_desc(vcpu);
 				desc = desc->more;
+				desc->spte_count = 0;
 				break;
 			}
 			desc = desc->more;
 		}
-		for (i = 0; desc->sptes[i]; ++i)
-			++count;
-		desc->sptes[i] = spte;
+		count += desc->spte_count;
+		desc->sptes[desc->spte_count++] = spte;
 	}
 	return count;
 }
@@ -938,13 +948,12 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
 			   struct pte_list_desc *desc, int i,
 			   struct pte_list_desc *prev_desc)
 {
-	int j;
+	int j = desc->spte_count - 1;
 
-	for (j = PTE_LIST_EXT - 1; !desc->sptes[j] && j > i; --j)
-		;
 	desc->sptes[i] = desc->sptes[j];
 	desc->sptes[j] = NULL;
-	if (j != 0)
+	desc->spte_count--;
+	if (desc->spte_count)
 		return;
 	if (!prev_desc && !desc->more)
 		rmap_head->val = 0;
@@ -977,7 +986,7 @@ static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
 		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 		prev_desc = NULL;
 		while (desc) {
-			for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
+			for (i = 0; i < desc->spte_count; ++i) {
 				if (desc->sptes[i] == spte) {
 					pte_list_desc_remove_entry(rmap_head,
 							desc, i, prev_desc);
-- 
cgit 


From a75b540451d20ef1aebaa09d183ddc5c44c6f86a Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 30 Jul 2021 18:06:05 -0400
Subject: KVM: X86: Optimize zapping rmap

Using rmap_get_first() and rmap_remove() for zapping a huge rmap list could be
slow.  The easy way is to travers the rmap list, collecting the a/d bits and
free the slots along the way.

Provide a pte_list_destroy() and do exactly that.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20210730220605.26377-1-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f1eb2fdfa473..232ced2e7bf8 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1007,6 +1007,34 @@ static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep)
 	__pte_list_remove(sptep, rmap_head);
 }
 
+/* Return true if rmap existed, false otherwise */
+static bool pte_list_destroy(struct kvm_rmap_head *rmap_head)
+{
+	struct pte_list_desc *desc, *next;
+	int i;
+
+	if (!rmap_head->val)
+		return false;
+
+	if (!(rmap_head->val & 1)) {
+		mmu_spte_clear_track_bits((u64 *)rmap_head->val);
+		goto out;
+	}
+
+	desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+
+	for (; desc; desc = next) {
+		for (i = 0; i < desc->spte_count; i++)
+			mmu_spte_clear_track_bits(desc->sptes[i]);
+		next = desc->more;
+		mmu_free_pte_list_desc(desc);
+	}
+out:
+	/* rmap_head is meaningless now, remember to reset it */
+	rmap_head->val = 0;
+	return true;
+}
+
 static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
 					   const struct kvm_memory_slot *slot)
 {
@@ -1398,18 +1426,7 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 			  const struct kvm_memory_slot *slot)
 {
-	u64 *sptep;
-	struct rmap_iterator iter;
-	bool flush = false;
-
-	while ((sptep = rmap_get_first(rmap_head, &iter))) {
-		rmap_printk("spte %p %llx.\n", sptep, *sptep);
-
-		pte_list_remove(rmap_head, sptep);
-		flush = true;
-	}
-
-	return flush;
+	return pte_list_destroy(rmap_head);
 }
 
 static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-- 
cgit 


From e79f49c37ccf273c8aba733f803b3774ebfbe581 Mon Sep 17 00:00:00 2001
From: Like Xu <likexu@tencent.com>
Date: Wed, 28 Jul 2021 20:07:05 +0800
Subject: KVM: x86/pmu: Introduce pmc->is_paused to reduce the call time of
 perf interfaces

Based on our observations, after any vm-exit associated with vPMU, there
are at least two or more perf interfaces to be called for guest counter
emulation, such as perf_event_{pause, read_value, period}(), and each one
will {lock, unlock} the same perf_event_ctx. The frequency of calls becomes
more severe when guest use counters in a multiplexed manner.

Holding a lock once and completing the KVM request operations in the perf
context would introduce a set of impractical new interfaces. So we can
further optimize the vPMU implementation by avoiding repeated calls to
these interfaces in the KVM context for at least one pattern:

After we call perf_event_pause() once, the event will be disabled and its
internal count will be reset to 0. So there is no need to pause it again
or read its value. Once the event is paused, event period will not be
updated until the next time it's resumed or reprogrammed. And there is
also no need to call perf_event_period twice for a non-running counter,
considering the perf_event for a running counter is never paused.

Based on this implementation, for the following common usage of
sampling 4 events using perf on a 4u8g guest:

  echo 0 > /proc/sys/kernel/watchdog
  echo 25 > /proc/sys/kernel/perf_cpu_time_max_percent
  echo 10000 > /proc/sys/kernel/perf_event_max_sample_rate
  echo 0 > /proc/sys/kernel/perf_cpu_time_max_percent
  for i in `seq 1 1 10`
  do
  taskset -c 0 perf record \
  -e cpu-cycles -e instructions -e branch-instructions -e cache-misses \
  /root/br_instr a
  done

the average latency of the guest NMI handler is reduced from
37646.7 ns to 32929.3 ns (~1.14x speed up) on the Intel ICX server.
Also, in addition to collecting more samples, no loss of sampling
accuracy was observed compared to before the optimization.

Signed-off-by: Like Xu <likexu@tencent.com>
Message-Id: <20210728120705.6855-1-likexu@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/pmu.c              | 5 ++++-
 arch/x86/kvm/pmu.h              | 2 +-
 arch/x86/kvm/vmx/pmu_intel.c    | 4 ++--
 4 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 99f37781a6fc..a079880d4cd5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -482,6 +482,7 @@ struct kvm_pmc {
 	 * ctrl value for fixed counters.
 	 */
 	u64 current_config;
+	bool is_paused;
 };
 
 struct kvm_pmu {
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 827886c12c16..0772bad9165c 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -137,18 +137,20 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 	pmc->perf_event = event;
 	pmc_to_pmu(pmc)->event_count++;
 	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
+	pmc->is_paused = false;
 }
 
 static void pmc_pause_counter(struct kvm_pmc *pmc)
 {
 	u64 counter = pmc->counter;
 
-	if (!pmc->perf_event)
+	if (!pmc->perf_event || pmc->is_paused)
 		return;
 
 	/* update counter, reset event value to avoid redundant accumulation */
 	counter += perf_event_pause(pmc->perf_event, true);
 	pmc->counter = counter & pmc_bitmask(pmc);
+	pmc->is_paused = true;
 }
 
 static bool pmc_resume_counter(struct kvm_pmc *pmc)
@@ -163,6 +165,7 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
 
 	/* reuse perf_event to serve as pmc_reprogram_counter() does*/
 	perf_event_enable(pmc->perf_event);
+	pmc->is_paused = false;
 
 	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
 	return true;
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 67e753edfa22..0e4f2b1fa9fb 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -55,7 +55,7 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
 	u64 counter, enabled, running;
 
 	counter = pmc->counter;
-	if (pmc->perf_event)
+	if (pmc->perf_event && !pmc->is_paused)
 		counter += perf_event_read_value(pmc->perf_event,
 						 &enabled, &running);
 	/* FIXME: Scaling needed? */
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 9efc1a6b8693..10cc4f65c4ef 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -437,13 +437,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			    !(msr & MSR_PMC_FULL_WIDTH_BIT))
 				data = (s64)(s32)data;
 			pmc->counter += data - pmc_read_counter(pmc);
-			if (pmc->perf_event)
+			if (pmc->perf_event && !pmc->is_paused)
 				perf_event_period(pmc->perf_event,
 						  get_sample_period(pmc, data));
 			return 0;
 		} else if ((pmc = get_fixed_pmc(pmu, msr))) {
 			pmc->counter += data - pmc_read_counter(pmc);
-			if (pmc->perf_event)
+			if (pmc->perf_event && !pmc->is_paused)
 				perf_event_period(pmc->perf_event,
 						  get_sample_period(pmc, data));
 			return 0;
-- 
cgit 


From eb48d154cd0dade56a0e244f0cfa198ea2925ed3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 2 Aug 2021 13:38:29 +0100
Subject: arm64: Move .hyp.rodata outside of the _sdata.._edata range

The HYP rodata section is currently lumped together with the BSS,
which isn't exactly what is expected (it gets registered with
kmemleak, for example).

Move it away so that it is actually marked RO. As an added
benefit, it isn't registered with kmemleak anymore.

Fixes: 380e18ade4a5 ("KVM: arm64: Introduce a BSS section for use at Hyp")
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org #5.13
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210802123830.2195174-2-maz@kernel.org
---
 arch/arm64/kernel/vmlinux.lds.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 709d2c433c5e..f6b1a88245db 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -181,6 +181,8 @@ SECTIONS
 	/* everything from this point to __init_begin will be marked RO NX */
 	RO_DATA(PAGE_SIZE)
 
+	HYPERVISOR_DATA_SECTIONS
+
 	idmap_pg_dir = .;
 	. += IDMAP_DIR_SIZE;
 	idmap_pg_end = .;
@@ -260,8 +262,6 @@ SECTIONS
 	_sdata = .;
 	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
 
-	HYPERVISOR_DATA_SECTIONS
-
 	/*
 	 * Data written with the MMU off but read with the MMU on requires
 	 * cache lines to be invalidated, discarding up to a Cache Writeback
-- 
cgit 


From 47e6223c841e029bfc23c3ce594dac5525cebaf8 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 2 Aug 2021 13:38:30 +0100
Subject: KVM: arm64: Unregister HYP sections from kmemleak in protected mode

Booting a KVM host in protected mode with kmemleak quickly results
in a pretty bad crash, as kmemleak doesn't know that the HYP sections
have been taken away. This is specially true for the BSS section,
which is part of the kernel BSS section and registered at boot time
by kmemleak itself.

Unregister the HYP part of the BSS before making that section
HYP-private. The rest of the HYP-specific data is obtained via
the page allocator or lives in other sections, none of which is
subjected to kmemleak.

Fixes: 90134ac9cabb ("KVM: arm64: Protect the .hyp sections from the host")
Reviewed-by: Quentin Perret <qperret@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org # 5.13
Link: https://lore.kernel.org/r/20210802123830.2195174-3-maz@kernel.org
---
 arch/arm64/kvm/arm.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e9a2b8f27792..52242f32c4be 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -15,6 +15,7 @@
 #include <linux/fs.h>
 #include <linux/mman.h>
 #include <linux/sched.h>
+#include <linux/kmemleak.h>
 #include <linux/kvm.h>
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
@@ -1982,6 +1983,12 @@ static int finalize_hyp_mode(void)
 	if (ret)
 		return ret;
 
+	/*
+	 * Exclude HYP BSS from kmemleak so that it doesn't get peeked
+	 * at, which would end badly once the section is inaccessible.
+	 * None of other sections should ever be introspected.
+	 */
+	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
 	ret = pkvm_mark_hyp_section(__hyp_bss);
 	if (ret)
 		return ret;
-- 
cgit 


From 8165c6ae8e3a264601f02fc17e5545d027584a2f Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Thu, 8 Jul 2021 09:59:47 +0800
Subject: riscv: Allow forced irq threading

The timer interrupt and the perf interrupt on riscv are with
IRQF_PERCPU, so it's safe to allow forced interrupt threading.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 1af859b9d5bf..1fc5a95df3f4 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -98,6 +98,7 @@ config RISCV
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select IRQ_DOMAIN
+	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_RELA if MODULES
 	select MODULE_SECTIONS if MODULES
 	select OF
-- 
cgit 


From bcf11b5e99b27472ea61231a64e29ad7dd31f0da Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Thu, 8 Jul 2021 09:59:48 +0800
Subject: riscv: Enable idle generic idle loop

Enable generic idle loop to support for hlt/nohlt command line options
to override default idle loop behavior.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 1fc5a95df3f4..95cb9efa9213 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -48,6 +48,7 @@ config RISCV
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
+	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IOREMAP
 	select GENERIC_IRQ_MULTI_HANDLER
 	select GENERIC_IRQ_SHOW
-- 
cgit 


From ecd4916c7261edccb2382c9931535e238875a44d Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Thu, 8 Jul 2021 09:59:49 +0800
Subject: riscv: Enable GENERIC_IRQ_SHOW_LEVEL

The interrupt controllers on riscv support both edge and level triggered
interrupts, it's useful to provide that information in /proc/interrupts.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 95cb9efa9213..387cf68a5d52 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -52,6 +52,7 @@ config RISCV
 	select GENERIC_IOREMAP
 	select GENERIC_IRQ_MULTI_HANDLER
 	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_LIB_DEVMEM_IS_ALLOWED
 	select GENERIC_PCI_IOMAP
 	select GENERIC_PTDUMP if MMU
-- 
cgit 


From 946e1052cdcc7e585ee5d1e72528ca49fb295243 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 18 Jul 2021 19:33:09 -0700
Subject: openrisc: don't printk() unconditionally

Don't call printk() when CONFIG_PRINTK is not set.
Fixes the following build errors:

or1k-linux-ld: arch/openrisc/kernel/entry.o: in function `_external_irq_handler':
(.text+0x804): undefined reference to `printk'
(.text+0x804): relocation truncated to fit: R_OR1K_INSN_REL_26 against undefined symbol `printk'

Fixes: 9d02a4283e9c ("OpenRISC: Boot code")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Stafford Horne <shorne@gmail.com>
Cc: openrisc@lists.librecores.org
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/kernel/entry.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index bc657e55c15f..98e4f97db515 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -547,6 +547,7 @@ EXCEPTION_ENTRY(_external_irq_handler)
 	l.bnf	1f			// ext irq enabled, all ok.
 	l.nop
 
+#ifdef CONFIG_PRINTK
 	l.addi  r1,r1,-0x8
 	l.movhi r3,hi(42f)
 	l.ori	r3,r3,lo(42f)
@@ -560,6 +561,7 @@ EXCEPTION_ENTRY(_external_irq_handler)
 		.string "\n\rESR interrupt bug: in _external_irq_handler (ESR %x)\n\r"
 		.align 4
 	.previous
+#endif
 
 	l.ori	r4,r4,SPR_SR_IEE	// fix the bug
 //	l.sw	PT_SR(r1),r4
-- 
cgit 


From 11648cbb7b335b7eb54e1ff973fb938939616f46 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 15 Jul 2021 19:23:38 -0700
Subject: openrisc: rename or32 code & comments to or1k

From Documentation/openrisc/todo.rst, rename "or32" in the
source code to "or1k" since this is the name that has been
settled on.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Stafford Horne <shorne@gmail.com>
Cc: openrisc@lists.librecores.org
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/pgtable.h     | 6 +++---
 arch/openrisc/include/asm/thread_info.h | 2 +-
 arch/openrisc/kernel/entry.S            | 4 ++--
 arch/openrisc/kernel/head.S             | 6 +++---
 arch/openrisc/kernel/setup.c            | 4 ++--
 arch/openrisc/lib/Makefile              | 2 +-
 arch/openrisc/mm/fault.c                | 2 +-
 7 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 4ac591c9ca33..cdd657f80bfa 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -12,7 +12,7 @@
  * et al.
  */
 
-/* or32 pgtable.h - macros and functions to manipulate page tables
+/* or1k pgtable.h - macros and functions to manipulate page tables
  *
  * Based on:
  * include/asm-cris/pgtable.h
@@ -29,14 +29,14 @@
 
 /*
  * The Linux memory management assumes a three-level page table setup. On
- * or32, we use that, but "fold" the mid level into the top-level page
+ * or1k, we use that, but "fold" the mid level into the top-level page
  * table. Since the MMU TLB is software loaded through an interrupt, it
  * supports any page table structure, so we could have used a three-level
  * setup, but for the amounts of memory we normally use, a two-level is
  * probably more efficient.
  *
  * This file contains the functions and defines necessary to modify and use
- * the or32 page table tree.
+ * the or1k page table tree.
  */
 
 extern void paging_init(void);
diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h
index 4f9d2a261455..659834ab87fa 100644
--- a/arch/openrisc/include/asm/thread_info.h
+++ b/arch/openrisc/include/asm/thread_info.h
@@ -25,7 +25,7 @@
 
 /* THREAD_SIZE is the size of the task_struct/kernel_stack combo.
  * normally, the stack is found by doing something like p + THREAD_SIZE
- * in or32, a page is 8192 bytes, which seems like a sane size
+ * in or1k, a page is 8192 bytes, which seems like a sane size
  */
 
 #define THREAD_SIZE_ORDER 0
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index 98e4f97db515..8c8ac3451425 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -326,7 +326,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
 1:	l.ori   r6,r0,0x0                  // !write access
 2:
 
-	/* call fault.c handler in or32/mm/fault.c */
+	/* call fault.c handler in openrisc/mm/fault.c */
 	l.jal   do_page_fault
 	 l.nop
 	l.j     _ret_from_exception
@@ -348,7 +348,7 @@ EXCEPTION_ENTRY(_insn_page_fault_handler)
 	/* r4 set be EXCEPTION_HANDLE */   // effective address of fault
 	l.ori	r6,r0,0x0		   // !write access
 
-	/* call fault.c handler in or32/mm/fault.c */
+	/* call fault.c handler in openrisc/mm/fault.c */
 	l.jal   do_page_fault
 	 l.nop
 	l.j     _ret_from_exception
diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
index af355e3f4619..15f1b38dfe03 100644
--- a/arch/openrisc/kernel/head.S
+++ b/arch/openrisc/kernel/head.S
@@ -599,7 +599,7 @@ flush_tlb:
 	l.jal	_flush_tlb
 	 l.nop
 
-/* The MMU needs to be enabled before or32_early_setup is called */
+/* The MMU needs to be enabled before or1k_early_setup is called */
 
 enable_mmu:
 	/*
@@ -641,9 +641,9 @@ enable_mmu:
 	/* magic number mismatch, set fdt pointer to null */
 	l.or	r25,r0,r0
 _fdt_found:
-	/* pass fdt pointer to or32_early_setup in r3 */
+	/* pass fdt pointer to or1k_early_setup in r3 */
 	l.or	r3,r0,r25
-	LOAD_SYMBOL_2_GPR(r24, or32_early_setup)
+	LOAD_SYMBOL_2_GPR(r24, or1k_early_setup)
 	l.jalr r24
 	 l.nop
 
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 8ae2da6ac097..7eddcac0ef2f 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -209,7 +209,7 @@ void __init setup_cpuinfo(void)
 }
 
 /**
- * or32_early_setup
+ * or1k_early_setup
  *
  * Handles the pointer to the device tree that this kernel is to use
  * for establishing the available platform devices.
@@ -217,7 +217,7 @@ void __init setup_cpuinfo(void)
  * Falls back on built-in device tree in case null pointer is passed.
  */
 
-void __init or32_early_setup(void *fdt)
+void __init or1k_early_setup(void *fdt)
 {
 	if (fdt)
 		pr_info("FDT at %p\n", fdt);
diff --git a/arch/openrisc/lib/Makefile b/arch/openrisc/lib/Makefile
index 79775aaa6baa..53327406b483 100644
--- a/arch/openrisc/lib/Makefile
+++ b/arch/openrisc/lib/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
-# Makefile for or32 specific library files..
+# Makefile for or1k specific library files..
 #
 
 obj-y	:= delay.o string.o memset.o memcpy.o
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index ca97d9baab51..c730d1a51686 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -28,7 +28,7 @@ unsigned long pte_misses;	/* updated by do_page_fault() */
 unsigned long pte_errors;	/* updated by do_page_fault() */
 
 /* __PHX__ :: - check the vmalloc_fault in do_page_fault()
- *            - also look into include/asm-or32/mmu_context.h
+ *            - also look into include/asm/mmu_context.h
  */
 volatile pgd_t *current_pgd[NR_CPUS];
 
-- 
cgit 


From 319afe68567b923e25140e744e7f05e3e5d889c1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 4 Aug 2021 12:48:41 -0400
Subject: KVM: xen: do not use struct gfn_to_hva_cache

gfn_to_hva_cache is not thread-safe, so it is usually used only within
a vCPU (whose code is protected by vcpu->mutex).  The Xen interface
implementation has such a cache in kvm->arch, but it is not really
used except to store the location of the shared info page.  Replace
shinfo_set and shinfo_cache with just the value that is passed via
KVM_XEN_ATTR_TYPE_SHARED_INFO; the only complication is that the
initialization value is not zero anymore and therefore kvm_xen_init_vm
needs to be introduced.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +--
 arch/x86/kvm/x86.c              |  1 +
 arch/x86/kvm/xen.c              | 23 ++++++++++++-----------
 arch/x86/kvm/xen.h              |  5 +++++
 4 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a079880d4cd5..6a73ff7db5f9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1003,9 +1003,8 @@ struct msr_bitmap_range {
 /* Xen emulation context */
 struct kvm_xen {
 	bool long_mode;
-	bool shinfo_set;
 	u8 upcall_vector;
-	struct gfn_to_hva_cache shinfo_cache;
+	gfn_t shinfo_gfn;
 };
 
 enum kvm_irqchip_mode {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 348452bb16bc..3cedc7cc132a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11162,6 +11162,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_hv_init_vm(kvm);
 	kvm_page_track_init(kvm);
 	kvm_mmu_init_vm(kvm);
+	kvm_xen_init_vm(kvm);
 
 	return static_call(kvm_x86_vm_init)(kvm);
 }
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index ae17250e1efe..9ea9c3dabe37 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -25,15 +25,14 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
 {
 	gpa_t gpa = gfn_to_gpa(gfn);
 	int wc_ofs, sec_hi_ofs;
-	int ret;
+	int ret = 0;
 	int idx = srcu_read_lock(&kvm->srcu);
 
-	ret = kvm_gfn_to_hva_cache_init(kvm, &kvm->arch.xen.shinfo_cache,
-					gpa, PAGE_SIZE);
-	if (ret)
+	if (kvm_is_error_hva(gfn_to_hva(kvm, gfn))) {
+		ret = -EFAULT;
 		goto out;
-
-	kvm->arch.xen.shinfo_set = true;
+	}
+	kvm->arch.xen.shinfo_gfn = gfn;
 
 	/* Paranoia checks on the 32-bit struct layout */
 	BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
@@ -245,7 +244,7 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 
 	case KVM_XEN_ATTR_TYPE_SHARED_INFO:
 		if (data->u.shared_info.gfn == GPA_INVALID) {
-			kvm->arch.xen.shinfo_set = false;
+			kvm->arch.xen.shinfo_gfn = GPA_INVALID;
 			r = 0;
 			break;
 		}
@@ -283,10 +282,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 		break;
 
 	case KVM_XEN_ATTR_TYPE_SHARED_INFO:
-		if (kvm->arch.xen.shinfo_set)
-			data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
-		else
-			data->u.shared_info.gfn = GPA_INVALID;
+		data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
 		r = 0;
 		break;
 
@@ -646,6 +642,11 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
 	return 0;
 }
 
+void kvm_xen_init_vm(struct kvm *kvm)
+{
+	kvm->arch.xen.shinfo_gfn = GPA_INVALID;
+}
+
 void kvm_xen_destroy_vm(struct kvm *kvm)
 {
 	if (kvm->arch.xen_hvm_config.msr)
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index 463a7844a8ca..cc0cf5f37450 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -21,6 +21,7 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data);
 int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
+void kvm_xen_init_vm(struct kvm *kvm);
 void kvm_xen_destroy_vm(struct kvm *kvm);
 
 static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
@@ -50,6 +51,10 @@ static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
 	return 1;
 }
 
+static inline void kvm_xen_init_vm(struct kvm *kvm)
+{
+}
+
 static inline void kvm_xen_destroy_vm(struct kvm *kvm)
 {
 }
-- 
cgit 


From 730d070ae9f12fff5d44fe8fb0547ae37d100da8 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 3 Aug 2021 16:15:45 +0200
Subject: MIPS: Replace deprecated CPU-hotplug functions.

The functions get_online_cpus() and put_online_cpus() have been
deprecated during the CPU hotplug rework. They map directly to
cpus_read_lock() and cpus_read_unlock().

Replace deprecated CPU-hotplug functions with the official version.
The behavior remains unchanged.

Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/kernel/mips-mt-fpaff.c | 10 +++++-----
 arch/mips/kernel/process.c       |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index 6c590ef27648..67e130d3f038 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -76,13 +76,13 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
 	if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask)))
 		return -EFAULT;
 
-	get_online_cpus();
+	cpus_read_lock();
 	rcu_read_lock();
 
 	p = find_process_by_pid(pid);
 	if (!p) {
 		rcu_read_unlock();
-		put_online_cpus();
+		cpus_read_unlock();
 		return -ESRCH;
 	}
 
@@ -147,7 +147,7 @@ out_free_cpus_allowed:
 	free_cpumask_var(cpus_allowed);
 out_put_task:
 	put_task_struct(p);
-	put_online_cpus();
+	cpus_read_unlock();
 	return retval;
 }
 
@@ -166,7 +166,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
 	if (len < real_len)
 		return -EINVAL;
 
-	get_online_cpus();
+	cpus_read_lock();
 	rcu_read_lock();
 
 	retval = -ESRCH;
@@ -182,7 +182,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
 
 out_unlock:
 	rcu_read_unlock();
-	put_online_cpus();
+	cpus_read_unlock();
 	if (retval)
 		return retval;
 	if (copy_to_user(user_mask_ptr, &mask, real_len))
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 73c8e7990a97..95aa86fa6077 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -859,10 +859,10 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
 	 * scheduled in then it will already have picked up the new FP mode
 	 * whilst doing so.
 	 */
-	get_online_cpus();
+	cpus_read_lock();
 	for_each_cpu_and(cpu, &process_cpus, cpu_online_mask)
 		work_on_cpu(cpu, prepare_for_fp_mode_switch, NULL);
-	put_online_cpus();
+	cpus_read_unlock();
 
 	return 0;
 }
-- 
cgit 


From ad548993a66c498267695edd8b19a682be0e3a8b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 4 Aug 2021 17:02:16 -0700
Subject: MIPS: loongson2ef: don't build serial.o unconditionally

LOONGSON_UART_BASE depends on EARLY_PRINTK || SERIAL_8250, but when
neither of these Kconfig symbols is set, the kernel build has errors:

../arch/mips/loongson2ef/common/serial.c: In function 'serial_init':
../arch/mips/loongson2ef/common/serial.c:66:25: error: 'loongson_uart_base' undeclared (first use in this function)
   66 |                         loongson_uart_base;
../arch/mips/loongson2ef/common/serial.c:66:25: note: each undeclared identifier is reported only once for each function it appears in
../arch/mips/loongson2ef/common/serial.c:68:41: error: '_loongson_uart_base' undeclared (first use in this function)
   68 |                         (void __iomem *)_loongson_uart_base;

Fix this by building serial.o only when one (or both) of these
Kconfig symbols is enabled.

Tested with:
	(a) EARLY_PRINTK=y, SERIAL_8250 not set;
	(b) EARLY_PRINTK=y, SERIAL_8250=y;
	(c) EARLY_PRINTK=y, SERIAL_8250=m.
	(d) EARLY_PRINTK not set, SERIAL_8250=y;
	(e) EARLY_PRINTK not set, SERIAL_8250=m;
	(f) EARLY_PRINTK not set, SERIAL_8250 not set.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/loongson2ef/common/Makefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/loongson2ef/common/Makefile b/arch/mips/loongson2ef/common/Makefile
index d5ab3e543ea3..30ea8b5ca685 100644
--- a/arch/mips/loongson2ef/common/Makefile
+++ b/arch/mips/loongson2ef/common/Makefile
@@ -4,12 +4,14 @@
 #
 
 obj-y += setup.o init.o env.o time.o reset.o irq.o \
-    bonito-irq.o mem.o machtype.o platform.o serial.o
+    bonito-irq.o mem.o machtype.o platform.o
 obj-$(CONFIG_PCI) += pci.o
 
 #
 # Serial port support
 #
+obj-$(CONFIG_LOONGSON_UART_BASE) += serial.o
+obj-$(CONFIG_EARLY_PRINTK) += serial.o
 obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
 obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
 
-- 
cgit 


From cb95ea79b3fc772c5873a7a4532ab4c14a455da2 Mon Sep 17 00:00:00 2001
From: Rui Wang <wangrui@loongson.cn>
Date: Thu, 29 Jul 2021 17:31:52 +0800
Subject: MIPS: locking/atomic: Fix atomic{_64,}_sub_if_positive

This looks like a typo and that caused atomic64 test failed.

Signed-off-by: Rui Wang <wangrui@loongson.cn>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/include/asm/atomic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 95e1f7f3597f..a0b9e7c1e4fc 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -206,7 +206,7 @@ ATOMIC_OPS(atomic64, xor, s64, ^=, xor, lld, scd)
  * The function returns the old value of @v minus @i.
  */
 #define ATOMIC_SIP_OP(pfx, type, op, ll, sc)				\
-static __inline__ int arch_##pfx##_sub_if_positive(type i, pfx##_t * v)	\
+static __inline__ type arch_##pfx##_sub_if_positive(type i, pfx##_t * v)	\
 {									\
 	type temp, result;						\
 									\
-- 
cgit 


From 87689270b10fa9e6fac7242233b355cb6792b845 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 4 Aug 2021 22:28:38 +0000
Subject: KVM: Rename lru_slot to last_used_slot

lru_slot is used to keep track of the index of the most-recently used
memslot. The correct acronym would be "mru" but that is not a common
acronym. So call it last_used_slot which is a bit more obvious.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210804222844.1419481-2-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/s390/kvm/kvm-s390.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4527ac7b5961..02574d7b3612 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1953,7 +1953,7 @@ out:
 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
 {
 	int start = 0, end = slots->used_slots;
-	int slot = atomic_read(&slots->lru_slot);
+	int slot = atomic_read(&slots->last_used_slot);
 	struct kvm_memory_slot *memslots = slots->memslots;
 
 	if (gfn >= memslots[slot].base_gfn &&
@@ -1974,7 +1974,7 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
 
 	if (gfn >= memslots[start].base_gfn &&
 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
-		atomic_set(&slots->lru_slot, start);
+		atomic_set(&slots->last_used_slot, start);
 	}
 
 	return start;
-- 
cgit 


From 0f22af940dc8ec4f437189096a5f8677995323b0 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 4 Aug 2021 22:28:39 +0000
Subject: KVM: Move last_used_slot logic out of search_memslots

Make search_memslots unconditionally search all memslots and move the
last_used_slot logic up one level to __gfn_to_memslot. This is in
preparation for introducing a per-vCPU last_used_slot.

As part of this change convert existing callers of search_memslots to
__gfn_to_memslot to avoid making any functional changes.

Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210804222844.1419481-3-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/kvm/book3s_64_vio.c    | 2 +-
 arch/powerpc/kvm/book3s_64_vio_hv.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 8da93fdfa59e..6365087f3160 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -346,7 +346,7 @@ static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
 	unsigned long gfn = tce >> PAGE_SHIFT;
 	struct kvm_memory_slot *memslot;
 
-	memslot = search_memslots(kvm_memslots(kvm), gfn);
+	memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 	if (!memslot)
 		return -EINVAL;
 
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index dc6591548f0c..f38dfe195ef2 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -80,7 +80,7 @@ static long kvmppc_rm_tce_to_ua(struct kvm *kvm,
 	unsigned long gfn = tce >> PAGE_SHIFT;
 	struct kvm_memory_slot *memslot;
 
-	memslot = search_memslots(kvm_memslots_raw(kvm), gfn);
+	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
 	if (!memslot)
 		return -EINVAL;
 
-- 
cgit 


From 081de470f1e6e83f9f460ba5ae8f57ff07f37692 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 4 Aug 2021 22:28:41 +0000
Subject: KVM: x86/mmu: Leverage vcpu->last_used_slot in
 tdp_mmu_map_handle_target_level

The existing TDP MMU methods to handle dirty logging are vcpu-agnostic
since they can be driven by MMU notifiers and other non-vcpu-specific
events in addition to page faults. However this means that the TDP MMU
is not benefiting from the new vcpu->last_used_slot. Fix that by
introducing a tdp_mmu_map_set_spte_atomic() which is only called during
a TDP page fault and has access to the kvm_vcpu for fast slot lookups.

This improves "Populate memory time" in dirty_log_perf_test by 5%:

Command                         | Before           | After
------------------------------- | ---------------- | -------------
./dirty_log_perf_test -v64 -x64 | 5.472321072s     | 5.169832886s

Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210804222844.1419481-5-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 42 +++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 43f12f5d12c0..dab6cb46cdb2 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -542,15 +542,40 @@ static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
 	return true;
 }
 
-static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
-					   struct tdp_iter *iter,
-					   u64 new_spte)
+/*
+ * tdp_mmu_map_set_spte_atomic - Set a leaf TDP MMU SPTE atomically to resolve a
+ * TDP page fault.
+ *
+ * @vcpu: The vcpu instance that took the TDP page fault.
+ * @iter: a tdp_iter instance currently on the SPTE that should be set
+ * @new_spte: The value the SPTE should be set to
+ *
+ * Returns: true if the SPTE was set, false if it was not. If false is returned,
+ *	    this function will have no side-effects.
+ */
+static inline bool tdp_mmu_map_set_spte_atomic(struct kvm_vcpu *vcpu,
+					       struct tdp_iter *iter,
+					       u64 new_spte)
 {
+	struct kvm *kvm = vcpu->kvm;
+
 	if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, new_spte))
 		return false;
 
-	handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
-				      iter->old_spte, new_spte, iter->level);
+	/*
+	 * Use kvm_vcpu_gfn_to_memslot() instead of going through
+	 * handle_changed_spte_dirty_log() to leverage vcpu->last_used_slot.
+	 */
+	if (is_writable_pte(new_spte)) {
+		struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, iter->gfn);
+
+		if (slot && kvm_slot_dirty_track_enabled(slot)) {
+			/* Enforced by kvm_mmu_hugepage_adjust. */
+			WARN_ON_ONCE(iter->level > PG_LEVEL_4K);
+			mark_page_dirty_in_slot(kvm, slot, iter->gfn);
+		}
+	}
+
 	return true;
 }
 
@@ -563,7 +588,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
 	 * immediately installing a present entry in its place
 	 * before the TLBs are flushed.
 	 */
-	if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE))
+	if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, REMOVED_SPTE))
 		return false;
 
 	kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
@@ -931,7 +956,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
 
 	if (new_spte == iter->old_spte)
 		ret = RET_PF_SPURIOUS;
-	else if (!tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte))
+	else if (!tdp_mmu_map_set_spte_atomic(vcpu, iter, new_spte))
 		return RET_PF_RETRY;
 
 	/*
@@ -1035,8 +1060,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 			new_spte = make_nonleaf_spte(child_pt,
 						     !shadow_accessed_mask);
 
-			if (tdp_mmu_set_spte_atomic(vcpu->kvm, &iter,
-						    new_spte)) {
+			if (tdp_mmu_set_spte_atomic_no_dirty_log(vcpu->kvm, &iter, new_spte)) {
 				tdp_mmu_link_page(vcpu->kvm, sp, true,
 						  huge_page_disallowed &&
 						  req_level >= iter.level);
-- 
cgit 


From 601f8af01e5ae535b45cdb91234887c9fd861ad4 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 4 Aug 2021 22:28:42 +0000
Subject: KVM: x86/mmu: Leverage vcpu->last_used_slot for rmap_add and
 rmap_recycle

rmap_add() and rmap_recycle() both run in the context of the vCPU and
thus we can use kvm_vcpu_gfn_to_memslot() to look up the memslot. This
enables rmap_add() and rmap_recycle() to take advantage of
vcpu->last_used_slot and avoid expensive memslot searching.

This change improves the performance of "Populate memory time" in
dirty_log_perf_test with tdp_mmu=N. In addition to improving the
performance, "Populate memory time" no longer scales with the number
of memslots in the VM.

Command                         | Before           | After
------------------------------- | ---------------- | -------------
./dirty_log_perf_test -v64 -x1  | 15.18001570s     | 14.99469366s
./dirty_log_perf_test -v64 -x64 | 18.71336392s     | 14.98675076s

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210804222844.1419481-6-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 232ced2e7bf8..3a0ae48a26e9 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1044,17 +1044,6 @@ static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
 	return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
 }
 
-static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
-					 struct kvm_mmu_page *sp)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *slot;
-
-	slots = kvm_memslots_for_spte_role(kvm, sp->role);
-	slot = __gfn_to_memslot(slots, gfn);
-	return __gfn_to_rmap(gfn, sp->role.level, slot);
-}
-
 static bool rmap_can_add(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu_memory_cache *mc;
@@ -1065,24 +1054,39 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu)
 
 static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 {
+	struct kvm_memory_slot *slot;
 	struct kvm_mmu_page *sp;
 	struct kvm_rmap_head *rmap_head;
 
 	sp = sptep_to_sp(spte);
 	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
-	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot);
 	return pte_list_add(vcpu, spte, rmap_head);
 }
 
+
 static void rmap_remove(struct kvm *kvm, u64 *spte)
 {
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *slot;
 	struct kvm_mmu_page *sp;
 	gfn_t gfn;
 	struct kvm_rmap_head *rmap_head;
 
 	sp = sptep_to_sp(spte);
 	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
-	rmap_head = gfn_to_rmap(kvm, gfn, sp);
+
+	/*
+	 * Unlike rmap_add and rmap_recycle, rmap_remove does not run in the
+	 * context of a vCPU so have to determine which memslots to use based
+	 * on context information in sp->role.
+	 */
+	slots = kvm_memslots_for_spte_role(kvm, sp->role);
+
+	slot = __gfn_to_memslot(slots, gfn);
+	rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot);
+
 	__pte_list_remove(spte, rmap_head);
 }
 
@@ -1620,12 +1624,13 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 
 static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 {
+	struct kvm_memory_slot *slot;
 	struct kvm_rmap_head *rmap_head;
 	struct kvm_mmu_page *sp;
 
 	sp = sptep_to_sp(spte);
-
-	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot);
 
 	kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, __pte(0));
 	kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
-- 
cgit 


From 93e083d4f4bfe790eb1cdc87103bd6a84be9df75 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 4 Aug 2021 22:28:43 +0000
Subject: KVM: x86/mmu: Rename __gfn_to_rmap to gfn_to_rmap

gfn_to_rmap was removed in the previous patch so there is no need to
retain the double underscore on __gfn_to_rmap.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210804222844.1419481-7-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c       | 25 ++++++++++++-------------
 arch/x86/kvm/mmu/mmu_audit.c |  4 ++--
 2 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3a0ae48a26e9..964c797dcc46 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1035,8 +1035,8 @@ out:
 	return true;
 }
 
-static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
-					   const struct kvm_memory_slot *slot)
+static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
+					 const struct kvm_memory_slot *slot)
 {
 	unsigned long idx;
 
@@ -1061,7 +1061,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 	sp = sptep_to_sp(spte);
 	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
 	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot);
+	rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
 	return pte_list_add(vcpu, spte, rmap_head);
 }
 
@@ -1085,7 +1085,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 
 	slot = __gfn_to_memslot(slots, gfn);
-	rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot);
+	rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
 
 	__pte_list_remove(spte, rmap_head);
 }
@@ -1307,8 +1307,8 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 		return;
 
 	while (mask) {
-		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-					  PG_LEVEL_4K, slot);
+		rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+					PG_LEVEL_4K, slot);
 		__rmap_write_protect(kvm, rmap_head, false);
 
 		/* clear the first set bit */
@@ -1340,8 +1340,8 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 		return;
 
 	while (mask) {
-		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-					  PG_LEVEL_4K, slot);
+		rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+					PG_LEVEL_4K, slot);
 		__rmap_clear_dirty(kvm, rmap_head, slot);
 
 		/* clear the first set bit */
@@ -1407,7 +1407,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 
 	if (kvm_memslots_have_rmaps(kvm)) {
 		for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
-			rmap_head = __gfn_to_rmap(gfn, i, slot);
+			rmap_head = gfn_to_rmap(gfn, i, slot);
 			write_protected |= __rmap_write_protect(kvm, rmap_head, true);
 		}
 	}
@@ -1502,9 +1502,8 @@ rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level)
 {
 	iterator->level = level;
 	iterator->gfn = iterator->start_gfn;
-	iterator->rmap = __gfn_to_rmap(iterator->gfn, level, iterator->slot);
-	iterator->end_rmap = __gfn_to_rmap(iterator->end_gfn, level,
-					   iterator->slot);
+	iterator->rmap = gfn_to_rmap(iterator->gfn, level, iterator->slot);
+	iterator->end_rmap = gfn_to_rmap(iterator->end_gfn, level, iterator->slot);
 }
 
 static void
@@ -1630,7 +1629,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
 	sp = sptep_to_sp(spte);
 	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot);
+	rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
 
 	kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, __pte(0));
 	kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
diff --git a/arch/x86/kvm/mmu/mmu_audit.c b/arch/x86/kvm/mmu/mmu_audit.c
index cedc17b2f60e..9e7dcf999f08 100644
--- a/arch/x86/kvm/mmu/mmu_audit.c
+++ b/arch/x86/kvm/mmu/mmu_audit.c
@@ -147,7 +147,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 		return;
 	}
 
-	rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
+	rmap_head = gfn_to_rmap(gfn, rev_sp->role.level, slot);
 	if (!rmap_head->val) {
 		if (!__ratelimit(&ratelimit_state))
 			return;
@@ -200,7 +200,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 	slot = __gfn_to_memslot(slots, sp->gfn);
-	rmap_head = __gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot);
+	rmap_head = gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot);
 
 	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		if (is_writable_pte(*sptep))
-- 
cgit 


From ca33d26ac640bdf74e5fd0fcd711dbc5ab9f36c0 Mon Sep 17 00:00:00 2001
From: Martin Oliveira <martin.oliveira@eideticom.com>
Date: Thu, 29 Jul 2021 14:15:23 -0600
Subject: alpha: return error code from alpha_pci_map_sg()

The .map_sg() op now expects an error code instead of zero on failure.

pci_map_single_1() can fail for different reasons, but since the only
supported type of error return is DMA_MAPPING_ERROR, we coalesce those
errors into EIO.

ENOMEM is returned when no page tables can be allocated.

Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/alpha/kernel/pci_iommu.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 35d7b3096d6e..21f9ac101324 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -649,7 +649,9 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg,
 		sg->dma_address
 		  = pci_map_single_1(pdev, SG_ENT_VIRT_ADDRESS(sg),
 				     sg->length, dac_allowed);
-		return sg->dma_address != DMA_MAPPING_ERROR;
+		if (sg->dma_address == DMA_MAPPING_ERROR)
+			return -EIO;
+		return 1;
 	}
 
 	start = sg;
@@ -685,8 +687,10 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg,
 	if (out < end)
 		out->dma_length = 0;
 
-	if (out - start == 0)
+	if (out - start == 0) {
 		printk(KERN_WARNING "pci_map_sg failed: no entries?\n");
+		return -ENOMEM;
+	}
 	DBGA("pci_map_sg: %ld entries\n", out - start);
 
 	return out - start;
@@ -699,7 +703,7 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg,
 	   entries.  Unmap them now.  */
 	if (out > start)
 		pci_unmap_sg(pdev, start, out - start, dir);
-	return 0;
+	return -ENOMEM;
 }
 
 /* Unmap a set of streaming mode DMA translations.  Again, cpu read
-- 
cgit 


From 6506932b3268001ff15c081aceb47c7f9aa9fb2d Mon Sep 17 00:00:00 2001
From: Martin Oliveira <martin.oliveira@eideticom.com>
Date: Thu, 29 Jul 2021 14:15:24 -0600
Subject: ARM/dma-mapping: return error code from .map_sg() ops

The .map_sg() op now expects an error code instead of zero on failure.
In the case of a DMA_MAPPING_ERROR, -EIO is returned. Otherwise,
-ENOMEM or -EINVAL is returned depending on the error from
__map_sg_chunk().

Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm/mm/dma-mapping.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c4b8df2ad328..113b9cb3701b 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -980,7 +980,7 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 {
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 	struct scatterlist *s;
-	int i, j;
+	int i, j, ret;
 
 	for_each_sg(sg, s, nents, i) {
 #ifdef CONFIG_NEED_SG_DMA_LENGTH
@@ -988,15 +988,17 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 #endif
 		s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
 						s->length, dir, attrs);
-		if (dma_mapping_error(dev, s->dma_address))
+		if (dma_mapping_error(dev, s->dma_address)) {
+			ret = -EIO;
 			goto bad_mapping;
+		}
 	}
 	return nents;
 
  bad_mapping:
 	for_each_sg(sg, s, i, j)
 		ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs);
-	return 0;
+	return ret;
 }
 
 /**
@@ -1622,7 +1624,7 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		     bool is_coherent)
 {
 	struct scatterlist *s = sg, *dma = sg, *start = sg;
-	int i, count = 0;
+	int i, count = 0, ret;
 	unsigned int offset = s->offset;
 	unsigned int size = s->offset + s->length;
 	unsigned int max = dma_get_max_seg_size(dev);
@@ -1634,8 +1636,10 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		s->dma_length = 0;
 
 		if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
-			if (__map_sg_chunk(dev, start, size, &dma->dma_address,
-			    dir, attrs, is_coherent) < 0)
+			ret = __map_sg_chunk(dev, start, size,
+					     &dma->dma_address, dir, attrs,
+					     is_coherent);
+			if (ret < 0)
 				goto bad_mapping;
 
 			dma->dma_address += offset;
@@ -1648,8 +1652,9 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		}
 		size += s->length;
 	}
-	if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs,
-		is_coherent) < 0)
+	ret = __map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs,
+			     is_coherent);
+	if (ret < 0)
 		goto bad_mapping;
 
 	dma->dma_address += offset;
@@ -1660,7 +1665,9 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 bad_mapping:
 	for_each_sg(sg, s, count, i)
 		__iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
-	return 0;
+	if (ret == -ENOMEM)
+		return ret;
+	return -EINVAL;
 }
 
 /**
-- 
cgit 


From 9cf88ec5e0e876c959389b1e695438f98663b92d Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Thu, 29 Jul 2021 14:15:25 -0600
Subject: ARM/dma-mapping: don't set failed sg dma_address to DMA_MAPPING_ERROR

Setting the ->dma_address to DMA_MAPPING_ERROR is not part of the
->map_sg calling convention, so remove it.

Link: https://lore.kernel.org/linux-mips/20210716063241.GC13345@lst.de/
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm/mm/dma-mapping.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 113b9cb3701b..4b61541853ea 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1632,7 +1632,6 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 	for (i = 1; i < nents; i++) {
 		s = sg_next(s);
 
-		s->dma_address = DMA_MAPPING_ERROR;
 		s->dma_length = 0;
 
 		if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
-- 
cgit 


From 62af5ca50c29a9be266656dfb88db6d439d129a1 Mon Sep 17 00:00:00 2001
From: Martin Oliveira <martin.oliveira@eideticom.com>
Date: Thu, 29 Jul 2021 14:15:26 -0600
Subject: ia64/sba_iommu: return error code from sba_map_sg_attrs()

The .map_sg() op now expects an error code instead of zero on failure.

In the case of a dma_mapping_error() return -EIO as the actual cause
is opaque here.

sba_coalesce_chunks() may only presently fail if sba_alloc_range()
fails, which in turn only fails if the iommu is out of mapping
resources, hence a -ENOMEM is used in that case.

Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/ia64/hp/common/sba_iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 9148ddbf02e5..8ad6946521d8 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1459,7 +1459,7 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
 		sglist->dma_address = sba_map_page(dev, sg_page(sglist),
 				sglist->offset, sglist->length, dir, attrs);
 		if (dma_mapping_error(dev, sglist->dma_address))
-			return 0;
+			return -EIO;
 		return 1;
 	}
 
@@ -1486,7 +1486,7 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
 	coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents);
 	if (coalesced < 0) {
 		sba_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
-		return 0;
+		return -ENOMEM;
 	}
 
 	/*
-- 
cgit 


From af82fe85665d49bdb72dba66cd57e3bf95b71895 Mon Sep 17 00:00:00 2001
From: Martin Oliveira <martin.oliveira@eideticom.com>
Date: Thu, 29 Jul 2021 14:15:27 -0600
Subject: MIPS/jazzdma: return error code from jazz_dma_map_sg()

The .map_sg() op now expects an error code instead of zero on failure.

vdma_alloc() may fail for different reasons, but since it only supports
indicating an error via a return of DMA_MAPPING_ERROR, we coalesce the
different reasons into -EIO as is documented on dma_map_sgtable().

Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/mips/jazz/jazzdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index 461457b28982..eabddb89d221 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -552,7 +552,7 @@ static int jazz_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 				dir);
 		sg->dma_address = vdma_alloc(sg_phys(sg), sg->length);
 		if (sg->dma_address == DMA_MAPPING_ERROR)
-			return 0;
+			return -EIO;
 		sg_dma_len(sg) = sg->length;
 	}
 
-- 
cgit 


From c4e0e892ab0579d4de3ca6d260d537fc16b39790 Mon Sep 17 00:00:00 2001
From: Martin Oliveira <martin.oliveira@eideticom.com>
Date: Thu, 29 Jul 2021 14:15:28 -0600
Subject: powerpc/iommu: return error code from .map_sg() ops

The .map_sg() op now expects an error code instead of zero on failure.

Propagate the error up if vio_dma_iommu_map_sg() fails.

ppc_iommu_map_sg() may fail either because of iommu_range_alloc() or
because of tbl->it_ops->set(). The former only supports returning an
error with DMA_MAPPING_ERROR and an examination of the latter indicates
that it may return arch-specific errors (for example,
tce_buildmulti_pSeriesLP()). Hence, coalesce all of those errors into
-EIO, per the documentation on dma_map_sgtable().

Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geoff Levand <geoff@infradead.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/kernel/iommu.c             | 4 ++--
 arch/powerpc/platforms/ps3/system-bus.c | 2 +-
 arch/powerpc/platforms/pseries/vio.c    | 5 +++--
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 2af89a5e379f..a8ec4fe42817 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -473,7 +473,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 	BUG_ON(direction == DMA_NONE);
 
 	if ((nelems == 0) || !tbl)
-		return 0;
+		return -EINVAL;
 
 	outs = s = segstart = &sglist[0];
 	outcount = 1;
@@ -599,7 +599,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 		if (s == outs)
 			break;
 	}
-	return 0;
+	return -EIO;
 }
 
 
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 1a5665875165..c54eb46f0cfb 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -663,7 +663,7 @@ static int ps3_ioc0_map_sg(struct device *_dev, struct scatterlist *sg,
 			   unsigned long attrs)
 {
 	BUG();
-	return 0;
+	return -EINVAL;
 }
 
 static void ps3_sb_unmap_sg(struct device *_dev, struct scatterlist *sg,
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index e00f3725ec96..e31e59c54f30 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -560,7 +560,8 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 	for_each_sg(sglist, sgl, nelems, count)
 		alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
 
-	if (vio_cmo_alloc(viodev, alloc_size))
+	ret = vio_cmo_alloc(viodev, alloc_size);
+	if (ret)
 		goto out_fail;
 	ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, dma_get_mask(dev),
 			direction, attrs);
@@ -577,7 +578,7 @@ out_deallocate:
 	vio_cmo_dealloc(viodev, alloc_size);
 out_fail:
 	atomic_inc(&viodev->cmo.allocs_failed);
-	return 0;
+	return ret;
 }
 
 static void vio_dma_iommu_unmap_sg(struct device *dev,
-- 
cgit 


From eb86ef3b2d7e06b1447d4979e282977a845b611b Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Thu, 29 Jul 2021 14:15:29 -0600
Subject: powerpc/iommu: don't set failed sg dma_address to DMA_MAPPING_ERROR

Setting the ->dma_address to DMA_MAPPING_ERROR is not part of
the ->map_sg calling convention, so remove it.

Link: https://lore.kernel.org/linux-mips/20210716063241.GC13345@lst.de/
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geoff Levand <geoff@infradead.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/kernel/iommu.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index a8ec4fe42817..30b7736f0896 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -575,7 +575,6 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 	 */
 	if (outcount < incount) {
 		outs = sg_next(outs);
-		outs->dma_address = DMA_MAPPING_ERROR;
 		outs->dma_length = 0;
 	}
 
@@ -593,7 +592,6 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 			npages = iommu_num_pages(s->dma_address, s->dma_length,
 						 IOMMU_PAGE_SIZE(tbl));
 			__iommu_free(tbl, vaddr, npages);
-			s->dma_address = DMA_MAPPING_ERROR;
 			s->dma_length = 0;
 		}
 		if (s == outs)
-- 
cgit 


From 911ace0ba628424637e42826c8a8b63b89bd5611 Mon Sep 17 00:00:00 2001
From: Martin Oliveira <martin.oliveira@eideticom.com>
Date: Thu, 29 Jul 2021 14:15:30 -0600
Subject: s390/pci: return error code from s390_dma_map_sg()

The .map_sg() op now expects an error code instead of zero on failure.

So propagate the error from __s390_dma_map_sg() up. __s390_dma_map_sg()
returns either -ENOMEM on allocation failure or -EINVAL which is
the same as what's expected by dma_map_sgtable().

Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/s390/pci/pci_dma.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index ebc9a49523aa..c78b02012764 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -487,7 +487,7 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	unsigned int max = dma_get_max_seg_size(dev);
 	unsigned int size = s->offset + s->length;
 	unsigned int offset = s->offset;
-	int count = 0, i;
+	int count = 0, i, ret;
 
 	for (i = 1; i < nr_elements; i++) {
 		s = sg_next(s);
@@ -497,8 +497,9 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 
 		if (s->offset || (size & ~PAGE_MASK) ||
 		    size + s->length > max) {
-			if (__s390_dma_map_sg(dev, start, size,
-					      &dma->dma_address, dir))
+			ret = __s390_dma_map_sg(dev, start, size,
+						&dma->dma_address, dir);
+			if (ret)
 				goto unmap;
 
 			dma->dma_address += offset;
@@ -511,7 +512,8 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		}
 		size += s->length;
 	}
-	if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir))
+	ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir);
+	if (ret)
 		goto unmap;
 
 	dma->dma_address += offset;
@@ -523,7 +525,7 @@ unmap:
 		s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
 				     dir, attrs);
 
-	return 0;
+	return ret;
 }
 
 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-- 
cgit 


From 7e4e7d4c54ecc9986041aae84e04280413f2435b Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Thu, 29 Jul 2021 14:15:31 -0600
Subject: s390/pci: don't set failed sg dma_address to DMA_MAPPING_ERROR

Setting the ->dma_address to DMA_MAPPING_ERROR is not part of
the ->map_sg calling convention, so remove it.

Link: https://lore.kernel.org/linux-mips/20210716063241.GC13345@lst.de/
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/s390/pci/pci_dma.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index c78b02012764..be48e5b5bfcf 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -492,7 +492,6 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	for (i = 1; i < nr_elements; i++) {
 		s = sg_next(s);
 
-		s->dma_address = DMA_MAPPING_ERROR;
 		s->dma_length = 0;
 
 		if (s->offset || (size & ~PAGE_MASK) ||
-- 
cgit 


From e02373fddb0ea86ce3962bb5d38e20ea0b6c8a05 Mon Sep 17 00:00:00 2001
From: Martin Oliveira <martin.oliveira@eideticom.com>
Date: Thu, 29 Jul 2021 14:15:32 -0600
Subject: sparc/iommu: return error codes from .map_sg() ops

The .map_sg() op now expects an error code instead of zero on failure.

Returning an errno from __sbus_iommu_map_sg() results in
sbus_iommu_map_sg_gflush() and sbus_iommu_map_sg_pflush() returning an
errno, as those functions are wrappers around __sbus_iommu_map_sg().

Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/sparc/kernel/iommu.c     | 4 ++--
 arch/sparc/kernel/pci_sun4v.c | 4 ++--
 arch/sparc/mm/iommu.c         | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index a034f571d869..0589acd34201 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -448,7 +448,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 	iommu = dev->archdata.iommu;
 	strbuf = dev->archdata.stc;
 	if (nelems == 0 || !iommu)
-		return 0;
+		return -EINVAL;
 
 	spin_lock_irqsave(&iommu->lock, flags);
 
@@ -580,7 +580,7 @@ iommu_map_failed:
 	}
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	return 0;
+	return -EINVAL;
 }
 
 /* If contexts are being used, they are the same in all of the mappings
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 9de57e88f7a1..d90e80fa5705 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -486,7 +486,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 
 	iommu = dev->archdata.iommu;
 	if (nelems == 0 || !iommu)
-		return 0;
+		return -EINVAL;
 	atu = iommu->atu;
 
 	prot = HV_PCI_MAP_ATTR_READ;
@@ -619,7 +619,7 @@ iommu_map_failed:
 	}
 	local_irq_restore(flags);
 
-	return 0;
+	return -EINVAL;
 }
 
 static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index 0c0342e5b10d..9e3f6933ca13 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -256,7 +256,7 @@ static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl,
 		sg->dma_address =__sbus_iommu_map_page(dev, sg_page(sg),
 				sg->offset, sg->length, per_page_flush);
 		if (sg->dma_address == DMA_MAPPING_ERROR)
-			return 0;
+			return -EIO;
 		sg->dma_length = sg->length;
 	}
 
-- 
cgit 


From ba3a0482db879f33863348b81ed9770888d727a9 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Thu, 29 Jul 2021 14:15:33 -0600
Subject: sparc/iommu: don't set failed sg dma_address to DMA_MAPPING_ERROR

Setting the ->dma_address to DMA_MAPPING_ERROR is not part of
the ->map_sg calling convention, so remove it.

Link: https://lore.kernel.org/linux-mips/20210716063241.GC13345@lst.de/
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/sparc/kernel/iommu.c     | 2 --
 arch/sparc/kernel/pci_sun4v.c | 2 --
 2 files changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 0589acd34201..da0363692528 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -546,7 +546,6 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 
 	if (outcount < incount) {
 		outs = sg_next(outs);
-		outs->dma_address = DMA_MAPPING_ERROR;
 		outs->dma_length = 0;
 	}
 
@@ -572,7 +571,6 @@ iommu_map_failed:
 			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
 					     IOMMU_ERROR_CODE);
 
-			s->dma_address = DMA_MAPPING_ERROR;
 			s->dma_length = 0;
 		}
 		if (s == outs)
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index d90e80fa5705..384480971805 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -594,7 +594,6 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 
 	if (outcount < incount) {
 		outs = sg_next(outs);
-		outs->dma_address = DMA_MAPPING_ERROR;
 		outs->dma_length = 0;
 	}
 
@@ -611,7 +610,6 @@ iommu_map_failed:
 			iommu_tbl_range_free(tbl, vaddr, npages,
 					     IOMMU_ERROR_CODE);
 			/* XXX demap? XXX */
-			s->dma_address = DMA_MAPPING_ERROR;
 			s->dma_length = 0;
 		}
 		if (s == outs)
-- 
cgit 


From fcacc8a614391d3bf009319509c846fef47d9352 Mon Sep 17 00:00:00 2001
From: Martin Oliveira <martin.oliveira@eideticom.com>
Date: Thu, 29 Jul 2021 14:15:36 -0600
Subject: x86/amd_gart: return error code from gart_map_sg()

The .map_sg() op now expects an error code instead of zero on failure.

So make __dma_map_cont() return a valid errno (which is then propagated
to gart_map_sg() via dma_map_cont()) and return it in case of failure.

Also, return -EINVAL in case of invalid nents.

Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/kernel/amd_gart_64.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 9ac696487b13..46aea9a4f26b 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -331,7 +331,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
 	int i;
 
 	if (iommu_start == -1)
-		return -1;
+		return -ENOMEM;
 
 	for_each_sg(start, s, nelems, i) {
 		unsigned long pages, addr;
@@ -380,13 +380,13 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		       enum dma_data_direction dir, unsigned long attrs)
 {
 	struct scatterlist *s, *ps, *start_sg, *sgmap;
-	int need = 0, nextneed, i, out, start;
+	int need = 0, nextneed, i, out, start, ret;
 	unsigned long pages = 0;
 	unsigned int seg_size;
 	unsigned int max_seg_size;
 
 	if (nents == 0)
-		return 0;
+		return -EINVAL;
 
 	out		= 0;
 	start		= 0;
@@ -414,8 +414,9 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 			if (!iommu_merge || !nextneed || !need || s->offset ||
 			    (s->length + seg_size > max_seg_size) ||
 			    (ps->offset + ps->length) % PAGE_SIZE) {
-				if (dma_map_cont(dev, start_sg, i - start,
-						 sgmap, pages, need) < 0)
+				ret = dma_map_cont(dev, start_sg, i - start,
+						   sgmap, pages, need);
+				if (ret < 0)
 					goto error;
 				out++;
 
@@ -432,7 +433,8 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE);
 		ps = s;
 	}
-	if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
+	ret = dma_map_cont(dev, start_sg, i - start, sgmap, pages, need);
+	if (ret < 0)
 		goto error;
 	out++;
 	flush_gart();
@@ -458,7 +460,7 @@ error:
 	iommu_full(dev, pages << PAGE_SHIFT, dir);
 	for_each_sg(sg, s, nents, i)
 		s->dma_address = DMA_MAPPING_ERROR;
-	return 0;
+	return ret;
 }
 
 /* allocate and map a coherent mapping */
-- 
cgit 


From 183dc86335e69f516b94f169afd784cf97cb8421 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Thu, 29 Jul 2021 14:15:37 -0600
Subject: x86/amd_gart: don't set failed sg dma_address to DMA_MAPPING_ERROR

Setting the ->dma_address to DMA_MAPPING_ERROR is not part of
the ->map_sg calling convention, so remove it.

Link: https://lore.kernel.org/linux-mips/20210716063241.GC13345@lst.de/
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/kernel/amd_gart_64.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 46aea9a4f26b..ed837383de5c 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -458,8 +458,6 @@ error:
 		panic("dma_map_sg: overflow on %lu pages\n", pages);
 
 	iommu_full(dev, pages << PAGE_SHIFT, dir);
-	for_each_sg(sg, s, nents, i)
-		s->dma_address = DMA_MAPPING_ERROR;
 	return ret;
 }
 
-- 
cgit 


From d8285639550578a1bf2d102391d1a9e08e0586ca Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 25 Jul 2021 03:35:56 +0900
Subject: kbuild: do not require sub-make for separate output tree builds

As explained in commit 3204a7fb98a3 ("kbuild: prefix $(srctree)/ to some
included Makefiles"), I want to stop using --include-dir some day.

I already fixed up the top Makefile, but some arch Makefiles (mips, um,
x86) still include check-in Makefiles without $(srctree)/.

Fix them up so 'need-sub-make := 1' can go away for this case.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/mips/Makefile | 2 +-
 arch/um/Makefile   | 6 +++---
 arch/x86/Makefile  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 653befc1b176..5fd26d514851 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -254,7 +254,7 @@ endif
 #
 # Board-dependent options and extra files
 #
-include arch/mips/Kbuild.platforms
+include $(srctree)/arch/mips/Kbuild.platforms
 
 ifdef CONFIG_PHYSICAL_START
 load-y					= $(CONFIG_PHYSICAL_START)
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 12a7acef0357..f2fe63bfd819 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -41,8 +41,8 @@ endif
 
 HOST_DIR := arch/$(HEADER_ARCH)
 
-include $(ARCH_DIR)/Makefile-skas
-include $(HOST_DIR)/Makefile.um
+include $(srctree)/$(ARCH_DIR)/Makefile-skas
+include $(srctree)/$(HOST_DIR)/Makefile.um
 
 core-y += $(HOST_DIR)/um/
 
@@ -76,7 +76,7 @@ USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \
 		-idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__
 
 #This will adjust *FLAGS accordingly to the platform.
-include $(ARCH_DIR)/Makefile-os-$(OS)
+include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
 
 KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \
 		   -I$(srctree)/$(HOST_DIR)/include/uapi \
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 307fd0000a83..0fa7dc73b5d8 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -75,7 +75,7 @@ ifeq ($(CONFIG_X86_32),y)
         KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4))
 
         # CPU-specific tuning. Anything which can be shared with UML should go here.
-        include arch/x86/Makefile_32.cpu
+        include $(srctree)/arch/x86/Makefile_32.cpu
         KBUILD_CFLAGS += $(cflags-y)
 
         # temporary until string.h is fixed
-- 
cgit 


From 52cc02b910284d6bddba46cce402044ab775f314 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 6 Aug 2021 00:01:02 +0900
Subject: kbuild: check CONFIG_AS_IS_LLVM instead of LLVM_IAS

LLVM_IAS is the user interface to set the -(no-)integrated-as flag,
and it should be used only for that purpose.

LLVM_IAS is checked in some places to determine the assembler type,
but it is not precise.

For example,

 $ make CC=gcc LLVM_IAS=1

... will use the GNU assembler (i.e. binutils) since LLVM_IAS=1 is
effective only when $(CC) is clang.

Of course, 'CC=gcc LLVM_IAS=1' is an odd combination, but the build
system can be more robust against such insane input.

Commit ba64beb17493a ("kbuild: check the minimum assembler version in
Kconfig") introduced CONFIG_AS_IS_GNU/LLVM, which is more precise
because Kconfig checks the version string from the assembler in use.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
---
 arch/riscv/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index bc74afdbf31e..dcfbd2a87d41 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -41,7 +41,7 @@ endif
 ifeq ($(CONFIG_LD_IS_LLD),y)
 	KBUILD_CFLAGS += -mno-relax
 	KBUILD_AFLAGS += -mno-relax
-ifneq ($(LLVM_IAS),1)
+ifndef CONFIG_AS_IS_LLVM
 	KBUILD_CFLAGS += -Wa,-mno-relax
 	KBUILD_AFLAGS += -Wa,-mno-relax
 endif
-- 
cgit 


From c755238d2ce0960ced9ffccc2ce14de2cd01b647 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 11 Jul 2021 23:31:43 +0100
Subject: ARM: 9099/1: crypto: rename 'mod_init' & 'mod_exit' functions to be
 module-specific

Rename module_init & module_exit functions that are named
"mod_init" and "mod_exit" so that they are unique in both the
System.map file and in initcall_debug output instead of showing
up as almost anonymous "mod_init".

This is helpful for debugging and in determining how long certain
module_init calls take to execute.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: patches@armlinux.org.uk
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/crypto/curve25519-glue.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
index 31eb75b6002f..9bdafd57888c 100644
--- a/arch/arm/crypto/curve25519-glue.c
+++ b/arch/arm/crypto/curve25519-glue.c
@@ -112,7 +112,7 @@ static struct kpp_alg curve25519_alg = {
 	.max_size		= curve25519_max_size,
 };
 
-static int __init mod_init(void)
+static int __init arm_curve25519_init(void)
 {
 	if (elf_hwcap & HWCAP_NEON) {
 		static_branch_enable(&have_neon);
@@ -122,14 +122,14 @@ static int __init mod_init(void)
 	return 0;
 }
 
-static void __exit mod_exit(void)
+static void __exit arm_curve25519_exit(void)
 {
 	if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
 		crypto_unregister_kpp(&curve25519_alg);
 }
 
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(arm_curve25519_init);
+module_exit(arm_curve25519_exit);
 
 MODULE_ALIAS_CRYPTO("curve25519");
 MODULE_ALIAS_CRYPTO("curve25519-neon");
-- 
cgit 


From d7bcc5e22967c96685d03dbbd167e1a1ddf9b910 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 29 Jul 2021 15:03:51 +0100
Subject: ARM: 9102/1: move theinstall rules to arch/arm/Makefile

Currently, the (z/u)install targets in arch/arm/Makefile descend into
arch/arm/boot/Makefile to invoke the shell script, but there is no
good reason to do so.

arch/arm/Makefile can run the shell script directly.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/Makefile      |  3 ++-
 arch/arm/boot/Makefile | 14 +-------------
 2 files changed, 3 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 173da685a52e..847c31e7c368 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -308,7 +308,8 @@ $(BOOT_TARGETS): vmlinux
 	@$(kecho) '  Kernel: $(boot)/$@ is ready'
 
 $(INSTALL_TARGETS):
-	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
+	$(CONFIG_SHELL) $(srctree)/$(boot)/install.sh "$(KERNELRELEASE)" \
+	$(boot)/$(patsubst %install,%Image,$@) System.map "$(INSTALL_PATH)"
 
 PHONY += vdso_install
 vdso_install:
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index 0b3cd7a33a26..54a09f9464fb 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -96,23 +96,11 @@ $(obj)/bootp/bootp: $(obj)/zImage initrd FORCE
 $(obj)/bootpImage: $(obj)/bootp/bootp FORCE
 	$(call if_changed,objcopy)
 
-PHONY += initrd install zinstall uinstall
+PHONY += initrd
 initrd:
 	@test "$(INITRD_PHYS)" != "" || \
 	(echo This machine does not support INITRD; exit -1)
 	@test "$(INITRD)" != "" || \
 	(echo You must specify INITRD; exit -1)
 
-install:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
-	$(obj)/Image System.map "$(INSTALL_PATH)"
-
-zinstall:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
-	$(obj)/zImage System.map "$(INSTALL_PATH)"
-
-uinstall:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
-	$(obj)/uImage System.map "$(INSTALL_PATH)"
-
 subdir-	    := bootp compressed dts
-- 
cgit 


From 6fec92d9b2bfd2fb1a2a4295dc859d9bab16c8fc Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 30 Jul 2021 10:43:02 +0100
Subject: ARM: 9103/1: Drop ARCH_NR_GPIOS definition

The conditional by the generic header is the same,
hence drop unnecessary duplication.

Link: https://lore.kernel.org/r/20210510114107.43006-1-andriy.shevchenko@linux.intel.com

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/gpio.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/gpio.h b/arch/arm/include/asm/gpio.h
index c50e383358c4..f3bb8a2bf788 100644
--- a/arch/arm/include/asm/gpio.h
+++ b/arch/arm/include/asm/gpio.h
@@ -2,10 +2,6 @@
 #ifndef _ARCH_ARM_GPIO_H
 #define _ARCH_ARM_GPIO_H
 
-#if CONFIG_ARCH_NR_GPIO > 0
-#define ARCH_NR_GPIOS CONFIG_ARCH_NR_GPIO
-#endif
-
 /* Note: this may rely upon the value of ARCH_NR_GPIOS set in mach/gpio.h */
 #include <asm-generic/gpio.h>
 
-- 
cgit 


From b30d0289de72c62516df03fdad8d53f552c69839 Mon Sep 17 00:00:00 2001
From: David Heidelberg <david@ixit.cz>
Date: Mon, 9 Aug 2021 19:07:30 +0100
Subject: ARM: 9105/1: atags_to_fdt: don't warn about stack size

The merge_fdt_bootargs() function by definition consumes more than 1024
bytes of stack because it has a 1024 byte command line on the stack,
meaning that we always get a warning when building this file:

arch/arm/boot/compressed/atags_to_fdt.c: In function 'merge_fdt_bootargs':
arch/arm/boot/compressed/atags_to_fdt.c:98:1: warning: the frame size of 1032 bytes is larger than 1024 bytes [-Wframe-larger-than=]

However, as this is the decompressor and we know that it has a very shallow
call chain, and we do not actually risk overflowing the kernel stack
at runtime here.

This just shuts up the warning by disabling the warning flag for this
file.

Tested on Nexus 7 2012 builds.

Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: David Heidelberg <david@ixit.cz>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/boot/compressed/Makefile | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 9d91ae1091b0..91265e7ff672 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -85,6 +85,8 @@ compress-$(CONFIG_KERNEL_LZ4)  = lz4
 libfdt_objs := fdt_rw.o fdt_ro.o fdt_wip.o fdt.o
 
 ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y)
+CFLAGS_REMOVE_atags_to_fdt.o += -Wframe-larger-than=${CONFIG_FRAME_WARN}
+CFLAGS_atags_to_fdt.o += -Wframe-larger-than=1280
 OBJS	+= $(libfdt_objs) atags_to_fdt.o
 endif
 ifeq ($(CONFIG_USE_OF),y)
-- 
cgit 


From 09ca497528dac12cbbceab8197011c875a96d053 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Sun, 27 Jun 2021 17:09:18 +0000
Subject: powerpc: Remove in_kernel_text()

Last user of in_kernel_text() stopped using in with
commit 549e8152de80 ("powerpc: Make the 64-bit kernel as a
position-independent executable").

Generic function is_kernel_text() does the same.

So remote it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/2a3a5b6f8cc0ef4e854d7b764f66aa8d2ee270d2.1624813698.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/sections.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 324d7b298ec3..6e4af4492a14 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -38,14 +38,6 @@ extern char start_virt_trampolines[];
 extern char end_virt_trampolines[];
 #endif
 
-static inline int in_kernel_text(unsigned long addr)
-{
-	if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
-		return 1;
-
-	return 0;
-}
-
 static inline unsigned long kernel_toc_addr(void)
 {
 	/* Defined by the linker, see vmlinux.lds.S */
-- 
cgit 


From c8a6d91005343dea0d53be0ff0620c66934dcd44 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 5 Jul 2021 12:00:50 +0000
Subject: powerpc/non-smp: Unconditionaly call smp_mb() on switch_mm

Commit 3ccfebedd8cf ("powerpc, membarrier: Skip memory barrier in
switch_mm()") added some logic to skip the smp_mb() in
switch_mm_irqs_off() before the call to switch_mmu_context().

However, on non SMP smp_mb() is just a compiler barrier and doing
it unconditionaly is simpler than the logic used to check whether the
barrier is needed or not.

After the patch:

00000000 <switch_mm_irqs_off>:
...
   c:	7c 04 18 40 	cmplw   r4,r3
  10:	81 24 00 24 	lwz     r9,36(r4)
  14:	91 25 04 c8 	stw     r9,1224(r5)
  18:	4d 82 00 20 	beqlr
  1c:	48 00 00 00 	b       1c <switch_mm_irqs_off+0x1c>
			1c: R_PPC_REL24	switch_mmu_context

Before the patch:

00000000 <switch_mm_irqs_off>:
...
   c:	7c 04 18 40 	cmplw   r4,r3
  10:	81 24 00 24 	lwz     r9,36(r4)
  14:	91 25 04 c8 	stw     r9,1224(r5)
  18:	4d 82 00 20 	beqlr
  1c:	81 24 00 28 	lwz     r9,40(r4)
  20:	71 29 00 0a 	andi.   r9,r9,10
  24:	40 82 00 34 	bne     58 <switch_mm_irqs_off+0x58>
  28:	48 00 00 00 	b       28 <switch_mm_irqs_off+0x28>
			28: R_PPC_REL24	switch_mmu_context
...
  58:	2c 03 00 00 	cmpwi   r3,0
  5c:	41 82 ff cc 	beq     28 <switch_mm_irqs_off+0x28>
  60:	48 00 00 00 	b       60 <switch_mm_irqs_off+0x60>
			60: R_PPC_REL24	switch_mmu_context

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/e9d501da0c59f60ca767b1b3ea4603fce6d02b9e.1625486440.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/membarrier.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h
index 6e20bb5c74ea..de7f79157918 100644
--- a/arch/powerpc/include/asm/membarrier.h
+++ b/arch/powerpc/include/asm/membarrier.h
@@ -12,7 +12,8 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
 	 * when switching from userspace to kernel is not needed after
 	 * store to rq->curr.
 	 */
-	if (likely(!(atomic_read(&next->membarrier_state) &
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    likely(!(atomic_read(&next->membarrier_state) &
 		     (MEMBARRIER_STATE_PRIVATE_EXPEDITED |
 		      MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
 		return;
-- 
cgit 


From 9c7248bb8de31f51c693bfa6a6ea53b1c07e0fa8 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Tue, 11 May 2021 09:31:36 +0200
Subject: powerpc/numa: Consider the max NUMA node for migratable LPAR

When a LPAR is migratable, we should consider the maximum possible NUMA
node instead of the number of NUMA nodes from the actual system.

The DT property 'ibm,current-associativity-domains' defines the maximum
number of nodes the LPAR can see when running on that box. But if the
LPAR is being migrated on another box, it may see up to the nodes
defined by 'ibm,max-associativity-domains'. So if a LPAR is migratable,
that value should be used.

Unfortunately, there is no easy way to know if an LPAR is migratable or
not. The hypervisor exports the property 'ibm,migratable-partition' in
the case it set to migrate partition, but that would not mean that the
current partition is migratable.

Without this patch, when a LPAR is started on a 2 node box and then
migrated to a 3 node box, the hypervisor may spread the LPAR's CPUs on
the 3rd node. In that case if a CPU from that 3rd node is added to the
LPAR, it will be wrongly assigned to the node because the kernel has
been set to use up to 2 nodes (the configuration of the departure node).
With this patch applies, the CPU is correctly added to the 3rd node.

Fixes: f9f130ff2ec9 ("powerpc/numa: Detect support for coregroup")
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210511073136.17795-1-ldufour@linux.ibm.com
---
 arch/powerpc/mm/numa.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f2bf98bdcea2..094a1076fd1f 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -893,7 +893,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 static void __init find_possible_nodes(void)
 {
 	struct device_node *rtas;
-	const __be32 *domains;
+	const __be32 *domains = NULL;
 	int prop_length, max_nodes;
 	u32 i;
 
@@ -909,9 +909,14 @@ static void __init find_possible_nodes(void)
 	 * it doesn't exist, then fallback on ibm,max-associativity-domains.
 	 * Current denotes what the platform can support compared to max
 	 * which denotes what the Hypervisor can support.
+	 *
+	 * If the LPAR is migratable, new nodes might be activated after a LPM,
+	 * so we should consider the max number in that case.
 	 */
-	domains = of_get_property(rtas, "ibm,current-associativity-domains",
-					&prop_length);
+	if (!of_get_property(of_root, "ibm,migratable-partition", NULL))
+		domains = of_get_property(rtas,
+					  "ibm,current-associativity-domains",
+					  &prop_length);
 	if (!domains) {
 		domains = of_get_property(rtas, "ibm,max-associativity-domains",
 					&prop_length);
@@ -920,6 +925,8 @@ static void __init find_possible_nodes(void)
 	}
 
 	max_nodes = of_read_number(&domains[min_common_depth], 1);
+	pr_info("Partition configured for %d NUMA nodes.\n", max_nodes);
+
 	for (i = 0; i < max_nodes; i++) {
 		if (!node_possible(i))
 			node_set(i, node_possible_map);
-- 
cgit 


From d144f4d5a8a804133d20ff311d7be70bcdbfaac2 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Mon, 17 May 2021 11:06:06 +0200
Subject: pseries/drmem: update LMBs after LPM

After a LPM, the device tree node ibm,dynamic-reconfiguration-memory may be
updated by the hypervisor in the case the NUMA topology of the LPAR's
memory is updated.

This is handled by the kernel, but the memory's node is not updated because
there is no way to move a memory block between nodes from the Linux kernel
point of view.

If later a memory block is added or removed, drmem_update_dt() is called
and it is overwriting the DT node ibm,dynamic-reconfiguration-memory to
match the added or removed LMB. But the LMB's associativity node has not
been updated after the DT node update and thus the node is overwritten by
the Linux's topology instead of the hypervisor one.

Introduce a hook called when the ibm,dynamic-reconfiguration-memory node is
updated to force an update of the LMB's associativity. However, ignore the
call to that hook when the update has been triggered by drmem_update_dt().
Because, in that case, the LMB tree has been used to set the DT property
and thus it doesn't need to be updated back. Since drmem_update_dt() is
called under the protection of the device_hotplug_lock and the hook is
called in the same context, use a simple boolean variable to detect that
call.

Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Reviewed-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210517090606.56930-1-ldufour@linux.ibm.com
---
 arch/powerpc/include/asm/drmem.h                |  1 +
 arch/powerpc/mm/drmem.c                         | 46 +++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/hotplug-memory.c |  4 +++
 3 files changed, 51 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index bf2402fed3e0..4265d5e95c2c 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -111,6 +111,7 @@ int drmem_update_dt(void);
 int __init
 walk_drmem_lmbs_early(unsigned long node, void *data,
 		      int (*func)(struct drmem_lmb *, const __be32 **, void *));
+void drmem_update_lmbs(struct property *prop);
 #endif
 
 static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 9af3832c9d8d..22197b18d85e 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -18,6 +18,7 @@ static int n_root_addr_cells, n_root_size_cells;
 
 static struct drmem_lmb_info __drmem_info;
 struct drmem_lmb_info *drmem_info = &__drmem_info;
+static bool in_drmem_update;
 
 u64 drmem_lmb_memory_max(void)
 {
@@ -178,6 +179,11 @@ int drmem_update_dt(void)
 	if (!memory)
 		return -1;
 
+	/*
+	 * Set in_drmem_update to prevent the notifier callback to process the
+	 * DT property back since the change is coming from the LMB tree.
+	 */
+	in_drmem_update = true;
 	prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
 	if (prop) {
 		rc = drmem_update_dt_v1(memory, prop);
@@ -186,6 +192,7 @@ int drmem_update_dt(void)
 		if (prop)
 			rc = drmem_update_dt_v2(memory, prop);
 	}
+	in_drmem_update = false;
 
 	of_node_put(memory);
 	return rc;
@@ -307,6 +314,45 @@ int __init walk_drmem_lmbs_early(unsigned long node, void *data,
 	return ret;
 }
 
+/*
+ * Update the LMB associativity index.
+ */
+static int update_lmb(struct drmem_lmb *updated_lmb,
+		      __maybe_unused const __be32 **usm,
+		      __maybe_unused void *data)
+{
+	struct drmem_lmb *lmb;
+
+	for_each_drmem_lmb(lmb) {
+		if (lmb->drc_index != updated_lmb->drc_index)
+			continue;
+
+		lmb->aa_index = updated_lmb->aa_index;
+		break;
+	}
+	return 0;
+}
+
+/*
+ * Update the LMB associativity index.
+ *
+ * This needs to be called when the hypervisor is updating the
+ * dynamic-reconfiguration-memory node property.
+ */
+void drmem_update_lmbs(struct property *prop)
+{
+	/*
+	 * Don't update the LMBs if triggered by the update done in
+	 * drmem_update_dt(), the LMB values have been used to the update the DT
+	 * property in that case.
+	 */
+	if (in_drmem_update)
+		return;
+	if (!strcmp(prop->name, "ibm,dynamic-memory"))
+		__walk_drmem_v1_lmbs(prop->value, NULL, NULL, update_lmb);
+	else if (!strcmp(prop->name, "ibm,dynamic-memory-v2"))
+		__walk_drmem_v2_lmbs(prop->value, NULL, NULL, update_lmb);
+}
 #endif
 
 static int init_drmem_lmb_size(struct device_node *dn)
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 377d852f5a9a..0beb3ca2b549 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -979,6 +979,10 @@ static int pseries_memory_notifier(struct notifier_block *nb,
 	case OF_RECONFIG_DETACH_NODE:
 		err = pseries_remove_mem_node(rd->dn);
 		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		if (!strcmp(rd->dn->name,
+			    "ibm,dynamic-reconfiguration-memory"))
+			drmem_update_lmbs(rd->prop);
 	}
 	return notifier_from_errno(err);
 }
-- 
cgit 


From bd1dd4c5f5286df0148b5b316f37c583b8f55fa1 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Thu, 29 Apr 2021 19:49:08 +0200
Subject: powerpc/pseries: Prevent free CPU ids being reused on another node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a CPU is hot added, the CPU ids are taken from the available mask
from the lower possible set. If that set of values was previously used
for a CPU attached to a different node, it appears to an application as
if these CPUs have migrated from one node to another node which is not
expected.

To prevent this, it is needed to record the CPU ids used for each node
and to not reuse them on another node. However, to prevent CPU hot plug
to fail, in the case the CPU ids is starved on a node, the capability to
reuse other nodes’ free CPU ids is kept. A warning is displayed in such
a case to warn the user.

A new CPU bit mask (node_recorded_ids_map) is introduced for each
possible node. It is populated with the CPU onlined at boot time, and
then when a CPU is hot plugged to a node. The bits in that mask remain
when the CPU is hot unplugged, to remind this CPU ids have been used for
this node.

If no id set was found, a retry is made without removing the ids used on
the other nodes to try reusing them. This is the way ids have been
allocated prior to this patch.

The effect of this patch can be seen by removing and adding CPUs using
the Qemu monitor. In the following case, the first CPU from the node 2
is removed, then the first one from the node 1 is removed too. Later,
the first CPU of the node 2 is added back. Without that patch, the
kernel will number these CPUs using the first CPU ids available which
are the ones freed when removing the second CPU of the node 0. This
leads to the CPU ids 16-23 to move from the node 1 to the node 2. With
the patch applied, the CPU ids 32-39 are used since they are the lowest
free ones which have not been used on another node.

At boot time:
  [root@vm40 ~]# numactl -H | grep cpus
  node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
  node 1 cpus: 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  node 2 cpus: 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47

Vanilla kernel, after the CPU hot unplug/plug operations:
  [root@vm40 ~]# numactl -H | grep cpus
  node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
  node 1 cpus: 24 25 26 27 28 29 30 31
  node 2 cpus: 16 17 18 19 20 21 22 23 40 41 42 43 44 45 46 47

Patched kernel, after the CPU hot unplug/plug operations:
  [root@vm40 ~]# numactl -H | grep cpus
  node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
  node 1 cpus: 24 25 26 27 28 29 30 31
  node 2 cpus: 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47

Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Reviewed-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210429174908.16613-1-ldufour@linux.ibm.com
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 171 +++++++++++++++++++++------
 1 file changed, 132 insertions(+), 39 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7e970f81d8ff..e1f224320102 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -39,6 +39,12 @@
 /* This version can't take the spinlock, because it never returns */
 static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
 
+/*
+ * Record the CPU ids used on each nodes.
+ * Protected by cpu_add_remove_lock.
+ */
+static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES];
+
 static void rtas_stop_self(void)
 {
 	static struct rtas_args args;
@@ -139,72 +145,148 @@ static void pseries_cpu_die(unsigned int cpu)
 	paca_ptrs[cpu]->cpu_start = 0;
 }
 
+/**
+ * find_cpu_id_range - found a linear ranger of @nthreads free CPU ids.
+ * @nthreads : the number of threads (cpu ids)
+ * @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any
+ *                  node can be peek.
+ * @cpu_mask: the returned CPU mask.
+ *
+ * Returns 0 on success.
+ */
+static int find_cpu_id_range(unsigned int nthreads, int assigned_node,
+			     cpumask_var_t *cpu_mask)
+{
+	cpumask_var_t candidate_mask;
+	unsigned int cpu, node;
+	int rc = -ENOSPC;
+
+	if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_clear(*cpu_mask);
+	for (cpu = 0; cpu < nthreads; cpu++)
+		cpumask_set_cpu(cpu, *cpu_mask);
+
+	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+
+	/* Get a bitmap of unoccupied slots. */
+	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
+
+	if (assigned_node != NUMA_NO_NODE) {
+		/*
+		 * Remove free ids previously assigned on the other nodes. We
+		 * can walk only online nodes because once a node became online
+		 * it is not turned offlined back.
+		 */
+		for_each_online_node(node) {
+			if (node == assigned_node)
+				continue;
+			cpumask_andnot(candidate_mask, candidate_mask,
+				       node_recorded_ids_map[node]);
+		}
+	}
+
+	if (cpumask_empty(candidate_mask))
+		goto out;
+
+	while (!cpumask_empty(*cpu_mask)) {
+		if (cpumask_subset(*cpu_mask, candidate_mask))
+			/* Found a range where we can insert the new cpu(s) */
+			break;
+		cpumask_shift_left(*cpu_mask, *cpu_mask, nthreads);
+	}
+
+	if (!cpumask_empty(*cpu_mask))
+		rc = 0;
+
+out:
+	free_cpumask_var(candidate_mask);
+	return rc;
+}
+
 /*
  * Update cpu_present_mask and paca(s) for a new cpu node.  The wrinkle
- * here is that a cpu device node may represent up to two logical cpus
+ * here is that a cpu device node may represent multiple logical cpus
  * in the SMT case.  We must honor the assumption in other code that
  * the logical ids for sibling SMT threads x and y are adjacent, such
  * that x^1 == y and y^1 == x.
  */
 static int pseries_add_processor(struct device_node *np)
 {
-	unsigned int cpu;
-	cpumask_var_t candidate_mask, tmp;
-	int err = -ENOSPC, len, nthreads, i;
+	int len, nthreads, node, cpu, assigned_node;
+	int rc = 0;
+	cpumask_var_t cpu_mask;
 	const __be32 *intserv;
 
 	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
 	if (!intserv)
 		return 0;
 
-	zalloc_cpumask_var(&candidate_mask, GFP_KERNEL);
-	zalloc_cpumask_var(&tmp, GFP_KERNEL);
-
 	nthreads = len / sizeof(u32);
-	for (i = 0; i < nthreads; i++)
-		cpumask_set_cpu(i, tmp);
 
-	cpu_maps_update_begin();
+	if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+		return -ENOMEM;
 
-	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+	/*
+	 * Fetch from the DT nodes read by dlpar_configure_connector() the NUMA
+	 * node id the added CPU belongs to.
+	 */
+	node = of_node_to_nid(np);
+	if (node < 0 || !node_possible(node))
+		node = first_online_node;
 
-	/* Get a bitmap of unoccupied slots. */
-	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
-	if (cpumask_empty(candidate_mask)) {
-		/* If we get here, it most likely means that NR_CPUS is
-		 * less than the partition's max processors setting.
+	BUG_ON(node == NUMA_NO_NODE);
+	assigned_node = node;
+
+	cpu_maps_update_begin();
+
+	rc = find_cpu_id_range(nthreads, node, &cpu_mask);
+	if (rc && nr_node_ids > 1) {
+		/*
+		 * Try again, considering the free CPU ids from the other node.
 		 */
-		printk(KERN_ERR "Cannot add cpu %pOF; this system configuration"
-		       " supports %d logical cpus.\n", np,
-		       num_possible_cpus());
-		goto out_unlock;
+		node = NUMA_NO_NODE;
+		rc = find_cpu_id_range(nthreads, NUMA_NO_NODE, &cpu_mask);
 	}
 
-	while (!cpumask_empty(tmp))
-		if (cpumask_subset(tmp, candidate_mask))
-			/* Found a range where we can insert the new cpu(s) */
-			break;
-		else
-			cpumask_shift_left(tmp, tmp, nthreads);
-
-	if (cpumask_empty(tmp)) {
-		printk(KERN_ERR "Unable to find space in cpu_present_mask for"
-		       " processor %pOFn with %d thread(s)\n", np,
-		       nthreads);
-		goto out_unlock;
+	if (rc) {
+		pr_err("Cannot add cpu %pOF; this system configuration"
+		       " supports %d logical cpus.\n", np, num_possible_cpus());
+		goto out;
 	}
 
-	for_each_cpu(cpu, tmp) {
+	for_each_cpu(cpu, cpu_mask) {
 		BUG_ON(cpu_present(cpu));
 		set_cpu_present(cpu, true);
 		set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
 	}
-	err = 0;
-out_unlock:
+
+	/* Record the newly used CPU ids for the associate node. */
+	cpumask_or(node_recorded_ids_map[assigned_node],
+		   node_recorded_ids_map[assigned_node], cpu_mask);
+
+	/*
+	 * If node is set to NUMA_NO_NODE, CPU ids have be reused from
+	 * another node, remove them from its mask.
+	 */
+	if (node == NUMA_NO_NODE) {
+		cpu = cpumask_first(cpu_mask);
+		pr_warn("Reusing free CPU ids %d-%d from another node\n",
+			cpu, cpu + nthreads - 1);
+		for_each_online_node(node) {
+			if (node == assigned_node)
+				continue;
+			cpumask_andnot(node_recorded_ids_map[node],
+				       node_recorded_ids_map[node],
+				       cpu_mask);
+		}
+	}
+
+out:
 	cpu_maps_update_done();
-	free_cpumask_var(candidate_mask);
-	free_cpumask_var(tmp);
-	return err;
+	free_cpumask_var(cpu_mask);
+	return rc;
 }
 
 /*
@@ -908,6 +990,7 @@ static struct notifier_block pseries_smp_nb = {
 static int __init pseries_cpu_hotplug_init(void)
 {
 	int qcss_tok;
+	unsigned int node;
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
 	ppc_md.cpu_probe = dlpar_cpu_probe;
@@ -929,8 +1012,18 @@ static int __init pseries_cpu_hotplug_init(void)
 	smp_ops->cpu_die = pseries_cpu_die;
 
 	/* Processors can be added/removed only on LPAR */
-	if (firmware_has_feature(FW_FEATURE_LPAR))
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		for_each_node(node) {
+			alloc_bootmem_cpumask_var(&node_recorded_ids_map[node]);
+
+			/* Record ids of CPU added at boot time */
+			cpumask_or(node_recorded_ids_map[node],
+				   node_recorded_ids_map[node],
+				   node_to_cpumask_map[node]);
+		}
+
 		of_reconfig_notifier_register(&pseries_smp_nb);
+	}
 
 	return 0;
 }
-- 
cgit 


From c00103abf76fd3916596afd07dd3fdeee0dca15d Mon Sep 17 00:00:00 2001
From: kernel test robot <lkp@intel.com>
Date: Tue, 3 Aug 2021 16:59:55 +0200
Subject: powerpc/kexec: fix for_each_child.cocci warning

for_each_node_by_type should have of_node_put() before return.

Generated by: scripts/coccinelle/iterators/for_each_child.cocci

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: kernel test robot <lkp@intel.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2108031654080.17639@hadrien
---
 arch/powerpc/kexec/core_64.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 84618d3c8013..89c069d664a5 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -64,8 +64,10 @@ int default_machine_kexec_prepare(struct kimage *image)
 			begin = image->segment[i].mem;
 			end = begin + image->segment[i].memsz;
 
-			if ((begin < high) && (end > low))
+			if ((begin < high) && (end > low)) {
+				of_node_put(node);
 				return -ETXTBSY;
+			}
 		}
 	}
 
-- 
cgit 


From 5ae36401ca4ea2737d779ce7c267444b16530001 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 3 Aug 2021 16:15:46 +0200
Subject: powerpc: Replace deprecated CPU-hotplug functions.

The functions get_online_cpus() and put_online_cpus() have been
deprecated during the CPU hotplug rework. They map directly to
cpus_read_lock() and cpus_read_unlock().

Replace deprecated CPU-hotplug functions with the official version.
The behavior remains unchanged.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210803141621.780504-4-bigeasy@linutronix.de
---
 arch/powerpc/kernel/rtasd.c               |  4 ++--
 arch/powerpc/kvm/book3s_hv_builtin.c      | 10 +++++-----
 arch/powerpc/platforms/powernv/idle.c     |  4 ++--
 arch/powerpc/platforms/powernv/opal-imc.c |  8 ++++----
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 8561dfb33f24..32ee17753eb4 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -429,7 +429,7 @@ static void rtas_event_scan(struct work_struct *w)
 
 	do_event_scan();
 
-	get_online_cpus();
+	cpus_read_lock();
 
 	/* raw_ OK because just using CPU as starting point. */
 	cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
@@ -451,7 +451,7 @@ static void rtas_event_scan(struct work_struct *w)
 	schedule_delayed_work_on(cpu, &event_scan_work,
 		__round_jiffies_relative(event_scan_delay, cpu));
 
-	put_online_cpus();
+	cpus_read_unlock();
 }
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index be8ef1c5b1bf..fcf4760a3a0e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -137,23 +137,23 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
  * exist in the system. We use a counter of VMs to track this.
  *
  * One of the operations we need to block is onlining of secondaries, so we
- * protect hv_vm_count with get/put_online_cpus().
+ * protect hv_vm_count with cpus_read_lock/unlock().
  */
 static atomic_t hv_vm_count;
 
 void kvm_hv_vm_activated(void)
 {
-	get_online_cpus();
+	cpus_read_lock();
 	atomic_inc(&hv_vm_count);
-	put_online_cpus();
+	cpus_read_unlock();
 }
 EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
 
 void kvm_hv_vm_deactivated(void)
 {
-	get_online_cpus();
+	cpus_read_lock();
 	atomic_dec(&hv_vm_count);
-	put_online_cpus();
+	cpus_read_unlock();
 }
 EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
 
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 528a7e0cf83a..aa27689b832d 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -199,12 +199,12 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
 	 */
 	power7_fastsleep_workaround_exit = false;
 
-	get_online_cpus();
+	cpus_read_lock();
 	primary_thread_mask = cpu_online_cores_map();
 	on_each_cpu_mask(&primary_thread_mask,
 				pnv_fastsleep_workaround_apply,
 				&err, 1);
-	put_online_cpus();
+	cpus_read_unlock();
 	if (err) {
 		pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
 		goto fail;
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 7824cc364bc4..ba02a75c1410 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -186,7 +186,7 @@ static void disable_nest_pmu_counters(void)
 	int nid, cpu;
 	const struct cpumask *l_cpumask;
 
-	get_online_cpus();
+	cpus_read_lock();
 	for_each_node_with_cpus(nid) {
 		l_cpumask = cpumask_of_node(nid);
 		cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
@@ -195,7 +195,7 @@ static void disable_nest_pmu_counters(void)
 		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
 				       get_hard_smp_processor_id(cpu));
 	}
-	put_online_cpus();
+	cpus_read_unlock();
 }
 
 static void disable_core_pmu_counters(void)
@@ -203,7 +203,7 @@ static void disable_core_pmu_counters(void)
 	cpumask_t cores_map;
 	int cpu, rc;
 
-	get_online_cpus();
+	cpus_read_lock();
 	/* Disable the IMC Core functions */
 	cores_map = cpu_online_cores_map();
 	for_each_cpu(cpu, &cores_map) {
@@ -213,7 +213,7 @@ static void disable_core_pmu_counters(void)
 			pr_err("%s: Failed to stop Core (cpu = %d)\n",
 				__FUNCTION__, cpu);
 	}
-	put_online_cpus();
+	cpus_read_unlock();
 }
 
 int get_max_nest_dev(void)
-- 
cgit 


From 27fd1111051dc218e5b6cb2da5dbb3f342879ff1 Mon Sep 17 00:00:00 2001
From: Jordan Niethe <jniethe5@gmail.com>
Date: Wed, 4 Aug 2021 11:37:24 +1000
Subject: powerpc: Always inline radix_enabled() to fix build failure

This is the same as commit acdad8fb4a15 ("powerpc: Force inlining of
mmu_has_feature to fix build failure") but for radix_enabled().  The
config in the linked bugzilla causes the following build failure:

  LD      .tmp_vmlinux.kallsyms1
  powerpc64-linux-ld: arch/powerpc/mm/pgtable.o: in function `.__ptep_set_access_flags':
  pgtable.c:(.text+0x17c): undefined reference to `.radix__ptep_set_access_flags'
  powerpc64-linux-ld: arch/powerpc/mm/pageattr.o: in function `.change_page_attr':
  pageattr.c:(.text+0xc0): undefined reference to `.radix__flush_tlb_kernel_range'
  etc.

This is due to radix_enabled() not being inlined. See extract from
building with -Winline:

  In file included from arch/powerpc/include/asm/lppaca.h:46,
                   from arch/powerpc/include/asm/paca.h:17,
                   from arch/powerpc/include/asm/current.h:13,
                   from include/linux/thread_info.h:23,
                   from include/asm-generic/preempt.h:5,
                   from ./arch/powerpc/include/generated/asm/preempt.h:1,
                   from include/linux/preempt.h:78,
                   from include/linux/spinlock.h:51,
                   from include/linux/mmzone.h:8,
                   from include/linux/gfp.h:6,
                   from arch/powerpc/mm/pgtable.c:21:
  arch/powerpc/include/asm/book3s/64/pgtable.h: In function '__ptep_set_access_flags':
  arch/powerpc/include/asm/mmu.h:327:20: error: inlining failed in call to 'radix_enabled': call is unlikely and code size would grow [-Werror=inline]

The code relies on constant folding of MMU_FTRS_POSSIBLE at buildtime
and elimination of non possible parts of code at compile time. For this
to work radix_enabled() must be inlined so make it __always_inline.

Reported-by: Erhard F. <erhard_f@mailbox.org>
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
[mpe: Trimmed error messages in change log]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213803
Link: https://lore.kernel.org/r/20210804013724.514468-1-jniethe5@gmail.com
---
 arch/powerpc/include/asm/mmu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 27016b98ecb2..8abe8e42e045 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -324,7 +324,7 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
 }
 #endif /* !CONFIG_DEBUG_VM */
 
-static inline bool radix_enabled(void)
+static __always_inline bool radix_enabled(void)
 {
 	return mmu_has_feature(MMU_FTR_TYPE_RADIX);
 }
-- 
cgit 


From 9b49f979b3d560cb75ea9f1a596baf432d566798 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@ozlabs.org>
Date: Thu, 5 Aug 2021 11:20:05 +1000
Subject: powerpc/configs: Disable legacy ptys on microwatt defconfig

We shouldn't need legacy ptys, and disabling the option improves boot
time by about 0.5 seconds.

Signed-off-by: Anton Blanchard <anton@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210805112005.3cb1f412@kryten.localdomain
---
 arch/powerpc/configs/microwatt_defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
index a08b739123da..ebc90aefbc0c 100644
--- a/arch/powerpc/configs/microwatt_defconfig
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -57,6 +57,7 @@ CONFIG_NETDEVICES=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_8250=y
 # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
 CONFIG_SERIAL_8250_CONSOLE=y
-- 
cgit 


From 2ac78e0c00184a9ba53d507be7549c69a3f566b6 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 5 Aug 2021 17:56:49 +1000
Subject: KVM: PPC: Use arch_get_random_seed_long instead of powernv variant

The powernv_get_random_long() does not work in nested KVM (which is
pseries) and produces a crash when accessing in_be64(rng->regs) in
powernv_get_random_long().

This replaces powernv_get_random_long with the ppc_md machine hook
wrapper.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210805075649.2086567-1-aik@ozlabs.ru
---
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 085fb8ecbf68..9f957ceee58a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1165,7 +1165,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		break;
 #endif
 	case H_RANDOM:
-		if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
+		if (!arch_get_random_seed_long(&vcpu->arch.regs.gpr[4]))
 			ret = H_HARDWARE;
 		break;
 	case H_RPT_INVALIDATE:
-- 
cgit 


From 786e5b102a0007d81579822eac23cb5bfaa0b65f Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:19 +0200
Subject: powerpc/pseries/pci: Introduce __find_pe_total_msi()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It will help to size the PCI MSI domain.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-2-clg@kaod.org
---
 arch/powerpc/platforms/pseries/msi.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 637300330507..d2d090e04745 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -164,12 +164,12 @@ static int check_req_msix(struct pci_dev *pdev, int nvec)
 
 /* Quota calculation */
 
-static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
+static struct device_node *__find_pe_total_msi(struct device_node *node, int *total)
 {
 	struct device_node *dn;
 	const __be32 *p;
 
-	dn = of_node_get(pci_device_to_OF_node(dev));
+	dn = of_node_get(node);
 	while (dn) {
 		p = of_get_property(dn, "ibm,pe-total-#msi", NULL);
 		if (p) {
@@ -185,6 +185,11 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
 	return NULL;
 }
 
+static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
+{
+	return __find_pe_total_msi(pci_device_to_OF_node(dev), total);
+}
+
 static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 {
 	struct device_node *dn;
-- 
cgit 


From e81202007363bd694b711f307f02320b5f98edaa Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:20 +0200
Subject: powerpc/pseries/pci: Introduce rtas_prepare_msi_irqs()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This splits the routine setting the MSIs in two parts: allocation of
MSIs for the PCI device at the FW level (RTAS) and the actual mapping
and activation of the IRQs.

rtas_prepare_msi_irqs() will serve as a handler for the PCI MSI domain.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-3-clg@kaod.org
---
 arch/powerpc/platforms/pseries/msi.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index d2d090e04745..4bf14f27e1aa 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -373,12 +373,11 @@ static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev)
 	pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0);
 }
 
-static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
+static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type,
+				 msi_alloc_info_t *arg)
 {
 	struct pci_dn *pdn;
-	int hwirq, virq, i, quota, rc;
-	struct msi_desc *entry;
-	struct msi_msg msg;
+	int quota, rc;
 	int nvec = nvec_in;
 	int use_32bit_msi_hack = 0;
 
@@ -456,6 +455,22 @@ again:
 		return rc;
 	}
 
+	return 0;
+}
+
+static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
+{
+	struct pci_dn *pdn;
+	int hwirq, virq, i;
+	int rc;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+
+	rc = rtas_prepare_msi_irqs(pdev, nvec_in, type, NULL);
+	if (rc)
+		return rc;
+
+	pdn = pci_get_pdn(pdev);
 	i = 0;
 	for_each_pci_msi_entry(entry, pdev) {
 		hwirq = rtas_query_irq_number(pdn, i++);
-- 
cgit 


From 14be098c5387eb93b794f299f3c3e2ddf6038ec7 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:21 +0200
Subject: powerpc/xive: Add support for IRQ domain hierarchy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds handlers to allocate/free IRQs in a domain hierarchy. We
could try to use xive_irq_domain_map() in xive_irq_domain_alloc() but
we rely on xive_irq_alloc_data() to set the IRQ handler data and
duplicating the code is simpler.

xive_irq_free_data() needs to be called when IRQ are freed to clear
the MMIO mappings and free the XIVE handler data, xive_irq_data
structure. This is going to be a problem with MSI domains which we
will address later.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-4-clg@kaod.org
---
 arch/powerpc/sysdev/xive/common.c | 64 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index dbdbbc2f1dc5..420d96deb7b6 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1366,7 +1366,71 @@ static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d,
 }
 #endif
 
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+static int xive_irq_domain_translate(struct irq_domain *d,
+				     struct irq_fwspec *fwspec,
+				     unsigned long *hwirq,
+				     unsigned int *type)
+{
+	return xive_irq_domain_xlate(d, to_of_node(fwspec->fwnode),
+				     fwspec->param, fwspec->param_count,
+				     hwirq, type);
+}
+
+static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				 unsigned int nr_irqs, void *arg)
+{
+	struct irq_fwspec *fwspec = arg;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	int i, rc;
+
+	rc = xive_irq_domain_translate(domain, fwspec, &hwirq, &type);
+	if (rc)
+		return rc;
+
+	pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++) {
+		/* TODO: call xive_irq_domain_map() */
+
+		/*
+		 * Mark interrupts as edge sensitive by default so that resend
+		 * actually works. Will fix that up below if needed.
+		 */
+		irq_clear_status_flags(virq, IRQ_LEVEL);
+
+		/* allocates and sets handler data */
+		rc = xive_irq_alloc_data(virq + i, hwirq + i);
+		if (rc)
+			return rc;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &xive_irq_chip, domain->host_data);
+		irq_set_handler(virq + i, handle_fasteoi_irq);
+	}
+
+	return 0;
+}
+
+static void xive_irq_domain_free(struct irq_domain *domain,
+				 unsigned int virq, unsigned int nr_irqs)
+{
+	int i;
+
+	pr_debug("%s %d #%d\n", __func__, virq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++)
+		xive_irq_free_data(virq + i);
+}
+#endif
+
 static const struct irq_domain_ops xive_irq_domain_ops = {
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	.alloc	= xive_irq_domain_alloc,
+	.free	= xive_irq_domain_free,
+	.translate = xive_irq_domain_translate,
+#endif
 	.match = xive_irq_domain_match,
 	.map = xive_irq_domain_map,
 	.unmap = xive_irq_domain_unmap,
-- 
cgit 


From 6c2ab2a5d634d4e30445ee5d52d5d1469bf74aa2 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:22 +0200
Subject: powerpc/xive: Ease debugging of xive_irq_set_affinity()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pr_debug() is easier to activate and it helps to know how the kernel
configures the HW when tweaking the IRQ subsystem.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-5-clg@kaod.org
---
 arch/powerpc/sysdev/xive/common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 420d96deb7b6..823f9fe3542a 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -713,7 +713,7 @@ static int xive_irq_set_affinity(struct irq_data *d,
 	u32 target, old_target;
 	int rc = 0;
 
-	pr_devel("xive_irq_set_affinity: irq %d\n", d->irq);
+	pr_debug("%s: irq %d/%x\n", __func__, d->irq, hw_irq);
 
 	/* Is this valid ? */
 	if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
@@ -758,7 +758,7 @@ static int xive_irq_set_affinity(struct irq_data *d,
 		return rc;
 	}
 
-	pr_devel("  target: 0x%x\n", target);
+	pr_debug("  target: 0x%x\n", target);
 	xd->target = target;
 
 	/* Give up previous target */
-- 
cgit 


From a5f3d2c17b07e69166b93209f34a5fb8271a6810 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:23 +0200
Subject: powerpc/pseries/pci: Add MSI domains
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two IRQ domains are added on top of default machine IRQ domain.

First, the top level "pSeries-PCI-MSI" domain deals with the MSI
specificities. In this domain, the HW IRQ numbers are generated by the
PCI MSI layer, they compose a unique ID for an MSI source with the PCI
device identifier and the MSI vector number.

These numbers can be quite large on a pSeries machine running under
the IBM Hypervisor and /sys/kernel/irq/ and /proc/interrupts will
require small fixes to show them correctly.

Second domain is the in-the-middle "pSeries-MSI" domain which acts as
a proxy between the PCI MSI subsystem and the machine IRQ subsystem.
It usually allocate the MSI vector numbers but, on pSeries machines,
this is done by the RTAS FW and RTAS returns IRQ numbers in the IRQ
number space of the machine. This is why the in-the-middle "pSeries-MSI"
domain has the same HW IRQ numbers as its parent domain.

Only the XIVE (P9/P10) parent domain is supported for now. We still
need to add support for IRQ domain hierarchy under XICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-6-clg@kaod.org
---
 arch/powerpc/include/asm/pci-bridge.h    |   5 +
 arch/powerpc/kernel/pci-common.c         |   6 +
 arch/powerpc/platforms/pseries/msi.c     | 185 +++++++++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/pseries.h |   1 +
 arch/powerpc/platforms/pseries/setup.c   |   2 +
 5 files changed, 199 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 74424c14515c..90f488fa4c17 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -126,6 +126,11 @@ struct pci_controller {
 #endif	/* CONFIG_PPC64 */
 
 	void *private_data;
+
+	/* IRQ domain hierarchy */
+	struct irq_domain	*dev_domain;
+	struct irq_domain	*msi_domain;
+	struct fwnode_handle	*fwnode;
 };
 
 /* These are used for config access before all the PCI probing
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 001e90cd8948..c3573430919d 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/vgaarb.h>
 #include <linux/numa.h>
+#include <linux/msi.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -1060,11 +1061,16 @@ void pcibios_bus_add_device(struct pci_dev *dev)
 
 int pcibios_add_device(struct pci_dev *dev)
 {
+	struct irq_domain *d;
+
 #ifdef CONFIG_PCI_IOV
 	if (ppc_md.pcibios_fixup_sriov)
 		ppc_md.pcibios_fixup_sriov(dev);
 #endif /* CONFIG_PCI_IOV */
 
+	d = dev_get_msi_domain(&dev->bus->dev);
+	if (d)
+		dev_set_msi_domain(&dev->dev, d);
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 4bf14f27e1aa..86c6809ebac2 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -13,6 +13,7 @@
 #include <asm/hw_irq.h>
 #include <asm/ppc-pci.h>
 #include <asm/machdep.h>
+#include <asm/xive.h>
 
 #include "pseries.h"
 
@@ -518,6 +519,190 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
 	return 0;
 }
 
+static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev,
+				   int nvec, msi_alloc_info_t *arg)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct msi_desc *desc = first_pci_msi_entry(pdev);
+	int type = desc->msi_attrib.is_msix ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI;
+
+	return rtas_prepare_msi_irqs(pdev, nvec, type, arg);
+}
+
+static struct msi_domain_ops pseries_pci_msi_domain_ops = {
+	.msi_prepare	= pseries_msi_ops_prepare,
+};
+
+static void pseries_msi_shutdown(struct irq_data *d)
+{
+	d = d->parent_data;
+	if (d->chip->irq_shutdown)
+		d->chip->irq_shutdown(d);
+}
+
+static void pseries_msi_mask(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void pseries_msi_unmask(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip pseries_pci_msi_irq_chip = {
+	.name		= "pSeries-PCI-MSI",
+	.irq_shutdown	= pseries_msi_shutdown,
+	.irq_mask	= pseries_msi_mask,
+	.irq_unmask	= pseries_msi_unmask,
+	.irq_eoi	= irq_chip_eoi_parent,
+};
+
+static struct msi_domain_info pseries_msi_domain_info = {
+	.flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_MULTI_PCI_MSI  | MSI_FLAG_PCI_MSIX),
+	.ops   = &pseries_pci_msi_domain_ops,
+	.chip  = &pseries_pci_msi_irq_chip,
+};
+
+static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	__pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+}
+
+static struct irq_chip pseries_msi_irq_chip = {
+	.name			= "pSeries-MSI",
+	.irq_shutdown		= pseries_msi_shutdown,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_compose_msi_msg	= pseries_msi_compose_msg,
+};
+
+static int pseries_irq_parent_domain_alloc(struct irq_domain *domain, unsigned int virq,
+					   irq_hw_number_t hwirq)
+{
+	struct irq_fwspec parent_fwspec;
+	int ret;
+
+	parent_fwspec.fwnode = domain->parent->fwnode;
+	parent_fwspec.param_count = 2;
+	parent_fwspec.param[0] = hwirq;
+	parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				    unsigned int nr_irqs, void *arg)
+{
+	struct pci_controller *phb = domain->host_data;
+	msi_alloc_info_t *info = arg;
+	struct msi_desc *desc = info->desc;
+	struct pci_dev *pdev = msi_desc_to_pci_dev(desc);
+	int hwirq;
+	int i, ret;
+
+	hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_attrib.entry_nr);
+	if (hwirq < 0) {
+		dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq);
+		return hwirq;
+	}
+
+	dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+		phb->dn, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++) {
+		ret = pseries_irq_parent_domain_alloc(domain, virq + i, hwirq + i);
+		if (ret)
+			goto out;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &pseries_msi_irq_chip, domain->host_data);
+	}
+
+	return 0;
+
+out:
+	/* TODO: handle RTAS cleanup in ->msi_finish() ? */
+	irq_domain_free_irqs_parent(domain, virq, i - 1);
+	return ret;
+}
+
+static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				    unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct pci_controller *phb = irq_data_get_irq_chip_data(d);
+
+	pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs);
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops pseries_irq_domain_ops = {
+	.alloc  = pseries_irq_domain_alloc,
+	.free   = pseries_irq_domain_free,
+};
+
+static int __pseries_msi_allocate_domains(struct pci_controller *phb,
+					  unsigned int count)
+{
+	struct irq_domain *parent = irq_get_default_host();
+
+	phb->fwnode = irq_domain_alloc_named_id_fwnode("pSeries-MSI",
+						       phb->global_number);
+	if (!phb->fwnode)
+		return -ENOMEM;
+
+	phb->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+						      phb->fwnode,
+						      &pseries_irq_domain_ops, phb);
+	if (!phb->dev_domain) {
+		pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+		       phb->dn, phb->global_number);
+		irq_domain_free_fwnode(phb->fwnode);
+		return -ENOMEM;
+	}
+
+	phb->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(phb->dn),
+						    &pseries_msi_domain_info,
+						    phb->dev_domain);
+	if (!phb->msi_domain) {
+		pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+		       phb->dn, phb->global_number);
+		irq_domain_free_fwnode(phb->fwnode);
+		irq_domain_remove(phb->dev_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+int pseries_msi_allocate_domains(struct pci_controller *phb)
+{
+	int count;
+
+	/* Only supported by the XIVE driver */
+	if (!xive_enabled())
+		return -ENODEV;
+
+	if (!__find_pe_total_msi(phb->dn, &count)) {
+		pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n",
+		       phb->dn, phb->global_number);
+		return -ENOSPC;
+	}
+
+	return __pseries_msi_allocate_domains(phb, count);
+}
+
 static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
 {
 	/* No LSI -> leave MSIs (if any) configured */
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 1f051a786fb3..d9280262588b 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -85,6 +85,7 @@ struct pci_host_bridge;
 int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
 
 extern struct pci_controller_ops pseries_pci_controller_ops;
+int pseries_msi_allocate_domains(struct pci_controller *phb);
 
 unsigned long pseries_memory_block_size(void);
 
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 631a0d57b6cd..35724caf8a83 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -486,6 +486,8 @@ static void __init pSeries_discover_phbs(void)
 
 		/* create pci_dn's for DT nodes under this PHB */
 		pci_devs_phb_init_dynamic(phb);
+
+		pseries_msi_allocate_domains(phb);
 	}
 
 	of_node_put(root);
-- 
cgit 


From 5690bcae186084a8544b1819f0d89399268bd0cf Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:24 +0200
Subject: powerpc/xive: Drop unmask of MSIs at startup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That was a workaround in the XIVE domain because of the lack of MSI
domain. This is now handled.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-7-clg@kaod.org
---
 arch/powerpc/sysdev/xive/common.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 823f9fe3542a..356f584cc51b 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -616,16 +616,6 @@ static unsigned int xive_irq_startup(struct irq_data *d)
 	pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
 		 d->irq, hw_irq, d);
 
-#ifdef CONFIG_PCI_MSI
-	/*
-	 * The generic MSI code returns with the interrupt disabled on the
-	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
-	 * at that level, so we do it here by hand.
-	 */
-	if (irq_data_get_msi_desc(d))
-		pci_msi_unmask_irq(d);
-#endif
-
 	/* Pick a target */
 	target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d));
 	if (target == XIVE_INVALID_TARGET) {
-- 
cgit 


From 292145a6e598c1e6633b8f5f607706b46f552ab9 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:25 +0200
Subject: powerpc/xive: Remove irqd_is_started() check when setting the
 affinity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the early days of XIVE support, commit cffb717ceb8e ("powerpc/xive:
Ensure active irqd when setting affinity") tried to fix an issue
related to interrupt migration. If the root cause was related to CPU
unplug, it should have been fixed and there is no reason to keep the
irqd_is_started() check. This test is also breaking affinity setting
of MSIs which can set before starting the associated IRQ.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-8-clg@kaod.org
---
 arch/powerpc/sysdev/xive/common.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 356f584cc51b..0631c97b3a14 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -709,10 +709,6 @@ static int xive_irq_set_affinity(struct irq_data *d,
 	if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
 		return -EINVAL;
 
-	/* Don't do anything if the interrupt isn't started */
-	if (!irqd_is_started(d))
-		return IRQ_SET_MASK_OK;
-
 	/*
 	 * If existing target is already in the new mask, and is
 	 * online then do nothing.
-- 
cgit 


From 07817a578a7a79638537480b8847dc7a12f293c5 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:26 +0200
Subject: powerpc/pseries/pci: Add a domain_free_irqs() handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The RTAS firmware can not disable one MSI at a time. It's all or
nothing. We need a custom free IRQ handler for that.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-9-clg@kaod.org
---
 arch/powerpc/platforms/pseries/msi.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 86c6809ebac2..591cee9cbc9e 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -529,8 +529,24 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev
 	return rtas_prepare_msi_irqs(pdev, nvec, type, arg);
 }
 
+/*
+ * RTAS can not disable one MSI at a time. It's all or nothing. Do it
+ * at the end after all IRQs have been freed.
+ */
+static void pseries_msi_domain_free_irqs(struct irq_domain *domain,
+					 struct device *dev)
+{
+	if (WARN_ON_ONCE(!dev_is_pci(dev)))
+		return;
+
+	__msi_domain_free_irqs(domain, dev);
+
+	rtas_disable_msi(to_pci_dev(dev));
+}
+
 static struct msi_domain_ops pseries_pci_msi_domain_ops = {
 	.msi_prepare	= pseries_msi_ops_prepare,
+	.domain_free_irqs = pseries_msi_domain_free_irqs,
 };
 
 static void pseries_msi_shutdown(struct irq_data *d)
-- 
cgit 


From 9a014f456881e947bf8cdd8c984a207097e6c096 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:27 +0200
Subject: powerpc/pseries/pci: Add a msi_free() handler to clear XIVE data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The MSI domain clears the IRQ with msi_domain_free(), which calls
irq_domain_free_irqs_top(), which clears the handler data. This is a
problem for the XIVE controller since we need to unmap MMIO pages and
free a specific XIVE structure.

The 'msi_free()' handler is called before irq_domain_free_irqs_top()
when the handler data is still available. Use that to clear the XIVE
controller data.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-10-clg@kaod.org
---
 arch/powerpc/include/asm/xive.h      |  1 +
 arch/powerpc/platforms/pseries/msi.c | 16 +++++++++++++++-
 arch/powerpc/sysdev/xive/common.c    |  5 ++++-
 3 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index aa094a8655b0..20ae50ab083c 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -111,6 +111,7 @@ void xive_native_free_vp_block(u32 vp_base);
 int xive_native_populate_irq_data(u32 hw_irq,
 				  struct xive_irq_data *data);
 void xive_cleanup_irq_data(struct xive_irq_data *xd);
+void xive_irq_free_data(unsigned int virq);
 void xive_native_free_irq(u32 irq);
 int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
 
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 591cee9cbc9e..f9635b01b2bf 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -529,6 +529,19 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev
 	return rtas_prepare_msi_irqs(pdev, nvec, type, arg);
 }
 
+/*
+ * ->msi_free() is called before irq_domain_free_irqs_top() when the
+ * handler data is still available. Use that to clear the XIVE
+ * controller data.
+ */
+static void pseries_msi_ops_msi_free(struct irq_domain *domain,
+				     struct msi_domain_info *info,
+				     unsigned int irq)
+{
+	if (xive_enabled())
+		xive_irq_free_data(irq);
+}
+
 /*
  * RTAS can not disable one MSI at a time. It's all or nothing. Do it
  * at the end after all IRQs have been freed.
@@ -546,6 +559,7 @@ static void pseries_msi_domain_free_irqs(struct irq_domain *domain,
 
 static struct msi_domain_ops pseries_pci_msi_domain_ops = {
 	.msi_prepare	= pseries_msi_ops_prepare,
+	.msi_free	= pseries_msi_ops_msi_free,
 	.domain_free_irqs = pseries_msi_domain_free_irqs,
 };
 
@@ -660,7 +674,7 @@ static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq
 
 	pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs);
 
-	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+	/* XIVE domain data is cleared through ->msi_free() */
 }
 
 static const struct irq_domain_ops pseries_irq_domain_ops = {
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 0631c97b3a14..107f442d3411 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -975,6 +975,8 @@ EXPORT_SYMBOL_GPL(is_xive_irq);
 
 void xive_cleanup_irq_data(struct xive_irq_data *xd)
 {
+	pr_debug("%s for HW %x\n", __func__, xd->hw_irq);
+
 	if (xd->eoi_mmio) {
 		iounmap(xd->eoi_mmio);
 		if (xd->eoi_mmio == xd->trig_mmio)
@@ -1016,7 +1018,7 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
 	return 0;
 }
 
-static void xive_irq_free_data(unsigned int virq)
+void xive_irq_free_data(unsigned int virq)
 {
 	struct xive_irq_data *xd = irq_get_handler_data(virq);
 
@@ -1026,6 +1028,7 @@ static void xive_irq_free_data(unsigned int virq)
 	xive_cleanup_irq_data(xd);
 	kfree(xd);
 }
+EXPORT_SYMBOL_GPL(xive_irq_free_data);
 
 #ifdef CONFIG_SMP
 
-- 
cgit 


From 174db9e7f775ce06fc6949c9abbe758b3eb8171c Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:28 +0200
Subject: powerpc/pseries/pci: Add support of MSI domains to PHB hotplug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Simply allocate or release the MSI domains when a PHB is inserted in
or removed from the machine.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-11-clg@kaod.org
---
 arch/powerpc/platforms/pseries/msi.c       | 10 ++++++++++
 arch/powerpc/platforms/pseries/pci_dlpar.c |  4 ++++
 arch/powerpc/platforms/pseries/pseries.h   |  1 +
 3 files changed, 15 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index f9635b01b2bf..e2127a3f7ebd 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -733,6 +733,16 @@ int pseries_msi_allocate_domains(struct pci_controller *phb)
 	return __pseries_msi_allocate_domains(phb, count);
 }
 
+void pseries_msi_free_domains(struct pci_controller *phb)
+{
+	if (phb->msi_domain)
+		irq_domain_remove(phb->msi_domain);
+	if (phb->dev_domain)
+		irq_domain_remove(phb->dev_domain);
+	if (phb->fwnode)
+		irq_domain_free_fwnode(phb->fwnode);
+}
+
 static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
 {
 	/* No LSI -> leave MSIs (if any) configured */
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index a8f9140a24fa..90c9d3531694 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -33,6 +33,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
 
 	pci_devs_phb_init_dynamic(phb);
 
+	pseries_msi_allocate_domains(phb);
+
 	/* Create EEH devices for the PHB */
 	eeh_phb_pe_create(phb);
 
@@ -74,6 +76,8 @@ int remove_phb_dynamic(struct pci_controller *phb)
 		}
 	}
 
+	pseries_msi_free_domains(phb);
+
 	/* Remove the PCI bus and unregister the bridge device from sysfs */
 	phb->bus = NULL;
 	pci_remove_bus(b);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index d9280262588b..3544778e06d0 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -86,6 +86,7 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
 
 extern struct pci_controller_ops pseries_pci_controller_ops;
 int pseries_msi_allocate_domains(struct pci_controller *phb);
+void pseries_msi_free_domains(struct pci_controller *phb);
 
 unsigned long pseries_memory_block_size(void);
 
-- 
cgit 


From 2c50d7e99e39eba92b93210e740f3f9e5a06ba54 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:29 +0200
Subject: powerpc/powernv/pci: Introduce __pnv_pci_ioda_msi_setup()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It will be used as a 'compose_msg' handler of the MSI domain introduced
later.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-12-clg@kaod.org
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7de464679292..2922674cc934 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2016,15 +2016,17 @@ bool is_pnv_opal_msi(struct irq_chip *chip)
 }
 EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
 
-static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
-				  unsigned int hwirq, unsigned int virq,
-				  unsigned int is_64, struct msi_msg *msg)
+static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
+				    unsigned int xive_num,
+				    unsigned int is_64, struct msi_msg *msg)
 {
 	struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
-	unsigned int xive_num = hwirq - phb->msi_base;
 	__be32 data;
 	int rc;
 
+	dev_dbg(&dev->dev, "%s: setup %s-bit MSI for vector #%d\n", __func__,
+		is_64 ? "64" : "32", xive_num);
+
 	/* No PE assigned ? bail out ... no MSI for you ! */
 	if (pe == NULL)
 		return -ENXIO;
@@ -2072,12 +2074,28 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 	}
 	msg->data = be32_to_cpu(data);
 
+	return 0;
+}
+
+static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
+				  unsigned int hwirq, unsigned int virq,
+				  unsigned int is_64, struct msi_msg *msg)
+{
+	struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
+	unsigned int xive_num = hwirq - phb->msi_base;
+	int rc;
+
+	rc = __pnv_pci_ioda_msi_setup(phb, dev, xive_num, is_64, msg);
+	if (rc)
+		return rc;
+
+	/* P8 only */
 	pnv_set_msi_irq_chip(phb, virq);
 
 	pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
 		 " address=%x_%08x data=%x PE# %x\n",
 		 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
-		 msg->address_hi, msg->address_lo, data, pe->pe_number);
+		 msg->address_hi, msg->address_lo, msg->data, pe->pe_number);
 
 	return 0;
 }
-- 
cgit 


From 0fcfe2247e75070361af2b6845030cada92cdbf8 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:30 +0200
Subject: powerpc/powernv/pci: Add MSI domains
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is very similar to the MSI domains of the pSeries platform. The
MSI allocator is directly handled under the Linux PHB in the
in-the-middle "PNV-MSI" domain.

Only the XIVE (P9/P10) parent domain is supported for now. Support for
XICS will come later.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-13-clg@kaod.org
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 188 ++++++++++++++++++++++++++++++
 1 file changed, 188 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2922674cc934..d2a17fcb6002 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -36,6 +36,7 @@
 #include <asm/firmware.h>
 #include <asm/pnv-pci.h>
 #include <asm/mmzone.h>
+#include <asm/xive.h>
 
 #include <misc/cxl-base.h>
 
@@ -2100,6 +2101,189 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 	return 0;
 }
 
+/*
+ * The msi_free() op is called before irq_domain_free_irqs_top() when
+ * the handler data is still available. Use that to clear the XIVE
+ * controller.
+ */
+static void pnv_msi_ops_msi_free(struct irq_domain *domain,
+				 struct msi_domain_info *info,
+				 unsigned int irq)
+{
+	if (xive_enabled())
+		xive_irq_free_data(irq);
+}
+
+static struct msi_domain_ops pnv_pci_msi_domain_ops = {
+	.msi_free	= pnv_msi_ops_msi_free,
+};
+
+static void pnv_msi_shutdown(struct irq_data *d)
+{
+	d = d->parent_data;
+	if (d->chip->irq_shutdown)
+		d->chip->irq_shutdown(d);
+}
+
+static void pnv_msi_mask(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void pnv_msi_unmask(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip pnv_pci_msi_irq_chip = {
+	.name		= "PNV-PCI-MSI",
+	.irq_shutdown	= pnv_msi_shutdown,
+	.irq_mask	= pnv_msi_mask,
+	.irq_unmask	= pnv_msi_unmask,
+	.irq_eoi	= irq_chip_eoi_parent,
+};
+
+static struct msi_domain_info pnv_msi_domain_info = {
+	.flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_MULTI_PCI_MSI  | MSI_FLAG_PCI_MSIX),
+	.ops   = &pnv_pci_msi_domain_ops,
+	.chip  = &pnv_pci_msi_irq_chip,
+};
+
+static void pnv_msi_compose_msg(struct irq_data *d, struct msi_msg *msg)
+{
+	struct msi_desc *entry = irq_data_get_msi_desc(d);
+	struct pci_dev *pdev = msi_desc_to_pci_dev(entry);
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
+	int rc;
+
+	rc = __pnv_pci_ioda_msi_setup(phb, pdev, d->hwirq,
+				      entry->msi_attrib.is_64, msg);
+	if (rc)
+		dev_err(&pdev->dev, "Failed to setup %s-bit MSI #%ld : %d\n",
+			entry->msi_attrib.is_64 ? "64" : "32", d->hwirq, rc);
+}
+
+static struct irq_chip pnv_msi_irq_chip = {
+	.name			= "PNV-MSI",
+	.irq_shutdown		= pnv_msi_shutdown,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_compose_msi_msg	= pnv_msi_compose_msg,
+};
+
+static int pnv_irq_parent_domain_alloc(struct irq_domain *domain,
+				       unsigned int virq, int hwirq)
+{
+	struct irq_fwspec parent_fwspec;
+	int ret;
+
+	parent_fwspec.fwnode = domain->parent->fwnode;
+	parent_fwspec.param_count = 2;
+	parent_fwspec.param[0] = hwirq;
+	parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int pnv_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs, void *arg)
+{
+	struct pci_controller *hose = domain->host_data;
+	struct pnv_phb *phb = hose->private_data;
+	msi_alloc_info_t *info = arg;
+	struct pci_dev *pdev = msi_desc_to_pci_dev(info->desc);
+	int hwirq;
+	int i, ret;
+
+	hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, nr_irqs);
+	if (hwirq < 0) {
+		dev_warn(&pdev->dev, "failed to find a free MSI\n");
+		return -ENOSPC;
+	}
+
+	dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+		hose->dn, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++) {
+		ret = pnv_irq_parent_domain_alloc(domain, virq + i,
+						  phb->msi_base + hwirq + i);
+		if (ret)
+			goto out;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &pnv_msi_irq_chip, hose);
+	}
+
+	return 0;
+
+out:
+	irq_domain_free_irqs_parent(domain, virq, i - 1);
+	msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, nr_irqs);
+	return ret;
+}
+
+static void pnv_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
+
+	pr_debug("%s bridge %pOF %d/%lx #%d\n", __func__, hose->dn,
+		 virq, d->hwirq, nr_irqs);
+
+	msi_bitmap_free_hwirqs(&phb->msi_bmp, d->hwirq, nr_irqs);
+	/* XIVE domain is cleared through ->msi_free() */
+}
+
+static const struct irq_domain_ops pnv_irq_domain_ops = {
+	.alloc  = pnv_irq_domain_alloc,
+	.free   = pnv_irq_domain_free,
+};
+
+static int pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct irq_domain *parent = irq_get_default_host();
+
+	hose->fwnode = irq_domain_alloc_named_id_fwnode("PNV-MSI", phb->opal_id);
+	if (!hose->fwnode)
+		return -ENOMEM;
+
+	hose->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+						       hose->fwnode,
+						       &pnv_irq_domain_ops, hose);
+	if (!hose->dev_domain) {
+		pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+		       hose->dn, hose->global_number);
+		irq_domain_free_fwnode(hose->fwnode);
+		return -ENOMEM;
+	}
+
+	hose->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(hose->dn),
+						     &pnv_msi_domain_info,
+						     hose->dev_domain);
+	if (!hose->msi_domain) {
+		pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+		       hose->dn, hose->global_number);
+		irq_domain_free_fwnode(hose->fwnode);
+		irq_domain_remove(hose->dev_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 {
 	unsigned int count;
@@ -2124,6 +2308,10 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 	phb->msi32_support = 1;
 	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
 		count, phb->msi_base);
+
+	/* Only supported by the XIVE driver */
+	if (xive_enabled())
+		pnv_msi_allocate_domains(phb->hose, count);
 }
 
 static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
-- 
cgit 


From ba418a0278265ad65f2f9544e743b7dbff3b994b Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:31 +0200
Subject: KVM: PPC: Book3S HV: Use the new IRQ chip to detect passthrough
 interrupts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Passthrough PCI MSI interrupts are detected in KVM with a check on a
specific EOI handler (P8) or on XIVE (P9). We can now check the
PCI-MSI IRQ chip which is cleaner.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-14-clg@kaod.org
---
 arch/powerpc/kvm/book3s_hv.c              | 2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9f957ceee58a..1a757c3d33f7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5355,7 +5355,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	 * what our real-mode EOI code does, or a XIVE interrupt
 	 */
 	chip = irq_data_get_irq_chip(&desc->irq_data);
-	if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
+	if (!chip || !is_pnv_opal_msi(chip)) {
 		pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
 			host_irq, guest_gsi);
 		mutex_unlock(&kvm->lock);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index d2a17fcb6002..e77caa4dbbdf 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2007,13 +2007,15 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
 	irq_set_chip(virq, &phb->ioda.irq_chip);
 }
 
+static struct irq_chip pnv_pci_msi_irq_chip;
+
 /*
  * Returns true iff chip is something that we could call
  * pnv_opal_pci_msi_eoi for.
  */
 bool is_pnv_opal_msi(struct irq_chip *chip)
 {
-	return chip->irq_eoi == pnv_ioda2_msi_eoi;
+	return chip->irq_eoi == pnv_ioda2_msi_eoi || chip == &pnv_pci_msi_irq_chip;
 }
 EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
 
-- 
cgit 


From e5e78b15113a73d0294141d9796969fa7b10fa3c Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:32 +0200
Subject: KVM: PPC: Book3S HV: XIVE: Change interface of passthrough interrupt
 routines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The routine kvmppc_set_passthru_irq() calls kvmppc_xive_set_mapped()
and kvmppc_xive_clr_mapped() with an IRQ descriptor. Use directly the
host IRQ number to remove a useless conversion.

Add some debug.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-15-clg@kaod.org
---
 arch/powerpc/include/asm/kvm_ppc.h |  4 ++--
 arch/powerpc/kvm/book3s_hv.c       |  4 ++--
 arch/powerpc/kvm/book3s_xive.c     | 17 ++++++++---------
 3 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2d88944f9f34..671fbd1a765e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -664,9 +664,9 @@ extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 				    struct kvm_vcpu *vcpu, u32 cpu);
 extern void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu);
 extern int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
-				  struct irq_desc *host_desc);
+				  unsigned long host_irq);
 extern int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
-				  struct irq_desc *host_desc);
+				  unsigned long host_irq);
 extern u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1a757c3d33f7..05b3a3548c18 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5398,7 +5398,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 		pimap->n_mapped++;
 
 	if (xics_on_xive())
-		rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
+		rc = kvmppc_xive_set_mapped(kvm, guest_gsi, host_irq);
 	else
 		kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
 	if (rc)
@@ -5439,7 +5439,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	}
 
 	if (xics_on_xive())
-		rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
+		rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, host_irq);
 	else
 		kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 8cfab3547494..45a0434e9d85 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -922,13 +922,12 @@ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
 }
 
 int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
-			   struct irq_desc *host_desc)
+			   unsigned long host_irq)
 {
 	struct kvmppc_xive *xive = kvm->arch.xive;
 	struct kvmppc_xive_src_block *sb;
 	struct kvmppc_xive_irq_state *state;
-	struct irq_data *host_data = irq_desc_get_irq_data(host_desc);
-	unsigned int host_irq = irq_desc_get_irq(host_desc);
+	struct irq_data *host_data = irq_get_irq_data(host_irq);
 	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
 	u16 idx;
 	u8 prio;
@@ -937,7 +936,8 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
 	if (!xive)
 		return -ENODEV;
 
-	pr_devel("set_mapped girq 0x%lx host HW irq 0x%x...\n",guest_irq, hw_irq);
+	pr_debug("%s: GIRQ 0x%lx host IRQ %ld XIVE HW IRQ 0x%x\n",
+		 __func__, guest_irq, host_irq, hw_irq);
 
 	sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
 	if (!sb)
@@ -959,7 +959,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
 	 */
 	rc = irq_set_vcpu_affinity(host_irq, state);
 	if (rc) {
-		pr_err("Failed to set VCPU affinity for irq %d\n", host_irq);
+		pr_err("Failed to set VCPU affinity for host IRQ %ld\n", host_irq);
 		return rc;
 	}
 
@@ -1019,12 +1019,11 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
 EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
 
 int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
-			   struct irq_desc *host_desc)
+			   unsigned long host_irq)
 {
 	struct kvmppc_xive *xive = kvm->arch.xive;
 	struct kvmppc_xive_src_block *sb;
 	struct kvmppc_xive_irq_state *state;
-	unsigned int host_irq = irq_desc_get_irq(host_desc);
 	u16 idx;
 	u8 prio;
 	int rc;
@@ -1032,7 +1031,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
 	if (!xive)
 		return -ENODEV;
 
-	pr_devel("clr_mapped girq 0x%lx...\n", guest_irq);
+	pr_debug("%s: GIRQ 0x%lx host IRQ %ld\n", __func__, guest_irq, host_irq);
 
 	sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
 	if (!sb)
@@ -1059,7 +1058,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
 	/* Release the passed-through interrupt to the host */
 	rc = irq_set_vcpu_affinity(host_irq, NULL);
 	if (rc) {
-		pr_err("Failed to clr VCPU affinity for irq %d\n", host_irq);
+		pr_err("Failed to clr VCPU affinity for host IRQ %ld\n", host_irq);
 		return rc;
 	}
 
-- 
cgit 


From 51be9e51a8000ffc6a33083ceca9da9303ed4dc5 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:33 +0200
Subject: KVM: PPC: Book3S HV: XIVE: Fix mapping of passthrough interrupts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PCI MSI interrupt numbers are now mapped in a PCI-MSI domain but the
underlying calls handling the passthrough of the interrupt in the
guest need a number in the XIVE IRQ domain.

Use the IRQ data mapped in the XIVE IRQ domain and not the one in the
PCI-MSI domain.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-16-clg@kaod.org
---
 arch/powerpc/kvm/book3s_xive.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 45a0434e9d85..6878026ee8ec 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -927,7 +927,8 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
 	struct kvmppc_xive *xive = kvm->arch.xive;
 	struct kvmppc_xive_src_block *sb;
 	struct kvmppc_xive_irq_state *state;
-	struct irq_data *host_data = irq_get_irq_data(host_irq);
+	struct irq_data *host_data =
+		irq_domain_get_irq_data(irq_get_default_host(), host_irq);
 	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
 	u16 idx;
 	u8 prio;
-- 
cgit 


From 298f6f952885eeb1f25461f085c6c238bcd9fc5e Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:34 +0200
Subject: powerpc/xics: Remove ICS list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We always had only one ICS per machine. Simplify the XICS driver by
removing the ICS list.

The ICS stored in the chip data of the XICS domain becomes useless and
we don't need it anymore to migrate away IRQs from a CPU. This will be
removed in a subsequent patch.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-17-clg@kaod.org
---
 arch/powerpc/sysdev/xics/xics-common.c | 45 ++++++++++++++--------------------
 1 file changed, 19 insertions(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index b14c502e56a8..08c25748efb9 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -38,7 +38,7 @@ DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
 
 struct irq_domain *xics_host;
 
-static LIST_HEAD(ics_list);
+static struct ics *xics_ics;
 
 void xics_update_irq_servers(void)
 {
@@ -111,12 +111,11 @@ void xics_setup_cpu(void)
 
 void xics_mask_unknown_vec(unsigned int vec)
 {
-	struct ics *ics;
-
 	pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec);
 
-	list_for_each_entry(ics, &ics_list, link)
-		ics->mask_unknown(ics, vec);
+	if (WARN_ON(!xics_ics))
+		return;
+	xics_ics->mask_unknown(xics_ics, vec);
 }
 
 
@@ -198,7 +197,6 @@ void xics_migrate_irqs_away(void)
 		struct irq_chip *chip;
 		long server;
 		unsigned long flags;
-		struct ics *ics;
 
 		/* We can't set affinity on ISA interrupts */
 		if (virq < NR_IRQS_LEGACY)
@@ -219,13 +217,10 @@ void xics_migrate_irqs_away(void)
 		raw_spin_lock_irqsave(&desc->lock, flags);
 
 		/* Locate interrupt server */
-		server = -1;
-		ics = irq_desc_get_chip_data(desc);
-		if (ics)
-			server = ics->get_server(ics, irq);
+		server = xics_ics->get_server(xics_ics, irq);
 		if (server < 0) {
-			printk(KERN_ERR "%s: Can't find server for irq %d\n",
-			       __func__, irq);
+			pr_err("%s: Can't find server for irq %d/%x\n",
+			       __func__, virq, irq);
 			goto unlock;
 		}
 
@@ -307,13 +302,9 @@ int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
 static int xics_host_match(struct irq_domain *h, struct device_node *node,
 			   enum irq_domain_bus_token bus_token)
 {
-	struct ics *ics;
-
-	list_for_each_entry(ics, &ics_list, link)
-		if (ics->host_match(ics, node))
-			return 1;
-
-	return 0;
+	if (WARN_ON(!xics_ics))
+		return 0;
+	return xics_ics->host_match(xics_ics, node) ? 1 : 0;
 }
 
 /* Dummies */
@@ -330,8 +321,6 @@ static struct irq_chip xics_ipi_chip = {
 static int xics_host_map(struct irq_domain *h, unsigned int virq,
 			 irq_hw_number_t hw)
 {
-	struct ics *ics;
-
 	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
 
 	/*
@@ -348,12 +337,14 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq,
 		return 0;
 	}
 
+	if (WARN_ON(!xics_ics))
+		return -EINVAL;
+
 	/* Let the ICS setup the chip data */
-	list_for_each_entry(ics, &ics_list, link)
-		if (ics->map(ics, virq) == 0)
-			return 0;
+	if (xics_ics->map(xics_ics, virq))
+		return -EINVAL;
 
-	return -EINVAL;
+	return 0;
 }
 
 static int xics_host_xlate(struct irq_domain *h, struct device_node *ct,
@@ -427,7 +418,9 @@ static void __init xics_init_host(void)
 
 void __init xics_register_ics(struct ics *ics)
 {
-	list_add(&ics->link, &ics_list);
+	if (WARN_ONCE(xics_ics, "XICS: Source Controller is already defined !"))
+		return;
+	xics_ics = ics;
 }
 
 static void __init xics_get_server_size(void)
-- 
cgit 


From 248af248a8f45461662fb633eca4adf24ae704ad Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:35 +0200
Subject: powerpc/xics: Rename the map handler in a check handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This moves the IRQ initialization done under the different ICS backends
in the common part of XICS. The 'map' handler becomes a simple 'check'
on the HW IRQ at the FW level.

As we don't need an ICS anymore in xics_migrate_irqs_away(), the XICS
domain does not set a chip data for the IRQ.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-18-clg@kaod.org
---
 arch/powerpc/include/asm/xics.h        |  3 ++-
 arch/powerpc/sysdev/xics/ics-native.c  | 13 +++++--------
 arch/powerpc/sysdev/xics/ics-opal.c    | 27 ++++++++++-----------------
 arch/powerpc/sysdev/xics/ics-rtas.c    | 28 ++++++++++------------------
 arch/powerpc/sysdev/xics/xics-common.c | 15 +++++++++------
 5 files changed, 36 insertions(+), 50 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index d9cf192368ad..0ac9bfddf704 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -89,10 +89,11 @@ static inline int ics_opal_init(void) { return -ENODEV; }
 /* ICS instance, hooked up to chip_data of an irq */
 struct ics {
 	struct list_head link;
-	int (*map)(struct ics *ics, unsigned int virq);
+	int (*check)(struct ics *ics, unsigned int hwirq);
 	void (*mask_unknown)(struct ics *ics, unsigned long vec);
 	long (*get_server)(struct ics *ics, unsigned long vec);
 	int (*host_match)(struct ics *ics, struct device_node *node);
+	struct irq_chip *chip;
 	char data[];
 };
 
diff --git a/arch/powerpc/sysdev/xics/ics-native.c b/arch/powerpc/sysdev/xics/ics-native.c
index d450502f4053..dec7d93a8ba1 100644
--- a/arch/powerpc/sysdev/xics/ics-native.c
+++ b/arch/powerpc/sysdev/xics/ics-native.c
@@ -131,19 +131,15 @@ static struct irq_chip ics_native_irq_chip = {
 	.irq_retrigger		= xics_retrigger,
 };
 
-static int ics_native_map(struct ics *ics, unsigned int virq)
+static int ics_native_check(struct ics *ics, unsigned int hw_irq)
 {
-	unsigned int vec = (unsigned int)virq_to_hw(virq);
 	struct ics_native *in = to_ics_native(ics);
 
-	pr_devel("%s: vec=0x%x\n", __func__, vec);
+	pr_devel("%s: hw_irq=0x%x\n", __func__, hw_irq);
 
-	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+	if (hw_irq < in->ibase || hw_irq >= (in->ibase + in->icount))
 		return -EINVAL;
 
-	irq_set_chip_and_handler(virq, &ics_native_irq_chip, handle_fasteoi_irq);
-	irq_set_chip_data(virq, ics);
-
 	return 0;
 }
 
@@ -177,10 +173,11 @@ static int ics_native_host_match(struct ics *ics, struct device_node *node)
 }
 
 static struct ics ics_native_template = {
-	.map		= ics_native_map,
+	.check		= ics_native_check,
 	.mask_unknown	= ics_native_mask_unknown,
 	.get_server	= ics_native_get_server,
 	.host_match	= ics_native_host_match,
+	.chip = &ics_native_irq_chip,
 };
 
 static int __init ics_native_add_one(struct device_node *np)
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index 823f6c9664cd..8c7ddcc718b6 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -157,26 +157,13 @@ static struct irq_chip ics_opal_irq_chip = {
 	.irq_retrigger = xics_retrigger,
 };
 
-static int ics_opal_map(struct ics *ics, unsigned int virq);
-static void ics_opal_mask_unknown(struct ics *ics, unsigned long vec);
-static long ics_opal_get_server(struct ics *ics, unsigned long vec);
-
 static int ics_opal_host_match(struct ics *ics, struct device_node *node)
 {
 	return 1;
 }
 
-/* Only one global & state struct ics */
-static struct ics ics_hal = {
-	.map		= ics_opal_map,
-	.mask_unknown	= ics_opal_mask_unknown,
-	.get_server	= ics_opal_get_server,
-	.host_match	= ics_opal_host_match,
-};
-
-static int ics_opal_map(struct ics *ics, unsigned int virq)
+static int ics_opal_check(struct ics *ics, unsigned int hw_irq)
 {
-	unsigned int hw_irq = (unsigned int)virq_to_hw(virq);
 	int64_t rc;
 	__be16 server;
 	int8_t priority;
@@ -189,9 +176,6 @@ static int ics_opal_map(struct ics *ics, unsigned int virq)
 	if (rc != OPAL_SUCCESS)
 		return -ENXIO;
 
-	irq_set_chip_and_handler(virq, &ics_opal_irq_chip, handle_fasteoi_irq);
-	irq_set_chip_data(virq, &ics_hal);
-
 	return 0;
 }
 
@@ -222,6 +206,15 @@ static long ics_opal_get_server(struct ics *ics, unsigned long vec)
 	return ics_opal_unmangle_server(be16_to_cpu(server));
 }
 
+/* Only one global & state struct ics */
+static struct ics ics_hal = {
+	.check		= ics_opal_check,
+	.mask_unknown	= ics_opal_mask_unknown,
+	.get_server	= ics_opal_get_server,
+	.host_match	= ics_opal_host_match,
+	.chip		= &ics_opal_irq_chip,
+};
+
 int __init ics_opal_init(void)
 {
 	if (!firmware_has_feature(FW_FEATURE_OPAL))
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 4cf18000f07c..6d19d711ed35 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -24,19 +24,6 @@ static int ibm_set_xive;
 static int ibm_int_on;
 static int ibm_int_off;
 
-static int ics_rtas_map(struct ics *ics, unsigned int virq);
-static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec);
-static long ics_rtas_get_server(struct ics *ics, unsigned long vec);
-static int ics_rtas_host_match(struct ics *ics, struct device_node *node);
-
-/* Only one global & state struct ics */
-static struct ics ics_rtas = {
-	.map		= ics_rtas_map,
-	.mask_unknown	= ics_rtas_mask_unknown,
-	.get_server	= ics_rtas_get_server,
-	.host_match	= ics_rtas_host_match,
-};
-
 static void ics_rtas_unmask_irq(struct irq_data *d)
 {
 	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
@@ -169,9 +156,8 @@ static struct irq_chip ics_rtas_irq_chip = {
 	.irq_retrigger = xics_retrigger,
 };
 
-static int ics_rtas_map(struct ics *ics, unsigned int virq)
+static int ics_rtas_check(struct ics *ics, unsigned int hw_irq)
 {
-	unsigned int hw_irq = (unsigned int)virq_to_hw(virq);
 	int status[2];
 	int rc;
 
@@ -183,9 +169,6 @@ static int ics_rtas_map(struct ics *ics, unsigned int virq)
 	if (rc)
 		return -ENXIO;
 
-	irq_set_chip_and_handler(virq, &ics_rtas_irq_chip, handle_fasteoi_irq);
-	irq_set_chip_data(virq, &ics_rtas);
-
 	return 0;
 }
 
@@ -213,6 +196,15 @@ static int ics_rtas_host_match(struct ics *ics, struct device_node *node)
 	return !of_device_is_compatible(node, "chrp,iic");
 }
 
+/* Only one global & state struct ics */
+static struct ics ics_rtas = {
+	.check		= ics_rtas_check,
+	.mask_unknown	= ics_rtas_mask_unknown,
+	.get_server	= ics_rtas_get_server,
+	.host_match	= ics_rtas_host_match,
+	.chip = &ics_rtas_irq_chip,
+};
+
 __init int ics_rtas_init(void)
 {
 	ibm_get_xive = rtas_token("ibm,get-xive");
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 08c25748efb9..ed2bc14f56bd 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -318,10 +318,10 @@ static struct irq_chip xics_ipi_chip = {
 	.irq_unmask = xics_ipi_unmask,
 };
 
-static int xics_host_map(struct irq_domain *h, unsigned int virq,
-			 irq_hw_number_t hw)
+static int xics_host_map(struct irq_domain *domain, unsigned int virq,
+			 irq_hw_number_t hwirq)
 {
-	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
+	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hwirq);
 
 	/*
 	 * Mark interrupts as edge sensitive by default so that resend
@@ -331,7 +331,7 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq,
 	irq_clear_status_flags(virq, IRQ_LEVEL);
 
 	/* Don't call into ICS for IPIs */
-	if (hw == XICS_IPI) {
+	if (hwirq == XICS_IPI) {
 		irq_set_chip_and_handler(virq, &xics_ipi_chip,
 					 handle_percpu_irq);
 		return 0;
@@ -340,10 +340,13 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq,
 	if (WARN_ON(!xics_ics))
 		return -EINVAL;
 
-	/* Let the ICS setup the chip data */
-	if (xics_ics->map(xics_ics, virq))
+	if (xics_ics->check(xics_ics, hwirq))
 		return -EINVAL;
 
+	/* No chip data for the XICS domain */
+	irq_domain_set_info(domain, virq, hwirq, xics_ics->chip,
+			    NULL, handle_fasteoi_irq, NULL, NULL);
+
 	return 0;
 }
 
-- 
cgit 


From 7d14f6c60b76fa7f3f89d81a95385576ca33b483 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:36 +0200
Subject: powerpc/xics: Give a name to the default XICS IRQ domain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

and clean up the error path.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-19-clg@kaod.org
---
 arch/powerpc/sysdev/xics/xics-common.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index ed2bc14f56bd..18d3de2f2249 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -412,11 +412,22 @@ static const struct irq_domain_ops xics_host_ops = {
 	.xlate = xics_host_xlate,
 };
 
-static void __init xics_init_host(void)
+static int __init xics_allocate_domain(void)
 {
-	xics_host = irq_domain_add_tree(NULL, &xics_host_ops, NULL);
-	BUG_ON(xics_host == NULL);
+	struct fwnode_handle *fn;
+
+	fn = irq_domain_alloc_named_fwnode("XICS");
+	if (!fn)
+		return -ENOMEM;
+
+	xics_host = irq_domain_create_tree(fn, &xics_host_ops, NULL);
+	if (!xics_host) {
+		irq_domain_free_fwnode(fn);
+		return -ENOMEM;
+	}
+
 	irq_set_default_host(xics_host);
+	return 0;
 }
 
 void __init xics_register_ics(struct ics *ics)
@@ -480,6 +491,8 @@ void __init xics_init(void)
 	/* Initialize common bits */
 	xics_get_server_size();
 	xics_update_irq_servers();
-	xics_init_host();
+	rc = xics_allocate_domain();
+	if (rc < 0)
+		pr_err("XICS: Failed to create IRQ domain");
 	xics_setup_cpu();
 }
-- 
cgit 


From 53b34e8db73af98fa652641bf490384dc665d0f2 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:37 +0200
Subject: powerpc/xics: Add debug logging to the set_irq_affinity handlers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It really helps to know how the HW is configured when tweaking the IRQ
subsystem.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-20-clg@kaod.org
---
 arch/powerpc/sysdev/xics/ics-opal.c | 2 +-
 arch/powerpc/sysdev/xics/ics-rtas.c | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index 8c7ddcc718b6..bf26cae1b982 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -133,7 +133,7 @@ static int ics_opal_set_affinity(struct irq_data *d,
 	}
 	server = ics_opal_mangle_server(wanted_server);
 
-	pr_devel("ics-hal: set-affinity irq %d [hw 0x%x] server: 0x%x/0x%x\n",
+	pr_debug("ics-hal: set-affinity irq %d [hw 0x%x] server: 0x%x/0x%x\n",
 		 d->irq, hw_irq, wanted_server, server);
 
 	rc = opal_set_xive(hw_irq, server, priority);
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 6d19d711ed35..b50c6341682e 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -133,6 +133,9 @@ static int ics_rtas_set_affinity(struct irq_data *d,
 		return -1;
 	}
 
+	pr_debug("%s: irq %d [hw 0x%x] server: 0x%x\n", __func__, d->irq,
+		 hw_irq, irq_server);
+
 	status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL,
 				     hw_irq, irq_server, xics_status[1]);
 
-- 
cgit 


From e4f0aa3b4731430ad73fb4485e97f751c7500668 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:38 +0200
Subject: powerpc/xics: Add support for IRQ domain hierarchy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

XICS doesn't have any state associated with the IRQ. The support is
straightforward and simpler than for XIVE.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-21-clg@kaod.org
---
 arch/powerpc/sysdev/xics/xics-common.c | 41 ++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 18d3de2f2249..febab57f060f 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -406,7 +406,48 @@ int xics_retrigger(struct irq_data *data)
 	return 0;
 }
 
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+static int xics_host_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
+				      unsigned long *hwirq, unsigned int *type)
+{
+	return xics_host_xlate(d, to_of_node(fwspec->fwnode), fwspec->param,
+			       fwspec->param_count, hwirq, type);
+}
+
+static int xics_host_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *arg)
+{
+	struct irq_fwspec *fwspec = arg;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	int i, rc;
+
+	rc = xics_host_domain_translate(domain, fwspec, &hwirq, &type);
+	if (rc)
+		return rc;
+
+	pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, hwirq + i, xics_ics->chip,
+				    xics_ics, handle_fasteoi_irq, NULL, NULL);
+
+	return 0;
+}
+
+static void xics_host_domain_free(struct irq_domain *domain,
+				  unsigned int virq, unsigned int nr_irqs)
+{
+	pr_debug("%s %d #%d\n", __func__, virq, nr_irqs);
+}
+#endif
+
 static const struct irq_domain_ops xics_host_ops = {
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	.alloc	= xics_host_domain_alloc,
+	.free	= xics_host_domain_free,
+	.translate = xics_host_domain_translate,
+#endif
 	.match = xics_host_match,
 	.map = xics_host_map,
 	.xlate = xics_host_xlate,
-- 
cgit 


From bbb25af8fbdba4acaf955e412a84eb2eea48697c Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:39 +0200
Subject: powerpc/powernv/pci: Customize the MSI EOI handler to support PHB3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PHB3s need an extra OPAL call to EOI the interrupt. The call takes an
OPAL HW IRQ number but it is translated into a vector number in OPAL.
Here, we directly use the vector number of the in-the-middle "PNV-MSI"
domain instead of grabbing the OPAL HW IRQ number in the XICS parent
domain.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-22-clg@kaod.org
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index e77caa4dbbdf..b498876a976f 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2169,12 +2169,33 @@ static void pnv_msi_compose_msg(struct irq_data *d, struct msi_msg *msg)
 			entry->msi_attrib.is_64 ? "64" : "32", d->hwirq, rc);
 }
 
+/*
+ * The IRQ data is mapped in the MSI domain in which HW IRQ numbers
+ * correspond to vector numbers.
+ */
+static void pnv_msi_eoi(struct irq_data *d)
+{
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
+
+	if (phb->model == PNV_PHB_MODEL_PHB3) {
+		/*
+		 * The EOI OPAL call takes an OPAL HW IRQ number but
+		 * since it is translated into a vector number in
+		 * OPAL, use that directly.
+		 */
+		WARN_ON_ONCE(opal_pci_msi_eoi(phb->opal_id, d->hwirq));
+	}
+
+	irq_chip_eoi_parent(d);
+}
+
 static struct irq_chip pnv_msi_irq_chip = {
 	.name			= "PNV-MSI",
 	.irq_shutdown		= pnv_msi_shutdown,
 	.irq_mask		= irq_chip_mask_parent,
 	.irq_unmask		= irq_chip_unmask_parent,
-	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_eoi		= pnv_msi_eoi,
 	.irq_set_affinity	= irq_chip_set_affinity_parent,
 	.irq_compose_msi_msg	= pnv_msi_compose_msg,
 };
-- 
cgit 


From 679e30b9536eeb93bc8c9a39c0ddc77dec536f6b Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:40 +0200
Subject: powerpc/pci: Drop XIVE restriction on MSI domains
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PowerNV and pSeries platforms now have support for both the XICS
and XIVE IRQ domains.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-23-clg@kaod.org
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 4 +---
 arch/powerpc/platforms/pseries/msi.c      | 4 ----
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index b498876a976f..e2454439e574 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2332,9 +2332,7 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
 		count, phb->msi_base);
 
-	/* Only supported by the XIVE driver */
-	if (xive_enabled())
-		pnv_msi_allocate_domains(phb->hose, count);
+	pnv_msi_allocate_domains(phb->hose, count);
 }
 
 static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index e2127a3f7ebd..e196cc1b8540 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -720,10 +720,6 @@ int pseries_msi_allocate_domains(struct pci_controller *phb)
 {
 	int count;
 
-	/* Only supported by the XIVE driver */
-	if (!xive_enabled())
-		return -ENODEV;
-
 	if (!__find_pe_total_msi(phb->dn, &count)) {
 		pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n",
 		       phb->dn, phb->global_number);
-- 
cgit 


From 1e661f81a522eadfe4bc5bb1ec9fbae27c13f163 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:41 +0200
Subject: powerpc/xics: Drop unmask of MSIs at startup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That was a workaround in the XICS domain because of the lack of MSI
domain. This is now handled.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-24-clg@kaod.org
---
 arch/powerpc/sysdev/xics/ics-opal.c | 11 -----------
 arch/powerpc/sysdev/xics/ics-rtas.c |  9 ---------
 2 files changed, 20 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index bf26cae1b982..c4d95d8beb6f 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -62,17 +62,6 @@ static void ics_opal_unmask_irq(struct irq_data *d)
 
 static unsigned int ics_opal_startup(struct irq_data *d)
 {
-#ifdef CONFIG_PCI_MSI
-	/*
-	 * The generic MSI code returns with the interrupt disabled on the
-	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
-	 * at that level, so we do it here by hand.
-	 */
-	if (irq_data_get_msi_desc(d))
-		pci_msi_unmask_irq(d);
-#endif
-
-	/* unmask it */
 	ics_opal_unmask_irq(d);
 	return 0;
 }
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index b50c6341682e..b9da317b7a2d 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -57,15 +57,6 @@ static void ics_rtas_unmask_irq(struct irq_data *d)
 
 static unsigned int ics_rtas_startup(struct irq_data *d)
 {
-#ifdef CONFIG_PCI_MSI
-	/*
-	 * The generic MSI code returns with the interrupt disabled on the
-	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
-	 * at that level, so we do it here by hand.
-	 */
-	if (irq_data_get_msi_desc(d))
-		pci_msi_unmask_irq(d);
-#endif
 	/* unmask it */
 	ics_rtas_unmask_irq(d);
 	return 0;
-- 
cgit 


From 3005123eea0daa18d98602ab64b2ce3ad087d849 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:42 +0200
Subject: powerpc/pseries/pci: Drop unused MSI code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MSIs should be fully managed by the PCI and IRQ subsystems now.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-25-clg@kaod.org
---
 arch/powerpc/platforms/pseries/msi.c | 87 ------------------------------------
 1 file changed, 87 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index e196cc1b8540..1b305e411862 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -111,21 +111,6 @@ static int rtas_query_irq_number(struct pci_dn *pdn, int offset)
 	return rtas_ret[0];
 }
 
-static void rtas_teardown_msi_irqs(struct pci_dev *pdev)
-{
-	struct msi_desc *entry;
-
-	for_each_pci_msi_entry(entry, pdev) {
-		if (!entry->irq)
-			continue;
-
-		irq_set_msi_desc(entry->irq, NULL);
-		irq_dispose_mapping(entry->irq);
-	}
-
-	rtas_disable_msi(pdev);
-}
-
 static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
 {
 	struct device_node *dn;
@@ -459,66 +444,6 @@ again:
 	return 0;
 }
 
-static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
-{
-	struct pci_dn *pdn;
-	int hwirq, virq, i;
-	int rc;
-	struct msi_desc *entry;
-	struct msi_msg msg;
-
-	rc = rtas_prepare_msi_irqs(pdev, nvec_in, type, NULL);
-	if (rc)
-		return rc;
-
-	pdn = pci_get_pdn(pdev);
-	i = 0;
-	for_each_pci_msi_entry(entry, pdev) {
-		hwirq = rtas_query_irq_number(pdn, i++);
-		if (hwirq < 0) {
-			pr_debug("rtas_msi: error (%d) getting hwirq\n", rc);
-			return hwirq;
-		}
-
-		/*
-		 * Depending on the number of online CPUs in the original
-		 * kernel, it is likely for CPU #0 to be offline in a kdump
-		 * kernel. The associated IRQs in the affinity mappings
-		 * provided by irq_create_affinity_masks() are thus not
-		 * started by irq_startup(), as per-design for managed IRQs.
-		 * This can be a problem with multi-queue block devices driven
-		 * by blk-mq : such a non-started IRQ is very likely paired
-		 * with the single queue enforced by blk-mq during kdump (see
-		 * blk_mq_alloc_tag_set()). This causes the device to remain
-		 * silent and likely hangs the guest at some point.
-		 *
-		 * We don't really care for fine-grained affinity when doing
-		 * kdump actually : simply ignore the pre-computed affinity
-		 * masks in this case and let the default mask with all CPUs
-		 * be used when creating the IRQ mappings.
-		 */
-		if (is_kdump_kernel())
-			virq = irq_create_mapping(NULL, hwirq);
-		else
-			virq = irq_create_mapping_affinity(NULL, hwirq,
-							   entry->affinity);
-
-		if (!virq) {
-			pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
-			return -ENOSPC;
-		}
-
-		dev_dbg(&pdev->dev, "rtas_msi: allocated virq %d\n", virq);
-		irq_set_msi_desc(virq, entry);
-
-		/* Read config space back so we can restore after reset */
-		__pci_read_msi_msg(entry, &msg);
-		entry->msg = msg;
-	}
-
-	return 0;
-}
-
 static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev,
 				   int nvec, msi_alloc_info_t *arg)
 {
@@ -759,8 +684,6 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
 
 static int rtas_msi_init(void)
 {
-	struct pci_controller *phb;
-
 	query_token  = rtas_token("ibm,query-interrupt-source-number");
 	change_token = rtas_token("ibm,change-msi");
 
@@ -772,16 +695,6 @@ static int rtas_msi_init(void)
 
 	pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n");
 
-	WARN_ON(pseries_pci_controller_ops.setup_msi_irqs);
-	pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
-	pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
-
-	list_for_each_entry(phb, &hose_list, list_node) {
-		WARN_ON(phb->controller_ops.setup_msi_irqs);
-		phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
-		phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
-	}
-
 	WARN_ON(ppc_md.pci_irq_fixup);
 	ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup;
 
-- 
cgit 


From 6d9ba6121b1cf453985d08c141970a1b44cd9cf1 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:43 +0200
Subject: powerpc/powernv/pci: Drop unused MSI code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MSIs should be fully managed by the PCI and IRQ subsystems now.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-26-clg@kaod.org
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 27 -------------
 arch/powerpc/platforms/powernv/pci.c      | 67 -------------------------------
 arch/powerpc/platforms/powernv/pci.h      |  6 ---
 3 files changed, 100 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index e2454439e574..eb38ce1fd434 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2080,29 +2080,6 @@ static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 	return 0;
 }
 
-static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
-				  unsigned int hwirq, unsigned int virq,
-				  unsigned int is_64, struct msi_msg *msg)
-{
-	struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
-	unsigned int xive_num = hwirq - phb->msi_base;
-	int rc;
-
-	rc = __pnv_pci_ioda_msi_setup(phb, dev, xive_num, is_64, msg);
-	if (rc)
-		return rc;
-
-	/* P8 only */
-	pnv_set_msi_irq_chip(phb, virq);
-
-	pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
-		 " address=%x_%08x data=%x PE# %x\n",
-		 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
-		 msg->address_hi, msg->address_lo, msg->data, pe->pe_number);
-
-	return 0;
-}
-
 /*
  * The msi_free() op is called before irq_domain_free_irqs_top() when
  * the handler data is still available. Use that to clear the XIVE
@@ -2327,8 +2304,6 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 		return;
 	}
 
-	phb->msi_setup = pnv_pci_ioda_msi_setup;
-	phb->msi32_support = 1;
 	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
 		count, phb->msi_base);
 
@@ -2936,8 +2911,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
 	.dma_dev_setup		= pnv_pci_ioda_dma_dev_setup,
 	.dma_bus_setup		= pnv_pci_ioda_dma_bus_setup,
 	.iommu_bypass_supported	= pnv_pci_ioda_iommu_bypass_supported,
-	.setup_msi_irqs		= pnv_setup_msi_irqs,
-	.teardown_msi_irqs	= pnv_teardown_msi_irqs,
 	.enable_device_hook	= pnv_pci_enable_device_hook,
 	.release_device		= pnv_pci_release_device,
 	.window_alignment	= pnv_pci_window_alignment,
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 6bb3c52633fb..9a8391b983d1 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -160,73 +160,6 @@ exit:
 }
 EXPORT_SYMBOL_GPL(pnv_pci_set_power_state);
 
-int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
-	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
-	struct msi_desc *entry;
-	struct msi_msg msg;
-	int hwirq;
-	unsigned int virq;
-	int rc;
-
-	if (WARN_ON(!phb) || !phb->msi_bmp.bitmap)
-		return -ENODEV;
-
-	if (pdev->no_64bit_msi && !phb->msi32_support)
-		return -ENODEV;
-
-	for_each_pci_msi_entry(entry, pdev) {
-		if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
-			pr_warn("%s: Supports only 64-bit MSIs\n",
-				pci_name(pdev));
-			return -ENXIO;
-		}
-		hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, 1);
-		if (hwirq < 0) {
-			pr_warn("%s: Failed to find a free MSI\n",
-				pci_name(pdev));
-			return -ENOSPC;
-		}
-		virq = irq_create_mapping(NULL, phb->msi_base + hwirq);
-		if (!virq) {
-			pr_warn("%s: Failed to map MSI to linux irq\n",
-				pci_name(pdev));
-			msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
-			return -ENOMEM;
-		}
-		rc = phb->msi_setup(phb, pdev, phb->msi_base + hwirq,
-				    virq, entry->msi_attrib.is_64, &msg);
-		if (rc) {
-			pr_warn("%s: Failed to setup MSI\n", pci_name(pdev));
-			irq_dispose_mapping(virq);
-			msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
-			return rc;
-		}
-		irq_set_msi_desc(virq, entry);
-		pci_write_msi_msg(virq, &msg);
-	}
-	return 0;
-}
-
-void pnv_teardown_msi_irqs(struct pci_dev *pdev)
-{
-	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
-	struct msi_desc *entry;
-	irq_hw_number_t hwirq;
-
-	if (WARN_ON(!phb))
-		return;
-
-	for_each_pci_msi_entry(entry, pdev) {
-		if (!entry->irq)
-			continue;
-		hwirq = virq_to_hw(entry->irq);
-		irq_set_msi_desc(entry->irq, NULL);
-		irq_dispose_mapping(entry->irq);
-		msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1);
-	}
-}
-
 /* Nicely print the contents of the PE State Tables (PEST). */
 static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size)
 {
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index c8d4f222a86f..966a9eb64339 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -123,11 +123,7 @@ struct pnv_phb {
 #endif
 
 	unsigned int		msi_base;
-	unsigned int		msi32_support;
 	struct msi_bitmap	msi_bmp;
-	int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
-			 unsigned int hwirq, unsigned int virq,
-			 unsigned int is_64, struct msi_msg *msg);
 	int (*init_m64)(struct pnv_phb *phb);
 	int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
 	void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
@@ -289,8 +285,6 @@ extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
 extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
 
-extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
-extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
 extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn);
 extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
 extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
-- 
cgit 


From f1a377f86f51b381cfc30bf2270f8a5f81e35ee9 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:44 +0200
Subject: powerpc/powernv/pci: Adapt is_pnv_opal_msi() to detect passthrough
 interrupt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pnv_ioda2_msi_eoi() chip handler is not used anymore for MSIs.
Simply use the check on the PSI-MSI chip.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-27-clg@kaod.org
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index eb38ce1fd434..6c4b37598bcc 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2015,7 +2015,7 @@ static struct irq_chip pnv_pci_msi_irq_chip;
  */
 bool is_pnv_opal_msi(struct irq_chip *chip)
 {
-	return chip->irq_eoi == pnv_ioda2_msi_eoi || chip == &pnv_pci_msi_irq_chip;
+	return chip == &pnv_pci_msi_irq_chip;
 }
 EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
 
-- 
cgit 


From c80198a21792ac59412871e4e6fad5041c9be8e4 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:45 +0200
Subject: powerpc/xics: Fix IRQ migration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

desc->irq_data points to the top level IRQ data descriptor which is
not necessarily in the XICS IRQ domain. MSIs are in another domain for
instance. Fix that by looking for a mapping on the low level XICS IRQ
domain.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-28-clg@kaod.org
---
 arch/powerpc/sysdev/xics/xics-common.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index febab57f060f..4a7687caec75 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -183,6 +183,8 @@ void xics_migrate_irqs_away(void)
 	unsigned int irq, virq;
 	struct irq_desc *desc;
 
+	pr_debug("%s: CPU %u\n", __func__, cpu);
+
 	/* If we used to be the default server, move to the new "boot_cpuid" */
 	if (hw_cpu == xics_default_server)
 		xics_update_irq_servers();
@@ -197,6 +199,7 @@ void xics_migrate_irqs_away(void)
 		struct irq_chip *chip;
 		long server;
 		unsigned long flags;
+		struct irq_data *irqd;
 
 		/* We can't set affinity on ISA interrupts */
 		if (virq < NR_IRQS_LEGACY)
@@ -204,9 +207,11 @@ void xics_migrate_irqs_away(void)
 		/* We only need to migrate enabled IRQS */
 		if (!desc->action)
 			continue;
-		if (desc->irq_data.domain != xics_host)
+		/* We need a mapping in the XICS IRQ domain */
+		irqd = irq_domain_get_irq_data(xics_host, virq);
+		if (!irqd)
 			continue;
-		irq = desc->irq_data.hwirq;
+		irq = irqd_to_hwirq(irqd);
 		/* We need to get IPIs still. */
 		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
 			continue;
-- 
cgit 


From 5cd69651ceeed15e021cf7d19f1b1be0a80c0c7a Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:46 +0200
Subject: powerpc/powernv/pci: Set the IRQ chip data for P8/CXL devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before MSI domains, the default IRQ chip of PHB3 MSIs was patched by
pnv_set_msi_irq_chip() with the custom EOI handler pnv_ioda2_msi_eoi()
and the owning PHB was deduced from the 'ioda.irq_chip' field. This
path has been deprecated by the MSI domains but it is still in use by
the P8 CAPI 'cxl' driver.

Rewriting this driver to support MSI would be a waste of time.
Nevertheless, we can still remove the IRQ chip patch and set the IRQ
chip data instead. This is cleaner.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-29-clg@kaod.org
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6c4b37598bcc..aa97245eedbf 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1971,19 +1971,23 @@ int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
 	return opal_pci_msi_eoi(phb->opal_id, hw_irq);
 }
 
+/*
+ * The IRQ data is mapped in the XICS domain, with OPAL HW IRQ numbers
+ */
 static void pnv_ioda2_msi_eoi(struct irq_data *d)
 {
 	int64_t rc;
 	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-	struct irq_chip *chip = irq_data_get_irq_chip(d);
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
 
-	rc = pnv_opal_pci_msi_eoi(chip, hw_irq);
+	rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
 	WARN_ON_ONCE(rc);
 
 	icp_native_eoi(d);
 }
 
-
+/* P8/CXL only */
 void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
 {
 	struct irq_data *idata;
@@ -2005,6 +2009,7 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
 		phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
 	}
 	irq_set_chip(virq, &phb->ioda.irq_chip);
+	irq_set_chip_data(virq, phb->hose);
 }
 
 static struct irq_chip pnv_pci_msi_irq_chip;
-- 
cgit 


From c325712b5f85e561ea89bae2ba5d0104e797e42c Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:47 +0200
Subject: powerpc/powernv/pci: Rework pnv_opal_pci_msi_eoi()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pnv_opal_pci_msi_eoi() is called from KVM to EOI passthrough interrupts
when in real mode. Adding MSI domain broke the hack using the
'ioda.irq_chip' field to deduce the owning PHB. Fix that by using the
IRQ chip data in the MSI domain.

The 'ioda.irq_chip' field is now unused and could be removed from the
pnv_phb struct.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-30-clg@kaod.org
---
 arch/powerpc/include/asm/pnv-pci.h        |  2 +-
 arch/powerpc/kvm/book3s_hv_rm_xics.c      |  8 ++++----
 arch/powerpc/platforms/powernv/pci-ioda.c | 17 +++++++++++++----
 3 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index d0ee0ede5767..b3f480799352 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -33,7 +33,7 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num);
 void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num);
 int pnv_cxl_get_irq_count(struct pci_dev *dev);
 struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev);
-int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq);
+int64_t pnv_opal_pci_msi_eoi(struct irq_data *d);
 bool is_pnv_opal_msi(struct irq_chip *chip);
 
 #ifdef CONFIG_CXL_BASE
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 0a11ec88a0ae..587c33fc4564 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -706,6 +706,7 @@ static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
 		icp->rm_eoied_irq = irq;
 	}
 
+	/* Handle passthrough interrupts */
 	if (state->host_irq) {
 		++vcpu->stat.pthru_all;
 		if (state->intr_cpu != -1) {
@@ -759,12 +760,12 @@ int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 
 static unsigned long eoi_rc;
 
-static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
+static void icp_eoi(struct irq_data *d, u32 hwirq, __be32 xirr, bool *again)
 {
 	void __iomem *xics_phys;
 	int64_t rc;
 
-	rc = pnv_opal_pci_msi_eoi(c, hwirq);
+	rc = pnv_opal_pci_msi_eoi(d);
 
 	if (rc)
 		eoi_rc = rc;
@@ -872,8 +873,7 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
 		icp_rm_deliver_irq(xics, icp, irq, false);
 
 	/* EOI the interrupt */
-	icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr,
-		again);
+	icp_eoi(irq_desc_get_irq_data(irq_map->desc), irq_map->r_hwirq, xirr, again);
 
 	if (check_too_hard(xics, icp) == H_TOO_HARD)
 		return 2;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index aa97245eedbf..2389cd79c3c8 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1963,12 +1963,21 @@ void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	pe->dma_setup_done = true;
 }
 
-int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
+/*
+ * Called from KVM in real mode to EOI passthru interrupts. The ICP
+ * EOI is handled directly in KVM in kvmppc_deliver_irq_passthru().
+ *
+ * The IRQ data is mapped in the PCI-MSI domain and the EOI OPAL call
+ * needs an HW IRQ number mapped in the XICS IRQ domain. The HW IRQ
+ * numbers of the in-the-middle MSI domain are vector numbers and it's
+ * good enough for OPAL. Use that.
+ */
+int64_t pnv_opal_pci_msi_eoi(struct irq_data *d)
 {
-	struct pnv_phb *phb = container_of(chip, struct pnv_phb,
-					   ioda.irq_chip);
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d->parent_data);
+	struct pnv_phb *phb = hose->private_data;
 
-	return opal_pci_msi_eoi(phb->opal_id, hw_irq);
+	return opal_pci_msi_eoi(phb->opal_id, d->parent_data->hwirq);
 }
 
 /*
-- 
cgit 


From 1753081f2d445f9157550692fcc4221cd3ff0958 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:48 +0200
Subject: KVM: PPC: Book3S HV: XICS: Fix mapping of passthrough interrupts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PCI MSIs now live in an MSI domain but the underlying calls, which
will EOI the interrupt in real mode, need an HW IRQ number mapped in
the XICS IRQ domain. Grab it there.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-31-clg@kaod.org
---
 arch/powerpc/kvm/book3s_hv.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 05b3a3548c18..f28f99805c4c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5328,6 +5328,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	struct kvmppc_passthru_irqmap *pimap;
 	struct irq_chip *chip;
 	int i, rc = 0;
+	struct irq_data *host_data;
 
 	if (!kvm_irq_bypass)
 		return 1;
@@ -5392,7 +5393,14 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	 * the KVM real mode handler.
 	 */
 	smp_wmb();
-	irq_map->r_hwirq = desc->irq_data.hwirq;
+
+	/*
+	 * The 'host_irq' number is mapped in the PCI-MSI domain but
+	 * the underlying calls, which will EOI the interrupt in real
+	 * mode, need an HW IRQ number mapped in the XICS IRQ domain.
+	 */
+	host_data = irq_domain_get_irq_data(irq_get_default_host(), host_irq);
+	irq_map->r_hwirq = (unsigned int)irqd_to_hwirq(host_data);
 
 	if (i == pimap->n_mapped)
 		pimap->n_mapped++;
@@ -5400,7 +5408,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	if (xics_on_xive())
 		rc = kvmppc_xive_set_mapped(kvm, guest_gsi, host_irq);
 	else
-		kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
+		kvmppc_xics_set_mapped(kvm, guest_gsi, irq_map->r_hwirq);
 	if (rc)
 		irq_map->r_hwirq = 0;
 
-- 
cgit 


From 59b2bc18b1492b46d45b6b6828ba098f09b9ba67 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 1 Jul 2021 15:27:49 +0200
Subject: powerpc/xive: Use XIVE domain under xmon and debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The default domain of the PCI/MSIs is not the XIVE domain anymore. To
list the IRQ mappings under XMON and debugfs, query the IRQ data from
the low level XIVE domain.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210701132750.1475580-32-clg@kaod.org
---
 arch/powerpc/sysdev/xive/common.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 107f442d3411..d0deaebbfeeb 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -312,11 +312,10 @@ void xmon_xive_get_irq_all(void)
 	struct irq_desc *desc;
 
 	for_each_irq_desc(i, desc) {
-		struct irq_data *d = irq_desc_get_irq_data(desc);
-		unsigned int hwirq = (unsigned int)irqd_to_hwirq(d);
+		struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
 
-		if (d->domain == xive_irq_domain)
-			xmon_xive_get_irq_config(hwirq, d);
+		if (d)
+			xmon_xive_get_irq_config(irqd_to_hwirq(d), d);
 	}
 }
 
@@ -1757,9 +1756,9 @@ static int xive_core_debug_show(struct seq_file *m, void *private)
 		xive_debug_show_cpu(m, cpu);
 
 	for_each_irq_desc(i, desc) {
-		struct irq_data *d = irq_desc_get_irq_data(desc);
+		struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
 
-		if (d->domain == xive_irq_domain)
+		if (d)
 			xive_debug_show_irq(m, d);
 	}
 	return 0;
-- 
cgit 


From 17df41fec5b80b16ea4774495f1eb730e2225619 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Mon, 19 Jul 2021 15:06:14 +0200
Subject: powerpc: use IRQF_NO_DEBUG for IPIs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no need to use the lockup detector ("noirqdebug") for IPIs.
The ipistorm benchmark measures a ~10% improvement on high systems
when this flag is set.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210719130614.195886-1-clg@kaod.org
---
 arch/powerpc/sysdev/xics/xics-common.c | 2 +-
 arch/powerpc/sysdev/xive/common.c      | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 4a7687caec75..5c1a157a83b8 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -132,7 +132,7 @@ static void xics_request_ipi(void)
 	 * IPIs are marked IRQF_PERCPU. The handler was set in map.
 	 */
 	BUG_ON(request_irq(ipi, icp_ops->ipi_action,
-			   IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
+			   IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
 }
 
 void __init xics_smp_probe(void)
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index d0deaebbfeeb..4018964bbd69 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1149,7 +1149,8 @@ static int __init xive_request_ipi(void)
 		snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
 
 		ret = request_irq(xid->irq, xive_muxed_ipi_action,
-				  IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL);
+				  IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD,
+				  xid->name, NULL);
 
 		WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
 	}
-- 
cgit 


From b68c6646cce5ee8caefa6333ee743f960222dcea Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Tue, 20 Jul 2021 15:42:08 +0200
Subject: KVM: PPC: Book3S HV: XIVE: Add a 'flags' field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use it to hold platform specific features. P9 DD2 introduced
single-escalation support. P10 will add others.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210720134209.256133-2-clg@kaod.org
---
 arch/powerpc/kvm/book3s_xive.c        | 19 ++++++++++---------
 arch/powerpc/kvm/book3s_xive.h        |  9 ++++++++-
 arch/powerpc/kvm/book3s_xive_native.c | 12 +++++++-----
 3 files changed, 25 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 6878026ee8ec..555cc610e7ab 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -363,9 +363,9 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
 		if (!vcpu->arch.xive_vcpu)
 			continue;
 		rc = xive_provision_queue(vcpu, prio);
-		if (rc == 0 && !xive->single_escalation)
+		if (rc == 0 && !kvmppc_xive_has_single_escalation(xive))
 			kvmppc_xive_attach_escalation(vcpu, prio,
-						      xive->single_escalation);
+						      kvmppc_xive_has_single_escalation(xive));
 		if (rc)
 			return rc;
 	}
@@ -1199,7 +1199,7 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
 	/* Free escalations */
 	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
 		if (xc->esc_virq[i]) {
-			if (xc->xive->single_escalation)
+			if (kvmppc_xive_has_single_escalation(xc->xive))
 				xive_cleanup_single_escalation(vcpu, xc,
 							xc->esc_virq[i]);
 			free_irq(xc->esc_virq[i], vcpu);
@@ -1340,7 +1340,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	 * Enable the VP first as the single escalation mode will
 	 * affect escalation interrupts numbering
 	 */
-	r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+	r = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
 	if (r) {
 		pr_err("Failed to enable VP in OPAL, err %d\n", r);
 		goto bail;
@@ -1357,15 +1357,15 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 		struct xive_q *q = &xc->queues[i];
 
 		/* Single escalation, no queue 7 */
-		if (i == 7 && xive->single_escalation)
+		if (i == 7 && kvmppc_xive_has_single_escalation(xive))
 			break;
 
 		/* Is queue already enabled ? Provision it */
 		if (xive->qmap & (1 << i)) {
 			r = xive_provision_queue(vcpu, i);
-			if (r == 0 && !xive->single_escalation)
+			if (r == 0 && !kvmppc_xive_has_single_escalation(xive))
 				kvmppc_xive_attach_escalation(
-					vcpu, i, xive->single_escalation);
+					vcpu, i, kvmppc_xive_has_single_escalation(xive));
 			if (r)
 				goto bail;
 		} else {
@@ -1380,7 +1380,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	}
 
 	/* If not done above, attach priority 0 escalation */
-	r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation);
+	r = kvmppc_xive_attach_escalation(vcpu, 0, kvmppc_xive_has_single_escalation(xive));
 	if (r)
 		goto bail;
 
@@ -2135,7 +2135,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	 */
 	xive->nr_servers = KVM_MAX_VCPUS;
 
-	xive->single_escalation = xive_native_has_single_escalation();
+	if (xive_native_has_single_escalation())
+		xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
 
 	kvm->arch.xive = xive;
 	return 0;
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index afe9eeac6d56..73c3cd25093c 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -97,6 +97,8 @@ struct kvmppc_xive_ops {
 	int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq);
 };
 
+#define KVMPPC_XIVE_FLAG_SINGLE_ESCALATION 0x1
+
 struct kvmppc_xive {
 	struct kvm *kvm;
 	struct kvm_device *dev;
@@ -133,7 +135,7 @@ struct kvmppc_xive {
 	u32	q_page_order;
 
 	/* Flags */
-	u8	single_escalation;
+	u8	flags;
 
 	/* Number of entries in the VP block */
 	u32	nr_servers;
@@ -308,5 +310,10 @@ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
 int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
 int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
 
+static inline bool kvmppc_xive_has_single_escalation(struct kvmppc_xive *xive)
+{
+	return xive->flags & KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+}
+
 #endif /* CONFIG_KVM_XICS */
 #endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 573ecaab3597..2abb1358a268 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -93,7 +93,7 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
 	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
 		/* Free the escalation irq */
 		if (xc->esc_virq[i]) {
-			if (xc->xive->single_escalation)
+			if (kvmppc_xive_has_single_escalation(xc->xive))
 				xive_cleanup_single_escalation(vcpu, xc,
 							xc->esc_virq[i]);
 			free_irq(xc->esc_virq[i], vcpu);
@@ -172,7 +172,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
 	 * Enable the VP first as the single escalation mode will
 	 * affect escalation interrupts numbering
 	 */
-	rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+	rc = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
 	if (rc) {
 		pr_err("Failed to enable VP in OPAL: %d\n", rc);
 		goto bail;
@@ -693,7 +693,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 	}
 
 	rc = kvmppc_xive_attach_escalation(vcpu, priority,
-					   xive->single_escalation);
+					   kvmppc_xive_has_single_escalation(xive));
 error:
 	if (rc)
 		kvmppc_xive_native_cleanup_queue(vcpu, priority);
@@ -820,7 +820,7 @@ static int kvmppc_xive_reset(struct kvmppc_xive *xive)
 		for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
 
 			/* Single escalation, no queue 7 */
-			if (prio == 7 && xive->single_escalation)
+			if (prio == 7 && kvmppc_xive_has_single_escalation(xive))
 				break;
 
 			if (xc->esc_virq[prio]) {
@@ -1111,7 +1111,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
 	 */
 	xive->nr_servers = KVM_MAX_VCPUS;
 
-	xive->single_escalation = xive_native_has_single_escalation();
+	if (xive_native_has_single_escalation())
+		xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+
 	xive->ops = &kvmppc_xive_native_ops;
 
 	kvm->arch.xive = xive;
-- 
cgit 


From f5af0a978776b710f16dc99a85496b1e760bf9e0 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Tue, 20 Jul 2021 15:42:09 +0200
Subject: KVM: PPC: Book3S HV: XIVE: Add support for automatic save-restore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On P10, the feature doing an automatic "save & restore" of a VCPU
interrupt context is set by default in OPAL. When a VP context is
pulled out, the state of the interrupt registers are saved by the XIVE
interrupt controller under the internal NVP structure representing the
VP. This saves a costly store/load in guest entries and exits.

If OPAL advertises the "save & restore" feature in the device tree,
it should also have set the 'H' bit in the CAM line. Check that when
vCPUs are connected to their ICP in KVM before going any further.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210720134209.256133-3-clg@kaod.org
---
 arch/powerpc/include/asm/xive-regs.h  |  3 +++
 arch/powerpc/include/asm/xive.h       |  1 +
 arch/powerpc/kvm/book3s_xive.c        | 34 ++++++++++++++++++++++++++++++++--
 arch/powerpc/kvm/book3s_xive.h        |  2 ++
 arch/powerpc/kvm/book3s_xive_native.c |  9 +++++++++
 arch/powerpc/sysdev/xive/native.c     | 10 ++++++++++
 6 files changed, 57 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index 8b211faa0e42..cf8bb6ac4463 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -80,10 +80,13 @@
 #define   TM_QW0W2_VU		PPC_BIT32(0)
 #define   TM_QW0W2_LOGIC_SERV	PPC_BITMASK32(1,31) // XX 2,31 ?
 #define   TM_QW1W2_VO		PPC_BIT32(0)
+#define   TM_QW1W2_HO           PPC_BIT32(1) /* P10 XIVE2 */
 #define   TM_QW1W2_OS_CAM	PPC_BITMASK32(8,31)
 #define   TM_QW2W2_VP		PPC_BIT32(0)
+#define   TM_QW2W2_HP           PPC_BIT32(1) /* P10 XIVE2 */
 #define   TM_QW2W2_POOL_CAM	PPC_BITMASK32(8,31)
 #define   TM_QW3W2_VT		PPC_BIT32(0)
+#define   TM_QW3W2_HT           PPC_BIT32(1) /* P10 XIVE2 */
 #define   TM_QW3W2_LP		PPC_BIT32(6)
 #define   TM_QW3W2_LE		PPC_BIT32(7)
 #define   TM_QW3W2_T		PPC_BIT32(31)
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 20ae50ab083c..92930b0b5d0e 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -126,6 +126,7 @@ int xive_native_enable_vp(u32 vp_id, bool single_escalation);
 int xive_native_disable_vp(u32 vp_id);
 int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
 bool xive_native_has_single_escalation(void);
+bool xive_native_has_save_restore(void);
 
 int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
 			       u64 *out_qpage,
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 555cc610e7ab..912c1e9eef6b 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -59,6 +59,25 @@
  */
 #define XIVE_Q_GAP	2
 
+static bool kvmppc_xive_vcpu_has_save_restore(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+	/* Check enablement at VP level */
+	return xc->vp_cam & TM_QW1W2_HO;
+}
+
+bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = xc->xive;
+
+	if (xive->flags & KVMPPC_XIVE_FLAG_SAVE_RESTORE)
+		return kvmppc_xive_vcpu_has_save_restore(vcpu);
+
+	return true;
+}
+
 /*
  * Push a vcpu's context to the XIVE on guest entry.
  * This assumes we are in virtual mode (MMU on)
@@ -77,7 +96,8 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
 		return;
 
 	eieio();
-	__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
+	if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+		__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
 	__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
 	vcpu->arch.xive_pushed = 1;
 	eieio();
@@ -149,7 +169,8 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
 	/* First load to pull the context, we ignore the value */
 	__raw_readl(tima + TM_SPC_PULL_OS_CTX);
 	/* Second load to recover the context state (Words 0 and 1) */
-	vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
+	if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+		vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
 
 	/* Fixup some of the state for the next load */
 	vcpu->arch.xive_saved_state.lsmfb = 0;
@@ -1319,6 +1340,12 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	if (r)
 		goto bail;
 
+	if (!kvmppc_xive_check_save_restore(vcpu)) {
+		pr_err("inconsistent save-restore setup for VCPU %d\n", cpu);
+		r = -EIO;
+		goto bail;
+	}
+
 	/* Configure VCPU fields for use by assembly push/pull */
 	vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
 	vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
@@ -2138,6 +2165,9 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	if (xive_native_has_single_escalation())
 		xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
 
+	if (xive_native_has_save_restore())
+		xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
+
 	kvm->arch.xive = xive;
 	return 0;
 }
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 73c3cd25093c..e6a9651c6f1e 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -98,6 +98,7 @@ struct kvmppc_xive_ops {
 };
 
 #define KVMPPC_XIVE_FLAG_SINGLE_ESCALATION 0x1
+#define KVMPPC_XIVE_FLAG_SAVE_RESTORE 0x2
 
 struct kvmppc_xive {
 	struct kvm *kvm;
@@ -309,6 +310,7 @@ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
 				    struct kvmppc_xive_vcpu *xc, int irq);
 int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
 int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
+bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu);
 
 static inline bool kvmppc_xive_has_single_escalation(struct kvmppc_xive *xive)
 {
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 2abb1358a268..af65ea21bde7 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -168,6 +168,12 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
 		goto bail;
 	}
 
+	if (!kvmppc_xive_check_save_restore(vcpu)) {
+		pr_err("inconsistent save-restore setup for VCPU %d\n", server_num);
+		rc = -EIO;
+		goto bail;
+	}
+
 	/*
 	 * Enable the VP first as the single escalation mode will
 	 * affect escalation interrupts numbering
@@ -1114,6 +1120,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
 	if (xive_native_has_single_escalation())
 		xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
 
+	if (xive_native_has_save_restore())
+		xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
+
 	xive->ops = &kvmppc_xive_native_ops;
 
 	kvm->arch.xive = xive;
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 57e3f1540435..1aec282cd650 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -41,6 +41,7 @@ static u32 xive_queue_shift;
 static u32 xive_pool_vps = XIVE_INVALID_VP;
 static struct kmem_cache *xive_provision_cache;
 static bool xive_has_single_esc;
+static bool xive_has_save_restore;
 
 int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 {
@@ -588,6 +589,9 @@ bool __init xive_native_init(void)
 	if (of_get_property(np, "single-escalation-support", NULL) != NULL)
 		xive_has_single_esc = true;
 
+	if (of_get_property(np, "vp-save-restore", NULL))
+		xive_has_save_restore = true;
+
 	/* Configure Thread Management areas for KVM */
 	for_each_possible_cpu(cpu)
 		kvmppc_set_xive_tima(cpu, r.start, tima);
@@ -752,6 +756,12 @@ bool xive_native_has_single_escalation(void)
 }
 EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
 
+bool xive_native_has_save_restore(void)
+{
+	return xive_has_save_restore;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_save_restore);
+
 int xive_native_get_queue_info(u32 vp_id, u32 prio,
 			       u64 *out_qpage,
 			       u64 *out_qsize,
-- 
cgit 


From 1bce54250045443d55659b0b23be51e87ed2b919 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 2 Aug 2021 17:26:28 +0100
Subject: powerpc: Bulk conversion to generic_handle_domain_irq()

Wherever possible, replace constructs that match either
generic_handle_irq(irq_find_mapping()) or
generic_handle_irq(irq_linear_revmap()) to a single call to
generic_handle_domain_irq().

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210802162630.2219813-13-maz@kernel.org
---
 arch/powerpc/platforms/4xx/uic.c               |  4 +---
 arch/powerpc/platforms/512x/mpc5121_ads_cpld.c | 23 ++++++++++-------------
 arch/powerpc/platforms/52xx/media5200.c        |  9 ++++-----
 arch/powerpc/platforms/52xx/mpc52xx_gpt.c      |  7 ++-----
 arch/powerpc/platforms/82xx/pq2ads-pci-pic.c   |  6 ++----
 arch/powerpc/platforms/cell/interrupt.c        |  8 ++------
 arch/powerpc/platforms/cell/spider-pic.c       | 11 +++--------
 arch/powerpc/platforms/embedded6xx/hlwd-pic.c  | 15 +++++++--------
 arch/powerpc/platforms/powernv/opal-irqchip.c  | 11 ++++-------
 arch/powerpc/sysdev/fsl_mpic_err.c             | 11 ++++-------
 arch/powerpc/sysdev/fsl_msi.c                  | 12 ++++--------
 11 files changed, 43 insertions(+), 74 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c
index 36fb66ce54cf..89e2587b1a59 100644
--- a/arch/powerpc/platforms/4xx/uic.c
+++ b/arch/powerpc/platforms/4xx/uic.c
@@ -198,7 +198,6 @@ static void uic_irq_cascade(struct irq_desc *desc)
 	struct uic *uic = irq_desc_get_handler_data(desc);
 	u32 msr;
 	int src;
-	int subvirq;
 
 	raw_spin_lock(&desc->lock);
 	if (irqd_is_level_type(idata))
@@ -213,8 +212,7 @@ static void uic_irq_cascade(struct irq_desc *desc)
 
 	src = 32 - ffs(msr);
 
-	subvirq = irq_linear_revmap(uic->irqhost, src);
-	generic_handle_irq(subvirq);
+	generic_handle_domain_irq(uic->irqhost, src);
 
 uic_irq_ret:
 	raw_spin_lock(&desc->lock);
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index b2981634f1f8..ea46870e5d6e 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -81,11 +81,10 @@ static struct irq_chip cpld_pic = {
 	.irq_unmask = cpld_unmask_irq,
 };
 
-static int
+static unsigned int
 cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
 			    u8 __iomem *maskp)
 {
-	int cpld_irq;
 	u8 status = in_8(statusp);
 	u8 mask = in_8(maskp);
 
@@ -93,28 +92,26 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
 	status |= (ignore | mask);
 
 	if (status == 0xff)
-		return 0;
-
-	cpld_irq = ffz(status) + offset;
+		return ~0;
 
-	return irq_linear_revmap(cpld_pic_host, cpld_irq);
+	return ffz(status) + offset;
 }
 
 static void cpld_pic_cascade(struct irq_desc *desc)
 {
-	unsigned int irq;
+	unsigned int hwirq;
 
-	irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
+	hwirq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
 		&cpld_regs->pci_mask);
-	if (irq) {
-		generic_handle_irq(irq);
+	if (hwirq != ~0) {
+		generic_handle_domain_irq(cpld_pic_host, hwirq);
 		return;
 	}
 
-	irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
+	hwirq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
 		&cpld_regs->misc_mask);
-	if (irq) {
-		generic_handle_irq(irq);
+	if (hwirq != ~0) {
+		generic_handle_domain_irq(cpld_pic_host, hwirq);
 		return;
 	}
 }
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index efb8bdecbcc7..110c444f4bc7 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -78,7 +78,7 @@ static struct irq_chip media5200_irq_chip = {
 static void media5200_irq_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	int sub_virq, val;
+	int val;
 	u32 status, enable;
 
 	/* Mask off the cascaded IRQ */
@@ -92,11 +92,10 @@ static void media5200_irq_cascade(struct irq_desc *desc)
 	enable = in_be32(media5200_irq.regs + MEDIA5200_IRQ_STATUS);
 	val = ffs((status & enable) >> MEDIA5200_IRQ_SHIFT);
 	if (val) {
-		sub_virq = irq_linear_revmap(media5200_irq.irqhost, val - 1);
-		/* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i subvirq=%i\n",
-		 *          __func__, virq, status, enable, val - 1, sub_virq);
+		generic_handle_domain_irq(media5200_irq.irqhost, val - 1);
+		/* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i\n",
+		 *          __func__, virq, status, enable, val - 1);
 		 */
-		generic_handle_irq(sub_virq);
 	}
 
 	/* Processing done; can reenable the cascade now */
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 3823df235f25..f862b48b4824 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -190,14 +190,11 @@ static struct irq_chip mpc52xx_gpt_irq_chip = {
 static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc)
 {
 	struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc);
-	int sub_virq;
 	u32 status;
 
 	status = in_be32(&gpt->regs->status) & MPC52xx_GPT_STATUS_IRQMASK;
-	if (status) {
-		sub_virq = irq_linear_revmap(gpt->irqhost, 0);
-		generic_handle_irq(sub_virq);
-	}
+	if (status)
+		generic_handle_domain_irq(gpt->irqhost, 0);
 }
 
 static int mpc52xx_gpt_irq_map(struct irq_domain *h, unsigned int virq,
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index f82f75a6085c..285bfe19b798 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -91,10 +91,8 @@ static void pq2ads_pci_irq_demux(struct irq_desc *desc)
 			break;
 
 		for (bit = 0; pend != 0; ++bit, pend <<= 1) {
-			if (pend & 0x80000000) {
-				int virq = irq_linear_revmap(priv->host, bit);
-				generic_handle_irq(virq);
-			}
+			if (pend & 0x80000000)
+				generic_handle_domain_irq(priv->host, bit);
 		}
 	}
 }
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index c0ab62ba6f16..0873a7a20271 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -106,13 +106,9 @@ static void iic_ioexc_cascade(struct irq_desc *desc)
 			out_be64(&node_iic->iic_is, ack);
 		/* handle them */
 		for (cascade = 63; cascade >= 0; cascade--)
-			if (bits & (0x8000000000000000UL >> cascade)) {
-				unsigned int cirq =
-					irq_linear_revmap(iic_host,
+			if (bits & (0x8000000000000000UL >> cascade))
+				generic_handle_domain_irq(iic_host,
 							  base | cascade);
-				if (cirq)
-					generic_handle_irq(cirq);
-			}
 		/* post-ack level interrupts */
 		ack = bits & ~IIC_ISR_EDGE_MASK;
 		if (ack)
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 210785f59271..8af75867cb42 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -190,16 +190,11 @@ static void spider_irq_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct spider_pic *pic = irq_desc_get_handler_data(desc);
-	unsigned int cs, virq;
+	unsigned int cs;
 
 	cs = in_be32(pic->regs + TIR_CS) >> 24;
-	if (cs == SPIDER_IRQ_INVALID)
-		virq = 0;
-	else
-		virq = irq_linear_revmap(pic->host, cs);
-
-	if (virq)
-		generic_handle_irq(virq);
+	if (cs != SPIDER_IRQ_INVALID)
+		generic_handle_domain_irq(pic->host, cs);
 
 	chip->irq_eoi(&desc->irq_data);
 }
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index a1b7f79a8a15..15396333a90b 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -108,7 +108,6 @@ static const struct irq_domain_ops hlwd_irq_domain_ops = {
 static unsigned int __hlwd_pic_get_irq(struct irq_domain *h)
 {
 	void __iomem *io_base = h->host_data;
-	int irq;
 	u32 irq_status;
 
 	irq_status = in_be32(io_base + HW_BROADWAY_ICR) &
@@ -116,23 +115,22 @@ static unsigned int __hlwd_pic_get_irq(struct irq_domain *h)
 	if (irq_status == 0)
 		return 0;	/* no more IRQs pending */
 
-	irq = __ffs(irq_status);
-	return irq_linear_revmap(h, irq);
+	return __ffs(irq_status);
 }
 
 static void hlwd_pic_irq_cascade(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct irq_domain *irq_domain = irq_desc_get_handler_data(desc);
-	unsigned int virq;
+	unsigned int hwirq;
 
 	raw_spin_lock(&desc->lock);
 	chip->irq_mask(&desc->irq_data); /* IRQ_LEVEL */
 	raw_spin_unlock(&desc->lock);
 
-	virq = __hlwd_pic_get_irq(irq_domain);
-	if (virq)
-		generic_handle_irq(virq);
+	hwirq = __hlwd_pic_get_irq(irq_domain);
+	if (hwirq)
+		generic_handle_domain_irq(irq_domain, hwirq);
 	else
 		pr_err("spurious interrupt!\n");
 
@@ -190,7 +188,8 @@ static struct irq_domain *hlwd_pic_init(struct device_node *np)
 
 unsigned int hlwd_pic_get_irq(void)
 {
-	return __hlwd_pic_get_irq(hlwd_irq_host);
+	unsigned int hwirq = __hlwd_pic_get_irq(hlwd_irq_host);
+	return hwirq ? irq_linear_revmap(hlwd_irq_host, hwirq) : 0;
 }
 
 /*
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index c164419e254d..d55652b5f6fa 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -46,18 +46,15 @@ void opal_handle_events(void)
 	e = READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask;
 again:
 	while (e) {
-		int virq, hwirq;
+		int hwirq;
 
 		hwirq = fls64(e) - 1;
 		e &= ~BIT_ULL(hwirq);
 
 		local_irq_disable();
-		virq = irq_find_mapping(opal_event_irqchip.domain, hwirq);
-		if (virq) {
-			irq_enter();
-			generic_handle_irq(virq);
-			irq_exit();
-		}
+		irq_enter();
+		generic_handle_domain_irq(opal_event_irqchip.domain, hwirq);
+		irq_exit();
 		local_irq_enable();
 
 		cond_resched();
diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c
index 5fa5fa215541..9a98bb212922 100644
--- a/arch/powerpc/sysdev/fsl_mpic_err.c
+++ b/arch/powerpc/sysdev/fsl_mpic_err.c
@@ -99,7 +99,6 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data)
 	struct mpic *mpic = (struct mpic *) data;
 	u32 eisr, eimr;
 	int errint;
-	unsigned int cascade_irq;
 
 	eisr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EISR);
 	eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR);
@@ -108,13 +107,11 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data)
 		return IRQ_NONE;
 
 	while (eisr) {
+		int ret;
 		errint = __builtin_clz(eisr);
-		cascade_irq = irq_linear_revmap(mpic->irqhost,
-				 mpic->err_int_vecs[errint]);
-		WARN_ON(!cascade_irq);
-		if (cascade_irq) {
-			generic_handle_irq(cascade_irq);
-		} else {
+		ret = generic_handle_domain_irq(mpic->irqhost,
+						mpic->err_int_vecs[errint]);
+		if (WARN_ON(ret)) {
 			eimr |=  1 << (31 - errint);
 			mpic_fsl_err_write(mpic->err_regs, eimr);
 		}
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 808e7118abfc..e6b06c3f8197 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -266,7 +266,6 @@ out_free:
 
 static irqreturn_t fsl_msi_cascade(int irq, void *data)
 {
-	unsigned int cascade_irq;
 	struct fsl_msi *msi_data;
 	int msir_index = -1;
 	u32 msir_value = 0;
@@ -279,9 +278,6 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data)
 
 	msir_index = cascade_data->index;
 
-	if (msir_index >= NR_MSI_REG_MAX)
-		cascade_irq = 0;
-
 	switch (msi_data->feature & FSL_PIC_IP_MASK) {
 	case FSL_PIC_IP_MPIC:
 		msir_value = fsl_msi_read(msi_data->msi_regs,
@@ -305,15 +301,15 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data)
 	}
 
 	while (msir_value) {
+		int err;
 		intr_index = ffs(msir_value) - 1;
 
-		cascade_irq = irq_linear_revmap(msi_data->irqhost,
+		err = generic_handle_domain_irq(msi_data->irqhost,
 				msi_hwirq(msi_data, msir_index,
 					  intr_index + have_shift));
-		if (cascade_irq) {
-			generic_handle_irq(cascade_irq);
+		if (!err)
 			ret = IRQ_HANDLED;
-		}
+
 		have_shift += intr_index + 1;
 		msir_value = msir_value >> (intr_index + 1);
 	}
-- 
cgit 


From 94effcedaa543825ad9c80831450d4fbfa284880 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Thu, 5 Aug 2021 11:49:36 +0900
Subject: openrisc: Fix compiler warnings in setup

This was pointed out with the recent name change of or32_early_setup to
or1k_early_setup.  Investigating the file I found a few other warnings
so cleaning them up here.

    arch/openrisc/kernel/setup.c:220:13: warning: no previous prototype for 'or1k_early_setup' [-Wmissing-prototypes]
      220 | void __init or1k_early_setup(void *fdt)
	  |             ^~~~~~~~~~~~~~~~

Fix this the missing or1k_early_setup prototype warning by adding an
asm/setup.h file to define the prototype.

    arch/openrisc/kernel/setup.c:246:13: warning: no previous prototype for 'detect_unit_config' [-Wmissing-prototypes]
      246 | void __init detect_unit_config(unsigned long upr, unsigned long mask,
	  |             ^~~~~~~~~~~~~~~~~~

The function detect_unit_config is not used, just remove it.

    arch/openrisc/kernel/setup.c:221: warning: Function parameter or member 'fdt' not described in 'or1k_early_setup'

Add @fdt docs to the function comment to suppress this warning.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Stafford Horne <shorne@gmail.com>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
---
 arch/openrisc/include/asm/setup.h | 15 +++++++++++++++
 arch/openrisc/kernel/setup.c      | 16 +---------------
 2 files changed, 16 insertions(+), 15 deletions(-)
 create mode 100644 arch/openrisc/include/asm/setup.h

(limited to 'arch')

diff --git a/arch/openrisc/include/asm/setup.h b/arch/openrisc/include/asm/setup.h
new file mode 100644
index 000000000000..9acbc5deda69
--- /dev/null
+++ b/arch/openrisc/include/asm/setup.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021 Stafford Horne
+ */
+#ifndef _ASM_OR1K_SETUP_H
+#define _ASM_OR1K_SETUP_H
+
+#include <linux/init.h>
+#include <asm-generic/setup.h>
+
+#ifndef __ASSEMBLY__
+void __init or1k_early_setup(void *fdt);
+#endif
+
+#endif /* _ASM_OR1K_SETUP_H */
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 7eddcac0ef2f..0cd04d936a7a 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -210,6 +210,7 @@ void __init setup_cpuinfo(void)
 
 /**
  * or1k_early_setup
+ * @fdt: pointer to the start of the device tree in memory or NULL
  *
  * Handles the pointer to the device tree that this kernel is to use
  * for establishing the available platform devices.
@@ -243,21 +244,6 @@ static inline unsigned long extract_value(unsigned long reg, unsigned long mask)
 	return mask & reg;
 }
 
-void __init detect_unit_config(unsigned long upr, unsigned long mask,
-			       char *text, void (*func) (void))
-{
-	if (text != NULL)
-		printk("%s", text);
-
-	if (upr & mask) {
-		if (func != NULL)
-			func();
-		else
-			printk("present\n");
-	} else
-		printk("not present\n");
-}
-
 /*
  * calibrate_delay
  *
-- 
cgit 


From 3acd5d8b7cf614d8724986b0dbfee52b0944d027 Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Wed, 4 Aug 2021 12:40:31 +0800
Subject: arm: dts: mt8135: Move pinfunc to include/dt-bindings/pinctrl

Move mt8135-pinfunc.h into include/dt-bindings/pinctrl so that we can
include it in yaml examples.

Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Link: https://lore.kernel.org/r/20210804044033.3047296-1-hsinyi@chromium.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/boot/dts/mt8135-pinfunc.h | 1294 ------------------------------------
 arch/arm/boot/dts/mt8135.dtsi      |    2 +-
 2 files changed, 1 insertion(+), 1295 deletions(-)
 delete mode 100644 arch/arm/boot/dts/mt8135-pinfunc.h

(limited to 'arch')

diff --git a/arch/arm/boot/dts/mt8135-pinfunc.h b/arch/arm/boot/dts/mt8135-pinfunc.h
deleted file mode 100644
index ce0cb5a440eb..000000000000
--- a/arch/arm/boot/dts/mt8135-pinfunc.h
+++ /dev/null
@@ -1,1294 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2014 MediaTek Inc.
- * Author: Hongzhou.Yang <hongzhou.yang@mediatek.com>
- */
-
-#ifndef __DTS_MT8135_PINFUNC_H
-#define __DTS_MT8135_PINFUNC_H
-
-#include <dt-bindings/pinctrl/mt65xx.h>
-
-#define MT8135_PIN_0_MSDC0_DAT7__FUNC_GPIO0 (MTK_PIN_NO(0) | 0)
-#define MT8135_PIN_0_MSDC0_DAT7__FUNC_MSDC0_DAT7 (MTK_PIN_NO(0) | 1)
-#define MT8135_PIN_0_MSDC0_DAT7__FUNC_EINT49 (MTK_PIN_NO(0) | 2)
-#define MT8135_PIN_0_MSDC0_DAT7__FUNC_I2SOUT_DAT (MTK_PIN_NO(0) | 3)
-#define MT8135_PIN_0_MSDC0_DAT7__FUNC_DAC_DAT_OUT (MTK_PIN_NO(0) | 4)
-#define MT8135_PIN_0_MSDC0_DAT7__FUNC_PCM1_DO (MTK_PIN_NO(0) | 5)
-#define MT8135_PIN_0_MSDC0_DAT7__FUNC_SPI1_MO (MTK_PIN_NO(0) | 6)
-#define MT8135_PIN_0_MSDC0_DAT7__FUNC_NALE (MTK_PIN_NO(0) | 7)
-
-#define MT8135_PIN_1_MSDC0_DAT6__FUNC_GPIO1 (MTK_PIN_NO(1) | 0)
-#define MT8135_PIN_1_MSDC0_DAT6__FUNC_MSDC0_DAT6 (MTK_PIN_NO(1) | 1)
-#define MT8135_PIN_1_MSDC0_DAT6__FUNC_EINT48 (MTK_PIN_NO(1) | 2)
-#define MT8135_PIN_1_MSDC0_DAT6__FUNC_I2SIN_WS (MTK_PIN_NO(1) | 3)
-#define MT8135_PIN_1_MSDC0_DAT6__FUNC_DAC_WS (MTK_PIN_NO(1) | 4)
-#define MT8135_PIN_1_MSDC0_DAT6__FUNC_PCM1_WS (MTK_PIN_NO(1) | 5)
-#define MT8135_PIN_1_MSDC0_DAT6__FUNC_SPI1_CSN (MTK_PIN_NO(1) | 6)
-#define MT8135_PIN_1_MSDC0_DAT6__FUNC_NCLE (MTK_PIN_NO(1) | 7)
-
-#define MT8135_PIN_2_MSDC0_DAT5__FUNC_GPIO2 (MTK_PIN_NO(2) | 0)
-#define MT8135_PIN_2_MSDC0_DAT5__FUNC_MSDC0_DAT5 (MTK_PIN_NO(2) | 1)
-#define MT8135_PIN_2_MSDC0_DAT5__FUNC_EINT47 (MTK_PIN_NO(2) | 2)
-#define MT8135_PIN_2_MSDC0_DAT5__FUNC_I2SIN_CK (MTK_PIN_NO(2) | 3)
-#define MT8135_PIN_2_MSDC0_DAT5__FUNC_DAC_CK (MTK_PIN_NO(2) | 4)
-#define MT8135_PIN_2_MSDC0_DAT5__FUNC_PCM1_CK (MTK_PIN_NO(2) | 5)
-#define MT8135_PIN_2_MSDC0_DAT5__FUNC_SPI1_CLK (MTK_PIN_NO(2) | 6)
-#define MT8135_PIN_2_MSDC0_DAT5__FUNC_NLD4 (MTK_PIN_NO(2) | 7)
-
-#define MT8135_PIN_3_MSDC0_DAT4__FUNC_GPIO3 (MTK_PIN_NO(3) | 0)
-#define MT8135_PIN_3_MSDC0_DAT4__FUNC_MSDC0_DAT4 (MTK_PIN_NO(3) | 1)
-#define MT8135_PIN_3_MSDC0_DAT4__FUNC_EINT46 (MTK_PIN_NO(3) | 2)
-#define MT8135_PIN_3_MSDC0_DAT4__FUNC_A_FUNC_CK (MTK_PIN_NO(3) | 3)
-#define MT8135_PIN_3_MSDC0_DAT4__FUNC_LSCE1B_2X (MTK_PIN_NO(3) | 6)
-#define MT8135_PIN_3_MSDC0_DAT4__FUNC_NLD5 (MTK_PIN_NO(3) | 7)
-
-#define MT8135_PIN_4_MSDC0_CMD__FUNC_GPIO4 (MTK_PIN_NO(4) | 0)
-#define MT8135_PIN_4_MSDC0_CMD__FUNC_MSDC0_CMD (MTK_PIN_NO(4) | 1)
-#define MT8135_PIN_4_MSDC0_CMD__FUNC_EINT41 (MTK_PIN_NO(4) | 2)
-#define MT8135_PIN_4_MSDC0_CMD__FUNC_A_FUNC_DOUT_0 (MTK_PIN_NO(4) | 3)
-#define MT8135_PIN_4_MSDC0_CMD__FUNC_USB_TEST_IO_0 (MTK_PIN_NO(4) | 5)
-#define MT8135_PIN_4_MSDC0_CMD__FUNC_LRSTB_2X (MTK_PIN_NO(4) | 6)
-#define MT8135_PIN_4_MSDC0_CMD__FUNC_NRNB (MTK_PIN_NO(4) | 7)
-
-#define MT8135_PIN_5_MSDC0_CLK__FUNC_GPIO5 (MTK_PIN_NO(5) | 0)
-#define MT8135_PIN_5_MSDC0_CLK__FUNC_MSDC0_CLK (MTK_PIN_NO(5) | 1)
-#define MT8135_PIN_5_MSDC0_CLK__FUNC_EINT40 (MTK_PIN_NO(5) | 2)
-#define MT8135_PIN_5_MSDC0_CLK__FUNC_A_FUNC_DOUT_1 (MTK_PIN_NO(5) | 3)
-#define MT8135_PIN_5_MSDC0_CLK__FUNC_USB_TEST_IO_1 (MTK_PIN_NO(5) | 5)
-#define MT8135_PIN_5_MSDC0_CLK__FUNC_LPTE (MTK_PIN_NO(5) | 6)
-#define MT8135_PIN_5_MSDC0_CLK__FUNC_NREB (MTK_PIN_NO(5) | 7)
-
-#define MT8135_PIN_6_MSDC0_DAT3__FUNC_GPIO6 (MTK_PIN_NO(6) | 0)
-#define MT8135_PIN_6_MSDC0_DAT3__FUNC_MSDC0_DAT3 (MTK_PIN_NO(6) | 1)
-#define MT8135_PIN_6_MSDC0_DAT3__FUNC_EINT45 (MTK_PIN_NO(6) | 2)
-#define MT8135_PIN_6_MSDC0_DAT3__FUNC_A_FUNC_DOUT_2 (MTK_PIN_NO(6) | 3)
-#define MT8135_PIN_6_MSDC0_DAT3__FUNC_USB_TEST_IO_2 (MTK_PIN_NO(6) | 5)
-#define MT8135_PIN_6_MSDC0_DAT3__FUNC_LSCE0B_2X (MTK_PIN_NO(6) | 6)
-#define MT8135_PIN_6_MSDC0_DAT3__FUNC_NLD7 (MTK_PIN_NO(6) | 7)
-
-#define MT8135_PIN_7_MSDC0_DAT2__FUNC_GPIO7 (MTK_PIN_NO(7) | 0)
-#define MT8135_PIN_7_MSDC0_DAT2__FUNC_MSDC0_DAT2 (MTK_PIN_NO(7) | 1)
-#define MT8135_PIN_7_MSDC0_DAT2__FUNC_EINT44 (MTK_PIN_NO(7) | 2)
-#define MT8135_PIN_7_MSDC0_DAT2__FUNC_A_FUNC_DOUT_3 (MTK_PIN_NO(7) | 3)
-#define MT8135_PIN_7_MSDC0_DAT2__FUNC_USB_TEST_IO_3 (MTK_PIN_NO(7) | 5)
-#define MT8135_PIN_7_MSDC0_DAT2__FUNC_LSA0_2X (MTK_PIN_NO(7) | 6)
-#define MT8135_PIN_7_MSDC0_DAT2__FUNC_NLD14 (MTK_PIN_NO(7) | 7)
-
-#define MT8135_PIN_8_MSDC0_DAT1__FUNC_GPIO8 (MTK_PIN_NO(8) | 0)
-#define MT8135_PIN_8_MSDC0_DAT1__FUNC_MSDC0_DAT1 (MTK_PIN_NO(8) | 1)
-#define MT8135_PIN_8_MSDC0_DAT1__FUNC_EINT43 (MTK_PIN_NO(8) | 2)
-#define MT8135_PIN_8_MSDC0_DAT1__FUNC_USB_TEST_IO_4 (MTK_PIN_NO(8) | 5)
-#define MT8135_PIN_8_MSDC0_DAT1__FUNC_LSCK_2X (MTK_PIN_NO(8) | 6)
-#define MT8135_PIN_8_MSDC0_DAT1__FUNC_NLD11 (MTK_PIN_NO(8) | 7)
-
-#define MT8135_PIN_9_MSDC0_DAT0__FUNC_GPIO9 (MTK_PIN_NO(9) | 0)
-#define MT8135_PIN_9_MSDC0_DAT0__FUNC_MSDC0_DAT0 (MTK_PIN_NO(9) | 1)
-#define MT8135_PIN_9_MSDC0_DAT0__FUNC_EINT42 (MTK_PIN_NO(9) | 2)
-#define MT8135_PIN_9_MSDC0_DAT0__FUNC_USB_TEST_IO_5 (MTK_PIN_NO(9) | 5)
-#define MT8135_PIN_9_MSDC0_DAT0__FUNC_LSDA_2X (MTK_PIN_NO(9) | 6)
-
-#define MT8135_PIN_10_NCEB0__FUNC_GPIO10 (MTK_PIN_NO(10) | 0)
-#define MT8135_PIN_10_NCEB0__FUNC_NCEB0 (MTK_PIN_NO(10) | 1)
-#define MT8135_PIN_10_NCEB0__FUNC_EINT139 (MTK_PIN_NO(10) | 2)
-#define MT8135_PIN_10_NCEB0__FUNC_TESTA_OUT4 (MTK_PIN_NO(10) | 7)
-
-#define MT8135_PIN_11_NCEB1__FUNC_GPIO11 (MTK_PIN_NO(11) | 0)
-#define MT8135_PIN_11_NCEB1__FUNC_NCEB1 (MTK_PIN_NO(11) | 1)
-#define MT8135_PIN_11_NCEB1__FUNC_EINT140 (MTK_PIN_NO(11) | 2)
-#define MT8135_PIN_11_NCEB1__FUNC_USB_DRVVBUS (MTK_PIN_NO(11) | 6)
-#define MT8135_PIN_11_NCEB1__FUNC_TESTA_OUT5 (MTK_PIN_NO(11) | 7)
-
-#define MT8135_PIN_12_NRNB__FUNC_GPIO12 (MTK_PIN_NO(12) | 0)
-#define MT8135_PIN_12_NRNB__FUNC_NRNB (MTK_PIN_NO(12) | 1)
-#define MT8135_PIN_12_NRNB__FUNC_EINT141 (MTK_PIN_NO(12) | 2)
-#define MT8135_PIN_12_NRNB__FUNC_A_FUNC_DOUT_4 (MTK_PIN_NO(12) | 3)
-#define MT8135_PIN_12_NRNB__FUNC_TESTA_OUT6 (MTK_PIN_NO(12) | 7)
-
-#define MT8135_PIN_13_NCLE__FUNC_GPIO13 (MTK_PIN_NO(13) | 0)
-#define MT8135_PIN_13_NCLE__FUNC_NCLE (MTK_PIN_NO(13) | 1)
-#define MT8135_PIN_13_NCLE__FUNC_EINT142 (MTK_PIN_NO(13) | 2)
-#define MT8135_PIN_13_NCLE__FUNC_A_FUNC_DOUT_5 (MTK_PIN_NO(13) | 3)
-#define MT8135_PIN_13_NCLE__FUNC_CM2PDN_1X (MTK_PIN_NO(13) | 4)
-#define MT8135_PIN_13_NCLE__FUNC_NALE (MTK_PIN_NO(13) | 6)
-#define MT8135_PIN_13_NCLE__FUNC_TESTA_OUT7 (MTK_PIN_NO(13) | 7)
-
-#define MT8135_PIN_14_NALE__FUNC_GPIO14 (MTK_PIN_NO(14) | 0)
-#define MT8135_PIN_14_NALE__FUNC_NALE (MTK_PIN_NO(14) | 1)
-#define MT8135_PIN_14_NALE__FUNC_EINT143 (MTK_PIN_NO(14) | 2)
-#define MT8135_PIN_14_NALE__FUNC_A_FUNC_DOUT_6 (MTK_PIN_NO(14) | 3)
-#define MT8135_PIN_14_NALE__FUNC_CM2MCLK_1X (MTK_PIN_NO(14) | 4)
-#define MT8135_PIN_14_NALE__FUNC_IRDA_RXD (MTK_PIN_NO(14) | 5)
-#define MT8135_PIN_14_NALE__FUNC_NCLE (MTK_PIN_NO(14) | 6)
-#define MT8135_PIN_14_NALE__FUNC_TESTA_OUT8 (MTK_PIN_NO(14) | 7)
-
-#define MT8135_PIN_15_NREB__FUNC_GPIO15 (MTK_PIN_NO(15) | 0)
-#define MT8135_PIN_15_NREB__FUNC_NREB (MTK_PIN_NO(15) | 1)
-#define MT8135_PIN_15_NREB__FUNC_EINT144 (MTK_PIN_NO(15) | 2)
-#define MT8135_PIN_15_NREB__FUNC_A_FUNC_DOUT_7 (MTK_PIN_NO(15) | 3)
-#define MT8135_PIN_15_NREB__FUNC_CM2RST_1X (MTK_PIN_NO(15) | 4)
-#define MT8135_PIN_15_NREB__FUNC_IRDA_TXD (MTK_PIN_NO(15) | 5)
-#define MT8135_PIN_15_NREB__FUNC_TESTA_OUT9 (MTK_PIN_NO(15) | 7)
-
-#define MT8135_PIN_16_NWEB__FUNC_GPIO16 (MTK_PIN_NO(16) | 0)
-#define MT8135_PIN_16_NWEB__FUNC_NWEB (MTK_PIN_NO(16) | 1)
-#define MT8135_PIN_16_NWEB__FUNC_EINT145 (MTK_PIN_NO(16) | 2)
-#define MT8135_PIN_16_NWEB__FUNC_A_FUNC_DIN_0 (MTK_PIN_NO(16) | 3)
-#define MT8135_PIN_16_NWEB__FUNC_CM2PCLK_1X (MTK_PIN_NO(16) | 4)
-#define MT8135_PIN_16_NWEB__FUNC_IRDA_PDN (MTK_PIN_NO(16) | 5)
-#define MT8135_PIN_16_NWEB__FUNC_TESTA_OUT10 (MTK_PIN_NO(16) | 7)
-
-#define MT8135_PIN_17_NLD0__FUNC_GPIO17 (MTK_PIN_NO(17) | 0)
-#define MT8135_PIN_17_NLD0__FUNC_NLD0 (MTK_PIN_NO(17) | 1)
-#define MT8135_PIN_17_NLD0__FUNC_EINT146 (MTK_PIN_NO(17) | 2)
-#define MT8135_PIN_17_NLD0__FUNC_A_FUNC_DIN_1 (MTK_PIN_NO(17) | 3)
-#define MT8135_PIN_17_NLD0__FUNC_CM2DAT_1X_0 (MTK_PIN_NO(17) | 4)
-#define MT8135_PIN_17_NLD0__FUNC_I2SIN_CK (MTK_PIN_NO(17) | 5)
-#define MT8135_PIN_17_NLD0__FUNC_DAC_CK (MTK_PIN_NO(17) | 6)
-#define MT8135_PIN_17_NLD0__FUNC_TESTA_OUT11 (MTK_PIN_NO(17) | 7)
-
-#define MT8135_PIN_18_NLD1__FUNC_GPIO18 (MTK_PIN_NO(18) | 0)
-#define MT8135_PIN_18_NLD1__FUNC_NLD1 (MTK_PIN_NO(18) | 1)
-#define MT8135_PIN_18_NLD1__FUNC_EINT147 (MTK_PIN_NO(18) | 2)
-#define MT8135_PIN_18_NLD1__FUNC_A_FUNC_DIN_2 (MTK_PIN_NO(18) | 3)
-#define MT8135_PIN_18_NLD1__FUNC_CM2DAT_1X_1 (MTK_PIN_NO(18) | 4)
-#define MT8135_PIN_18_NLD1__FUNC_I2SIN_WS (MTK_PIN_NO(18) | 5)
-#define MT8135_PIN_18_NLD1__FUNC_DAC_WS (MTK_PIN_NO(18) | 6)
-#define MT8135_PIN_18_NLD1__FUNC_TESTA_OUT12 (MTK_PIN_NO(18) | 7)
-
-#define MT8135_PIN_19_NLD2__FUNC_GPIO19 (MTK_PIN_NO(19) | 0)
-#define MT8135_PIN_19_NLD2__FUNC_NLD2 (MTK_PIN_NO(19) | 1)
-#define MT8135_PIN_19_NLD2__FUNC_EINT148 (MTK_PIN_NO(19) | 2)
-#define MT8135_PIN_19_NLD2__FUNC_A_FUNC_DIN_3 (MTK_PIN_NO(19) | 3)
-#define MT8135_PIN_19_NLD2__FUNC_CM2DAT_1X_2 (MTK_PIN_NO(19) | 4)
-#define MT8135_PIN_19_NLD2__FUNC_I2SOUT_DAT (MTK_PIN_NO(19) | 5)
-#define MT8135_PIN_19_NLD2__FUNC_DAC_DAT_OUT (MTK_PIN_NO(19) | 6)
-#define MT8135_PIN_19_NLD2__FUNC_TESTA_OUT13 (MTK_PIN_NO(19) | 7)
-
-#define MT8135_PIN_20_NLD3__FUNC_GPIO20 (MTK_PIN_NO(20) | 0)
-#define MT8135_PIN_20_NLD3__FUNC_NLD3 (MTK_PIN_NO(20) | 1)
-#define MT8135_PIN_20_NLD3__FUNC_EINT149 (MTK_PIN_NO(20) | 2)
-#define MT8135_PIN_20_NLD3__FUNC_A_FUNC_DIN_4 (MTK_PIN_NO(20) | 3)
-#define MT8135_PIN_20_NLD3__FUNC_CM2DAT_1X_3 (MTK_PIN_NO(20) | 4)
-#define MT8135_PIN_20_NLD3__FUNC_TESTA_OUT14 (MTK_PIN_NO(20) | 7)
-
-#define MT8135_PIN_21_NLD4__FUNC_GPIO21 (MTK_PIN_NO(21) | 0)
-#define MT8135_PIN_21_NLD4__FUNC_NLD4 (MTK_PIN_NO(21) | 1)
-#define MT8135_PIN_21_NLD4__FUNC_EINT150 (MTK_PIN_NO(21) | 2)
-#define MT8135_PIN_21_NLD4__FUNC_A_FUNC_DIN_5 (MTK_PIN_NO(21) | 3)
-#define MT8135_PIN_21_NLD4__FUNC_CM2DAT_1X_4 (MTK_PIN_NO(21) | 4)
-#define MT8135_PIN_21_NLD4__FUNC_TESTA_OUT15 (MTK_PIN_NO(21) | 7)
-
-#define MT8135_PIN_22_NLD5__FUNC_GPIO22 (MTK_PIN_NO(22) | 0)
-#define MT8135_PIN_22_NLD5__FUNC_NLD5 (MTK_PIN_NO(22) | 1)
-#define MT8135_PIN_22_NLD5__FUNC_EINT151 (MTK_PIN_NO(22) | 2)
-#define MT8135_PIN_22_NLD5__FUNC_A_FUNC_DIN_6 (MTK_PIN_NO(22) | 3)
-#define MT8135_PIN_22_NLD5__FUNC_CM2DAT_1X_5 (MTK_PIN_NO(22) | 4)
-#define MT8135_PIN_22_NLD5__FUNC_TESTA_OUT16 (MTK_PIN_NO(22) | 7)
-
-#define MT8135_PIN_23_NLD6__FUNC_GPIO23 (MTK_PIN_NO(23) | 0)
-#define MT8135_PIN_23_NLD6__FUNC_NLD6 (MTK_PIN_NO(23) | 1)
-#define MT8135_PIN_23_NLD6__FUNC_EINT152 (MTK_PIN_NO(23) | 2)
-#define MT8135_PIN_23_NLD6__FUNC_A_FUNC_DIN_7 (MTK_PIN_NO(23) | 3)
-#define MT8135_PIN_23_NLD6__FUNC_CM2DAT_1X_6 (MTK_PIN_NO(23) | 4)
-#define MT8135_PIN_23_NLD6__FUNC_TESTA_OUT17 (MTK_PIN_NO(23) | 7)
-
-#define MT8135_PIN_24_NLD7__FUNC_GPIO24 (MTK_PIN_NO(24) | 0)
-#define MT8135_PIN_24_NLD7__FUNC_NLD7 (MTK_PIN_NO(24) | 1)
-#define MT8135_PIN_24_NLD7__FUNC_EINT153 (MTK_PIN_NO(24) | 2)
-#define MT8135_PIN_24_NLD7__FUNC_A_FUNC_DIN_8 (MTK_PIN_NO(24) | 3)
-#define MT8135_PIN_24_NLD7__FUNC_CM2DAT_1X_7 (MTK_PIN_NO(24) | 4)
-#define MT8135_PIN_24_NLD7__FUNC_TESTA_OUT18 (MTK_PIN_NO(24) | 7)
-
-#define MT8135_PIN_25_NLD8__FUNC_GPIO25 (MTK_PIN_NO(25) | 0)
-#define MT8135_PIN_25_NLD8__FUNC_NLD8 (MTK_PIN_NO(25) | 1)
-#define MT8135_PIN_25_NLD8__FUNC_EINT154 (MTK_PIN_NO(25) | 2)
-#define MT8135_PIN_25_NLD8__FUNC_CM2DAT_1X_8 (MTK_PIN_NO(25) | 4)
-
-#define MT8135_PIN_26_NLD9__FUNC_GPIO26 (MTK_PIN_NO(26) | 0)
-#define MT8135_PIN_26_NLD9__FUNC_NLD9 (MTK_PIN_NO(26) | 1)
-#define MT8135_PIN_26_NLD9__FUNC_EINT155 (MTK_PIN_NO(26) | 2)
-#define MT8135_PIN_26_NLD9__FUNC_CM2DAT_1X_9 (MTK_PIN_NO(26) | 4)
-#define MT8135_PIN_26_NLD9__FUNC_PWM1 (MTK_PIN_NO(26) | 5)
-
-#define MT8135_PIN_27_NLD10__FUNC_GPIO27 (MTK_PIN_NO(27) | 0)
-#define MT8135_PIN_27_NLD10__FUNC_NLD10 (MTK_PIN_NO(27) | 1)
-#define MT8135_PIN_27_NLD10__FUNC_EINT156 (MTK_PIN_NO(27) | 2)
-#define MT8135_PIN_27_NLD10__FUNC_CM2VSYNC_1X (MTK_PIN_NO(27) | 4)
-#define MT8135_PIN_27_NLD10__FUNC_PWM2 (MTK_PIN_NO(27) | 5)
-
-#define MT8135_PIN_28_NLD11__FUNC_GPIO28 (MTK_PIN_NO(28) | 0)
-#define MT8135_PIN_28_NLD11__FUNC_NLD11 (MTK_PIN_NO(28) | 1)
-#define MT8135_PIN_28_NLD11__FUNC_EINT157 (MTK_PIN_NO(28) | 2)
-#define MT8135_PIN_28_NLD11__FUNC_CM2HSYNC_1X (MTK_PIN_NO(28) | 4)
-#define MT8135_PIN_28_NLD11__FUNC_PWM3 (MTK_PIN_NO(28) | 5)
-
-#define MT8135_PIN_29_NLD12__FUNC_GPIO29 (MTK_PIN_NO(29) | 0)
-#define MT8135_PIN_29_NLD12__FUNC_NLD12 (MTK_PIN_NO(29) | 1)
-#define MT8135_PIN_29_NLD12__FUNC_EINT158 (MTK_PIN_NO(29) | 2)
-#define MT8135_PIN_29_NLD12__FUNC_I2SIN_CK (MTK_PIN_NO(29) | 3)
-#define MT8135_PIN_29_NLD12__FUNC_DAC_CK (MTK_PIN_NO(29) | 4)
-#define MT8135_PIN_29_NLD12__FUNC_PCM1_CK (MTK_PIN_NO(29) | 5)
-
-#define MT8135_PIN_30_NLD13__FUNC_GPIO30 (MTK_PIN_NO(30) | 0)
-#define MT8135_PIN_30_NLD13__FUNC_NLD13 (MTK_PIN_NO(30) | 1)
-#define MT8135_PIN_30_NLD13__FUNC_EINT159 (MTK_PIN_NO(30) | 2)
-#define MT8135_PIN_30_NLD13__FUNC_I2SIN_WS (MTK_PIN_NO(30) | 3)
-#define MT8135_PIN_30_NLD13__FUNC_DAC_WS (MTK_PIN_NO(30) | 4)
-#define MT8135_PIN_30_NLD13__FUNC_PCM1_WS (MTK_PIN_NO(30) | 5)
-
-#define MT8135_PIN_31_NLD14__FUNC_GPIO31 (MTK_PIN_NO(31) | 0)
-#define MT8135_PIN_31_NLD14__FUNC_NLD14 (MTK_PIN_NO(31) | 1)
-#define MT8135_PIN_31_NLD14__FUNC_EINT160 (MTK_PIN_NO(31) | 2)
-#define MT8135_PIN_31_NLD14__FUNC_I2SOUT_DAT (MTK_PIN_NO(31) | 3)
-#define MT8135_PIN_31_NLD14__FUNC_DAC_DAT_OUT (MTK_PIN_NO(31) | 4)
-#define MT8135_PIN_31_NLD14__FUNC_PCM1_DO (MTK_PIN_NO(31) | 5)
-
-#define MT8135_PIN_32_NLD15__FUNC_GPIO32 (MTK_PIN_NO(32) | 0)
-#define MT8135_PIN_32_NLD15__FUNC_NLD15 (MTK_PIN_NO(32) | 1)
-#define MT8135_PIN_32_NLD15__FUNC_EINT161 (MTK_PIN_NO(32) | 2)
-#define MT8135_PIN_32_NLD15__FUNC_DISP_PWM (MTK_PIN_NO(32) | 3)
-#define MT8135_PIN_32_NLD15__FUNC_PWM4 (MTK_PIN_NO(32) | 4)
-#define MT8135_PIN_32_NLD15__FUNC_PCM1_DI (MTK_PIN_NO(32) | 5)
-
-#define MT8135_PIN_33_MSDC0_RSTB__FUNC_GPIO33 (MTK_PIN_NO(33) | 0)
-#define MT8135_PIN_33_MSDC0_RSTB__FUNC_MSDC0_RSTB (MTK_PIN_NO(33) | 1)
-#define MT8135_PIN_33_MSDC0_RSTB__FUNC_EINT50 (MTK_PIN_NO(33) | 2)
-#define MT8135_PIN_33_MSDC0_RSTB__FUNC_I2SIN_DAT (MTK_PIN_NO(33) | 3)
-#define MT8135_PIN_33_MSDC0_RSTB__FUNC_PCM1_DI (MTK_PIN_NO(33) | 5)
-#define MT8135_PIN_33_MSDC0_RSTB__FUNC_SPI1_MI (MTK_PIN_NO(33) | 6)
-#define MT8135_PIN_33_MSDC0_RSTB__FUNC_NLD10 (MTK_PIN_NO(33) | 7)
-
-#define MT8135_PIN_34_IDDIG__FUNC_GPIO34 (MTK_PIN_NO(34) | 0)
-#define MT8135_PIN_34_IDDIG__FUNC_IDDIG (MTK_PIN_NO(34) | 1)
-#define MT8135_PIN_34_IDDIG__FUNC_EINT34 (MTK_PIN_NO(34) | 2)
-
-#define MT8135_PIN_35_SCL3__FUNC_GPIO35 (MTK_PIN_NO(35) | 0)
-#define MT8135_PIN_35_SCL3__FUNC_SCL3 (MTK_PIN_NO(35) | 1)
-#define MT8135_PIN_35_SCL3__FUNC_EINT96 (MTK_PIN_NO(35) | 2)
-#define MT8135_PIN_35_SCL3__FUNC_CLKM6 (MTK_PIN_NO(35) | 3)
-#define MT8135_PIN_35_SCL3__FUNC_PWM6 (MTK_PIN_NO(35) | 4)
-
-#define MT8135_PIN_36_SDA3__FUNC_GPIO36 (MTK_PIN_NO(36) | 0)
-#define MT8135_PIN_36_SDA3__FUNC_SDA3 (MTK_PIN_NO(36) | 1)
-#define MT8135_PIN_36_SDA3__FUNC_EINT97 (MTK_PIN_NO(36) | 2)
-
-#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_GPIO37 (MTK_PIN_NO(37) | 0)
-#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_AUD_CLK (MTK_PIN_NO(37) | 1)
-#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_ADC_CK (MTK_PIN_NO(37) | 2)
-#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_HDMI_SDATA0 (MTK_PIN_NO(37) | 3)
-#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_EINT19 (MTK_PIN_NO(37) | 4)
-#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_USB_TEST_IO_6 (MTK_PIN_NO(37) | 5)
-#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_TESTA_OUT19 (MTK_PIN_NO(37) | 7)
-
-#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_GPIO38 (MTK_PIN_NO(38) | 0)
-#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_AUD_DAT_MOSI (MTK_PIN_NO(38) | 1)
-#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_ADC_WS (MTK_PIN_NO(38) | 2)
-#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_AUD_DAT_MISO (MTK_PIN_NO(38) | 3)
-#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_EINT21 (MTK_PIN_NO(38) | 4)
-#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_USB_TEST_IO_7 (MTK_PIN_NO(38) | 5)
-#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_TESTA_OUT20 (MTK_PIN_NO(38) | 7)
-
-#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_GPIO39 (MTK_PIN_NO(39) | 0)
-#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_AUD_DAT_MISO (MTK_PIN_NO(39) | 1)
-#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_ADC_DAT_IN (MTK_PIN_NO(39) | 2)
-#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_AUD_DAT_MOSI (MTK_PIN_NO(39) | 3)
-#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_EINT20 (MTK_PIN_NO(39) | 4)
-#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_USB_TEST_IO_8 (MTK_PIN_NO(39) | 5)
-#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_TESTA_OUT21 (MTK_PIN_NO(39) | 7)
-
-#define MT8135_PIN_40_DAC_CLK__FUNC_GPIO40 (MTK_PIN_NO(40) | 0)
-#define MT8135_PIN_40_DAC_CLK__FUNC_DAC_CK (MTK_PIN_NO(40) | 1)
-#define MT8135_PIN_40_DAC_CLK__FUNC_EINT22 (MTK_PIN_NO(40) | 2)
-#define MT8135_PIN_40_DAC_CLK__FUNC_HDMI_SDATA1 (MTK_PIN_NO(40) | 3)
-#define MT8135_PIN_40_DAC_CLK__FUNC_USB_TEST_IO_9 (MTK_PIN_NO(40) | 5)
-#define MT8135_PIN_40_DAC_CLK__FUNC_TESTA_OUT22 (MTK_PIN_NO(40) | 7)
-
-#define MT8135_PIN_41_DAC_WS__FUNC_GPIO41 (MTK_PIN_NO(41) | 0)
-#define MT8135_PIN_41_DAC_WS__FUNC_DAC_WS (MTK_PIN_NO(41) | 1)
-#define MT8135_PIN_41_DAC_WS__FUNC_EINT24 (MTK_PIN_NO(41) | 2)
-#define MT8135_PIN_41_DAC_WS__FUNC_HDMI_SDATA2 (MTK_PIN_NO(41) | 3)
-#define MT8135_PIN_41_DAC_WS__FUNC_USB_TEST_IO_10 (MTK_PIN_NO(41) | 5)
-#define MT8135_PIN_41_DAC_WS__FUNC_TESTA_OUT23 (MTK_PIN_NO(41) | 7)
-
-#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_GPIO42 (MTK_PIN_NO(42) | 0)
-#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_DAC_DAT_OUT (MTK_PIN_NO(42) | 1)
-#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_EINT23 (MTK_PIN_NO(42) | 2)
-#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_HDMI_SDATA3 (MTK_PIN_NO(42) | 3)
-#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_USB_TEST_IO_11 (MTK_PIN_NO(42) | 5)
-#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_TESTA_OUT24 (MTK_PIN_NO(42) | 7)
-
-#define MT8135_PIN_43_PWRAP_SPI0_MO__FUNC_GPIO43 (MTK_PIN_NO(43) | 0)
-#define MT8135_PIN_43_PWRAP_SPI0_MO__FUNC_PWRAP_SPIDI (MTK_PIN_NO(43) | 1)
-#define MT8135_PIN_43_PWRAP_SPI0_MO__FUNC_EINT29 (MTK_PIN_NO(43) | 2)
-
-#define MT8135_PIN_44_PWRAP_SPI0_MI__FUNC_GPIO44 (MTK_PIN_NO(44) | 0)
-#define MT8135_PIN_44_PWRAP_SPI0_MI__FUNC_PWRAP_SPIDO (MTK_PIN_NO(44) | 1)
-#define MT8135_PIN_44_PWRAP_SPI0_MI__FUNC_EINT28 (MTK_PIN_NO(44) | 2)
-
-#define MT8135_PIN_45_PWRAP_SPI0_CSN__FUNC_GPIO45 (MTK_PIN_NO(45) | 0)
-#define MT8135_PIN_45_PWRAP_SPI0_CSN__FUNC_PWRAP_SPICS_B_I (MTK_PIN_NO(45) | 1)
-#define MT8135_PIN_45_PWRAP_SPI0_CSN__FUNC_EINT27 (MTK_PIN_NO(45) | 2)
-
-#define MT8135_PIN_46_PWRAP_SPI0_CLK__FUNC_GPIO46 (MTK_PIN_NO(46) | 0)
-#define MT8135_PIN_46_PWRAP_SPI0_CLK__FUNC_PWRAP_SPICK_I (MTK_PIN_NO(46) | 1)
-#define MT8135_PIN_46_PWRAP_SPI0_CLK__FUNC_EINT26 (MTK_PIN_NO(46) | 2)
-
-#define MT8135_PIN_47_PWRAP_EVENT__FUNC_GPIO47 (MTK_PIN_NO(47) | 0)
-#define MT8135_PIN_47_PWRAP_EVENT__FUNC_PWRAP_EVENT_IN (MTK_PIN_NO(47) | 1)
-#define MT8135_PIN_47_PWRAP_EVENT__FUNC_EINT25 (MTK_PIN_NO(47) | 2)
-#define MT8135_PIN_47_PWRAP_EVENT__FUNC_TESTA_OUT2 (MTK_PIN_NO(47) | 7)
-
-#define MT8135_PIN_48_RTC32K_CK__FUNC_GPIO48 (MTK_PIN_NO(48) | 0)
-#define MT8135_PIN_48_RTC32K_CK__FUNC_RTC32K_CK (MTK_PIN_NO(48) | 1)
-
-#define MT8135_PIN_49_WATCHDOG__FUNC_GPIO49 (MTK_PIN_NO(49) | 0)
-#define MT8135_PIN_49_WATCHDOG__FUNC_WATCHDOG (MTK_PIN_NO(49) | 1)
-#define MT8135_PIN_49_WATCHDOG__FUNC_EINT36 (MTK_PIN_NO(49) | 2)
-
-#define MT8135_PIN_50_SRCLKENA__FUNC_GPIO50 (MTK_PIN_NO(50) | 0)
-#define MT8135_PIN_50_SRCLKENA__FUNC_SRCLKENA (MTK_PIN_NO(50) | 1)
-#define MT8135_PIN_50_SRCLKENA__FUNC_EINT38 (MTK_PIN_NO(50) | 2)
-
-#define MT8135_PIN_51_SRCVOLTEN__FUNC_GPIO51 (MTK_PIN_NO(51) | 0)
-#define MT8135_PIN_51_SRCVOLTEN__FUNC_SRCVOLTEN (MTK_PIN_NO(51) | 1)
-#define MT8135_PIN_51_SRCVOLTEN__FUNC_EINT37 (MTK_PIN_NO(51) | 2)
-
-#define MT8135_PIN_52_EINT0__FUNC_GPIO52 (MTK_PIN_NO(52) | 0)
-#define MT8135_PIN_52_EINT0__FUNC_EINT0 (MTK_PIN_NO(52) | 1)
-#define MT8135_PIN_52_EINT0__FUNC_PWM1 (MTK_PIN_NO(52) | 2)
-#define MT8135_PIN_52_EINT0__FUNC_CLKM0 (MTK_PIN_NO(52) | 3)
-#define MT8135_PIN_52_EINT0__FUNC_SPDIF_OUT (MTK_PIN_NO(52) | 4)
-#define MT8135_PIN_52_EINT0__FUNC_USB_TEST_IO_12 (MTK_PIN_NO(52) | 5)
-#define MT8135_PIN_52_EINT0__FUNC_USB_SCL (MTK_PIN_NO(52) | 7)
-
-#define MT8135_PIN_53_URXD2__FUNC_GPIO53 (MTK_PIN_NO(53) | 0)
-#define MT8135_PIN_53_URXD2__FUNC_URXD2 (MTK_PIN_NO(53) | 1)
-#define MT8135_PIN_53_URXD2__FUNC_EINT83 (MTK_PIN_NO(53) | 2)
-#define MT8135_PIN_53_URXD2__FUNC_HDMI_LRCK (MTK_PIN_NO(53) | 4)
-#define MT8135_PIN_53_URXD2__FUNC_CLKM3 (MTK_PIN_NO(53) | 5)
-#define MT8135_PIN_53_URXD2__FUNC_UTXD2 (MTK_PIN_NO(53) | 7)
-
-#define MT8135_PIN_54_UTXD2__FUNC_GPIO54 (MTK_PIN_NO(54) | 0)
-#define MT8135_PIN_54_UTXD2__FUNC_UTXD2 (MTK_PIN_NO(54) | 1)
-#define MT8135_PIN_54_UTXD2__FUNC_EINT82 (MTK_PIN_NO(54) | 2)
-#define MT8135_PIN_54_UTXD2__FUNC_HDMI_BCK_OUT (MTK_PIN_NO(54) | 4)
-#define MT8135_PIN_54_UTXD2__FUNC_CLKM2 (MTK_PIN_NO(54) | 5)
-#define MT8135_PIN_54_UTXD2__FUNC_URXD2 (MTK_PIN_NO(54) | 7)
-
-#define MT8135_PIN_55_UCTS2__FUNC_GPIO55 (MTK_PIN_NO(55) | 0)
-#define MT8135_PIN_55_UCTS2__FUNC_UCTS2 (MTK_PIN_NO(55) | 1)
-#define MT8135_PIN_55_UCTS2__FUNC_EINT84 (MTK_PIN_NO(55) | 2)
-#define MT8135_PIN_55_UCTS2__FUNC_PWM1 (MTK_PIN_NO(55) | 5)
-#define MT8135_PIN_55_UCTS2__FUNC_URTS2 (MTK_PIN_NO(55) | 7)
-
-#define MT8135_PIN_56_URTS2__FUNC_GPIO56 (MTK_PIN_NO(56) | 0)
-#define MT8135_PIN_56_URTS2__FUNC_URTS2 (MTK_PIN_NO(56) | 1)
-#define MT8135_PIN_56_URTS2__FUNC_EINT85 (MTK_PIN_NO(56) | 2)
-#define MT8135_PIN_56_URTS2__FUNC_PWM2 (MTK_PIN_NO(56) | 5)
-#define MT8135_PIN_56_URTS2__FUNC_UCTS2 (MTK_PIN_NO(56) | 7)
-
-#define MT8135_PIN_57_JTCK__FUNC_GPIO57 (MTK_PIN_NO(57) | 0)
-#define MT8135_PIN_57_JTCK__FUNC_JTCK (MTK_PIN_NO(57) | 1)
-#define MT8135_PIN_57_JTCK__FUNC_EINT188 (MTK_PIN_NO(57) | 2)
-#define MT8135_PIN_57_JTCK__FUNC_DSP1_ICK (MTK_PIN_NO(57) | 3)
-
-#define MT8135_PIN_58_JTDO__FUNC_GPIO58 (MTK_PIN_NO(58) | 0)
-#define MT8135_PIN_58_JTDO__FUNC_JTDO (MTK_PIN_NO(58) | 1)
-#define MT8135_PIN_58_JTDO__FUNC_EINT190 (MTK_PIN_NO(58) | 2)
-#define MT8135_PIN_58_JTDO__FUNC_DSP2_IMS (MTK_PIN_NO(58) | 3)
-
-#define MT8135_PIN_59_JTRST_B__FUNC_GPIO59 (MTK_PIN_NO(59) | 0)
-#define MT8135_PIN_59_JTRST_B__FUNC_JTRST_B (MTK_PIN_NO(59) | 1)
-#define MT8135_PIN_59_JTRST_B__FUNC_EINT0 (MTK_PIN_NO(59) | 2)
-#define MT8135_PIN_59_JTRST_B__FUNC_DSP2_ICK (MTK_PIN_NO(59) | 3)
-
-#define MT8135_PIN_60_JTDI__FUNC_GPIO60 (MTK_PIN_NO(60) | 0)
-#define MT8135_PIN_60_JTDI__FUNC_JTDI (MTK_PIN_NO(60) | 1)
-#define MT8135_PIN_60_JTDI__FUNC_EINT189 (MTK_PIN_NO(60) | 2)
-#define MT8135_PIN_60_JTDI__FUNC_DSP1_IMS (MTK_PIN_NO(60) | 3)
-
-#define MT8135_PIN_61_JRTCK__FUNC_GPIO61 (MTK_PIN_NO(61) | 0)
-#define MT8135_PIN_61_JRTCK__FUNC_JRTCK (MTK_PIN_NO(61) | 1)
-#define MT8135_PIN_61_JRTCK__FUNC_EINT187 (MTK_PIN_NO(61) | 2)
-#define MT8135_PIN_61_JRTCK__FUNC_DSP1_ID (MTK_PIN_NO(61) | 3)
-
-#define MT8135_PIN_62_JTMS__FUNC_GPIO62 (MTK_PIN_NO(62) | 0)
-#define MT8135_PIN_62_JTMS__FUNC_JTMS (MTK_PIN_NO(62) | 1)
-#define MT8135_PIN_62_JTMS__FUNC_EINT191 (MTK_PIN_NO(62) | 2)
-#define MT8135_PIN_62_JTMS__FUNC_DSP2_ID (MTK_PIN_NO(62) | 3)
-
-#define MT8135_PIN_63_MSDC1_INSI__FUNC_GPIO63 (MTK_PIN_NO(63) | 0)
-#define MT8135_PIN_63_MSDC1_INSI__FUNC_MSDC1_INSI (MTK_PIN_NO(63) | 1)
-#define MT8135_PIN_63_MSDC1_INSI__FUNC_SCL5 (MTK_PIN_NO(63) | 3)
-#define MT8135_PIN_63_MSDC1_INSI__FUNC_PWM6 (MTK_PIN_NO(63) | 4)
-#define MT8135_PIN_63_MSDC1_INSI__FUNC_CLKM5 (MTK_PIN_NO(63) | 5)
-#define MT8135_PIN_63_MSDC1_INSI__FUNC_TESTB_OUT6 (MTK_PIN_NO(63) | 7)
-
-#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_GPIO64 (MTK_PIN_NO(64) | 0)
-#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_MSDC1_SDWPI (MTK_PIN_NO(64) | 1)
-#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_EINT58 (MTK_PIN_NO(64) | 2)
-#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_SDA5 (MTK_PIN_NO(64) | 3)
-#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_PWM7 (MTK_PIN_NO(64) | 4)
-#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_CLKM6 (MTK_PIN_NO(64) | 5)
-#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_TESTB_OUT7 (MTK_PIN_NO(64) | 7)
-
-#define MT8135_PIN_65_MSDC2_INSI__FUNC_GPIO65 (MTK_PIN_NO(65) | 0)
-#define MT8135_PIN_65_MSDC2_INSI__FUNC_MSDC2_INSI (MTK_PIN_NO(65) | 1)
-#define MT8135_PIN_65_MSDC2_INSI__FUNC_USB_TEST_IO_27 (MTK_PIN_NO(65) | 5)
-#define MT8135_PIN_65_MSDC2_INSI__FUNC_TESTA_OUT3 (MTK_PIN_NO(65) | 7)
-
-#define MT8135_PIN_66_MSDC2_SDWPI__FUNC_GPIO66 (MTK_PIN_NO(66) | 0)
-#define MT8135_PIN_66_MSDC2_SDWPI__FUNC_MSDC2_SDWPI (MTK_PIN_NO(66) | 1)
-#define MT8135_PIN_66_MSDC2_SDWPI__FUNC_EINT66 (MTK_PIN_NO(66) | 2)
-#define MT8135_PIN_66_MSDC2_SDWPI__FUNC_USB_TEST_IO_28 (MTK_PIN_NO(66) | 5)
-
-#define MT8135_PIN_67_URXD4__FUNC_GPIO67 (MTK_PIN_NO(67) | 0)
-#define MT8135_PIN_67_URXD4__FUNC_URXD4 (MTK_PIN_NO(67) | 1)
-#define MT8135_PIN_67_URXD4__FUNC_EINT89 (MTK_PIN_NO(67) | 2)
-#define MT8135_PIN_67_URXD4__FUNC_URXD1 (MTK_PIN_NO(67) | 3)
-#define MT8135_PIN_67_URXD4__FUNC_UTXD4 (MTK_PIN_NO(67) | 6)
-#define MT8135_PIN_67_URXD4__FUNC_TESTB_OUT10 (MTK_PIN_NO(67) | 7)
-
-#define MT8135_PIN_68_UTXD4__FUNC_GPIO68 (MTK_PIN_NO(68) | 0)
-#define MT8135_PIN_68_UTXD4__FUNC_UTXD4 (MTK_PIN_NO(68) | 1)
-#define MT8135_PIN_68_UTXD4__FUNC_EINT88 (MTK_PIN_NO(68) | 2)
-#define MT8135_PIN_68_UTXD4__FUNC_UTXD1 (MTK_PIN_NO(68) | 3)
-#define MT8135_PIN_68_UTXD4__FUNC_URXD4 (MTK_PIN_NO(68) | 6)
-#define MT8135_PIN_68_UTXD4__FUNC_TESTB_OUT11 (MTK_PIN_NO(68) | 7)
-
-#define MT8135_PIN_69_URXD1__FUNC_GPIO69 (MTK_PIN_NO(69) | 0)
-#define MT8135_PIN_69_URXD1__FUNC_URXD1 (MTK_PIN_NO(69) | 1)
-#define MT8135_PIN_69_URXD1__FUNC_EINT79 (MTK_PIN_NO(69) | 2)
-#define MT8135_PIN_69_URXD1__FUNC_URXD4 (MTK_PIN_NO(69) | 3)
-#define MT8135_PIN_69_URXD1__FUNC_UTXD1 (MTK_PIN_NO(69) | 6)
-#define MT8135_PIN_69_URXD1__FUNC_TESTB_OUT24 (MTK_PIN_NO(69) | 7)
-
-#define MT8135_PIN_70_UTXD1__FUNC_GPIO70 (MTK_PIN_NO(70) | 0)
-#define MT8135_PIN_70_UTXD1__FUNC_UTXD1 (MTK_PIN_NO(70) | 1)
-#define MT8135_PIN_70_UTXD1__FUNC_EINT78 (MTK_PIN_NO(70) | 2)
-#define MT8135_PIN_70_UTXD1__FUNC_UTXD4 (MTK_PIN_NO(70) | 3)
-#define MT8135_PIN_70_UTXD1__FUNC_URXD1 (MTK_PIN_NO(70) | 6)
-#define MT8135_PIN_70_UTXD1__FUNC_TESTB_OUT25 (MTK_PIN_NO(70) | 7)
-
-#define MT8135_PIN_71_UCTS1__FUNC_GPIO71 (MTK_PIN_NO(71) | 0)
-#define MT8135_PIN_71_UCTS1__FUNC_UCTS1 (MTK_PIN_NO(71) | 1)
-#define MT8135_PIN_71_UCTS1__FUNC_EINT80 (MTK_PIN_NO(71) | 2)
-#define MT8135_PIN_71_UCTS1__FUNC_CLKM0 (MTK_PIN_NO(71) | 5)
-#define MT8135_PIN_71_UCTS1__FUNC_URTS1 (MTK_PIN_NO(71) | 6)
-#define MT8135_PIN_71_UCTS1__FUNC_TESTB_OUT31 (MTK_PIN_NO(71) | 7)
-
-#define MT8135_PIN_72_URTS1__FUNC_GPIO72 (MTK_PIN_NO(72) | 0)
-#define MT8135_PIN_72_URTS1__FUNC_URTS1 (MTK_PIN_NO(72) | 1)
-#define MT8135_PIN_72_URTS1__FUNC_EINT81 (MTK_PIN_NO(72) | 2)
-#define MT8135_PIN_72_URTS1__FUNC_CLKM1 (MTK_PIN_NO(72) | 5)
-#define MT8135_PIN_72_URTS1__FUNC_UCTS1 (MTK_PIN_NO(72) | 6)
-#define MT8135_PIN_72_URTS1__FUNC_TESTB_OUT21 (MTK_PIN_NO(72) | 7)
-
-#define MT8135_PIN_73_PWM1__FUNC_GPIO73 (MTK_PIN_NO(73) | 0)
-#define MT8135_PIN_73_PWM1__FUNC_PWM1 (MTK_PIN_NO(73) | 1)
-#define MT8135_PIN_73_PWM1__FUNC_EINT73 (MTK_PIN_NO(73) | 2)
-#define MT8135_PIN_73_PWM1__FUNC_USB_DRVVBUS (MTK_PIN_NO(73) | 5)
-#define MT8135_PIN_73_PWM1__FUNC_DISP_PWM (MTK_PIN_NO(73) | 6)
-#define MT8135_PIN_73_PWM1__FUNC_TESTB_OUT8 (MTK_PIN_NO(73) | 7)
-
-#define MT8135_PIN_74_PWM2__FUNC_GPIO74 (MTK_PIN_NO(74) | 0)
-#define MT8135_PIN_74_PWM2__FUNC_PWM2 (MTK_PIN_NO(74) | 1)
-#define MT8135_PIN_74_PWM2__FUNC_EINT74 (MTK_PIN_NO(74) | 2)
-#define MT8135_PIN_74_PWM2__FUNC_DPI33_CK (MTK_PIN_NO(74) | 3)
-#define MT8135_PIN_74_PWM2__FUNC_PWM5 (MTK_PIN_NO(74) | 4)
-#define MT8135_PIN_74_PWM2__FUNC_URXD2 (MTK_PIN_NO(74) | 5)
-#define MT8135_PIN_74_PWM2__FUNC_DISP_PWM (MTK_PIN_NO(74) | 6)
-#define MT8135_PIN_74_PWM2__FUNC_TESTB_OUT9 (MTK_PIN_NO(74) | 7)
-
-#define MT8135_PIN_75_PWM3__FUNC_GPIO75 (MTK_PIN_NO(75) | 0)
-#define MT8135_PIN_75_PWM3__FUNC_PWM3 (MTK_PIN_NO(75) | 1)
-#define MT8135_PIN_75_PWM3__FUNC_EINT75 (MTK_PIN_NO(75) | 2)
-#define MT8135_PIN_75_PWM3__FUNC_DPI33_D0 (MTK_PIN_NO(75) | 3)
-#define MT8135_PIN_75_PWM3__FUNC_PWM6 (MTK_PIN_NO(75) | 4)
-#define MT8135_PIN_75_PWM3__FUNC_UTXD2 (MTK_PIN_NO(75) | 5)
-#define MT8135_PIN_75_PWM3__FUNC_DISP_PWM (MTK_PIN_NO(75) | 6)
-#define MT8135_PIN_75_PWM3__FUNC_TESTB_OUT12 (MTK_PIN_NO(75) | 7)
-
-#define MT8135_PIN_76_PWM4__FUNC_GPIO76 (MTK_PIN_NO(76) | 0)
-#define MT8135_PIN_76_PWM4__FUNC_PWM4 (MTK_PIN_NO(76) | 1)
-#define MT8135_PIN_76_PWM4__FUNC_EINT76 (MTK_PIN_NO(76) | 2)
-#define MT8135_PIN_76_PWM4__FUNC_DPI33_D1 (MTK_PIN_NO(76) | 3)
-#define MT8135_PIN_76_PWM4__FUNC_PWM7 (MTK_PIN_NO(76) | 4)
-#define MT8135_PIN_76_PWM4__FUNC_DISP_PWM (MTK_PIN_NO(76) | 6)
-#define MT8135_PIN_76_PWM4__FUNC_TESTB_OUT13 (MTK_PIN_NO(76) | 7)
-
-#define MT8135_PIN_77_MSDC2_DAT2__FUNC_GPIO77 (MTK_PIN_NO(77) | 0)
-#define MT8135_PIN_77_MSDC2_DAT2__FUNC_MSDC2_DAT2 (MTK_PIN_NO(77) | 1)
-#define MT8135_PIN_77_MSDC2_DAT2__FUNC_EINT63 (MTK_PIN_NO(77) | 2)
-#define MT8135_PIN_77_MSDC2_DAT2__FUNC_DSP2_IMS (MTK_PIN_NO(77) | 4)
-#define MT8135_PIN_77_MSDC2_DAT2__FUNC_DPI33_D6 (MTK_PIN_NO(77) | 6)
-#define MT8135_PIN_77_MSDC2_DAT2__FUNC_TESTA_OUT25 (MTK_PIN_NO(77) | 7)
-
-#define MT8135_PIN_78_MSDC2_DAT3__FUNC_GPIO78 (MTK_PIN_NO(78) | 0)
-#define MT8135_PIN_78_MSDC2_DAT3__FUNC_MSDC2_DAT3 (MTK_PIN_NO(78) | 1)
-#define MT8135_PIN_78_MSDC2_DAT3__FUNC_EINT64 (MTK_PIN_NO(78) | 2)
-#define MT8135_PIN_78_MSDC2_DAT3__FUNC_DSP2_ID (MTK_PIN_NO(78) | 4)
-#define MT8135_PIN_78_MSDC2_DAT3__FUNC_DPI33_D7 (MTK_PIN_NO(78) | 6)
-#define MT8135_PIN_78_MSDC2_DAT3__FUNC_TESTA_OUT26 (MTK_PIN_NO(78) | 7)
-
-#define MT8135_PIN_79_MSDC2_CMD__FUNC_GPIO79 (MTK_PIN_NO(79) | 0)
-#define MT8135_PIN_79_MSDC2_CMD__FUNC_MSDC2_CMD (MTK_PIN_NO(79) | 1)
-#define MT8135_PIN_79_MSDC2_CMD__FUNC_EINT60 (MTK_PIN_NO(79) | 2)
-#define MT8135_PIN_79_MSDC2_CMD__FUNC_DSP1_IMS (MTK_PIN_NO(79) | 4)
-#define MT8135_PIN_79_MSDC2_CMD__FUNC_PCM1_WS (MTK_PIN_NO(79) | 5)
-#define MT8135_PIN_79_MSDC2_CMD__FUNC_DPI33_D3 (MTK_PIN_NO(79) | 6)
-#define MT8135_PIN_79_MSDC2_CMD__FUNC_TESTA_OUT0 (MTK_PIN_NO(79) | 7)
-
-#define MT8135_PIN_80_MSDC2_CLK__FUNC_GPIO80 (MTK_PIN_NO(80) | 0)
-#define MT8135_PIN_80_MSDC2_CLK__FUNC_MSDC2_CLK (MTK_PIN_NO(80) | 1)
-#define MT8135_PIN_80_MSDC2_CLK__FUNC_EINT59 (MTK_PIN_NO(80) | 2)
-#define MT8135_PIN_80_MSDC2_CLK__FUNC_DSP1_ICK (MTK_PIN_NO(80) | 4)
-#define MT8135_PIN_80_MSDC2_CLK__FUNC_PCM1_CK (MTK_PIN_NO(80) | 5)
-#define MT8135_PIN_80_MSDC2_CLK__FUNC_DPI33_D2 (MTK_PIN_NO(80) | 6)
-#define MT8135_PIN_80_MSDC2_CLK__FUNC_TESTA_OUT1 (MTK_PIN_NO(80) | 7)
-
-#define MT8135_PIN_81_MSDC2_DAT1__FUNC_GPIO81 (MTK_PIN_NO(81) | 0)
-#define MT8135_PIN_81_MSDC2_DAT1__FUNC_MSDC2_DAT1 (MTK_PIN_NO(81) | 1)
-#define MT8135_PIN_81_MSDC2_DAT1__FUNC_EINT62 (MTK_PIN_NO(81) | 2)
-#define MT8135_PIN_81_MSDC2_DAT1__FUNC_DSP2_ICK (MTK_PIN_NO(81) | 4)
-#define MT8135_PIN_81_MSDC2_DAT1__FUNC_PCM1_DO (MTK_PIN_NO(81) | 5)
-#define MT8135_PIN_81_MSDC2_DAT1__FUNC_DPI33_D5 (MTK_PIN_NO(81) | 6)
-
-#define MT8135_PIN_82_MSDC2_DAT0__FUNC_GPIO82 (MTK_PIN_NO(82) | 0)
-#define MT8135_PIN_82_MSDC2_DAT0__FUNC_MSDC2_DAT0 (MTK_PIN_NO(82) | 1)
-#define MT8135_PIN_82_MSDC2_DAT0__FUNC_EINT61 (MTK_PIN_NO(82) | 2)
-#define MT8135_PIN_82_MSDC2_DAT0__FUNC_DSP1_ID (MTK_PIN_NO(82) | 4)
-#define MT8135_PIN_82_MSDC2_DAT0__FUNC_PCM1_DI (MTK_PIN_NO(82) | 5)
-#define MT8135_PIN_82_MSDC2_DAT0__FUNC_DPI33_D4 (MTK_PIN_NO(82) | 6)
-
-#define MT8135_PIN_83_MSDC1_DAT0__FUNC_GPIO83 (MTK_PIN_NO(83) | 0)
-#define MT8135_PIN_83_MSDC1_DAT0__FUNC_MSDC1_DAT0 (MTK_PIN_NO(83) | 1)
-#define MT8135_PIN_83_MSDC1_DAT0__FUNC_EINT53 (MTK_PIN_NO(83) | 2)
-#define MT8135_PIN_83_MSDC1_DAT0__FUNC_SCL1 (MTK_PIN_NO(83) | 3)
-#define MT8135_PIN_83_MSDC1_DAT0__FUNC_PWM2 (MTK_PIN_NO(83) | 4)
-#define MT8135_PIN_83_MSDC1_DAT0__FUNC_CLKM1 (MTK_PIN_NO(83) | 5)
-#define MT8135_PIN_83_MSDC1_DAT0__FUNC_TESTB_OUT2 (MTK_PIN_NO(83) | 7)
-
-#define MT8135_PIN_84_MSDC1_DAT1__FUNC_GPIO84 (MTK_PIN_NO(84) | 0)
-#define MT8135_PIN_84_MSDC1_DAT1__FUNC_MSDC1_DAT1 (MTK_PIN_NO(84) | 1)
-#define MT8135_PIN_84_MSDC1_DAT1__FUNC_EINT54 (MTK_PIN_NO(84) | 2)
-#define MT8135_PIN_84_MSDC1_DAT1__FUNC_SDA1 (MTK_PIN_NO(84) | 3)
-#define MT8135_PIN_84_MSDC1_DAT1__FUNC_PWM3 (MTK_PIN_NO(84) | 4)
-#define MT8135_PIN_84_MSDC1_DAT1__FUNC_CLKM2 (MTK_PIN_NO(84) | 5)
-#define MT8135_PIN_84_MSDC1_DAT1__FUNC_TESTB_OUT3 (MTK_PIN_NO(84) | 7)
-
-#define MT8135_PIN_85_MSDC1_CMD__FUNC_GPIO85 (MTK_PIN_NO(85) | 0)
-#define MT8135_PIN_85_MSDC1_CMD__FUNC_MSDC1_CMD (MTK_PIN_NO(85) | 1)
-#define MT8135_PIN_85_MSDC1_CMD__FUNC_EINT52 (MTK_PIN_NO(85) | 2)
-#define MT8135_PIN_85_MSDC1_CMD__FUNC_SDA0 (MTK_PIN_NO(85) | 3)
-#define MT8135_PIN_85_MSDC1_CMD__FUNC_PWM1 (MTK_PIN_NO(85) | 4)
-#define MT8135_PIN_85_MSDC1_CMD__FUNC_CLKM0 (MTK_PIN_NO(85) | 5)
-#define MT8135_PIN_85_MSDC1_CMD__FUNC_TESTB_OUT1 (MTK_PIN_NO(85) | 7)
-
-#define MT8135_PIN_86_MSDC1_CLK__FUNC_GPIO86 (MTK_PIN_NO(86) | 0)
-#define MT8135_PIN_86_MSDC1_CLK__FUNC_MSDC1_CLK (MTK_PIN_NO(86) | 1)
-#define MT8135_PIN_86_MSDC1_CLK__FUNC_EINT51 (MTK_PIN_NO(86) | 2)
-#define MT8135_PIN_86_MSDC1_CLK__FUNC_SCL0 (MTK_PIN_NO(86) | 3)
-#define MT8135_PIN_86_MSDC1_CLK__FUNC_DISP_PWM (MTK_PIN_NO(86) | 4)
-#define MT8135_PIN_86_MSDC1_CLK__FUNC_TESTB_OUT0 (MTK_PIN_NO(86) | 7)
-
-#define MT8135_PIN_87_MSDC1_DAT2__FUNC_GPIO87 (MTK_PIN_NO(87) | 0)
-#define MT8135_PIN_87_MSDC1_DAT2__FUNC_MSDC1_DAT2 (MTK_PIN_NO(87) | 1)
-#define MT8135_PIN_87_MSDC1_DAT2__FUNC_EINT55 (MTK_PIN_NO(87) | 2)
-#define MT8135_PIN_87_MSDC1_DAT2__FUNC_SCL4 (MTK_PIN_NO(87) | 3)
-#define MT8135_PIN_87_MSDC1_DAT2__FUNC_PWM4 (MTK_PIN_NO(87) | 4)
-#define MT8135_PIN_87_MSDC1_DAT2__FUNC_CLKM3 (MTK_PIN_NO(87) | 5)
-#define MT8135_PIN_87_MSDC1_DAT2__FUNC_TESTB_OUT4 (MTK_PIN_NO(87) | 7)
-
-#define MT8135_PIN_88_MSDC1_DAT3__FUNC_GPIO88 (MTK_PIN_NO(88) | 0)
-#define MT8135_PIN_88_MSDC1_DAT3__FUNC_MSDC1_DAT3 (MTK_PIN_NO(88) | 1)
-#define MT8135_PIN_88_MSDC1_DAT3__FUNC_EINT56 (MTK_PIN_NO(88) | 2)
-#define MT8135_PIN_88_MSDC1_DAT3__FUNC_SDA4 (MTK_PIN_NO(88) | 3)
-#define MT8135_PIN_88_MSDC1_DAT3__FUNC_PWM5 (MTK_PIN_NO(88) | 4)
-#define MT8135_PIN_88_MSDC1_DAT3__FUNC_CLKM4 (MTK_PIN_NO(88) | 5)
-#define MT8135_PIN_88_MSDC1_DAT3__FUNC_TESTB_OUT5 (MTK_PIN_NO(88) | 7)
-
-#define MT8135_PIN_89_MSDC4_DAT0__FUNC_GPIO89 (MTK_PIN_NO(89) | 0)
-#define MT8135_PIN_89_MSDC4_DAT0__FUNC_MSDC4_DAT0 (MTK_PIN_NO(89) | 1)
-#define MT8135_PIN_89_MSDC4_DAT0__FUNC_EINT133 (MTK_PIN_NO(89) | 2)
-#define MT8135_PIN_89_MSDC4_DAT0__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(89) | 4)
-#define MT8135_PIN_89_MSDC4_DAT0__FUNC_USB_DRVVBUS (MTK_PIN_NO(89) | 5)
-#define MT8135_PIN_89_MSDC4_DAT0__FUNC_A_FUNC_DIN_9 (MTK_PIN_NO(89) | 6)
-#define MT8135_PIN_89_MSDC4_DAT0__FUNC_LPTE (MTK_PIN_NO(89) | 7)
-
-#define MT8135_PIN_90_MSDC4_DAT1__FUNC_GPIO90 (MTK_PIN_NO(90) | 0)
-#define MT8135_PIN_90_MSDC4_DAT1__FUNC_MSDC4_DAT1 (MTK_PIN_NO(90) | 1)
-#define MT8135_PIN_90_MSDC4_DAT1__FUNC_EINT134 (MTK_PIN_NO(90) | 2)
-#define MT8135_PIN_90_MSDC4_DAT1__FUNC_A_FUNC_DIN_10 (MTK_PIN_NO(90) | 6)
-#define MT8135_PIN_90_MSDC4_DAT1__FUNC_LRSTB_1X (MTK_PIN_NO(90) | 7)
-
-#define MT8135_PIN_91_MSDC4_DAT5__FUNC_GPIO91 (MTK_PIN_NO(91) | 0)
-#define MT8135_PIN_91_MSDC4_DAT5__FUNC_MSDC4_DAT5 (MTK_PIN_NO(91) | 1)
-#define MT8135_PIN_91_MSDC4_DAT5__FUNC_EINT136 (MTK_PIN_NO(91) | 2)
-#define MT8135_PIN_91_MSDC4_DAT5__FUNC_I2SIN_WS (MTK_PIN_NO(91) | 3)
-#define MT8135_PIN_91_MSDC4_DAT5__FUNC_DAC_WS (MTK_PIN_NO(91) | 4)
-#define MT8135_PIN_91_MSDC4_DAT5__FUNC_PCM1_WS (MTK_PIN_NO(91) | 5)
-#define MT8135_PIN_91_MSDC4_DAT5__FUNC_A_FUNC_DIN_11 (MTK_PIN_NO(91) | 6)
-#define MT8135_PIN_91_MSDC4_DAT5__FUNC_SPI1_CSN (MTK_PIN_NO(91) | 7)
-
-#define MT8135_PIN_92_MSDC4_DAT6__FUNC_GPIO92 (MTK_PIN_NO(92) | 0)
-#define MT8135_PIN_92_MSDC4_DAT6__FUNC_MSDC4_DAT6 (MTK_PIN_NO(92) | 1)
-#define MT8135_PIN_92_MSDC4_DAT6__FUNC_EINT137 (MTK_PIN_NO(92) | 2)
-#define MT8135_PIN_92_MSDC4_DAT6__FUNC_I2SOUT_DAT (MTK_PIN_NO(92) | 3)
-#define MT8135_PIN_92_MSDC4_DAT6__FUNC_DAC_DAT_OUT (MTK_PIN_NO(92) | 4)
-#define MT8135_PIN_92_MSDC4_DAT6__FUNC_PCM1_DO (MTK_PIN_NO(92) | 5)
-#define MT8135_PIN_92_MSDC4_DAT6__FUNC_A_FUNC_DIN_12 (MTK_PIN_NO(92) | 6)
-#define MT8135_PIN_92_MSDC4_DAT6__FUNC_SPI1_MO (MTK_PIN_NO(92) | 7)
-
-#define MT8135_PIN_93_MSDC4_DAT7__FUNC_GPIO93 (MTK_PIN_NO(93) | 0)
-#define MT8135_PIN_93_MSDC4_DAT7__FUNC_MSDC4_DAT7 (MTK_PIN_NO(93) | 1)
-#define MT8135_PIN_93_MSDC4_DAT7__FUNC_EINT138 (MTK_PIN_NO(93) | 2)
-#define MT8135_PIN_93_MSDC4_DAT7__FUNC_I2SIN_DAT (MTK_PIN_NO(93) | 3)
-#define MT8135_PIN_93_MSDC4_DAT7__FUNC_PCM1_DI (MTK_PIN_NO(93) | 5)
-#define MT8135_PIN_93_MSDC4_DAT7__FUNC_A_FUNC_DIN_13 (MTK_PIN_NO(93) | 6)
-#define MT8135_PIN_93_MSDC4_DAT7__FUNC_SPI1_MI (MTK_PIN_NO(93) | 7)
-
-#define MT8135_PIN_94_MSDC4_DAT4__FUNC_GPIO94 (MTK_PIN_NO(94) | 0)
-#define MT8135_PIN_94_MSDC4_DAT4__FUNC_MSDC4_DAT4 (MTK_PIN_NO(94) | 1)
-#define MT8135_PIN_94_MSDC4_DAT4__FUNC_EINT135 (MTK_PIN_NO(94) | 2)
-#define MT8135_PIN_94_MSDC4_DAT4__FUNC_I2SIN_CK (MTK_PIN_NO(94) | 3)
-#define MT8135_PIN_94_MSDC4_DAT4__FUNC_DAC_CK (MTK_PIN_NO(94) | 4)
-#define MT8135_PIN_94_MSDC4_DAT4__FUNC_PCM1_CK (MTK_PIN_NO(94) | 5)
-#define MT8135_PIN_94_MSDC4_DAT4__FUNC_A_FUNC_DIN_14 (MTK_PIN_NO(94) | 6)
-#define MT8135_PIN_94_MSDC4_DAT4__FUNC_SPI1_CLK (MTK_PIN_NO(94) | 7)
-
-#define MT8135_PIN_95_MSDC4_DAT2__FUNC_GPIO95 (MTK_PIN_NO(95) | 0)
-#define MT8135_PIN_95_MSDC4_DAT2__FUNC_MSDC4_DAT2 (MTK_PIN_NO(95) | 1)
-#define MT8135_PIN_95_MSDC4_DAT2__FUNC_EINT131 (MTK_PIN_NO(95) | 2)
-#define MT8135_PIN_95_MSDC4_DAT2__FUNC_I2SIN_WS (MTK_PIN_NO(95) | 3)
-#define MT8135_PIN_95_MSDC4_DAT2__FUNC_CM2PDN_2X (MTK_PIN_NO(95) | 4)
-#define MT8135_PIN_95_MSDC4_DAT2__FUNC_DAC_WS (MTK_PIN_NO(95) | 5)
-#define MT8135_PIN_95_MSDC4_DAT2__FUNC_PCM1_WS (MTK_PIN_NO(95) | 6)
-#define MT8135_PIN_95_MSDC4_DAT2__FUNC_LSCE0B_1X (MTK_PIN_NO(95) | 7)
-
-#define MT8135_PIN_96_MSDC4_CLK__FUNC_GPIO96 (MTK_PIN_NO(96) | 0)
-#define MT8135_PIN_96_MSDC4_CLK__FUNC_MSDC4_CLK (MTK_PIN_NO(96) | 1)
-#define MT8135_PIN_96_MSDC4_CLK__FUNC_EINT129 (MTK_PIN_NO(96) | 2)
-#define MT8135_PIN_96_MSDC4_CLK__FUNC_DPI1_CK_2X (MTK_PIN_NO(96) | 3)
-#define MT8135_PIN_96_MSDC4_CLK__FUNC_CM2PCLK_2X (MTK_PIN_NO(96) | 4)
-#define MT8135_PIN_96_MSDC4_CLK__FUNC_PWM4 (MTK_PIN_NO(96) | 5)
-#define MT8135_PIN_96_MSDC4_CLK__FUNC_PCM1_DI (MTK_PIN_NO(96) | 6)
-#define MT8135_PIN_96_MSDC4_CLK__FUNC_LSCK_1X (MTK_PIN_NO(96) | 7)
-
-#define MT8135_PIN_97_MSDC4_DAT3__FUNC_GPIO97 (MTK_PIN_NO(97) | 0)
-#define MT8135_PIN_97_MSDC4_DAT3__FUNC_MSDC4_DAT3 (MTK_PIN_NO(97) | 1)
-#define MT8135_PIN_97_MSDC4_DAT3__FUNC_EINT132 (MTK_PIN_NO(97) | 2)
-#define MT8135_PIN_97_MSDC4_DAT3__FUNC_I2SOUT_DAT (MTK_PIN_NO(97) | 3)
-#define MT8135_PIN_97_MSDC4_DAT3__FUNC_CM2RST_2X (MTK_PIN_NO(97) | 4)
-#define MT8135_PIN_97_MSDC4_DAT3__FUNC_DAC_DAT_OUT (MTK_PIN_NO(97) | 5)
-#define MT8135_PIN_97_MSDC4_DAT3__FUNC_PCM1_DO (MTK_PIN_NO(97) | 6)
-#define MT8135_PIN_97_MSDC4_DAT3__FUNC_LSCE1B_1X (MTK_PIN_NO(97) | 7)
-
-#define MT8135_PIN_98_MSDC4_CMD__FUNC_GPIO98 (MTK_PIN_NO(98) | 0)
-#define MT8135_PIN_98_MSDC4_CMD__FUNC_MSDC4_CMD (MTK_PIN_NO(98) | 1)
-#define MT8135_PIN_98_MSDC4_CMD__FUNC_EINT128 (MTK_PIN_NO(98) | 2)
-#define MT8135_PIN_98_MSDC4_CMD__FUNC_DPI1_DE_2X (MTK_PIN_NO(98) | 3)
-#define MT8135_PIN_98_MSDC4_CMD__FUNC_PWM3 (MTK_PIN_NO(98) | 5)
-#define MT8135_PIN_98_MSDC4_CMD__FUNC_LSDA_1X (MTK_PIN_NO(98) | 7)
-
-#define MT8135_PIN_99_MSDC4_RSTB__FUNC_GPIO99 (MTK_PIN_NO(99) | 0)
-#define MT8135_PIN_99_MSDC4_RSTB__FUNC_MSDC4_RSTB (MTK_PIN_NO(99) | 1)
-#define MT8135_PIN_99_MSDC4_RSTB__FUNC_EINT130 (MTK_PIN_NO(99) | 2)
-#define MT8135_PIN_99_MSDC4_RSTB__FUNC_I2SIN_CK (MTK_PIN_NO(99) | 3)
-#define MT8135_PIN_99_MSDC4_RSTB__FUNC_CM2MCLK_2X (MTK_PIN_NO(99) | 4)
-#define MT8135_PIN_99_MSDC4_RSTB__FUNC_DAC_CK (MTK_PIN_NO(99) | 5)
-#define MT8135_PIN_99_MSDC4_RSTB__FUNC_PCM1_CK (MTK_PIN_NO(99) | 6)
-#define MT8135_PIN_99_MSDC4_RSTB__FUNC_LSA0_1X (MTK_PIN_NO(99) | 7)
-
-#define MT8135_PIN_100_SDA0__FUNC_GPIO100 (MTK_PIN_NO(100) | 0)
-#define MT8135_PIN_100_SDA0__FUNC_SDA0 (MTK_PIN_NO(100) | 1)
-#define MT8135_PIN_100_SDA0__FUNC_EINT91 (MTK_PIN_NO(100) | 2)
-#define MT8135_PIN_100_SDA0__FUNC_CLKM1 (MTK_PIN_NO(100) | 3)
-#define MT8135_PIN_100_SDA0__FUNC_PWM1 (MTK_PIN_NO(100) | 4)
-#define MT8135_PIN_100_SDA0__FUNC_A_FUNC_DIN_15 (MTK_PIN_NO(100) | 7)
-
-#define MT8135_PIN_101_SCL0__FUNC_GPIO101 (MTK_PIN_NO(101) | 0)
-#define MT8135_PIN_101_SCL0__FUNC_SCL0 (MTK_PIN_NO(101) | 1)
-#define MT8135_PIN_101_SCL0__FUNC_EINT90 (MTK_PIN_NO(101) | 2)
-#define MT8135_PIN_101_SCL0__FUNC_CLKM0 (MTK_PIN_NO(101) | 3)
-#define MT8135_PIN_101_SCL0__FUNC_DISP_PWM (MTK_PIN_NO(101) | 4)
-#define MT8135_PIN_101_SCL0__FUNC_A_FUNC_DIN_16 (MTK_PIN_NO(101) | 7)
-
-#define MT8135_PIN_102_EINT10_AUXIN2__FUNC_GPIO102 (MTK_PIN_NO(102) | 0)
-#define MT8135_PIN_102_EINT10_AUXIN2__FUNC_EINT10 (MTK_PIN_NO(102) | 1)
-#define MT8135_PIN_102_EINT10_AUXIN2__FUNC_USB_TEST_IO_16 (MTK_PIN_NO(102) | 5)
-#define MT8135_PIN_102_EINT10_AUXIN2__FUNC_TESTB_OUT16 (MTK_PIN_NO(102) | 6)
-#define MT8135_PIN_102_EINT10_AUXIN2__FUNC_A_FUNC_DIN_17 (MTK_PIN_NO(102) | 7)
-
-#define MT8135_PIN_103_EINT11_AUXIN3__FUNC_GPIO103 (MTK_PIN_NO(103) | 0)
-#define MT8135_PIN_103_EINT11_AUXIN3__FUNC_EINT11 (MTK_PIN_NO(103) | 1)
-#define MT8135_PIN_103_EINT11_AUXIN3__FUNC_USB_TEST_IO_17 (MTK_PIN_NO(103) | 5)
-#define MT8135_PIN_103_EINT11_AUXIN3__FUNC_TESTB_OUT17 (MTK_PIN_NO(103) | 6)
-#define MT8135_PIN_103_EINT11_AUXIN3__FUNC_A_FUNC_DIN_18 (MTK_PIN_NO(103) | 7)
-
-#define MT8135_PIN_104_EINT16_AUXIN4__FUNC_GPIO104 (MTK_PIN_NO(104) | 0)
-#define MT8135_PIN_104_EINT16_AUXIN4__FUNC_EINT16 (MTK_PIN_NO(104) | 1)
-#define MT8135_PIN_104_EINT16_AUXIN4__FUNC_USB_TEST_IO_18 (MTK_PIN_NO(104) | 5)
-#define MT8135_PIN_104_EINT16_AUXIN4__FUNC_TESTB_OUT18 (MTK_PIN_NO(104) | 6)
-#define MT8135_PIN_104_EINT16_AUXIN4__FUNC_A_FUNC_DIN_19 (MTK_PIN_NO(104) | 7)
-
-#define MT8135_PIN_105_I2S_CLK__FUNC_GPIO105 (MTK_PIN_NO(105) | 0)
-#define MT8135_PIN_105_I2S_CLK__FUNC_I2SIN_CK (MTK_PIN_NO(105) | 1)
-#define MT8135_PIN_105_I2S_CLK__FUNC_EINT10 (MTK_PIN_NO(105) | 2)
-#define MT8135_PIN_105_I2S_CLK__FUNC_DAC_CK (MTK_PIN_NO(105) | 3)
-#define MT8135_PIN_105_I2S_CLK__FUNC_PCM1_CK (MTK_PIN_NO(105) | 4)
-#define MT8135_PIN_105_I2S_CLK__FUNC_USB_TEST_IO_19 (MTK_PIN_NO(105) | 5)
-#define MT8135_PIN_105_I2S_CLK__FUNC_TESTB_OUT19 (MTK_PIN_NO(105) | 6)
-#define MT8135_PIN_105_I2S_CLK__FUNC_A_FUNC_DIN_20 (MTK_PIN_NO(105) | 7)
-
-#define MT8135_PIN_106_I2S_WS__FUNC_GPIO106 (MTK_PIN_NO(106) | 0)
-#define MT8135_PIN_106_I2S_WS__FUNC_I2SIN_WS (MTK_PIN_NO(106) | 1)
-#define MT8135_PIN_106_I2S_WS__FUNC_EINT13 (MTK_PIN_NO(106) | 2)
-#define MT8135_PIN_106_I2S_WS__FUNC_DAC_WS (MTK_PIN_NO(106) | 3)
-#define MT8135_PIN_106_I2S_WS__FUNC_PCM1_WS (MTK_PIN_NO(106) | 4)
-#define MT8135_PIN_106_I2S_WS__FUNC_USB_TEST_IO_20 (MTK_PIN_NO(106) | 5)
-#define MT8135_PIN_106_I2S_WS__FUNC_TESTB_OUT20 (MTK_PIN_NO(106) | 6)
-#define MT8135_PIN_106_I2S_WS__FUNC_A_FUNC_DIN_21 (MTK_PIN_NO(106) | 7)
-
-#define MT8135_PIN_107_I2S_DATA_IN__FUNC_GPIO107 (MTK_PIN_NO(107) | 0)
-#define MT8135_PIN_107_I2S_DATA_IN__FUNC_I2SIN_DAT (MTK_PIN_NO(107) | 1)
-#define MT8135_PIN_107_I2S_DATA_IN__FUNC_EINT11 (MTK_PIN_NO(107) | 2)
-#define MT8135_PIN_107_I2S_DATA_IN__FUNC_PCM1_DI (MTK_PIN_NO(107) | 4)
-#define MT8135_PIN_107_I2S_DATA_IN__FUNC_USB_TEST_IO_21 (MTK_PIN_NO(107) | 5)
-#define MT8135_PIN_107_I2S_DATA_IN__FUNC_TESTB_OUT22 (MTK_PIN_NO(107) | 6)
-#define MT8135_PIN_107_I2S_DATA_IN__FUNC_A_FUNC_DIN_22 (MTK_PIN_NO(107) | 7)
-
-#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_GPIO108 (MTK_PIN_NO(108) | 0)
-#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_I2SOUT_DAT (MTK_PIN_NO(108) | 1)
-#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_EINT12 (MTK_PIN_NO(108) | 2)
-#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_DAC_DAT_OUT (MTK_PIN_NO(108) | 3)
-#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_PCM1_DO (MTK_PIN_NO(108) | 4)
-#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_USB_TEST_IO_22 (MTK_PIN_NO(108) | 5)
-#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_TESTB_OUT23 (MTK_PIN_NO(108) | 6)
-#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_A_FUNC_DIN_23 (MTK_PIN_NO(108) | 7)
-
-#define MT8135_PIN_109_EINT5__FUNC_GPIO109 (MTK_PIN_NO(109) | 0)
-#define MT8135_PIN_109_EINT5__FUNC_EINT5 (MTK_PIN_NO(109) | 1)
-#define MT8135_PIN_109_EINT5__FUNC_PWM5 (MTK_PIN_NO(109) | 2)
-#define MT8135_PIN_109_EINT5__FUNC_CLKM3 (MTK_PIN_NO(109) | 3)
-#define MT8135_PIN_109_EINT5__FUNC_GPU_JTRSTB (MTK_PIN_NO(109) | 4)
-#define MT8135_PIN_109_EINT5__FUNC_USB_TEST_IO_23 (MTK_PIN_NO(109) | 5)
-#define MT8135_PIN_109_EINT5__FUNC_TESTB_OUT26 (MTK_PIN_NO(109) | 6)
-#define MT8135_PIN_109_EINT5__FUNC_A_FUNC_DIN_24 (MTK_PIN_NO(109) | 7)
-
-#define MT8135_PIN_110_EINT6__FUNC_GPIO110 (MTK_PIN_NO(110) | 0)
-#define MT8135_PIN_110_EINT6__FUNC_EINT6 (MTK_PIN_NO(110) | 1)
-#define MT8135_PIN_110_EINT6__FUNC_PWM6 (MTK_PIN_NO(110) | 2)
-#define MT8135_PIN_110_EINT6__FUNC_CLKM4 (MTK_PIN_NO(110) | 3)
-#define MT8135_PIN_110_EINT6__FUNC_GPU_JTMS (MTK_PIN_NO(110) | 4)
-#define MT8135_PIN_110_EINT6__FUNC_USB_TEST_IO_24 (MTK_PIN_NO(110) | 5)
-#define MT8135_PIN_110_EINT6__FUNC_TESTB_OUT27 (MTK_PIN_NO(110) | 6)
-#define MT8135_PIN_110_EINT6__FUNC_A_FUNC_DIN_25 (MTK_PIN_NO(110) | 7)
-
-#define MT8135_PIN_111_EINT7__FUNC_GPIO111 (MTK_PIN_NO(111) | 0)
-#define MT8135_PIN_111_EINT7__FUNC_EINT7 (MTK_PIN_NO(111) | 1)
-#define MT8135_PIN_111_EINT7__FUNC_PWM7 (MTK_PIN_NO(111) | 2)
-#define MT8135_PIN_111_EINT7__FUNC_CLKM5 (MTK_PIN_NO(111) | 3)
-#define MT8135_PIN_111_EINT7__FUNC_GPU_JTDO (MTK_PIN_NO(111) | 4)
-#define MT8135_PIN_111_EINT7__FUNC_USB_TEST_IO_25 (MTK_PIN_NO(111) | 5)
-#define MT8135_PIN_111_EINT7__FUNC_TESTB_OUT28 (MTK_PIN_NO(111) | 6)
-#define MT8135_PIN_111_EINT7__FUNC_A_FUNC_DIN_26 (MTK_PIN_NO(111) | 7)
-
-#define MT8135_PIN_112_EINT8__FUNC_GPIO112 (MTK_PIN_NO(112) | 0)
-#define MT8135_PIN_112_EINT8__FUNC_EINT8 (MTK_PIN_NO(112) | 1)
-#define MT8135_PIN_112_EINT8__FUNC_DISP_PWM (MTK_PIN_NO(112) | 2)
-#define MT8135_PIN_112_EINT8__FUNC_CLKM6 (MTK_PIN_NO(112) | 3)
-#define MT8135_PIN_112_EINT8__FUNC_GPU_JTDI (MTK_PIN_NO(112) | 4)
-#define MT8135_PIN_112_EINT8__FUNC_USB_TEST_IO_26 (MTK_PIN_NO(112) | 5)
-#define MT8135_PIN_112_EINT8__FUNC_TESTB_OUT29 (MTK_PIN_NO(112) | 6)
-#define MT8135_PIN_112_EINT8__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(112) | 7)
-
-#define MT8135_PIN_113_EINT9__FUNC_GPIO113 (MTK_PIN_NO(113) | 0)
-#define MT8135_PIN_113_EINT9__FUNC_EINT9 (MTK_PIN_NO(113) | 1)
-#define MT8135_PIN_113_EINT9__FUNC_GPU_JTCK (MTK_PIN_NO(113) | 4)
-#define MT8135_PIN_113_EINT9__FUNC_USB_DRVVBUS (MTK_PIN_NO(113) | 5)
-#define MT8135_PIN_113_EINT9__FUNC_TESTB_OUT30 (MTK_PIN_NO(113) | 6)
-#define MT8135_PIN_113_EINT9__FUNC_A_FUNC_DIN_27 (MTK_PIN_NO(113) | 7)
-
-#define MT8135_PIN_114_LPCE1B__FUNC_GPIO114 (MTK_PIN_NO(114) | 0)
-#define MT8135_PIN_114_LPCE1B__FUNC_LPCE1B (MTK_PIN_NO(114) | 1)
-#define MT8135_PIN_114_LPCE1B__FUNC_EINT127 (MTK_PIN_NO(114) | 2)
-#define MT8135_PIN_114_LPCE1B__FUNC_PWM2 (MTK_PIN_NO(114) | 5)
-#define MT8135_PIN_114_LPCE1B__FUNC_TESTB_OUT14 (MTK_PIN_NO(114) | 6)
-#define MT8135_PIN_114_LPCE1B__FUNC_A_FUNC_DIN_28 (MTK_PIN_NO(114) | 7)
-
-#define MT8135_PIN_115_LPCE0B__FUNC_GPIO115 (MTK_PIN_NO(115) | 0)
-#define MT8135_PIN_115_LPCE0B__FUNC_LPCE0B (MTK_PIN_NO(115) | 1)
-#define MT8135_PIN_115_LPCE0B__FUNC_EINT126 (MTK_PIN_NO(115) | 2)
-#define MT8135_PIN_115_LPCE0B__FUNC_PWM1 (MTK_PIN_NO(115) | 5)
-#define MT8135_PIN_115_LPCE0B__FUNC_TESTB_OUT15 (MTK_PIN_NO(115) | 6)
-#define MT8135_PIN_115_LPCE0B__FUNC_A_FUNC_DIN_29 (MTK_PIN_NO(115) | 7)
-
-#define MT8135_PIN_116_DISP_PWM__FUNC_GPIO116 (MTK_PIN_NO(116) | 0)
-#define MT8135_PIN_116_DISP_PWM__FUNC_DISP_PWM (MTK_PIN_NO(116) | 1)
-#define MT8135_PIN_116_DISP_PWM__FUNC_EINT77 (MTK_PIN_NO(116) | 2)
-#define MT8135_PIN_116_DISP_PWM__FUNC_LSDI (MTK_PIN_NO(116) | 3)
-#define MT8135_PIN_116_DISP_PWM__FUNC_PWM1 (MTK_PIN_NO(116) | 4)
-#define MT8135_PIN_116_DISP_PWM__FUNC_PWM2 (MTK_PIN_NO(116) | 5)
-#define MT8135_PIN_116_DISP_PWM__FUNC_PWM3 (MTK_PIN_NO(116) | 7)
-
-#define MT8135_PIN_117_EINT1__FUNC_GPIO117 (MTK_PIN_NO(117) | 0)
-#define MT8135_PIN_117_EINT1__FUNC_EINT1 (MTK_PIN_NO(117) | 1)
-#define MT8135_PIN_117_EINT1__FUNC_PWM2 (MTK_PIN_NO(117) | 2)
-#define MT8135_PIN_117_EINT1__FUNC_CLKM1 (MTK_PIN_NO(117) | 3)
-#define MT8135_PIN_117_EINT1__FUNC_USB_TEST_IO_13 (MTK_PIN_NO(117) | 5)
-#define MT8135_PIN_117_EINT1__FUNC_USB_SDA (MTK_PIN_NO(117) | 7)
-
-#define MT8135_PIN_118_EINT2__FUNC_GPIO118 (MTK_PIN_NO(118) | 0)
-#define MT8135_PIN_118_EINT2__FUNC_EINT2 (MTK_PIN_NO(118) | 1)
-#define MT8135_PIN_118_EINT2__FUNC_PWM3 (MTK_PIN_NO(118) | 2)
-#define MT8135_PIN_118_EINT2__FUNC_CLKM2 (MTK_PIN_NO(118) | 3)
-#define MT8135_PIN_118_EINT2__FUNC_USB_TEST_IO_14 (MTK_PIN_NO(118) | 5)
-#define MT8135_PIN_118_EINT2__FUNC_SRCLKENAI2 (MTK_PIN_NO(118) | 6)
-#define MT8135_PIN_118_EINT2__FUNC_A_FUNC_DIN_30 (MTK_PIN_NO(118) | 7)
-
-#define MT8135_PIN_119_EINT3__FUNC_GPIO119 (MTK_PIN_NO(119) | 0)
-#define MT8135_PIN_119_EINT3__FUNC_EINT3 (MTK_PIN_NO(119) | 1)
-#define MT8135_PIN_119_EINT3__FUNC_USB_TEST_IO_15 (MTK_PIN_NO(119) | 5)
-#define MT8135_PIN_119_EINT3__FUNC_SRCLKENAI1 (MTK_PIN_NO(119) | 6)
-#define MT8135_PIN_119_EINT3__FUNC_EXT_26M_CK (MTK_PIN_NO(119) | 7)
-
-#define MT8135_PIN_120_EINT4__FUNC_GPIO120 (MTK_PIN_NO(120) | 0)
-#define MT8135_PIN_120_EINT4__FUNC_EINT4 (MTK_PIN_NO(120) | 1)
-#define MT8135_PIN_120_EINT4__FUNC_PWM4 (MTK_PIN_NO(120) | 2)
-#define MT8135_PIN_120_EINT4__FUNC_USB_DRVVBUS (MTK_PIN_NO(120) | 5)
-#define MT8135_PIN_120_EINT4__FUNC_A_FUNC_DIN_31 (MTK_PIN_NO(120) | 7)
-
-#define MT8135_PIN_121_DPIDE__FUNC_GPIO121 (MTK_PIN_NO(121) | 0)
-#define MT8135_PIN_121_DPIDE__FUNC_DPI0_DE (MTK_PIN_NO(121) | 1)
-#define MT8135_PIN_121_DPIDE__FUNC_EINT100 (MTK_PIN_NO(121) | 2)
-#define MT8135_PIN_121_DPIDE__FUNC_I2SOUT_DAT (MTK_PIN_NO(121) | 3)
-#define MT8135_PIN_121_DPIDE__FUNC_DAC_DAT_OUT (MTK_PIN_NO(121) | 4)
-#define MT8135_PIN_121_DPIDE__FUNC_PCM1_DO (MTK_PIN_NO(121) | 5)
-#define MT8135_PIN_121_DPIDE__FUNC_IRDA_TXD (MTK_PIN_NO(121) | 6)
-
-#define MT8135_PIN_122_DPICK__FUNC_GPIO122 (MTK_PIN_NO(122) | 0)
-#define MT8135_PIN_122_DPICK__FUNC_DPI0_CK (MTK_PIN_NO(122) | 1)
-#define MT8135_PIN_122_DPICK__FUNC_EINT101 (MTK_PIN_NO(122) | 2)
-#define MT8135_PIN_122_DPICK__FUNC_I2SIN_DAT (MTK_PIN_NO(122) | 3)
-#define MT8135_PIN_122_DPICK__FUNC_PCM1_DI (MTK_PIN_NO(122) | 5)
-#define MT8135_PIN_122_DPICK__FUNC_IRDA_PDN (MTK_PIN_NO(122) | 6)
-
-#define MT8135_PIN_123_DPIG4__FUNC_GPIO123 (MTK_PIN_NO(123) | 0)
-#define MT8135_PIN_123_DPIG4__FUNC_DPI0_G4 (MTK_PIN_NO(123) | 1)
-#define MT8135_PIN_123_DPIG4__FUNC_EINT114 (MTK_PIN_NO(123) | 2)
-#define MT8135_PIN_123_DPIG4__FUNC_CM2DAT_2X_0 (MTK_PIN_NO(123) | 4)
-#define MT8135_PIN_123_DPIG4__FUNC_DSP2_ID (MTK_PIN_NO(123) | 5)
-
-#define MT8135_PIN_124_DPIG5__FUNC_GPIO124 (MTK_PIN_NO(124) | 0)
-#define MT8135_PIN_124_DPIG5__FUNC_DPI0_G5 (MTK_PIN_NO(124) | 1)
-#define MT8135_PIN_124_DPIG5__FUNC_EINT115 (MTK_PIN_NO(124) | 2)
-#define MT8135_PIN_124_DPIG5__FUNC_CM2DAT_2X_1 (MTK_PIN_NO(124) | 4)
-#define MT8135_PIN_124_DPIG5__FUNC_DSP2_ICK (MTK_PIN_NO(124) | 5)
-
-#define MT8135_PIN_125_DPIR3__FUNC_GPIO125 (MTK_PIN_NO(125) | 0)
-#define MT8135_PIN_125_DPIR3__FUNC_DPI0_R3 (MTK_PIN_NO(125) | 1)
-#define MT8135_PIN_125_DPIR3__FUNC_EINT121 (MTK_PIN_NO(125) | 2)
-#define MT8135_PIN_125_DPIR3__FUNC_CM2DAT_2X_7 (MTK_PIN_NO(125) | 4)
-
-#define MT8135_PIN_126_DPIG1__FUNC_GPIO126 (MTK_PIN_NO(126) | 0)
-#define MT8135_PIN_126_DPIG1__FUNC_DPI0_G1 (MTK_PIN_NO(126) | 1)
-#define MT8135_PIN_126_DPIG1__FUNC_EINT111 (MTK_PIN_NO(126) | 2)
-#define MT8135_PIN_126_DPIG1__FUNC_DSP1_ICK (MTK_PIN_NO(126) | 5)
-
-#define MT8135_PIN_127_DPIVSYNC__FUNC_GPIO127 (MTK_PIN_NO(127) | 0)
-#define MT8135_PIN_127_DPIVSYNC__FUNC_DPI0_VSYNC (MTK_PIN_NO(127) | 1)
-#define MT8135_PIN_127_DPIVSYNC__FUNC_EINT98 (MTK_PIN_NO(127) | 2)
-#define MT8135_PIN_127_DPIVSYNC__FUNC_I2SIN_CK (MTK_PIN_NO(127) | 3)
-#define MT8135_PIN_127_DPIVSYNC__FUNC_DAC_CK (MTK_PIN_NO(127) | 4)
-#define MT8135_PIN_127_DPIVSYNC__FUNC_PCM1_CK (MTK_PIN_NO(127) | 5)
-
-#define MT8135_PIN_128_DPIHSYNC__FUNC_GPIO128 (MTK_PIN_NO(128) | 0)
-#define MT8135_PIN_128_DPIHSYNC__FUNC_DPI0_HSYNC (MTK_PIN_NO(128) | 1)
-#define MT8135_PIN_128_DPIHSYNC__FUNC_EINT99 (MTK_PIN_NO(128) | 2)
-#define MT8135_PIN_128_DPIHSYNC__FUNC_I2SIN_WS (MTK_PIN_NO(128) | 3)
-#define MT8135_PIN_128_DPIHSYNC__FUNC_DAC_WS (MTK_PIN_NO(128) | 4)
-#define MT8135_PIN_128_DPIHSYNC__FUNC_PCM1_WS (MTK_PIN_NO(128) | 5)
-#define MT8135_PIN_128_DPIHSYNC__FUNC_IRDA_RXD (MTK_PIN_NO(128) | 6)
-
-#define MT8135_PIN_129_DPIB0__FUNC_GPIO129 (MTK_PIN_NO(129) | 0)
-#define MT8135_PIN_129_DPIB0__FUNC_DPI0_B0 (MTK_PIN_NO(129) | 1)
-#define MT8135_PIN_129_DPIB0__FUNC_EINT102 (MTK_PIN_NO(129) | 2)
-#define MT8135_PIN_129_DPIB0__FUNC_SCL0 (MTK_PIN_NO(129) | 4)
-#define MT8135_PIN_129_DPIB0__FUNC_DISP_PWM (MTK_PIN_NO(129) | 5)
-
-#define MT8135_PIN_130_DPIB1__FUNC_GPIO130 (MTK_PIN_NO(130) | 0)
-#define MT8135_PIN_130_DPIB1__FUNC_DPI0_B1 (MTK_PIN_NO(130) | 1)
-#define MT8135_PIN_130_DPIB1__FUNC_EINT103 (MTK_PIN_NO(130) | 2)
-#define MT8135_PIN_130_DPIB1__FUNC_CLKM0 (MTK_PIN_NO(130) | 3)
-#define MT8135_PIN_130_DPIB1__FUNC_SDA0 (MTK_PIN_NO(130) | 4)
-#define MT8135_PIN_130_DPIB1__FUNC_PWM1 (MTK_PIN_NO(130) | 5)
-
-#define MT8135_PIN_131_DPIB2__FUNC_GPIO131 (MTK_PIN_NO(131) | 0)
-#define MT8135_PIN_131_DPIB2__FUNC_DPI0_B2 (MTK_PIN_NO(131) | 1)
-#define MT8135_PIN_131_DPIB2__FUNC_EINT104 (MTK_PIN_NO(131) | 2)
-#define MT8135_PIN_131_DPIB2__FUNC_CLKM1 (MTK_PIN_NO(131) | 3)
-#define MT8135_PIN_131_DPIB2__FUNC_SCL1 (MTK_PIN_NO(131) | 4)
-#define MT8135_PIN_131_DPIB2__FUNC_PWM2 (MTK_PIN_NO(131) | 5)
-
-#define MT8135_PIN_132_DPIB3__FUNC_GPIO132 (MTK_PIN_NO(132) | 0)
-#define MT8135_PIN_132_DPIB3__FUNC_DPI0_B3 (MTK_PIN_NO(132) | 1)
-#define MT8135_PIN_132_DPIB3__FUNC_EINT105 (MTK_PIN_NO(132) | 2)
-#define MT8135_PIN_132_DPIB3__FUNC_CLKM2 (MTK_PIN_NO(132) | 3)
-#define MT8135_PIN_132_DPIB3__FUNC_SDA1 (MTK_PIN_NO(132) | 4)
-#define MT8135_PIN_132_DPIB3__FUNC_PWM3 (MTK_PIN_NO(132) | 5)
-
-#define MT8135_PIN_133_DPIB4__FUNC_GPIO133 (MTK_PIN_NO(133) | 0)
-#define MT8135_PIN_133_DPIB4__FUNC_DPI0_B4 (MTK_PIN_NO(133) | 1)
-#define MT8135_PIN_133_DPIB4__FUNC_EINT106 (MTK_PIN_NO(133) | 2)
-#define MT8135_PIN_133_DPIB4__FUNC_CLKM3 (MTK_PIN_NO(133) | 3)
-#define MT8135_PIN_133_DPIB4__FUNC_SCL2 (MTK_PIN_NO(133) | 4)
-#define MT8135_PIN_133_DPIB4__FUNC_PWM4 (MTK_PIN_NO(133) | 5)
-
-#define MT8135_PIN_134_DPIB5__FUNC_GPIO134 (MTK_PIN_NO(134) | 0)
-#define MT8135_PIN_134_DPIB5__FUNC_DPI0_B5 (MTK_PIN_NO(134) | 1)
-#define MT8135_PIN_134_DPIB5__FUNC_EINT107 (MTK_PIN_NO(134) | 2)
-#define MT8135_PIN_134_DPIB5__FUNC_CLKM4 (MTK_PIN_NO(134) | 3)
-#define MT8135_PIN_134_DPIB5__FUNC_SDA2 (MTK_PIN_NO(134) | 4)
-#define MT8135_PIN_134_DPIB5__FUNC_PWM5 (MTK_PIN_NO(134) | 5)
-
-#define MT8135_PIN_135_DPIB6__FUNC_GPIO135 (MTK_PIN_NO(135) | 0)
-#define MT8135_PIN_135_DPIB6__FUNC_DPI0_B6 (MTK_PIN_NO(135) | 1)
-#define MT8135_PIN_135_DPIB6__FUNC_EINT108 (MTK_PIN_NO(135) | 2)
-#define MT8135_PIN_135_DPIB6__FUNC_CLKM5 (MTK_PIN_NO(135) | 3)
-#define MT8135_PIN_135_DPIB6__FUNC_SCL3 (MTK_PIN_NO(135) | 4)
-#define MT8135_PIN_135_DPIB6__FUNC_PWM6 (MTK_PIN_NO(135) | 5)
-
-#define MT8135_PIN_136_DPIB7__FUNC_GPIO136 (MTK_PIN_NO(136) | 0)
-#define MT8135_PIN_136_DPIB7__FUNC_DPI0_B7 (MTK_PIN_NO(136) | 1)
-#define MT8135_PIN_136_DPIB7__FUNC_EINT109 (MTK_PIN_NO(136) | 2)
-#define MT8135_PIN_136_DPIB7__FUNC_CLKM6 (MTK_PIN_NO(136) | 3)
-#define MT8135_PIN_136_DPIB7__FUNC_SDA3 (MTK_PIN_NO(136) | 4)
-#define MT8135_PIN_136_DPIB7__FUNC_PWM7 (MTK_PIN_NO(136) | 5)
-
-#define MT8135_PIN_137_DPIG0__FUNC_GPIO137 (MTK_PIN_NO(137) | 0)
-#define MT8135_PIN_137_DPIG0__FUNC_DPI0_G0 (MTK_PIN_NO(137) | 1)
-#define MT8135_PIN_137_DPIG0__FUNC_EINT110 (MTK_PIN_NO(137) | 2)
-#define MT8135_PIN_137_DPIG0__FUNC_DSP1_ID (MTK_PIN_NO(137) | 5)
-
-#define MT8135_PIN_138_DPIG2__FUNC_GPIO138 (MTK_PIN_NO(138) | 0)
-#define MT8135_PIN_138_DPIG2__FUNC_DPI0_G2 (MTK_PIN_NO(138) | 1)
-#define MT8135_PIN_138_DPIG2__FUNC_EINT112 (MTK_PIN_NO(138) | 2)
-#define MT8135_PIN_138_DPIG2__FUNC_DSP1_IMS (MTK_PIN_NO(138) | 5)
-
-#define MT8135_PIN_139_DPIG3__FUNC_GPIO139 (MTK_PIN_NO(139) | 0)
-#define MT8135_PIN_139_DPIG3__FUNC_DPI0_G3 (MTK_PIN_NO(139) | 1)
-#define MT8135_PIN_139_DPIG3__FUNC_EINT113 (MTK_PIN_NO(139) | 2)
-#define MT8135_PIN_139_DPIG3__FUNC_DSP2_IMS (MTK_PIN_NO(139) | 5)
-
-#define MT8135_PIN_140_DPIG6__FUNC_GPIO140 (MTK_PIN_NO(140) | 0)
-#define MT8135_PIN_140_DPIG6__FUNC_DPI0_G6 (MTK_PIN_NO(140) | 1)
-#define MT8135_PIN_140_DPIG6__FUNC_EINT116 (MTK_PIN_NO(140) | 2)
-#define MT8135_PIN_140_DPIG6__FUNC_CM2DAT_2X_2 (MTK_PIN_NO(140) | 4)
-
-#define MT8135_PIN_141_DPIG7__FUNC_GPIO141 (MTK_PIN_NO(141) | 0)
-#define MT8135_PIN_141_DPIG7__FUNC_DPI0_G7 (MTK_PIN_NO(141) | 1)
-#define MT8135_PIN_141_DPIG7__FUNC_EINT117 (MTK_PIN_NO(141) | 2)
-#define MT8135_PIN_141_DPIG7__FUNC_CM2DAT_2X_3 (MTK_PIN_NO(141) | 4)
-
-#define MT8135_PIN_142_DPIR0__FUNC_GPIO142 (MTK_PIN_NO(142) | 0)
-#define MT8135_PIN_142_DPIR0__FUNC_DPI0_R0 (MTK_PIN_NO(142) | 1)
-#define MT8135_PIN_142_DPIR0__FUNC_EINT118 (MTK_PIN_NO(142) | 2)
-#define MT8135_PIN_142_DPIR0__FUNC_CM2DAT_2X_4 (MTK_PIN_NO(142) | 4)
-
-#define MT8135_PIN_143_DPIR1__FUNC_GPIO143 (MTK_PIN_NO(143) | 0)
-#define MT8135_PIN_143_DPIR1__FUNC_DPI0_R1 (MTK_PIN_NO(143) | 1)
-#define MT8135_PIN_143_DPIR1__FUNC_EINT119 (MTK_PIN_NO(143) | 2)
-#define MT8135_PIN_143_DPIR1__FUNC_CM2DAT_2X_5 (MTK_PIN_NO(143) | 4)
-
-#define MT8135_PIN_144_DPIR2__FUNC_GPIO144 (MTK_PIN_NO(144) | 0)
-#define MT8135_PIN_144_DPIR2__FUNC_DPI0_R2 (MTK_PIN_NO(144) | 1)
-#define MT8135_PIN_144_DPIR2__FUNC_EINT120 (MTK_PIN_NO(144) | 2)
-#define MT8135_PIN_144_DPIR2__FUNC_CM2DAT_2X_6 (MTK_PIN_NO(144) | 4)
-
-#define MT8135_PIN_145_DPIR4__FUNC_GPIO145 (MTK_PIN_NO(145) | 0)
-#define MT8135_PIN_145_DPIR4__FUNC_DPI0_R4 (MTK_PIN_NO(145) | 1)
-#define MT8135_PIN_145_DPIR4__FUNC_EINT122 (MTK_PIN_NO(145) | 2)
-#define MT8135_PIN_145_DPIR4__FUNC_CM2DAT_2X_8 (MTK_PIN_NO(145) | 4)
-
-#define MT8135_PIN_146_DPIR5__FUNC_GPIO146 (MTK_PIN_NO(146) | 0)
-#define MT8135_PIN_146_DPIR5__FUNC_DPI0_R5 (MTK_PIN_NO(146) | 1)
-#define MT8135_PIN_146_DPIR5__FUNC_EINT123 (MTK_PIN_NO(146) | 2)
-#define MT8135_PIN_146_DPIR5__FUNC_CM2DAT_2X_9 (MTK_PIN_NO(146) | 4)
-
-#define MT8135_PIN_147_DPIR6__FUNC_GPIO147 (MTK_PIN_NO(147) | 0)
-#define MT8135_PIN_147_DPIR6__FUNC_DPI0_R6 (MTK_PIN_NO(147) | 1)
-#define MT8135_PIN_147_DPIR6__FUNC_EINT124 (MTK_PIN_NO(147) | 2)
-#define MT8135_PIN_147_DPIR6__FUNC_CM2VSYNC_2X (MTK_PIN_NO(147) | 4)
-
-#define MT8135_PIN_148_DPIR7__FUNC_GPIO148 (MTK_PIN_NO(148) | 0)
-#define MT8135_PIN_148_DPIR7__FUNC_DPI0_R7 (MTK_PIN_NO(148) | 1)
-#define MT8135_PIN_148_DPIR7__FUNC_EINT125 (MTK_PIN_NO(148) | 2)
-#define MT8135_PIN_148_DPIR7__FUNC_CM2HSYNC_2X (MTK_PIN_NO(148) | 4)
-
-#define MT8135_PIN_149_TDN3__FUNC_GPIO149 (MTK_PIN_NO(149) | 0)
-#define MT8135_PIN_149_TDN3__FUNC_EINT36 (MTK_PIN_NO(149) | 2)
-
-#define MT8135_PIN_150_TDP3__FUNC_GPIO150 (MTK_PIN_NO(150) | 0)
-#define MT8135_PIN_150_TDP3__FUNC_EINT35 (MTK_PIN_NO(150) | 2)
-
-#define MT8135_PIN_151_TDN2__FUNC_GPIO151 (MTK_PIN_NO(151) | 0)
-#define MT8135_PIN_151_TDN2__FUNC_EINT169 (MTK_PIN_NO(151) | 2)
-
-#define MT8135_PIN_152_TDP2__FUNC_GPIO152 (MTK_PIN_NO(152) | 0)
-#define MT8135_PIN_152_TDP2__FUNC_EINT168 (MTK_PIN_NO(152) | 2)
-
-#define MT8135_PIN_153_TCN__FUNC_GPIO153 (MTK_PIN_NO(153) | 0)
-#define MT8135_PIN_153_TCN__FUNC_EINT163 (MTK_PIN_NO(153) | 2)
-
-#define MT8135_PIN_154_TCP__FUNC_GPIO154 (MTK_PIN_NO(154) | 0)
-#define MT8135_PIN_154_TCP__FUNC_EINT162 (MTK_PIN_NO(154) | 2)
-
-#define MT8135_PIN_155_TDN1__FUNC_GPIO155 (MTK_PIN_NO(155) | 0)
-#define MT8135_PIN_155_TDN1__FUNC_EINT167 (MTK_PIN_NO(155) | 2)
-
-#define MT8135_PIN_156_TDP1__FUNC_GPIO156 (MTK_PIN_NO(156) | 0)
-#define MT8135_PIN_156_TDP1__FUNC_EINT166 (MTK_PIN_NO(156) | 2)
-
-#define MT8135_PIN_157_TDN0__FUNC_GPIO157 (MTK_PIN_NO(157) | 0)
-#define MT8135_PIN_157_TDN0__FUNC_EINT165 (MTK_PIN_NO(157) | 2)
-
-#define MT8135_PIN_158_TDP0__FUNC_GPIO158 (MTK_PIN_NO(158) | 0)
-#define MT8135_PIN_158_TDP0__FUNC_EINT164 (MTK_PIN_NO(158) | 2)
-
-#define MT8135_PIN_159_RDN3__FUNC_GPIO159 (MTK_PIN_NO(159) | 0)
-#define MT8135_PIN_159_RDN3__FUNC_EINT18 (MTK_PIN_NO(159) | 2)
-
-#define MT8135_PIN_160_RDP3__FUNC_GPIO160 (MTK_PIN_NO(160) | 0)
-#define MT8135_PIN_160_RDP3__FUNC_EINT30 (MTK_PIN_NO(160) | 2)
-
-#define MT8135_PIN_161_RDN2__FUNC_GPIO161 (MTK_PIN_NO(161) | 0)
-#define MT8135_PIN_161_RDN2__FUNC_EINT31 (MTK_PIN_NO(161) | 2)
-
-#define MT8135_PIN_162_RDP2__FUNC_GPIO162 (MTK_PIN_NO(162) | 0)
-#define MT8135_PIN_162_RDP2__FUNC_EINT32 (MTK_PIN_NO(162) | 2)
-
-#define MT8135_PIN_163_RCN__FUNC_GPIO163 (MTK_PIN_NO(163) | 0)
-#define MT8135_PIN_163_RCN__FUNC_EINT33 (MTK_PIN_NO(163) | 2)
-
-#define MT8135_PIN_164_RCP__FUNC_GPIO164 (MTK_PIN_NO(164) | 0)
-#define MT8135_PIN_164_RCP__FUNC_EINT39 (MTK_PIN_NO(164) | 2)
-
-#define MT8135_PIN_165_RDN1__FUNC_GPIO165 (MTK_PIN_NO(165) | 0)
-
-#define MT8135_PIN_166_RDP1__FUNC_GPIO166 (MTK_PIN_NO(166) | 0)
-
-#define MT8135_PIN_167_RDN0__FUNC_GPIO167 (MTK_PIN_NO(167) | 0)
-
-#define MT8135_PIN_168_RDP0__FUNC_GPIO168 (MTK_PIN_NO(168) | 0)
-
-#define MT8135_PIN_169_RDN1_A__FUNC_GPIO169 (MTK_PIN_NO(169) | 0)
-#define MT8135_PIN_169_RDN1_A__FUNC_CMDAT6 (MTK_PIN_NO(169) | 1)
-#define MT8135_PIN_169_RDN1_A__FUNC_EINT175 (MTK_PIN_NO(169) | 2)
-
-#define MT8135_PIN_170_RDP1_A__FUNC_GPIO170 (MTK_PIN_NO(170) | 0)
-#define MT8135_PIN_170_RDP1_A__FUNC_CMDAT7 (MTK_PIN_NO(170) | 1)
-#define MT8135_PIN_170_RDP1_A__FUNC_EINT174 (MTK_PIN_NO(170) | 2)
-
-#define MT8135_PIN_171_RCN_A__FUNC_GPIO171 (MTK_PIN_NO(171) | 0)
-#define MT8135_PIN_171_RCN_A__FUNC_CMDAT8 (MTK_PIN_NO(171) | 1)
-#define MT8135_PIN_171_RCN_A__FUNC_EINT171 (MTK_PIN_NO(171) | 2)
-
-#define MT8135_PIN_172_RCP_A__FUNC_GPIO172 (MTK_PIN_NO(172) | 0)
-#define MT8135_PIN_172_RCP_A__FUNC_CMDAT9 (MTK_PIN_NO(172) | 1)
-#define MT8135_PIN_172_RCP_A__FUNC_EINT170 (MTK_PIN_NO(172) | 2)
-
-#define MT8135_PIN_173_RDN0_A__FUNC_GPIO173 (MTK_PIN_NO(173) | 0)
-#define MT8135_PIN_173_RDN0_A__FUNC_CMHSYNC (MTK_PIN_NO(173) | 1)
-#define MT8135_PIN_173_RDN0_A__FUNC_EINT173 (MTK_PIN_NO(173) | 2)
-
-#define MT8135_PIN_174_RDP0_A__FUNC_GPIO174 (MTK_PIN_NO(174) | 0)
-#define MT8135_PIN_174_RDP0_A__FUNC_CMVSYNC (MTK_PIN_NO(174) | 1)
-#define MT8135_PIN_174_RDP0_A__FUNC_EINT172 (MTK_PIN_NO(174) | 2)
-
-#define MT8135_PIN_175_RDN1_B__FUNC_GPIO175 (MTK_PIN_NO(175) | 0)
-#define MT8135_PIN_175_RDN1_B__FUNC_CMDAT2 (MTK_PIN_NO(175) | 1)
-#define MT8135_PIN_175_RDN1_B__FUNC_EINT181 (MTK_PIN_NO(175) | 2)
-#define MT8135_PIN_175_RDN1_B__FUNC_CMCSD2 (MTK_PIN_NO(175) | 3)
-
-#define MT8135_PIN_176_RDP1_B__FUNC_GPIO176 (MTK_PIN_NO(176) | 0)
-#define MT8135_PIN_176_RDP1_B__FUNC_CMDAT3 (MTK_PIN_NO(176) | 1)
-#define MT8135_PIN_176_RDP1_B__FUNC_EINT180 (MTK_PIN_NO(176) | 2)
-#define MT8135_PIN_176_RDP1_B__FUNC_CMCSD3 (MTK_PIN_NO(176) | 3)
-
-#define MT8135_PIN_177_RCN_B__FUNC_GPIO177 (MTK_PIN_NO(177) | 0)
-#define MT8135_PIN_177_RCN_B__FUNC_CMDAT4 (MTK_PIN_NO(177) | 1)
-#define MT8135_PIN_177_RCN_B__FUNC_EINT177 (MTK_PIN_NO(177) | 2)
-
-#define MT8135_PIN_178_RCP_B__FUNC_GPIO178 (MTK_PIN_NO(178) | 0)
-#define MT8135_PIN_178_RCP_B__FUNC_CMDAT5 (MTK_PIN_NO(178) | 1)
-#define MT8135_PIN_178_RCP_B__FUNC_EINT176 (MTK_PIN_NO(178) | 2)
-
-#define MT8135_PIN_179_RDN0_B__FUNC_GPIO179 (MTK_PIN_NO(179) | 0)
-#define MT8135_PIN_179_RDN0_B__FUNC_CMDAT0 (MTK_PIN_NO(179) | 1)
-#define MT8135_PIN_179_RDN0_B__FUNC_EINT179 (MTK_PIN_NO(179) | 2)
-#define MT8135_PIN_179_RDN0_B__FUNC_CMCSD0 (MTK_PIN_NO(179) | 3)
-
-#define MT8135_PIN_180_RDP0_B__FUNC_GPIO180 (MTK_PIN_NO(180) | 0)
-#define MT8135_PIN_180_RDP0_B__FUNC_CMDAT1 (MTK_PIN_NO(180) | 1)
-#define MT8135_PIN_180_RDP0_B__FUNC_EINT178 (MTK_PIN_NO(180) | 2)
-#define MT8135_PIN_180_RDP0_B__FUNC_CMCSD1 (MTK_PIN_NO(180) | 3)
-
-#define MT8135_PIN_181_CMPCLK__FUNC_GPIO181 (MTK_PIN_NO(181) | 0)
-#define MT8135_PIN_181_CMPCLK__FUNC_CMPCLK (MTK_PIN_NO(181) | 1)
-#define MT8135_PIN_181_CMPCLK__FUNC_EINT182 (MTK_PIN_NO(181) | 2)
-#define MT8135_PIN_181_CMPCLK__FUNC_CMCSK (MTK_PIN_NO(181) | 3)
-#define MT8135_PIN_181_CMPCLK__FUNC_CM2MCLK_4X (MTK_PIN_NO(181) | 4)
-#define MT8135_PIN_181_CMPCLK__FUNC_TS_AUXADC_SEL_3 (MTK_PIN_NO(181) | 5)
-#define MT8135_PIN_181_CMPCLK__FUNC_VENC_TEST_CK (MTK_PIN_NO(181) | 6)
-#define MT8135_PIN_181_CMPCLK__FUNC_TESTA_OUT27 (MTK_PIN_NO(181) | 7)
-
-#define MT8135_PIN_182_CMMCLK__FUNC_GPIO182 (MTK_PIN_NO(182) | 0)
-#define MT8135_PIN_182_CMMCLK__FUNC_CMMCLK (MTK_PIN_NO(182) | 1)
-#define MT8135_PIN_182_CMMCLK__FUNC_EINT183 (MTK_PIN_NO(182) | 2)
-#define MT8135_PIN_182_CMMCLK__FUNC_TS_AUXADC_SEL_2 (MTK_PIN_NO(182) | 5)
-#define MT8135_PIN_182_CMMCLK__FUNC_TESTA_OUT28 (MTK_PIN_NO(182) | 7)
-
-#define MT8135_PIN_183_CMRST__FUNC_GPIO183 (MTK_PIN_NO(183) | 0)
-#define MT8135_PIN_183_CMRST__FUNC_CMRST (MTK_PIN_NO(183) | 1)
-#define MT8135_PIN_183_CMRST__FUNC_EINT185 (MTK_PIN_NO(183) | 2)
-#define MT8135_PIN_183_CMRST__FUNC_TS_AUXADC_SEL_1 (MTK_PIN_NO(183) | 5)
-#define MT8135_PIN_183_CMRST__FUNC_TESTA_OUT30 (MTK_PIN_NO(183) | 7)
-
-#define MT8135_PIN_184_CMPDN__FUNC_GPIO184 (MTK_PIN_NO(184) | 0)
-#define MT8135_PIN_184_CMPDN__FUNC_CMPDN (MTK_PIN_NO(184) | 1)
-#define MT8135_PIN_184_CMPDN__FUNC_EINT184 (MTK_PIN_NO(184) | 2)
-#define MT8135_PIN_184_CMPDN__FUNC_TS_AUXADC_SEL_0 (MTK_PIN_NO(184) | 5)
-#define MT8135_PIN_184_CMPDN__FUNC_TESTA_OUT29 (MTK_PIN_NO(184) | 7)
-
-#define MT8135_PIN_185_CMFLASH__FUNC_GPIO185 (MTK_PIN_NO(185) | 0)
-#define MT8135_PIN_185_CMFLASH__FUNC_CMFLASH (MTK_PIN_NO(185) | 1)
-#define MT8135_PIN_185_CMFLASH__FUNC_EINT186 (MTK_PIN_NO(185) | 2)
-#define MT8135_PIN_185_CMFLASH__FUNC_CM2MCLK_3X (MTK_PIN_NO(185) | 3)
-#define MT8135_PIN_185_CMFLASH__FUNC_MFG_TEST_CK_1 (MTK_PIN_NO(185) | 6)
-#define MT8135_PIN_185_CMFLASH__FUNC_TESTA_OUT31 (MTK_PIN_NO(185) | 7)
-
-#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_GPIO186 (MTK_PIN_NO(186) | 0)
-#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_MRG_I2S_P_CLK (MTK_PIN_NO(186) | 1)
-#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_EINT14 (MTK_PIN_NO(186) | 2)
-#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_I2SIN_CK (MTK_PIN_NO(186) | 3)
-#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_PCM0_CK (MTK_PIN_NO(186) | 4)
-#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_DSP2_ICK (MTK_PIN_NO(186) | 5)
-#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_IMG_TEST_CK (MTK_PIN_NO(186) | 6)
-#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_USB_SCL (MTK_PIN_NO(186) | 7)
-
-#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_GPIO187 (MTK_PIN_NO(187) | 0)
-#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_MRG_I2S_SYNC (MTK_PIN_NO(187) | 1)
-#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_EINT16 (MTK_PIN_NO(187) | 2)
-#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_I2SIN_WS (MTK_PIN_NO(187) | 3)
-#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_PCM0_WS (MTK_PIN_NO(187) | 4)
-#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_DISP_TEST_CK (MTK_PIN_NO(187) | 6)
-
-#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_GPIO188 (MTK_PIN_NO(188) | 0)
-#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_MRG_I2S_PCM_RX (MTK_PIN_NO(188) | 1)
-#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_EINT15 (MTK_PIN_NO(188) | 2)
-#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_I2SIN_DAT (MTK_PIN_NO(188) | 3)
-#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_PCM0_DI (MTK_PIN_NO(188) | 4)
-#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_DSP2_ID (MTK_PIN_NO(188) | 5)
-#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_MFG_TEST_CK (MTK_PIN_NO(188) | 6)
-#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_USB_SDA (MTK_PIN_NO(188) | 7)
-
-#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_GPIO189 (MTK_PIN_NO(189) | 0)
-#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_MRG_I2S_PCM_TX (MTK_PIN_NO(189) | 1)
-#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_EINT17 (MTK_PIN_NO(189) | 2)
-#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_I2SOUT_DAT (MTK_PIN_NO(189) | 3)
-#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_PCM0_DO (MTK_PIN_NO(189) | 4)
-#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_VDEC_TEST_CK (MTK_PIN_NO(189) | 6)
-
-#define MT8135_PIN_190_SRCLKENAI__FUNC_GPIO190 (MTK_PIN_NO(190) | 0)
-#define MT8135_PIN_190_SRCLKENAI__FUNC_SRCLKENAI (MTK_PIN_NO(190) | 1)
-
-#define MT8135_PIN_191_URXD3__FUNC_GPIO191 (MTK_PIN_NO(191) | 0)
-#define MT8135_PIN_191_URXD3__FUNC_URXD3 (MTK_PIN_NO(191) | 1)
-#define MT8135_PIN_191_URXD3__FUNC_EINT87 (MTK_PIN_NO(191) | 2)
-#define MT8135_PIN_191_URXD3__FUNC_UTXD3 (MTK_PIN_NO(191) | 3)
-#define MT8135_PIN_191_URXD3__FUNC_TS_AUX_ST (MTK_PIN_NO(191) | 5)
-#define MT8135_PIN_191_URXD3__FUNC_PWM4 (MTK_PIN_NO(191) | 6)
-
-#define MT8135_PIN_192_UTXD3__FUNC_GPIO192 (MTK_PIN_NO(192) | 0)
-#define MT8135_PIN_192_UTXD3__FUNC_UTXD3 (MTK_PIN_NO(192) | 1)
-#define MT8135_PIN_192_UTXD3__FUNC_EINT86 (MTK_PIN_NO(192) | 2)
-#define MT8135_PIN_192_UTXD3__FUNC_URXD3 (MTK_PIN_NO(192) | 3)
-#define MT8135_PIN_192_UTXD3__FUNC_TS_AUX_CS_B (MTK_PIN_NO(192) | 5)
-#define MT8135_PIN_192_UTXD3__FUNC_PWM3 (MTK_PIN_NO(192) | 6)
-
-#define MT8135_PIN_193_SDA2__FUNC_GPIO193 (MTK_PIN_NO(193) | 0)
-#define MT8135_PIN_193_SDA2__FUNC_SDA2 (MTK_PIN_NO(193) | 1)
-#define MT8135_PIN_193_SDA2__FUNC_EINT95 (MTK_PIN_NO(193) | 2)
-#define MT8135_PIN_193_SDA2__FUNC_CLKM5 (MTK_PIN_NO(193) | 3)
-#define MT8135_PIN_193_SDA2__FUNC_PWM5 (MTK_PIN_NO(193) | 4)
-#define MT8135_PIN_193_SDA2__FUNC_TS_AUX_PWDB (MTK_PIN_NO(193) | 5)
-
-#define MT8135_PIN_194_SCL2__FUNC_GPIO194 (MTK_PIN_NO(194) | 0)
-#define MT8135_PIN_194_SCL2__FUNC_SCL2 (MTK_PIN_NO(194) | 1)
-#define MT8135_PIN_194_SCL2__FUNC_EINT94 (MTK_PIN_NO(194) | 2)
-#define MT8135_PIN_194_SCL2__FUNC_CLKM4 (MTK_PIN_NO(194) | 3)
-#define MT8135_PIN_194_SCL2__FUNC_PWM4 (MTK_PIN_NO(194) | 4)
-#define MT8135_PIN_194_SCL2__FUNC_TS_AUXADC_TEST_CK (MTK_PIN_NO(194) | 5)
-
-#define MT8135_PIN_195_SDA1__FUNC_GPIO195 (MTK_PIN_NO(195) | 0)
-#define MT8135_PIN_195_SDA1__FUNC_SDA1 (MTK_PIN_NO(195) | 1)
-#define MT8135_PIN_195_SDA1__FUNC_EINT93 (MTK_PIN_NO(195) | 2)
-#define MT8135_PIN_195_SDA1__FUNC_CLKM3 (MTK_PIN_NO(195) | 3)
-#define MT8135_PIN_195_SDA1__FUNC_PWM3 (MTK_PIN_NO(195) | 4)
-#define MT8135_PIN_195_SDA1__FUNC_TS_AUX_SCLK_PWDB (MTK_PIN_NO(195) | 5)
-
-#define MT8135_PIN_196_SCL1__FUNC_GPIO196 (MTK_PIN_NO(196) | 0)
-#define MT8135_PIN_196_SCL1__FUNC_SCL1 (MTK_PIN_NO(196) | 1)
-#define MT8135_PIN_196_SCL1__FUNC_EINT92 (MTK_PIN_NO(196) | 2)
-#define MT8135_PIN_196_SCL1__FUNC_CLKM2 (MTK_PIN_NO(196) | 3)
-#define MT8135_PIN_196_SCL1__FUNC_PWM2 (MTK_PIN_NO(196) | 4)
-#define MT8135_PIN_196_SCL1__FUNC_TS_AUX_DIN (MTK_PIN_NO(196) | 5)
-
-#define MT8135_PIN_197_MSDC3_DAT2__FUNC_GPIO197 (MTK_PIN_NO(197) | 0)
-#define MT8135_PIN_197_MSDC3_DAT2__FUNC_MSDC3_DAT2 (MTK_PIN_NO(197) | 1)
-#define MT8135_PIN_197_MSDC3_DAT2__FUNC_EINT71 (MTK_PIN_NO(197) | 2)
-#define MT8135_PIN_197_MSDC3_DAT2__FUNC_SCL6 (MTK_PIN_NO(197) | 3)
-#define MT8135_PIN_197_MSDC3_DAT2__FUNC_PWM5 (MTK_PIN_NO(197) | 4)
-#define MT8135_PIN_197_MSDC3_DAT2__FUNC_CLKM4 (MTK_PIN_NO(197) | 5)
-#define MT8135_PIN_197_MSDC3_DAT2__FUNC_MFG_TEST_CK_2 (MTK_PIN_NO(197) | 6)
-
-#define MT8135_PIN_198_MSDC3_DAT3__FUNC_GPIO198 (MTK_PIN_NO(198) | 0)
-#define MT8135_PIN_198_MSDC3_DAT3__FUNC_MSDC3_DAT3 (MTK_PIN_NO(198) | 1)
-#define MT8135_PIN_198_MSDC3_DAT3__FUNC_EINT72 (MTK_PIN_NO(198) | 2)
-#define MT8135_PIN_198_MSDC3_DAT3__FUNC_SDA6 (MTK_PIN_NO(198) | 3)
-#define MT8135_PIN_198_MSDC3_DAT3__FUNC_PWM6 (MTK_PIN_NO(198) | 4)
-#define MT8135_PIN_198_MSDC3_DAT3__FUNC_CLKM5 (MTK_PIN_NO(198) | 5)
-#define MT8135_PIN_198_MSDC3_DAT3__FUNC_MFG_TEST_CK_3 (MTK_PIN_NO(198) | 6)
-
-#define MT8135_PIN_199_MSDC3_CMD__FUNC_GPIO199 (MTK_PIN_NO(199) | 0)
-#define MT8135_PIN_199_MSDC3_CMD__FUNC_MSDC3_CMD (MTK_PIN_NO(199) | 1)
-#define MT8135_PIN_199_MSDC3_CMD__FUNC_EINT68 (MTK_PIN_NO(199) | 2)
-#define MT8135_PIN_199_MSDC3_CMD__FUNC_SDA2 (MTK_PIN_NO(199) | 3)
-#define MT8135_PIN_199_MSDC3_CMD__FUNC_PWM2 (MTK_PIN_NO(199) | 4)
-#define MT8135_PIN_199_MSDC3_CMD__FUNC_CLKM1 (MTK_PIN_NO(199) | 5)
-#define MT8135_PIN_199_MSDC3_CMD__FUNC_MFG_TEST_CK_4 (MTK_PIN_NO(199) | 6)
-
-#define MT8135_PIN_200_MSDC3_CLK__FUNC_GPIO200 (MTK_PIN_NO(200) | 0)
-#define MT8135_PIN_200_MSDC3_CLK__FUNC_MSDC3_CLK (MTK_PIN_NO(200) | 1)
-#define MT8135_PIN_200_MSDC3_CLK__FUNC_EINT67 (MTK_PIN_NO(200) | 2)
-#define MT8135_PIN_200_MSDC3_CLK__FUNC_SCL2 (MTK_PIN_NO(200) | 3)
-#define MT8135_PIN_200_MSDC3_CLK__FUNC_PWM1 (MTK_PIN_NO(200) | 4)
-#define MT8135_PIN_200_MSDC3_CLK__FUNC_CLKM0 (MTK_PIN_NO(200) | 5)
-
-#define MT8135_PIN_201_MSDC3_DAT1__FUNC_GPIO201 (MTK_PIN_NO(201) | 0)
-#define MT8135_PIN_201_MSDC3_DAT1__FUNC_MSDC3_DAT1 (MTK_PIN_NO(201) | 1)
-#define MT8135_PIN_201_MSDC3_DAT1__FUNC_EINT70 (MTK_PIN_NO(201) | 2)
-#define MT8135_PIN_201_MSDC3_DAT1__FUNC_SDA3 (MTK_PIN_NO(201) | 3)
-#define MT8135_PIN_201_MSDC3_DAT1__FUNC_PWM4 (MTK_PIN_NO(201) | 4)
-#define MT8135_PIN_201_MSDC3_DAT1__FUNC_CLKM3 (MTK_PIN_NO(201) | 5)
-
-#define MT8135_PIN_202_MSDC3_DAT0__FUNC_GPIO202 (MTK_PIN_NO(202) | 0)
-#define MT8135_PIN_202_MSDC3_DAT0__FUNC_MSDC3_DAT0 (MTK_PIN_NO(202) | 1)
-#define MT8135_PIN_202_MSDC3_DAT0__FUNC_EINT69 (MTK_PIN_NO(202) | 2)
-#define MT8135_PIN_202_MSDC3_DAT0__FUNC_SCL3 (MTK_PIN_NO(202) | 3)
-#define MT8135_PIN_202_MSDC3_DAT0__FUNC_PWM3 (MTK_PIN_NO(202) | 4)
-#define MT8135_PIN_202_MSDC3_DAT0__FUNC_CLKM2 (MTK_PIN_NO(202) | 5)
-
-#endif /* __DTS_MT8135_PINFUNC_H */
diff --git a/arch/arm/boot/dts/mt8135.dtsi b/arch/arm/boot/dts/mt8135.dtsi
index 0e4e835026db..a031b3636318 100644
--- a/arch/arm/boot/dts/mt8135.dtsi
+++ b/arch/arm/boot/dts/mt8135.dtsi
@@ -9,7 +9,7 @@
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/reset/mt8135-resets.h>
-#include "mt8135-pinfunc.h"
+#include <dt-bindings/pinctrl/mt8135-pinfunc.h>
 
 / {
 	#address-cells = <2>;
-- 
cgit 


From 4e233326e50bf2787a632f7625b9ef89819478ff Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Wed, 4 Aug 2021 12:40:32 +0800
Subject: arm: dts: mt8183: Move pinfunc to include/dt-bindings/pinctrl

Move mt8183-pinfunc.h into include/dt-bindings/pinctrl so that we can
include it in yaml examples.

Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Link: https://lore.kernel.org/r/20210804044033.3047296-2-hsinyi@chromium.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h | 1120 -------------------------
 arch/arm64/boot/dts/mediatek/mt8183.dtsi      |    2 +-
 2 files changed, 1 insertion(+), 1121 deletions(-)
 delete mode 100644 arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h b/arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h
deleted file mode 100644
index 6221cd712718..000000000000
--- a/arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h
+++ /dev/null
@@ -1,1120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2018 MediaTek Inc.
- * Author: Zhiyong Tao <zhiyong.tao@mediatek.com>
- *
- */
-
-#ifndef __MT8183_PINFUNC_H
-#define __MT8183_PINFUNC_H
-
-#include <dt-bindings/pinctrl/mt65xx.h>
-
-#define PINMUX_GPIO0__FUNC_GPIO0 (MTK_PIN_NO(0) | 0)
-#define PINMUX_GPIO0__FUNC_MRG_SYNC (MTK_PIN_NO(0) | 1)
-#define PINMUX_GPIO0__FUNC_PCM0_SYNC (MTK_PIN_NO(0) | 2)
-#define PINMUX_GPIO0__FUNC_TP_GPIO0_AO (MTK_PIN_NO(0) | 3)
-#define PINMUX_GPIO0__FUNC_SRCLKENAI0 (MTK_PIN_NO(0) | 4)
-#define PINMUX_GPIO0__FUNC_SCP_SPI2_CS (MTK_PIN_NO(0) | 5)
-#define PINMUX_GPIO0__FUNC_I2S3_MCK (MTK_PIN_NO(0) | 6)
-#define PINMUX_GPIO0__FUNC_SPI2_CSB (MTK_PIN_NO(0) | 7)
-
-#define PINMUX_GPIO1__FUNC_GPIO1 (MTK_PIN_NO(1) | 0)
-#define PINMUX_GPIO1__FUNC_MRG_CLK (MTK_PIN_NO(1) | 1)
-#define PINMUX_GPIO1__FUNC_PCM0_CLK (MTK_PIN_NO(1) | 2)
-#define PINMUX_GPIO1__FUNC_TP_GPIO1_AO (MTK_PIN_NO(1) | 3)
-#define PINMUX_GPIO1__FUNC_CLKM3 (MTK_PIN_NO(1) | 4)
-#define PINMUX_GPIO1__FUNC_SCP_SPI2_MO (MTK_PIN_NO(1) | 5)
-#define PINMUX_GPIO1__FUNC_I2S3_BCK (MTK_PIN_NO(1) | 6)
-#define PINMUX_GPIO1__FUNC_SPI2_MO (MTK_PIN_NO(1) | 7)
-
-#define PINMUX_GPIO2__FUNC_GPIO2 (MTK_PIN_NO(2) | 0)
-#define PINMUX_GPIO2__FUNC_MRG_DO (MTK_PIN_NO(2) | 1)
-#define PINMUX_GPIO2__FUNC_PCM0_DO (MTK_PIN_NO(2) | 2)
-#define PINMUX_GPIO2__FUNC_TP_GPIO2_AO (MTK_PIN_NO(2) | 3)
-#define PINMUX_GPIO2__FUNC_SCL6 (MTK_PIN_NO(2) | 4)
-#define PINMUX_GPIO2__FUNC_SCP_SPI2_CK (MTK_PIN_NO(2) | 5)
-#define PINMUX_GPIO2__FUNC_I2S3_LRCK (MTK_PIN_NO(2) | 6)
-#define PINMUX_GPIO2__FUNC_SPI2_CLK (MTK_PIN_NO(2) | 7)
-
-#define PINMUX_GPIO3__FUNC_GPIO3 (MTK_PIN_NO(3) | 0)
-#define PINMUX_GPIO3__FUNC_MRG_DI (MTK_PIN_NO(3) | 1)
-#define PINMUX_GPIO3__FUNC_PCM0_DI (MTK_PIN_NO(3) | 2)
-#define PINMUX_GPIO3__FUNC_TP_GPIO3_AO (MTK_PIN_NO(3) | 3)
-#define PINMUX_GPIO3__FUNC_SDA6 (MTK_PIN_NO(3) | 4)
-#define PINMUX_GPIO3__FUNC_TDM_MCK (MTK_PIN_NO(3) | 5)
-#define PINMUX_GPIO3__FUNC_I2S3_DO (MTK_PIN_NO(3) | 6)
-#define PINMUX_GPIO3__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(3) | 7)
-
-#define PINMUX_GPIO4__FUNC_GPIO4 (MTK_PIN_NO(4) | 0)
-#define PINMUX_GPIO4__FUNC_PWM_B (MTK_PIN_NO(4) | 1)
-#define PINMUX_GPIO4__FUNC_I2S0_MCK (MTK_PIN_NO(4) | 2)
-#define PINMUX_GPIO4__FUNC_SSPM_UTXD_AO (MTK_PIN_NO(4) | 3)
-#define PINMUX_GPIO4__FUNC_MD_URXD1 (MTK_PIN_NO(4) | 4)
-#define PINMUX_GPIO4__FUNC_TDM_BCK (MTK_PIN_NO(4) | 5)
-#define PINMUX_GPIO4__FUNC_TP_GPIO4_AO (MTK_PIN_NO(4) | 6)
-#define PINMUX_GPIO4__FUNC_DAP_MD32_SWD (MTK_PIN_NO(4) | 7)
-
-#define PINMUX_GPIO5__FUNC_GPIO5 (MTK_PIN_NO(5) | 0)
-#define PINMUX_GPIO5__FUNC_PWM_C (MTK_PIN_NO(5) | 1)
-#define PINMUX_GPIO5__FUNC_I2S0_BCK (MTK_PIN_NO(5) | 2)
-#define PINMUX_GPIO5__FUNC_SSPM_URXD_AO (MTK_PIN_NO(5) | 3)
-#define PINMUX_GPIO5__FUNC_MD_UTXD1 (MTK_PIN_NO(5) | 4)
-#define PINMUX_GPIO5__FUNC_TDM_LRCK (MTK_PIN_NO(5) | 5)
-#define PINMUX_GPIO5__FUNC_TP_GPIO5_AO (MTK_PIN_NO(5) | 6)
-#define PINMUX_GPIO5__FUNC_DAP_MD32_SWCK (MTK_PIN_NO(5) | 7)
-
-#define PINMUX_GPIO6__FUNC_GPIO6 (MTK_PIN_NO(6) | 0)
-#define PINMUX_GPIO6__FUNC_PWM_A (MTK_PIN_NO(6) | 1)
-#define PINMUX_GPIO6__FUNC_I2S0_LRCK (MTK_PIN_NO(6) | 2)
-#define PINMUX_GPIO6__FUNC_IDDIG (MTK_PIN_NO(6) | 3)
-#define PINMUX_GPIO6__FUNC_MD_URXD0 (MTK_PIN_NO(6) | 4)
-#define PINMUX_GPIO6__FUNC_TDM_DATA0 (MTK_PIN_NO(6) | 5)
-#define PINMUX_GPIO6__FUNC_TP_GPIO6_AO (MTK_PIN_NO(6) | 6)
-#define PINMUX_GPIO6__FUNC_CMFLASH (MTK_PIN_NO(6) | 7)
-
-#define PINMUX_GPIO7__FUNC_GPIO7 (MTK_PIN_NO(7) | 0)
-#define PINMUX_GPIO7__FUNC_SPI1_B_MI (MTK_PIN_NO(7) | 1)
-#define PINMUX_GPIO7__FUNC_I2S0_DI (MTK_PIN_NO(7) | 2)
-#define PINMUX_GPIO7__FUNC_USB_DRVVBUS (MTK_PIN_NO(7) | 3)
-#define PINMUX_GPIO7__FUNC_MD_UTXD0 (MTK_PIN_NO(7) | 4)
-#define PINMUX_GPIO7__FUNC_TDM_DATA1 (MTK_PIN_NO(7) | 5)
-#define PINMUX_GPIO7__FUNC_TP_GPIO7_AO (MTK_PIN_NO(7) | 6)
-#define PINMUX_GPIO7__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(7) | 7)
-
-#define PINMUX_GPIO8__FUNC_GPIO8 (MTK_PIN_NO(8) | 0)
-#define PINMUX_GPIO8__FUNC_SPI1_B_CSB (MTK_PIN_NO(8) | 1)
-#define PINMUX_GPIO8__FUNC_ANT_SEL3 (MTK_PIN_NO(8) | 2)
-#define PINMUX_GPIO8__FUNC_SCL7 (MTK_PIN_NO(8) | 3)
-#define PINMUX_GPIO8__FUNC_CONN_MCU_TRST_B (MTK_PIN_NO(8) | 4)
-#define PINMUX_GPIO8__FUNC_TDM_DATA2 (MTK_PIN_NO(8) | 5)
-#define PINMUX_GPIO8__FUNC_MD_INT0 (MTK_PIN_NO(8) | 6)
-#define PINMUX_GPIO8__FUNC_JTRSTN_SEL1 (MTK_PIN_NO(8) | 7)
-
-#define PINMUX_GPIO9__FUNC_GPIO9 (MTK_PIN_NO(9) | 0)
-#define PINMUX_GPIO9__FUNC_SPI1_B_MO (MTK_PIN_NO(9) | 1)
-#define PINMUX_GPIO9__FUNC_ANT_SEL4 (MTK_PIN_NO(9) | 2)
-#define PINMUX_GPIO9__FUNC_CMMCLK2 (MTK_PIN_NO(9) | 3)
-#define PINMUX_GPIO9__FUNC_CONN_MCU_DBGACK_N (MTK_PIN_NO(9) | 4)
-#define PINMUX_GPIO9__FUNC_SSPM_JTAG_TRSTN (MTK_PIN_NO(9) | 5)
-#define PINMUX_GPIO9__FUNC_IO_JTAG_TRSTN (MTK_PIN_NO(9) | 6)
-#define PINMUX_GPIO9__FUNC_DBG_MON_B10 (MTK_PIN_NO(9) | 7)
-
-#define PINMUX_GPIO10__FUNC_GPIO10 (MTK_PIN_NO(10) | 0)
-#define PINMUX_GPIO10__FUNC_SPI1_B_CLK (MTK_PIN_NO(10) | 1)
-#define PINMUX_GPIO10__FUNC_ANT_SEL5 (MTK_PIN_NO(10) | 2)
-#define PINMUX_GPIO10__FUNC_CMMCLK3 (MTK_PIN_NO(10) | 3)
-#define PINMUX_GPIO10__FUNC_CONN_MCU_DBGI_N (MTK_PIN_NO(10) | 4)
-#define PINMUX_GPIO10__FUNC_TDM_DATA3 (MTK_PIN_NO(10) | 5)
-#define PINMUX_GPIO10__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(10) | 6)
-#define PINMUX_GPIO10__FUNC_DBG_MON_B11 (MTK_PIN_NO(10) | 7)
-
-#define PINMUX_GPIO11__FUNC_GPIO11 (MTK_PIN_NO(11) | 0)
-#define PINMUX_GPIO11__FUNC_TP_URXD1_AO (MTK_PIN_NO(11) | 1)
-#define PINMUX_GPIO11__FUNC_IDDIG (MTK_PIN_NO(11) | 2)
-#define PINMUX_GPIO11__FUNC_SCL6 (MTK_PIN_NO(11) | 3)
-#define PINMUX_GPIO11__FUNC_UCTS1 (MTK_PIN_NO(11) | 4)
-#define PINMUX_GPIO11__FUNC_UCTS0 (MTK_PIN_NO(11) | 5)
-#define PINMUX_GPIO11__FUNC_SRCLKENAI1 (MTK_PIN_NO(11) | 6)
-#define PINMUX_GPIO11__FUNC_I2S5_MCK (MTK_PIN_NO(11) | 7)
-
-#define PINMUX_GPIO12__FUNC_GPIO12 (MTK_PIN_NO(12) | 0)
-#define PINMUX_GPIO12__FUNC_TP_UTXD1_AO (MTK_PIN_NO(12) | 1)
-#define PINMUX_GPIO12__FUNC_USB_DRVVBUS (MTK_PIN_NO(12) | 2)
-#define PINMUX_GPIO12__FUNC_SDA6 (MTK_PIN_NO(12) | 3)
-#define PINMUX_GPIO12__FUNC_URTS1 (MTK_PIN_NO(12) | 4)
-#define PINMUX_GPIO12__FUNC_URTS0 (MTK_PIN_NO(12) | 5)
-#define PINMUX_GPIO12__FUNC_I2S2_DI2 (MTK_PIN_NO(12) | 6)
-#define PINMUX_GPIO12__FUNC_I2S5_BCK (MTK_PIN_NO(12) | 7)
-
-#define PINMUX_GPIO13__FUNC_GPIO13 (MTK_PIN_NO(13) | 0)
-#define PINMUX_GPIO13__FUNC_DBPI_D0 (MTK_PIN_NO(13) | 1)
-#define PINMUX_GPIO13__FUNC_SPI5_MI (MTK_PIN_NO(13) | 2)
-#define PINMUX_GPIO13__FUNC_PCM0_SYNC (MTK_PIN_NO(13) | 3)
-#define PINMUX_GPIO13__FUNC_MD_URXD0 (MTK_PIN_NO(13) | 4)
-#define PINMUX_GPIO13__FUNC_ANT_SEL3 (MTK_PIN_NO(13) | 5)
-#define PINMUX_GPIO13__FUNC_I2S0_MCK (MTK_PIN_NO(13) | 6)
-#define PINMUX_GPIO13__FUNC_DBG_MON_B15 (MTK_PIN_NO(13) | 7)
-
-#define PINMUX_GPIO14__FUNC_GPIO14 (MTK_PIN_NO(14) | 0)
-#define PINMUX_GPIO14__FUNC_DBPI_D1 (MTK_PIN_NO(14) | 1)
-#define PINMUX_GPIO14__FUNC_SPI5_CSB (MTK_PIN_NO(14) | 2)
-#define PINMUX_GPIO14__FUNC_PCM0_CLK (MTK_PIN_NO(14) | 3)
-#define PINMUX_GPIO14__FUNC_MD_UTXD0 (MTK_PIN_NO(14) | 4)
-#define PINMUX_GPIO14__FUNC_ANT_SEL4 (MTK_PIN_NO(14) | 5)
-#define PINMUX_GPIO14__FUNC_I2S0_BCK (MTK_PIN_NO(14) | 6)
-#define PINMUX_GPIO14__FUNC_DBG_MON_B16 (MTK_PIN_NO(14) | 7)
-
-#define PINMUX_GPIO15__FUNC_GPIO15 (MTK_PIN_NO(15) | 0)
-#define PINMUX_GPIO15__FUNC_DBPI_D2 (MTK_PIN_NO(15) | 1)
-#define PINMUX_GPIO15__FUNC_SPI5_MO (MTK_PIN_NO(15) | 2)
-#define PINMUX_GPIO15__FUNC_PCM0_DO (MTK_PIN_NO(15) | 3)
-#define PINMUX_GPIO15__FUNC_MD_URXD1 (MTK_PIN_NO(15) | 4)
-#define PINMUX_GPIO15__FUNC_ANT_SEL5 (MTK_PIN_NO(15) | 5)
-#define PINMUX_GPIO15__FUNC_I2S0_LRCK (MTK_PIN_NO(15) | 6)
-#define PINMUX_GPIO15__FUNC_DBG_MON_B17 (MTK_PIN_NO(15) | 7)
-
-#define PINMUX_GPIO16__FUNC_GPIO16 (MTK_PIN_NO(16) | 0)
-#define PINMUX_GPIO16__FUNC_DBPI_D3 (MTK_PIN_NO(16) | 1)
-#define PINMUX_GPIO16__FUNC_SPI5_CLK (MTK_PIN_NO(16) | 2)
-#define PINMUX_GPIO16__FUNC_PCM0_DI (MTK_PIN_NO(16) | 3)
-#define PINMUX_GPIO16__FUNC_MD_UTXD1 (MTK_PIN_NO(16) | 4)
-#define PINMUX_GPIO16__FUNC_ANT_SEL6 (MTK_PIN_NO(16) | 5)
-#define PINMUX_GPIO16__FUNC_I2S0_DI (MTK_PIN_NO(16) | 6)
-#define PINMUX_GPIO16__FUNC_DBG_MON_B23 (MTK_PIN_NO(16) | 7)
-
-#define PINMUX_GPIO17__FUNC_GPIO17 (MTK_PIN_NO(17) | 0)
-#define PINMUX_GPIO17__FUNC_DBPI_D4 (MTK_PIN_NO(17) | 1)
-#define PINMUX_GPIO17__FUNC_SPI4_MI (MTK_PIN_NO(17) | 2)
-#define PINMUX_GPIO17__FUNC_CONN_MCU_TRST_B (MTK_PIN_NO(17) | 3)
-#define PINMUX_GPIO17__FUNC_MD_INT0 (MTK_PIN_NO(17) | 4)
-#define PINMUX_GPIO17__FUNC_ANT_SEL7 (MTK_PIN_NO(17) | 5)
-#define PINMUX_GPIO17__FUNC_I2S3_MCK (MTK_PIN_NO(17) | 6)
-#define PINMUX_GPIO17__FUNC_DBG_MON_A1 (MTK_PIN_NO(17) | 7)
-
-#define PINMUX_GPIO18__FUNC_GPIO18 (MTK_PIN_NO(18) | 0)
-#define PINMUX_GPIO18__FUNC_DBPI_D5 (MTK_PIN_NO(18) | 1)
-#define PINMUX_GPIO18__FUNC_SPI4_CSB (MTK_PIN_NO(18) | 2)
-#define PINMUX_GPIO18__FUNC_CONN_MCU_DBGI_N (MTK_PIN_NO(18) | 3)
-#define PINMUX_GPIO18__FUNC_MD_INT0 (MTK_PIN_NO(18) | 4)
-#define PINMUX_GPIO18__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(18) | 5)
-#define PINMUX_GPIO18__FUNC_I2S3_BCK (MTK_PIN_NO(18) | 6)
-#define PINMUX_GPIO18__FUNC_DBG_MON_A2 (MTK_PIN_NO(18) | 7)
-
-#define PINMUX_GPIO19__FUNC_GPIO19 (MTK_PIN_NO(19) | 0)
-#define PINMUX_GPIO19__FUNC_DBPI_D6 (MTK_PIN_NO(19) | 1)
-#define PINMUX_GPIO19__FUNC_SPI4_MO (MTK_PIN_NO(19) | 2)
-#define PINMUX_GPIO19__FUNC_CONN_MCU_TDO (MTK_PIN_NO(19) | 3)
-#define PINMUX_GPIO19__FUNC_MD_INT2_C2K_UIM1_HOT_PLUG (MTK_PIN_NO(19) | 4)
-#define PINMUX_GPIO19__FUNC_URXD1 (MTK_PIN_NO(19) | 5)
-#define PINMUX_GPIO19__FUNC_I2S3_LRCK (MTK_PIN_NO(19) | 6)
-#define PINMUX_GPIO19__FUNC_DBG_MON_A3 (MTK_PIN_NO(19) | 7)
-
-#define PINMUX_GPIO20__FUNC_GPIO20 (MTK_PIN_NO(20) | 0)
-#define PINMUX_GPIO20__FUNC_DBPI_D7 (MTK_PIN_NO(20) | 1)
-#define PINMUX_GPIO20__FUNC_SPI4_CLK (MTK_PIN_NO(20) | 2)
-#define PINMUX_GPIO20__FUNC_CONN_MCU_DBGACK_N (MTK_PIN_NO(20) | 3)
-#define PINMUX_GPIO20__FUNC_MD_INT1_C2K_UIM0_HOT_PLUG (MTK_PIN_NO(20) | 4)
-#define PINMUX_GPIO20__FUNC_UTXD1 (MTK_PIN_NO(20) | 5)
-#define PINMUX_GPIO20__FUNC_I2S3_DO (MTK_PIN_NO(20) | 6)
-#define PINMUX_GPIO20__FUNC_DBG_MON_A19 (MTK_PIN_NO(20) | 7)
-
-#define PINMUX_GPIO21__FUNC_GPIO21 (MTK_PIN_NO(21) | 0)
-#define PINMUX_GPIO21__FUNC_DBPI_D8 (MTK_PIN_NO(21) | 1)
-#define PINMUX_GPIO21__FUNC_SPI3_MI (MTK_PIN_NO(21) | 2)
-#define PINMUX_GPIO21__FUNC_CONN_MCU_TMS (MTK_PIN_NO(21) | 3)
-#define PINMUX_GPIO21__FUNC_DAP_MD32_SWD (MTK_PIN_NO(21) | 4)
-#define PINMUX_GPIO21__FUNC_CONN_MCU_AICE_TMSC (MTK_PIN_NO(21) | 5)
-#define PINMUX_GPIO21__FUNC_I2S2_MCK (MTK_PIN_NO(21) | 6)
-#define PINMUX_GPIO21__FUNC_DBG_MON_B5 (MTK_PIN_NO(21) | 7)
-
-#define PINMUX_GPIO22__FUNC_GPIO22 (MTK_PIN_NO(22) | 0)
-#define PINMUX_GPIO22__FUNC_DBPI_D9 (MTK_PIN_NO(22) | 1)
-#define PINMUX_GPIO22__FUNC_SPI3_CSB (MTK_PIN_NO(22) | 2)
-#define PINMUX_GPIO22__FUNC_CONN_MCU_TCK (MTK_PIN_NO(22) | 3)
-#define PINMUX_GPIO22__FUNC_DAP_MD32_SWCK (MTK_PIN_NO(22) | 4)
-#define PINMUX_GPIO22__FUNC_CONN_MCU_AICE_TCKC (MTK_PIN_NO(22) | 5)
-#define PINMUX_GPIO22__FUNC_I2S2_BCK (MTK_PIN_NO(22) | 6)
-#define PINMUX_GPIO22__FUNC_DBG_MON_B6 (MTK_PIN_NO(22) | 7)
-
-#define PINMUX_GPIO23__FUNC_GPIO23 (MTK_PIN_NO(23) | 0)
-#define PINMUX_GPIO23__FUNC_DBPI_D10 (MTK_PIN_NO(23) | 1)
-#define PINMUX_GPIO23__FUNC_SPI3_MO (MTK_PIN_NO(23) | 2)
-#define PINMUX_GPIO23__FUNC_CONN_MCU_TDI (MTK_PIN_NO(23) | 3)
-#define PINMUX_GPIO23__FUNC_UCTS1 (MTK_PIN_NO(23) | 4)
-#define PINMUX_GPIO23__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(23) | 5)
-#define PINMUX_GPIO23__FUNC_I2S2_LRCK (MTK_PIN_NO(23) | 6)
-#define PINMUX_GPIO23__FUNC_DBG_MON_B7 (MTK_PIN_NO(23) | 7)
-
-#define PINMUX_GPIO24__FUNC_GPIO24 (MTK_PIN_NO(24) | 0)
-#define PINMUX_GPIO24__FUNC_DBPI_D11 (MTK_PIN_NO(24) | 1)
-#define PINMUX_GPIO24__FUNC_SPI3_CLK (MTK_PIN_NO(24) | 2)
-#define PINMUX_GPIO24__FUNC_SRCLKENAI0 (MTK_PIN_NO(24) | 3)
-#define PINMUX_GPIO24__FUNC_URTS1 (MTK_PIN_NO(24) | 4)
-#define PINMUX_GPIO24__FUNC_IO_JTAG_TCK (MTK_PIN_NO(24) | 5)
-#define PINMUX_GPIO24__FUNC_I2S2_DI (MTK_PIN_NO(24) | 6)
-#define PINMUX_GPIO24__FUNC_DBG_MON_B31 (MTK_PIN_NO(24) | 7)
-
-#define PINMUX_GPIO25__FUNC_GPIO25 (MTK_PIN_NO(25) | 0)
-#define PINMUX_GPIO25__FUNC_DBPI_HSYNC (MTK_PIN_NO(25) | 1)
-#define PINMUX_GPIO25__FUNC_ANT_SEL0 (MTK_PIN_NO(25) | 2)
-#define PINMUX_GPIO25__FUNC_SCL6 (MTK_PIN_NO(25) | 3)
-#define PINMUX_GPIO25__FUNC_KPCOL2 (MTK_PIN_NO(25) | 4)
-#define PINMUX_GPIO25__FUNC_IO_JTAG_TMS (MTK_PIN_NO(25) | 5)
-#define PINMUX_GPIO25__FUNC_I2S1_MCK (MTK_PIN_NO(25) | 6)
-#define PINMUX_GPIO25__FUNC_DBG_MON_B0 (MTK_PIN_NO(25) | 7)
-
-#define PINMUX_GPIO26__FUNC_GPIO26 (MTK_PIN_NO(26) | 0)
-#define PINMUX_GPIO26__FUNC_DBPI_VSYNC (MTK_PIN_NO(26) | 1)
-#define PINMUX_GPIO26__FUNC_ANT_SEL1 (MTK_PIN_NO(26) | 2)
-#define PINMUX_GPIO26__FUNC_SDA6 (MTK_PIN_NO(26) | 3)
-#define PINMUX_GPIO26__FUNC_KPROW2 (MTK_PIN_NO(26) | 4)
-#define PINMUX_GPIO26__FUNC_IO_JTAG_TDI (MTK_PIN_NO(26) | 5)
-#define PINMUX_GPIO26__FUNC_I2S1_BCK (MTK_PIN_NO(26) | 6)
-#define PINMUX_GPIO26__FUNC_DBG_MON_B1 (MTK_PIN_NO(26) | 7)
-
-#define PINMUX_GPIO27__FUNC_GPIO27 (MTK_PIN_NO(27) | 0)
-#define PINMUX_GPIO27__FUNC_DBPI_DE (MTK_PIN_NO(27) | 1)
-#define PINMUX_GPIO27__FUNC_ANT_SEL2 (MTK_PIN_NO(27) | 2)
-#define PINMUX_GPIO27__FUNC_SCL7 (MTK_PIN_NO(27) | 3)
-#define PINMUX_GPIO27__FUNC_DMIC_CLK (MTK_PIN_NO(27) | 4)
-#define PINMUX_GPIO27__FUNC_IO_JTAG_TDO (MTK_PIN_NO(27) | 5)
-#define PINMUX_GPIO27__FUNC_I2S1_LRCK (MTK_PIN_NO(27) | 6)
-#define PINMUX_GPIO27__FUNC_DBG_MON_B9 (MTK_PIN_NO(27) | 7)
-
-#define PINMUX_GPIO28__FUNC_GPIO28 (MTK_PIN_NO(28) | 0)
-#define PINMUX_GPIO28__FUNC_DBPI_CK (MTK_PIN_NO(28) | 1)
-#define PINMUX_GPIO28__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(28) | 2)
-#define PINMUX_GPIO28__FUNC_SDA7 (MTK_PIN_NO(28) | 3)
-#define PINMUX_GPIO28__FUNC_DMIC_DAT (MTK_PIN_NO(28) | 4)
-#define PINMUX_GPIO28__FUNC_IO_JTAG_TRSTN (MTK_PIN_NO(28) | 5)
-#define PINMUX_GPIO28__FUNC_I2S1_DO (MTK_PIN_NO(28) | 6)
-#define PINMUX_GPIO28__FUNC_DBG_MON_B32 (MTK_PIN_NO(28) | 7)
-
-#define PINMUX_GPIO29__FUNC_GPIO29 (MTK_PIN_NO(29) | 0)
-#define PINMUX_GPIO29__FUNC_MSDC1_CLK (MTK_PIN_NO(29) | 1)
-#define PINMUX_GPIO29__FUNC_IO_JTAG_TCK (MTK_PIN_NO(29) | 2)
-#define PINMUX_GPIO29__FUNC_UDI_TCK (MTK_PIN_NO(29) | 3)
-#define PINMUX_GPIO29__FUNC_CONN_DSP_JCK (MTK_PIN_NO(29) | 4)
-#define PINMUX_GPIO29__FUNC_SSPM_JTAG_TCK (MTK_PIN_NO(29) | 5)
-#define PINMUX_GPIO29__FUNC_PCM1_CLK (MTK_PIN_NO(29) | 6)
-#define PINMUX_GPIO29__FUNC_DBG_MON_A6 (MTK_PIN_NO(29) | 7)
-
-#define PINMUX_GPIO30__FUNC_GPIO30 (MTK_PIN_NO(30) | 0)
-#define PINMUX_GPIO30__FUNC_MSDC1_DAT3 (MTK_PIN_NO(30) | 1)
-#define PINMUX_GPIO30__FUNC_DAP_MD32_SWD (MTK_PIN_NO(30) | 2)
-#define PINMUX_GPIO30__FUNC_CONN_MCU_AICE_TMSC (MTK_PIN_NO(30) | 3)
-#define PINMUX_GPIO30__FUNC_CONN_DSP_JINTP (MTK_PIN_NO(30) | 4)
-#define PINMUX_GPIO30__FUNC_SSPM_JTAG_TRSTN (MTK_PIN_NO(30) | 5)
-#define PINMUX_GPIO30__FUNC_PCM1_DI (MTK_PIN_NO(30) | 6)
-#define PINMUX_GPIO30__FUNC_DBG_MON_A7 (MTK_PIN_NO(30) | 7)
-
-#define PINMUX_GPIO31__FUNC_GPIO31 (MTK_PIN_NO(31) | 0)
-#define PINMUX_GPIO31__FUNC_MSDC1_CMD (MTK_PIN_NO(31) | 1)
-#define PINMUX_GPIO31__FUNC_IO_JTAG_TMS (MTK_PIN_NO(31) | 2)
-#define PINMUX_GPIO31__FUNC_UDI_TMS (MTK_PIN_NO(31) | 3)
-#define PINMUX_GPIO31__FUNC_CONN_DSP_JMS (MTK_PIN_NO(31) | 4)
-#define PINMUX_GPIO31__FUNC_SSPM_JTAG_TMS (MTK_PIN_NO(31) | 5)
-#define PINMUX_GPIO31__FUNC_PCM1_SYNC (MTK_PIN_NO(31) | 6)
-#define PINMUX_GPIO31__FUNC_DBG_MON_A8 (MTK_PIN_NO(31) | 7)
-
-#define PINMUX_GPIO32__FUNC_GPIO32 (MTK_PIN_NO(32) | 0)
-#define PINMUX_GPIO32__FUNC_MSDC1_DAT0 (MTK_PIN_NO(32) | 1)
-#define PINMUX_GPIO32__FUNC_IO_JTAG_TDI (MTK_PIN_NO(32) | 2)
-#define PINMUX_GPIO32__FUNC_UDI_TDI (MTK_PIN_NO(32) | 3)
-#define PINMUX_GPIO32__FUNC_CONN_DSP_JDI (MTK_PIN_NO(32) | 4)
-#define PINMUX_GPIO32__FUNC_SSPM_JTAG_TDI (MTK_PIN_NO(32) | 5)
-#define PINMUX_GPIO32__FUNC_PCM1_DO0 (MTK_PIN_NO(32) | 6)
-#define PINMUX_GPIO32__FUNC_DBG_MON_A9 (MTK_PIN_NO(32) | 7)
-
-#define PINMUX_GPIO33__FUNC_GPIO33 (MTK_PIN_NO(33) | 0)
-#define PINMUX_GPIO33__FUNC_MSDC1_DAT2 (MTK_PIN_NO(33) | 1)
-#define PINMUX_GPIO33__FUNC_IO_JTAG_TRSTN (MTK_PIN_NO(33) | 2)
-#define PINMUX_GPIO33__FUNC_UDI_NTRST (MTK_PIN_NO(33) | 3)
-#define PINMUX_GPIO33__FUNC_DAP_MD32_SWCK (MTK_PIN_NO(33) | 4)
-#define PINMUX_GPIO33__FUNC_CONN_MCU_AICE_TCKC (MTK_PIN_NO(33) | 5)
-#define PINMUX_GPIO33__FUNC_PCM1_DO2 (MTK_PIN_NO(33) | 6)
-#define PINMUX_GPIO33__FUNC_DBG_MON_A10 (MTK_PIN_NO(33) | 7)
-
-#define PINMUX_GPIO34__FUNC_GPIO34 (MTK_PIN_NO(34) | 0)
-#define PINMUX_GPIO34__FUNC_MSDC1_DAT1 (MTK_PIN_NO(34) | 1)
-#define PINMUX_GPIO34__FUNC_IO_JTAG_TDO (MTK_PIN_NO(34) | 2)
-#define PINMUX_GPIO34__FUNC_UDI_TDO (MTK_PIN_NO(34) | 3)
-#define PINMUX_GPIO34__FUNC_CONN_DSP_JDO (MTK_PIN_NO(34) | 4)
-#define PINMUX_GPIO34__FUNC_SSPM_JTAG_TDO (MTK_PIN_NO(34) | 5)
-#define PINMUX_GPIO34__FUNC_PCM1_DO1 (MTK_PIN_NO(34) | 6)
-#define PINMUX_GPIO34__FUNC_DBG_MON_A11 (MTK_PIN_NO(34) | 7)
-
-#define PINMUX_GPIO35__FUNC_GPIO35 (MTK_PIN_NO(35) | 0)
-#define PINMUX_GPIO35__FUNC_MD1_SIM2_SIO (MTK_PIN_NO(35) | 1)
-#define PINMUX_GPIO35__FUNC_CCU_JTAG_TDO (MTK_PIN_NO(35) | 2)
-#define PINMUX_GPIO35__FUNC_MD1_SIM1_SIO (MTK_PIN_NO(35) | 3)
-#define PINMUX_GPIO35__FUNC_SCP_JTAG_TDO (MTK_PIN_NO(35) | 5)
-#define PINMUX_GPIO35__FUNC_CONN_DSP_JMS (MTK_PIN_NO(35) | 6)
-#define PINMUX_GPIO35__FUNC_DBG_MON_A28 (MTK_PIN_NO(35) | 7)
-
-#define PINMUX_GPIO36__FUNC_GPIO36 (MTK_PIN_NO(36) | 0)
-#define PINMUX_GPIO36__FUNC_MD1_SIM2_SRST (MTK_PIN_NO(36) | 1)
-#define PINMUX_GPIO36__FUNC_CCU_JTAG_TMS (MTK_PIN_NO(36) | 2)
-#define PINMUX_GPIO36__FUNC_MD1_SIM1_SRST (MTK_PIN_NO(36) | 3)
-#define PINMUX_GPIO36__FUNC_CONN_MCU_AICE_TMSC (MTK_PIN_NO(36) | 4)
-#define PINMUX_GPIO36__FUNC_SCP_JTAG_TMS (MTK_PIN_NO(36) | 5)
-#define PINMUX_GPIO36__FUNC_CONN_DSP_JINTP (MTK_PIN_NO(36) | 6)
-#define PINMUX_GPIO36__FUNC_DBG_MON_A29 (MTK_PIN_NO(36) | 7)
-
-#define PINMUX_GPIO37__FUNC_GPIO37 (MTK_PIN_NO(37) | 0)
-#define PINMUX_GPIO37__FUNC_MD1_SIM2_SCLK (MTK_PIN_NO(37) | 1)
-#define PINMUX_GPIO37__FUNC_CCU_JTAG_TDI (MTK_PIN_NO(37) | 2)
-#define PINMUX_GPIO37__FUNC_MD1_SIM1_SCLK (MTK_PIN_NO(37) | 3)
-#define PINMUX_GPIO37__FUNC_SCP_JTAG_TDI (MTK_PIN_NO(37) | 5)
-#define PINMUX_GPIO37__FUNC_CONN_DSP_JDO (MTK_PIN_NO(37) | 6)
-#define PINMUX_GPIO37__FUNC_DBG_MON_A30 (MTK_PIN_NO(37) | 7)
-
-#define PINMUX_GPIO38__FUNC_GPIO38 (MTK_PIN_NO(38) | 0)
-#define PINMUX_GPIO38__FUNC_MD1_SIM1_SCLK (MTK_PIN_NO(38) | 1)
-#define PINMUX_GPIO38__FUNC_MD1_SIM2_SCLK (MTK_PIN_NO(38) | 3)
-#define PINMUX_GPIO38__FUNC_CONN_MCU_AICE_TCKC (MTK_PIN_NO(38) | 4)
-#define PINMUX_GPIO38__FUNC_DBG_MON_A20 (MTK_PIN_NO(38) | 7)
-
-#define PINMUX_GPIO39__FUNC_GPIO39 (MTK_PIN_NO(39) | 0)
-#define PINMUX_GPIO39__FUNC_MD1_SIM1_SRST (MTK_PIN_NO(39) | 1)
-#define PINMUX_GPIO39__FUNC_CCU_JTAG_TCK (MTK_PIN_NO(39) | 2)
-#define PINMUX_GPIO39__FUNC_MD1_SIM2_SRST (MTK_PIN_NO(39) | 3)
-#define PINMUX_GPIO39__FUNC_SCP_JTAG_TCK (MTK_PIN_NO(39) | 5)
-#define PINMUX_GPIO39__FUNC_CONN_DSP_JCK (MTK_PIN_NO(39) | 6)
-#define PINMUX_GPIO39__FUNC_DBG_MON_A31 (MTK_PIN_NO(39) | 7)
-
-#define PINMUX_GPIO40__FUNC_GPIO40 (MTK_PIN_NO(40) | 0)
-#define PINMUX_GPIO40__FUNC_MD1_SIM1_SIO (MTK_PIN_NO(40) | 1)
-#define PINMUX_GPIO40__FUNC_CCU_JTAG_TRST (MTK_PIN_NO(40) | 2)
-#define PINMUX_GPIO40__FUNC_MD1_SIM2_SIO (MTK_PIN_NO(40) | 3)
-#define PINMUX_GPIO40__FUNC_SCP_JTAG_TRSTN (MTK_PIN_NO(40) | 5)
-#define PINMUX_GPIO40__FUNC_CONN_DSP_JDI (MTK_PIN_NO(40) | 6)
-#define PINMUX_GPIO40__FUNC_DBG_MON_A32 (MTK_PIN_NO(40) | 7)
-
-#define PINMUX_GPIO41__FUNC_GPIO41 (MTK_PIN_NO(41) | 0)
-#define PINMUX_GPIO41__FUNC_IDDIG (MTK_PIN_NO(41) | 1)
-#define PINMUX_GPIO41__FUNC_URXD1 (MTK_PIN_NO(41) | 2)
-#define PINMUX_GPIO41__FUNC_UCTS0 (MTK_PIN_NO(41) | 3)
-#define PINMUX_GPIO41__FUNC_SSPM_UTXD_AO (MTK_PIN_NO(41) | 4)
-#define PINMUX_GPIO41__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(41) | 5)
-#define PINMUX_GPIO41__FUNC_DMIC_CLK (MTK_PIN_NO(41) | 6)
-
-#define PINMUX_GPIO42__FUNC_GPIO42 (MTK_PIN_NO(42) | 0)
-#define PINMUX_GPIO42__FUNC_USB_DRVVBUS (MTK_PIN_NO(42) | 1)
-#define PINMUX_GPIO42__FUNC_UTXD1 (MTK_PIN_NO(42) | 2)
-#define PINMUX_GPIO42__FUNC_URTS0 (MTK_PIN_NO(42) | 3)
-#define PINMUX_GPIO42__FUNC_SSPM_URXD_AO (MTK_PIN_NO(42) | 4)
-#define PINMUX_GPIO42__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(42) | 5)
-#define PINMUX_GPIO42__FUNC_DMIC_DAT (MTK_PIN_NO(42) | 6)
-
-#define PINMUX_GPIO43__FUNC_GPIO43 (MTK_PIN_NO(43) | 0)
-#define PINMUX_GPIO43__FUNC_DISP_PWM (MTK_PIN_NO(43) | 1)
-
-#define PINMUX_GPIO44__FUNC_GPIO44 (MTK_PIN_NO(44) | 0)
-#define PINMUX_GPIO44__FUNC_DSI_TE (MTK_PIN_NO(44) | 1)
-
-#define PINMUX_GPIO45__FUNC_GPIO45 (MTK_PIN_NO(45) | 0)
-#define PINMUX_GPIO45__FUNC_LCM_RST (MTK_PIN_NO(45) | 1)
-
-#define PINMUX_GPIO46__FUNC_GPIO46 (MTK_PIN_NO(46) | 0)
-#define PINMUX_GPIO46__FUNC_MD_INT2_C2K_UIM1_HOT_PLUG (MTK_PIN_NO(46) | 1)
-#define PINMUX_GPIO46__FUNC_URXD1 (MTK_PIN_NO(46) | 2)
-#define PINMUX_GPIO46__FUNC_UCTS1 (MTK_PIN_NO(46) | 3)
-#define PINMUX_GPIO46__FUNC_CCU_UTXD_AO (MTK_PIN_NO(46) | 4)
-#define PINMUX_GPIO46__FUNC_TP_UCTS1_AO (MTK_PIN_NO(46) | 5)
-#define PINMUX_GPIO46__FUNC_IDDIG (MTK_PIN_NO(46) | 6)
-#define PINMUX_GPIO46__FUNC_I2S5_LRCK (MTK_PIN_NO(46) | 7)
-
-#define PINMUX_GPIO47__FUNC_GPIO47 (MTK_PIN_NO(47) | 0)
-#define PINMUX_GPIO47__FUNC_MD_INT1_C2K_UIM0_HOT_PLUG (MTK_PIN_NO(47) | 1)
-#define PINMUX_GPIO47__FUNC_UTXD1 (MTK_PIN_NO(47) | 2)
-#define PINMUX_GPIO47__FUNC_URTS1 (MTK_PIN_NO(47) | 3)
-#define PINMUX_GPIO47__FUNC_CCU_URXD_AO (MTK_PIN_NO(47) | 4)
-#define PINMUX_GPIO47__FUNC_TP_URTS1_AO (MTK_PIN_NO(47) | 5)
-#define PINMUX_GPIO47__FUNC_USB_DRVVBUS (MTK_PIN_NO(47) | 6)
-#define PINMUX_GPIO47__FUNC_I2S5_DO (MTK_PIN_NO(47) | 7)
-
-#define PINMUX_GPIO48__FUNC_GPIO48 (MTK_PIN_NO(48) | 0)
-#define PINMUX_GPIO48__FUNC_SCL5 (MTK_PIN_NO(48) | 1)
-
-#define PINMUX_GPIO49__FUNC_GPIO49 (MTK_PIN_NO(49) | 0)
-#define PINMUX_GPIO49__FUNC_SDA5 (MTK_PIN_NO(49) | 1)
-
-#define PINMUX_GPIO50__FUNC_GPIO50 (MTK_PIN_NO(50) | 0)
-#define PINMUX_GPIO50__FUNC_SCL3 (MTK_PIN_NO(50) | 1)
-
-#define PINMUX_GPIO51__FUNC_GPIO51 (MTK_PIN_NO(51) | 0)
-#define PINMUX_GPIO51__FUNC_SDA3 (MTK_PIN_NO(51) | 1)
-
-#define PINMUX_GPIO52__FUNC_GPIO52 (MTK_PIN_NO(52) | 0)
-#define PINMUX_GPIO52__FUNC_BPI_ANT2 (MTK_PIN_NO(52) | 1)
-
-#define PINMUX_GPIO53__FUNC_GPIO53 (MTK_PIN_NO(53) | 0)
-#define PINMUX_GPIO53__FUNC_BPI_ANT0 (MTK_PIN_NO(53) | 1)
-
-#define PINMUX_GPIO54__FUNC_GPIO54 (MTK_PIN_NO(54) | 0)
-#define PINMUX_GPIO54__FUNC_BPI_OLAT1 (MTK_PIN_NO(54) | 1)
-
-#define PINMUX_GPIO55__FUNC_GPIO55 (MTK_PIN_NO(55) | 0)
-#define PINMUX_GPIO55__FUNC_BPI_BUS8 (MTK_PIN_NO(55) | 1)
-
-#define PINMUX_GPIO56__FUNC_GPIO56 (MTK_PIN_NO(56) | 0)
-#define PINMUX_GPIO56__FUNC_BPI_BUS9 (MTK_PIN_NO(56) | 1)
-#define PINMUX_GPIO56__FUNC_SCL_6306 (MTK_PIN_NO(56) | 2)
-
-#define PINMUX_GPIO57__FUNC_GPIO57 (MTK_PIN_NO(57) | 0)
-#define PINMUX_GPIO57__FUNC_BPI_BUS10 (MTK_PIN_NO(57) | 1)
-#define PINMUX_GPIO57__FUNC_SDA_6306 (MTK_PIN_NO(57) | 2)
-
-#define PINMUX_GPIO58__FUNC_GPIO58 (MTK_PIN_NO(58) | 0)
-#define PINMUX_GPIO58__FUNC_RFIC0_BSI_D2 (MTK_PIN_NO(58) | 1)
-#define PINMUX_GPIO58__FUNC_SPM_BSI_D2 (MTK_PIN_NO(58) | 2)
-#define PINMUX_GPIO58__FUNC_PWM_B (MTK_PIN_NO(58) | 3)
-
-#define PINMUX_GPIO59__FUNC_GPIO59 (MTK_PIN_NO(59) | 0)
-#define PINMUX_GPIO59__FUNC_RFIC0_BSI_D1 (MTK_PIN_NO(59) | 1)
-#define PINMUX_GPIO59__FUNC_SPM_BSI_D1 (MTK_PIN_NO(59) | 2)
-
-#define PINMUX_GPIO60__FUNC_GPIO60 (MTK_PIN_NO(60) | 0)
-#define PINMUX_GPIO60__FUNC_RFIC0_BSI_D0 (MTK_PIN_NO(60) | 1)
-#define PINMUX_GPIO60__FUNC_SPM_BSI_D0 (MTK_PIN_NO(60) | 2)
-
-#define PINMUX_GPIO61__FUNC_GPIO61 (MTK_PIN_NO(61) | 0)
-#define PINMUX_GPIO61__FUNC_MIPI1_SDATA (MTK_PIN_NO(61) | 1)
-
-#define PINMUX_GPIO62__FUNC_GPIO62 (MTK_PIN_NO(62) | 0)
-#define PINMUX_GPIO62__FUNC_MIPI1_SCLK (MTK_PIN_NO(62) | 1)
-
-#define PINMUX_GPIO63__FUNC_GPIO63 (MTK_PIN_NO(63) | 0)
-#define PINMUX_GPIO63__FUNC_MIPI0_SDATA (MTK_PIN_NO(63) | 1)
-
-#define PINMUX_GPIO64__FUNC_GPIO64 (MTK_PIN_NO(64) | 0)
-#define PINMUX_GPIO64__FUNC_MIPI0_SCLK (MTK_PIN_NO(64) | 1)
-
-#define PINMUX_GPIO65__FUNC_GPIO65 (MTK_PIN_NO(65) | 0)
-#define PINMUX_GPIO65__FUNC_MIPI3_SDATA (MTK_PIN_NO(65) | 1)
-#define PINMUX_GPIO65__FUNC_BPI_OLAT2 (MTK_PIN_NO(65) | 2)
-
-#define PINMUX_GPIO66__FUNC_GPIO66 (MTK_PIN_NO(66) | 0)
-#define PINMUX_GPIO66__FUNC_MIPI3_SCLK (MTK_PIN_NO(66) | 1)
-#define PINMUX_GPIO66__FUNC_BPI_OLAT3 (MTK_PIN_NO(66) | 2)
-
-#define PINMUX_GPIO67__FUNC_GPIO67 (MTK_PIN_NO(67) | 0)
-#define PINMUX_GPIO67__FUNC_MIPI2_SDATA (MTK_PIN_NO(67) | 1)
-
-#define PINMUX_GPIO68__FUNC_GPIO68 (MTK_PIN_NO(68) | 0)
-#define PINMUX_GPIO68__FUNC_MIPI2_SCLK (MTK_PIN_NO(68) | 1)
-
-#define PINMUX_GPIO69__FUNC_GPIO69 (MTK_PIN_NO(69) | 0)
-#define PINMUX_GPIO69__FUNC_BPI_BUS7 (MTK_PIN_NO(69) | 1)
-
-#define PINMUX_GPIO70__FUNC_GPIO70 (MTK_PIN_NO(70) | 0)
-#define PINMUX_GPIO70__FUNC_BPI_BUS6 (MTK_PIN_NO(70) | 1)
-
-#define PINMUX_GPIO71__FUNC_GPIO71 (MTK_PIN_NO(71) | 0)
-#define PINMUX_GPIO71__FUNC_BPI_BUS5 (MTK_PIN_NO(71) | 1)
-
-#define PINMUX_GPIO72__FUNC_GPIO72 (MTK_PIN_NO(72) | 0)
-#define PINMUX_GPIO72__FUNC_BPI_BUS4 (MTK_PIN_NO(72) | 1)
-
-#define PINMUX_GPIO73__FUNC_GPIO73 (MTK_PIN_NO(73) | 0)
-#define PINMUX_GPIO73__FUNC_BPI_BUS3 (MTK_PIN_NO(73) | 1)
-
-#define PINMUX_GPIO74__FUNC_GPIO74 (MTK_PIN_NO(74) | 0)
-#define PINMUX_GPIO74__FUNC_BPI_BUS2 (MTK_PIN_NO(74) | 1)
-
-#define PINMUX_GPIO75__FUNC_GPIO75 (MTK_PIN_NO(75) | 0)
-#define PINMUX_GPIO75__FUNC_BPI_BUS1 (MTK_PIN_NO(75) | 1)
-
-#define PINMUX_GPIO76__FUNC_GPIO76 (MTK_PIN_NO(76) | 0)
-#define PINMUX_GPIO76__FUNC_BPI_BUS0 (MTK_PIN_NO(76) | 1)
-
-#define PINMUX_GPIO77__FUNC_GPIO77 (MTK_PIN_NO(77) | 0)
-#define PINMUX_GPIO77__FUNC_BPI_ANT1 (MTK_PIN_NO(77) | 1)
-
-#define PINMUX_GPIO78__FUNC_GPIO78 (MTK_PIN_NO(78) | 0)
-#define PINMUX_GPIO78__FUNC_BPI_OLAT0 (MTK_PIN_NO(78) | 1)
-
-#define PINMUX_GPIO79__FUNC_GPIO79 (MTK_PIN_NO(79) | 0)
-#define PINMUX_GPIO79__FUNC_BPI_PA_VM1 (MTK_PIN_NO(79) | 1)
-#define PINMUX_GPIO79__FUNC_MIPI4_SDATA (MTK_PIN_NO(79) | 2)
-
-#define PINMUX_GPIO80__FUNC_GPIO80 (MTK_PIN_NO(80) | 0)
-#define PINMUX_GPIO80__FUNC_BPI_PA_VM0 (MTK_PIN_NO(80) | 1)
-#define PINMUX_GPIO80__FUNC_MIPI4_SCLK (MTK_PIN_NO(80) | 2)
-
-#define PINMUX_GPIO81__FUNC_GPIO81 (MTK_PIN_NO(81) | 0)
-#define PINMUX_GPIO81__FUNC_SDA1 (MTK_PIN_NO(81) | 1)
-
-#define PINMUX_GPIO82__FUNC_GPIO82 (MTK_PIN_NO(82) | 0)
-#define PINMUX_GPIO82__FUNC_SDA0 (MTK_PIN_NO(82) | 1)
-
-#define PINMUX_GPIO83__FUNC_GPIO83 (MTK_PIN_NO(83) | 0)
-#define PINMUX_GPIO83__FUNC_SCL0 (MTK_PIN_NO(83) | 1)
-
-#define PINMUX_GPIO84__FUNC_GPIO84 (MTK_PIN_NO(84) | 0)
-#define PINMUX_GPIO84__FUNC_SCL1 (MTK_PIN_NO(84) | 1)
-
-#define PINMUX_GPIO85__FUNC_GPIO85 (MTK_PIN_NO(85) | 0)
-#define PINMUX_GPIO85__FUNC_SPI0_MI (MTK_PIN_NO(85) | 1)
-#define PINMUX_GPIO85__FUNC_SCP_SPI0_MI (MTK_PIN_NO(85) | 2)
-#define PINMUX_GPIO85__FUNC_CLKM3 (MTK_PIN_NO(85) | 3)
-#define PINMUX_GPIO85__FUNC_I2S1_BCK (MTK_PIN_NO(85) | 4)
-#define PINMUX_GPIO85__FUNC_MFG_DFD_JTAG_TDO (MTK_PIN_NO(85) | 5)
-#define PINMUX_GPIO85__FUNC_DFD_TDO (MTK_PIN_NO(85) | 6)
-#define PINMUX_GPIO85__FUNC_JTDO_SEL1 (MTK_PIN_NO(85) | 7)
-
-#define PINMUX_GPIO86__FUNC_GPIO86 (MTK_PIN_NO(86) | 0)
-#define PINMUX_GPIO86__FUNC_SPI0_CSB (MTK_PIN_NO(86) | 1)
-#define PINMUX_GPIO86__FUNC_SCP_SPI0_CS (MTK_PIN_NO(86) | 2)
-#define PINMUX_GPIO86__FUNC_CLKM0 (MTK_PIN_NO(86) | 3)
-#define PINMUX_GPIO86__FUNC_I2S1_LRCK (MTK_PIN_NO(86) | 4)
-#define PINMUX_GPIO86__FUNC_MFG_DFD_JTAG_TMS (MTK_PIN_NO(86) | 5)
-#define PINMUX_GPIO86__FUNC_DFD_TMS (MTK_PIN_NO(86) | 6)
-#define PINMUX_GPIO86__FUNC_JTMS_SEL1 (MTK_PIN_NO(86) | 7)
-
-#define PINMUX_GPIO87__FUNC_GPIO87 (MTK_PIN_NO(87) | 0)
-#define PINMUX_GPIO87__FUNC_SPI0_MO (MTK_PIN_NO(87) | 1)
-#define PINMUX_GPIO87__FUNC_SCP_SPI0_MO (MTK_PIN_NO(87) | 2)
-#define PINMUX_GPIO87__FUNC_SDA1 (MTK_PIN_NO(87) | 3)
-#define PINMUX_GPIO87__FUNC_I2S1_DO (MTK_PIN_NO(87) | 4)
-#define PINMUX_GPIO87__FUNC_MFG_DFD_JTAG_TDI (MTK_PIN_NO(87) | 5)
-#define PINMUX_GPIO87__FUNC_DFD_TDI (MTK_PIN_NO(87) | 6)
-#define PINMUX_GPIO87__FUNC_JTDI_SEL1 (MTK_PIN_NO(87) | 7)
-
-#define PINMUX_GPIO88__FUNC_GPIO88 (MTK_PIN_NO(88) | 0)
-#define PINMUX_GPIO88__FUNC_SPI0_CLK (MTK_PIN_NO(88) | 1)
-#define PINMUX_GPIO88__FUNC_SCP_SPI0_CK (MTK_PIN_NO(88) | 2)
-#define PINMUX_GPIO88__FUNC_SCL1 (MTK_PIN_NO(88) | 3)
-#define PINMUX_GPIO88__FUNC_I2S1_MCK (MTK_PIN_NO(88) | 4)
-#define PINMUX_GPIO88__FUNC_MFG_DFD_JTAG_TCK (MTK_PIN_NO(88) | 5)
-#define PINMUX_GPIO88__FUNC_DFD_TCK_XI (MTK_PIN_NO(88) | 6)
-#define PINMUX_GPIO88__FUNC_JTCK_SEL1 (MTK_PIN_NO(88) | 7)
-
-#define PINMUX_GPIO89__FUNC_GPIO89 (MTK_PIN_NO(89) | 0)
-#define PINMUX_GPIO89__FUNC_SRCLKENAI0 (MTK_PIN_NO(89) | 1)
-#define PINMUX_GPIO89__FUNC_PWM_C (MTK_PIN_NO(89) | 2)
-#define PINMUX_GPIO89__FUNC_I2S5_BCK (MTK_PIN_NO(89) | 3)
-#define PINMUX_GPIO89__FUNC_ANT_SEL6 (MTK_PIN_NO(89) | 4)
-#define PINMUX_GPIO89__FUNC_SDA8 (MTK_PIN_NO(89) | 5)
-#define PINMUX_GPIO89__FUNC_CMVREF0 (MTK_PIN_NO(89) | 6)
-#define PINMUX_GPIO89__FUNC_DBG_MON_A21 (MTK_PIN_NO(89) | 7)
-
-#define PINMUX_GPIO90__FUNC_GPIO90 (MTK_PIN_NO(90) | 0)
-#define PINMUX_GPIO90__FUNC_PWM_A (MTK_PIN_NO(90) | 1)
-#define PINMUX_GPIO90__FUNC_CMMCLK2 (MTK_PIN_NO(90) | 2)
-#define PINMUX_GPIO90__FUNC_I2S5_LRCK (MTK_PIN_NO(90) | 3)
-#define PINMUX_GPIO90__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(90) | 4)
-#define PINMUX_GPIO90__FUNC_SCL8 (MTK_PIN_NO(90) | 5)
-#define PINMUX_GPIO90__FUNC_PTA_RXD (MTK_PIN_NO(90) | 6)
-#define PINMUX_GPIO90__FUNC_DBG_MON_A22 (MTK_PIN_NO(90) | 7)
-
-#define PINMUX_GPIO91__FUNC_GPIO91 (MTK_PIN_NO(91) | 0)
-#define PINMUX_GPIO91__FUNC_KPROW1 (MTK_PIN_NO(91) | 1)
-#define PINMUX_GPIO91__FUNC_PWM_B (MTK_PIN_NO(91) | 2)
-#define PINMUX_GPIO91__FUNC_I2S5_DO (MTK_PIN_NO(91) | 3)
-#define PINMUX_GPIO91__FUNC_ANT_SEL7 (MTK_PIN_NO(91) | 4)
-#define PINMUX_GPIO91__FUNC_CMMCLK3 (MTK_PIN_NO(91) | 5)
-#define PINMUX_GPIO91__FUNC_PTA_TXD (MTK_PIN_NO(91) | 6)
-
-#define PINMUX_GPIO92__FUNC_GPIO92 (MTK_PIN_NO(92) | 0)
-#define PINMUX_GPIO92__FUNC_KPROW0 (MTK_PIN_NO(92) | 1)
-
-#define PINMUX_GPIO93__FUNC_GPIO93 (MTK_PIN_NO(93) | 0)
-#define PINMUX_GPIO93__FUNC_KPCOL0 (MTK_PIN_NO(93) | 1)
-#define PINMUX_GPIO93__FUNC_DBG_MON_B27 (MTK_PIN_NO(93) | 7)
-
-#define PINMUX_GPIO94__FUNC_GPIO94 (MTK_PIN_NO(94) | 0)
-#define PINMUX_GPIO94__FUNC_KPCOL1 (MTK_PIN_NO(94) | 1)
-#define PINMUX_GPIO94__FUNC_I2S2_DI2 (MTK_PIN_NO(94) | 2)
-#define PINMUX_GPIO94__FUNC_I2S5_MCK (MTK_PIN_NO(94) | 3)
-#define PINMUX_GPIO94__FUNC_CMMCLK2 (MTK_PIN_NO(94) | 4)
-#define PINMUX_GPIO94__FUNC_SCP_SPI2_MI (MTK_PIN_NO(94) | 5)
-#define PINMUX_GPIO94__FUNC_SRCLKENAI1 (MTK_PIN_NO(94) | 6)
-#define PINMUX_GPIO94__FUNC_SPI2_MI (MTK_PIN_NO(94) | 7)
-
-#define PINMUX_GPIO95__FUNC_GPIO95 (MTK_PIN_NO(95) | 0)
-#define PINMUX_GPIO95__FUNC_URXD0 (MTK_PIN_NO(95) | 1)
-#define PINMUX_GPIO95__FUNC_UTXD0 (MTK_PIN_NO(95) | 2)
-#define PINMUX_GPIO95__FUNC_MD_URXD0 (MTK_PIN_NO(95) | 3)
-#define PINMUX_GPIO95__FUNC_MD_URXD1 (MTK_PIN_NO(95) | 4)
-#define PINMUX_GPIO95__FUNC_SSPM_URXD_AO (MTK_PIN_NO(95) | 5)
-#define PINMUX_GPIO95__FUNC_CCU_URXD_AO (MTK_PIN_NO(95) | 6)
-
-#define PINMUX_GPIO96__FUNC_GPIO96 (MTK_PIN_NO(96) | 0)
-#define PINMUX_GPIO96__FUNC_UTXD0 (MTK_PIN_NO(96) | 1)
-#define PINMUX_GPIO96__FUNC_URXD0 (MTK_PIN_NO(96) | 2)
-#define PINMUX_GPIO96__FUNC_MD_UTXD0 (MTK_PIN_NO(96) | 3)
-#define PINMUX_GPIO96__FUNC_MD_UTXD1 (MTK_PIN_NO(96) | 4)
-#define PINMUX_GPIO96__FUNC_SSPM_UTXD_AO (MTK_PIN_NO(96) | 5)
-#define PINMUX_GPIO96__FUNC_CCU_UTXD_AO (MTK_PIN_NO(96) | 6)
-#define PINMUX_GPIO96__FUNC_DBG_MON_B2 (MTK_PIN_NO(96) | 7)
-
-#define PINMUX_GPIO97__FUNC_GPIO97 (MTK_PIN_NO(97) | 0)
-#define PINMUX_GPIO97__FUNC_UCTS0 (MTK_PIN_NO(97) | 1)
-#define PINMUX_GPIO97__FUNC_I2S2_MCK (MTK_PIN_NO(97) | 2)
-#define PINMUX_GPIO97__FUNC_IDDIG (MTK_PIN_NO(97) | 3)
-#define PINMUX_GPIO97__FUNC_CONN_MCU_TDO (MTK_PIN_NO(97) | 4)
-#define PINMUX_GPIO97__FUNC_SSPM_JTAG_TDO (MTK_PIN_NO(97) | 5)
-#define PINMUX_GPIO97__FUNC_IO_JTAG_TDO (MTK_PIN_NO(97) | 6)
-#define PINMUX_GPIO97__FUNC_DBG_MON_B3 (MTK_PIN_NO(97) | 7)
-
-#define PINMUX_GPIO98__FUNC_GPIO98 (MTK_PIN_NO(98) | 0)
-#define PINMUX_GPIO98__FUNC_URTS0 (MTK_PIN_NO(98) | 1)
-#define PINMUX_GPIO98__FUNC_I2S2_BCK (MTK_PIN_NO(98) | 2)
-#define PINMUX_GPIO98__FUNC_USB_DRVVBUS (MTK_PIN_NO(98) | 3)
-#define PINMUX_GPIO98__FUNC_CONN_MCU_TMS (MTK_PIN_NO(98) | 4)
-#define PINMUX_GPIO98__FUNC_SSPM_JTAG_TMS (MTK_PIN_NO(98) | 5)
-#define PINMUX_GPIO98__FUNC_IO_JTAG_TMS (MTK_PIN_NO(98) | 6)
-#define PINMUX_GPIO98__FUNC_DBG_MON_B4 (MTK_PIN_NO(98) | 7)
-
-#define PINMUX_GPIO99__FUNC_GPIO99 (MTK_PIN_NO(99) | 0)
-#define PINMUX_GPIO99__FUNC_CMMCLK0 (MTK_PIN_NO(99) | 1)
-#define PINMUX_GPIO99__FUNC_CONN_MCU_AICE_TMSC (MTK_PIN_NO(99) | 4)
-#define PINMUX_GPIO99__FUNC_DBG_MON_B28 (MTK_PIN_NO(99) | 7)
-
-#define PINMUX_GPIO100__FUNC_GPIO100 (MTK_PIN_NO(100) | 0)
-#define PINMUX_GPIO100__FUNC_CMMCLK1 (MTK_PIN_NO(100) | 1)
-#define PINMUX_GPIO100__FUNC_PWM_C (MTK_PIN_NO(100) | 2)
-#define PINMUX_GPIO100__FUNC_MD_INT1_C2K_UIM0_HOT_PLUG (MTK_PIN_NO(100) | 3)
-#define PINMUX_GPIO100__FUNC_CONN_MCU_AICE_TCKC (MTK_PIN_NO(100) | 4)
-#define PINMUX_GPIO100__FUNC_DBG_MON_B29 (MTK_PIN_NO(100) | 7)
-
-#define PINMUX_GPIO101__FUNC_GPIO101 (MTK_PIN_NO(101) | 0)
-#define PINMUX_GPIO101__FUNC_CLKM2 (MTK_PIN_NO(101) | 1)
-#define PINMUX_GPIO101__FUNC_I2S2_LRCK (MTK_PIN_NO(101) | 2)
-#define PINMUX_GPIO101__FUNC_CMVREF1 (MTK_PIN_NO(101) | 3)
-#define PINMUX_GPIO101__FUNC_CONN_MCU_TCK (MTK_PIN_NO(101) | 4)
-#define PINMUX_GPIO101__FUNC_SSPM_JTAG_TCK (MTK_PIN_NO(101) | 5)
-#define PINMUX_GPIO101__FUNC_IO_JTAG_TCK (MTK_PIN_NO(101) | 6)
-
-#define PINMUX_GPIO102__FUNC_GPIO102 (MTK_PIN_NO(102) | 0)
-#define PINMUX_GPIO102__FUNC_CLKM1 (MTK_PIN_NO(102) | 1)
-#define PINMUX_GPIO102__FUNC_I2S2_DI (MTK_PIN_NO(102) | 2)
-#define PINMUX_GPIO102__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(102) | 3)
-#define PINMUX_GPIO102__FUNC_CONN_MCU_TDI (MTK_PIN_NO(102) | 4)
-#define PINMUX_GPIO102__FUNC_SSPM_JTAG_TDI (MTK_PIN_NO(102) | 5)
-#define PINMUX_GPIO102__FUNC_IO_JTAG_TDI (MTK_PIN_NO(102) | 6)
-#define PINMUX_GPIO102__FUNC_DBG_MON_B8 (MTK_PIN_NO(102) | 7)
-
-#define PINMUX_GPIO103__FUNC_GPIO103 (MTK_PIN_NO(103) | 0)
-#define PINMUX_GPIO103__FUNC_SCL2 (MTK_PIN_NO(103) | 1)
-
-#define PINMUX_GPIO104__FUNC_GPIO104 (MTK_PIN_NO(104) | 0)
-#define PINMUX_GPIO104__FUNC_SDA2 (MTK_PIN_NO(104) | 1)
-
-#define PINMUX_GPIO105__FUNC_GPIO105 (MTK_PIN_NO(105) | 0)
-#define PINMUX_GPIO105__FUNC_SCL4 (MTK_PIN_NO(105) | 1)
-
-#define PINMUX_GPIO106__FUNC_GPIO106 (MTK_PIN_NO(106) | 0)
-#define PINMUX_GPIO106__FUNC_SDA4 (MTK_PIN_NO(106) | 1)
-
-#define PINMUX_GPIO107__FUNC_GPIO107 (MTK_PIN_NO(107) | 0)
-#define PINMUX_GPIO107__FUNC_DMIC_CLK (MTK_PIN_NO(107) | 1)
-#define PINMUX_GPIO107__FUNC_ANT_SEL0 (MTK_PIN_NO(107) | 2)
-#define PINMUX_GPIO107__FUNC_CLKM0 (MTK_PIN_NO(107) | 3)
-#define PINMUX_GPIO107__FUNC_SDA7 (MTK_PIN_NO(107) | 4)
-#define PINMUX_GPIO107__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(107) | 5)
-#define PINMUX_GPIO107__FUNC_PWM_A (MTK_PIN_NO(107) | 6)
-#define PINMUX_GPIO107__FUNC_DBG_MON_B12 (MTK_PIN_NO(107) | 7)
-
-#define PINMUX_GPIO108__FUNC_GPIO108 (MTK_PIN_NO(108) | 0)
-#define PINMUX_GPIO108__FUNC_CMMCLK2 (MTK_PIN_NO(108) | 1)
-#define PINMUX_GPIO108__FUNC_ANT_SEL1 (MTK_PIN_NO(108) | 2)
-#define PINMUX_GPIO108__FUNC_CLKM1 (MTK_PIN_NO(108) | 3)
-#define PINMUX_GPIO108__FUNC_SCL8 (MTK_PIN_NO(108) | 4)
-#define PINMUX_GPIO108__FUNC_DAP_MD32_SWD (MTK_PIN_NO(108) | 5)
-#define PINMUX_GPIO108__FUNC_PWM_B (MTK_PIN_NO(108) | 6)
-#define PINMUX_GPIO108__FUNC_DBG_MON_B13 (MTK_PIN_NO(108) | 7)
-
-#define PINMUX_GPIO109__FUNC_GPIO109 (MTK_PIN_NO(109) | 0)
-#define PINMUX_GPIO109__FUNC_DMIC_DAT (MTK_PIN_NO(109) | 1)
-#define PINMUX_GPIO109__FUNC_ANT_SEL2 (MTK_PIN_NO(109) | 2)
-#define PINMUX_GPIO109__FUNC_CLKM2 (MTK_PIN_NO(109) | 3)
-#define PINMUX_GPIO109__FUNC_SDA8 (MTK_PIN_NO(109) | 4)
-#define PINMUX_GPIO109__FUNC_DAP_MD32_SWCK (MTK_PIN_NO(109) | 5)
-#define PINMUX_GPIO109__FUNC_PWM_C (MTK_PIN_NO(109) | 6)
-#define PINMUX_GPIO109__FUNC_DBG_MON_B14 (MTK_PIN_NO(109) | 7)
-
-#define PINMUX_GPIO110__FUNC_GPIO110 (MTK_PIN_NO(110) | 0)
-#define PINMUX_GPIO110__FUNC_SCL7 (MTK_PIN_NO(110) | 1)
-#define PINMUX_GPIO110__FUNC_ANT_SEL0 (MTK_PIN_NO(110) | 2)
-#define PINMUX_GPIO110__FUNC_TP_URXD1_AO (MTK_PIN_NO(110) | 3)
-#define PINMUX_GPIO110__FUNC_USB_DRVVBUS (MTK_PIN_NO(110) | 4)
-#define PINMUX_GPIO110__FUNC_SRCLKENAI1 (MTK_PIN_NO(110) | 5)
-#define PINMUX_GPIO110__FUNC_KPCOL2 (MTK_PIN_NO(110) | 6)
-#define PINMUX_GPIO110__FUNC_URXD1 (MTK_PIN_NO(110) | 7)
-
-#define PINMUX_GPIO111__FUNC_GPIO111 (MTK_PIN_NO(111) | 0)
-#define PINMUX_GPIO111__FUNC_CMMCLK3 (MTK_PIN_NO(111) | 1)
-#define PINMUX_GPIO111__FUNC_ANT_SEL1 (MTK_PIN_NO(111) | 2)
-#define PINMUX_GPIO111__FUNC_SRCLKENAI0 (MTK_PIN_NO(111) | 3)
-#define PINMUX_GPIO111__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(111) | 4)
-#define PINMUX_GPIO111__FUNC_MD_INT2_C2K_UIM1_HOT_PLUG (MTK_PIN_NO(111) | 5)
-#define PINMUX_GPIO111__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(111) | 7)
-
-#define PINMUX_GPIO112__FUNC_GPIO112 (MTK_PIN_NO(112) | 0)
-#define PINMUX_GPIO112__FUNC_SDA7 (MTK_PIN_NO(112) | 1)
-#define PINMUX_GPIO112__FUNC_ANT_SEL2 (MTK_PIN_NO(112) | 2)
-#define PINMUX_GPIO112__FUNC_TP_UTXD1_AO (MTK_PIN_NO(112) | 3)
-#define PINMUX_GPIO112__FUNC_IDDIG (MTK_PIN_NO(112) | 4)
-#define PINMUX_GPIO112__FUNC_AGPS_SYNC (MTK_PIN_NO(112) | 5)
-#define PINMUX_GPIO112__FUNC_KPROW2 (MTK_PIN_NO(112) | 6)
-#define PINMUX_GPIO112__FUNC_UTXD1 (MTK_PIN_NO(112) | 7)
-
-#define PINMUX_GPIO113__FUNC_GPIO113 (MTK_PIN_NO(113) | 0)
-#define PINMUX_GPIO113__FUNC_CONN_TOP_CLK (MTK_PIN_NO(113) | 1)
-#define PINMUX_GPIO113__FUNC_SCL6 (MTK_PIN_NO(113) | 3)
-#define PINMUX_GPIO113__FUNC_AUXIF_CLK0 (MTK_PIN_NO(113) | 4)
-#define PINMUX_GPIO113__FUNC_TP_UCTS1_AO (MTK_PIN_NO(113) | 6)
-
-#define PINMUX_GPIO114__FUNC_GPIO114 (MTK_PIN_NO(114) | 0)
-#define PINMUX_GPIO114__FUNC_CONN_TOP_DATA (MTK_PIN_NO(114) | 1)
-#define PINMUX_GPIO114__FUNC_SDA6 (MTK_PIN_NO(114) | 3)
-#define PINMUX_GPIO114__FUNC_AUXIF_ST0 (MTK_PIN_NO(114) | 4)
-#define PINMUX_GPIO114__FUNC_TP_URTS1_AO (MTK_PIN_NO(114) | 6)
-
-#define PINMUX_GPIO115__FUNC_GPIO115 (MTK_PIN_NO(115) | 0)
-#define PINMUX_GPIO115__FUNC_CONN_BT_CLK (MTK_PIN_NO(115) | 1)
-#define PINMUX_GPIO115__FUNC_UTXD1 (MTK_PIN_NO(115) | 2)
-#define PINMUX_GPIO115__FUNC_PTA_TXD (MTK_PIN_NO(115) | 3)
-#define PINMUX_GPIO115__FUNC_AUXIF_CLK1 (MTK_PIN_NO(115) | 4)
-#define PINMUX_GPIO115__FUNC_DAP_MD32_SWD (MTK_PIN_NO(115) | 5)
-#define PINMUX_GPIO115__FUNC_TP_UTXD1_AO (MTK_PIN_NO(115) | 6)
-
-#define PINMUX_GPIO116__FUNC_GPIO116 (MTK_PIN_NO(116) | 0)
-#define PINMUX_GPIO116__FUNC_CONN_BT_DATA (MTK_PIN_NO(116) | 1)
-#define PINMUX_GPIO116__FUNC_IPU_JTAG_TRST (MTK_PIN_NO(116) | 2)
-#define PINMUX_GPIO116__FUNC_AUXIF_ST1 (MTK_PIN_NO(116) | 4)
-#define PINMUX_GPIO116__FUNC_DAP_MD32_SWCK (MTK_PIN_NO(116) | 5)
-#define PINMUX_GPIO116__FUNC_TP_URXD2_AO (MTK_PIN_NO(116) | 6)
-#define PINMUX_GPIO116__FUNC_DBG_MON_A0 (MTK_PIN_NO(116) | 7)
-
-#define PINMUX_GPIO117__FUNC_GPIO117 (MTK_PIN_NO(117) | 0)
-#define PINMUX_GPIO117__FUNC_CONN_WF_HB0 (MTK_PIN_NO(117) | 1)
-#define PINMUX_GPIO117__FUNC_IPU_JTAG_TDO (MTK_PIN_NO(117) | 2)
-#define PINMUX_GPIO117__FUNC_TP_UTXD2_AO (MTK_PIN_NO(117) | 6)
-#define PINMUX_GPIO117__FUNC_DBG_MON_A4 (MTK_PIN_NO(117) | 7)
-
-#define PINMUX_GPIO118__FUNC_GPIO118 (MTK_PIN_NO(118) | 0)
-#define PINMUX_GPIO118__FUNC_CONN_WF_HB1 (MTK_PIN_NO(118) | 1)
-#define PINMUX_GPIO118__FUNC_IPU_JTAG_TDI (MTK_PIN_NO(118) | 2)
-#define PINMUX_GPIO118__FUNC_SSPM_URXD_AO (MTK_PIN_NO(118) | 5)
-#define PINMUX_GPIO118__FUNC_TP_UCTS2_AO (MTK_PIN_NO(118) | 6)
-#define PINMUX_GPIO118__FUNC_DBG_MON_A5 (MTK_PIN_NO(118) | 7)
-
-#define PINMUX_GPIO119__FUNC_GPIO119 (MTK_PIN_NO(119) | 0)
-#define PINMUX_GPIO119__FUNC_CONN_WF_HB2 (MTK_PIN_NO(119) | 1)
-#define PINMUX_GPIO119__FUNC_IPU_JTAG_TCK (MTK_PIN_NO(119) | 2)
-#define PINMUX_GPIO119__FUNC_SSPM_UTXD_AO (MTK_PIN_NO(119) | 5)
-#define PINMUX_GPIO119__FUNC_TP_URTS2_AO (MTK_PIN_NO(119) | 6)
-
-#define PINMUX_GPIO120__FUNC_GPIO120 (MTK_PIN_NO(120) | 0)
-#define PINMUX_GPIO120__FUNC_CONN_WB_PTA (MTK_PIN_NO(120) | 1)
-#define PINMUX_GPIO120__FUNC_IPU_JTAG_TMS (MTK_PIN_NO(120) | 2)
-#define PINMUX_GPIO120__FUNC_CCU_URXD_AO (MTK_PIN_NO(120) | 5)
-
-#define PINMUX_GPIO121__FUNC_GPIO121 (MTK_PIN_NO(121) | 0)
-#define PINMUX_GPIO121__FUNC_CONN_HRST_B (MTK_PIN_NO(121) | 1)
-#define PINMUX_GPIO121__FUNC_URXD1 (MTK_PIN_NO(121) | 2)
-#define PINMUX_GPIO121__FUNC_PTA_RXD (MTK_PIN_NO(121) | 3)
-#define PINMUX_GPIO121__FUNC_CCU_UTXD_AO (MTK_PIN_NO(121) | 5)
-#define PINMUX_GPIO121__FUNC_TP_URXD1_AO (MTK_PIN_NO(121) | 6)
-
-#define PINMUX_GPIO122__FUNC_GPIO122 (MTK_PIN_NO(122) | 0)
-#define PINMUX_GPIO122__FUNC_MSDC0_CMD (MTK_PIN_NO(122) | 1)
-#define PINMUX_GPIO122__FUNC_SSPM_URXD2_AO (MTK_PIN_NO(122) | 2)
-#define PINMUX_GPIO122__FUNC_ANT_SEL1 (MTK_PIN_NO(122) | 3)
-#define PINMUX_GPIO122__FUNC_DBG_MON_A12 (MTK_PIN_NO(122) | 7)
-
-#define PINMUX_GPIO123__FUNC_GPIO123 (MTK_PIN_NO(123) | 0)
-#define PINMUX_GPIO123__FUNC_MSDC0_DAT0 (MTK_PIN_NO(123) | 1)
-#define PINMUX_GPIO123__FUNC_ANT_SEL0 (MTK_PIN_NO(123) | 3)
-#define PINMUX_GPIO123__FUNC_DBG_MON_A13 (MTK_PIN_NO(123) | 7)
-
-#define PINMUX_GPIO124__FUNC_GPIO124 (MTK_PIN_NO(124) | 0)
-#define PINMUX_GPIO124__FUNC_MSDC0_CLK (MTK_PIN_NO(124) | 1)
-#define PINMUX_GPIO124__FUNC_DBG_MON_A14 (MTK_PIN_NO(124) | 7)
-
-#define PINMUX_GPIO125__FUNC_GPIO125 (MTK_PIN_NO(125) | 0)
-#define PINMUX_GPIO125__FUNC_MSDC0_DAT2 (MTK_PIN_NO(125) | 1)
-#define PINMUX_GPIO125__FUNC_MRG_CLK (MTK_PIN_NO(125) | 3)
-#define PINMUX_GPIO125__FUNC_DBG_MON_A15 (MTK_PIN_NO(125) | 7)
-
-#define PINMUX_GPIO126__FUNC_GPIO126 (MTK_PIN_NO(126) | 0)
-#define PINMUX_GPIO126__FUNC_MSDC0_DAT4 (MTK_PIN_NO(126) | 1)
-#define PINMUX_GPIO126__FUNC_ANT_SEL5 (MTK_PIN_NO(126) | 3)
-#define PINMUX_GPIO126__FUNC_UFS_MPHY_SCL (MTK_PIN_NO(126) | 6)
-#define PINMUX_GPIO126__FUNC_DBG_MON_A16 (MTK_PIN_NO(126) | 7)
-
-#define PINMUX_GPIO127__FUNC_GPIO127 (MTK_PIN_NO(127) | 0)
-#define PINMUX_GPIO127__FUNC_MSDC0_DAT6 (MTK_PIN_NO(127) | 1)
-#define PINMUX_GPIO127__FUNC_ANT_SEL4 (MTK_PIN_NO(127) | 3)
-#define PINMUX_GPIO127__FUNC_UFS_MPHY_SDA (MTK_PIN_NO(127) | 6)
-#define PINMUX_GPIO127__FUNC_DBG_MON_A17 (MTK_PIN_NO(127) | 7)
-
-#define PINMUX_GPIO128__FUNC_GPIO128 (MTK_PIN_NO(128) | 0)
-#define PINMUX_GPIO128__FUNC_MSDC0_DAT1 (MTK_PIN_NO(128) | 1)
-#define PINMUX_GPIO128__FUNC_ANT_SEL2 (MTK_PIN_NO(128) | 3)
-#define PINMUX_GPIO128__FUNC_UFS_UNIPRO_SDA (MTK_PIN_NO(128) | 6)
-#define PINMUX_GPIO128__FUNC_DBG_MON_A18 (MTK_PIN_NO(128) | 7)
-
-#define PINMUX_GPIO129__FUNC_GPIO129 (MTK_PIN_NO(129) | 0)
-#define PINMUX_GPIO129__FUNC_MSDC0_DAT5 (MTK_PIN_NO(129) | 1)
-#define PINMUX_GPIO129__FUNC_ANT_SEL3 (MTK_PIN_NO(129) | 3)
-#define PINMUX_GPIO129__FUNC_UFS_UNIPRO_SCL (MTK_PIN_NO(129) | 6)
-#define PINMUX_GPIO129__FUNC_DBG_MON_A23 (MTK_PIN_NO(129) | 7)
-
-#define PINMUX_GPIO130__FUNC_GPIO130 (MTK_PIN_NO(130) | 0)
-#define PINMUX_GPIO130__FUNC_MSDC0_DAT7 (MTK_PIN_NO(130) | 1)
-#define PINMUX_GPIO130__FUNC_MRG_DO (MTK_PIN_NO(130) | 3)
-#define PINMUX_GPIO130__FUNC_DBG_MON_A24 (MTK_PIN_NO(130) | 7)
-
-#define PINMUX_GPIO131__FUNC_GPIO131 (MTK_PIN_NO(131) | 0)
-#define PINMUX_GPIO131__FUNC_MSDC0_DSL (MTK_PIN_NO(131) | 1)
-#define PINMUX_GPIO131__FUNC_MRG_SYNC (MTK_PIN_NO(131) | 3)
-#define PINMUX_GPIO131__FUNC_DBG_MON_A25 (MTK_PIN_NO(131) | 7)
-
-#define PINMUX_GPIO132__FUNC_GPIO132 (MTK_PIN_NO(132) | 0)
-#define PINMUX_GPIO132__FUNC_MSDC0_DAT3 (MTK_PIN_NO(132) | 1)
-#define PINMUX_GPIO132__FUNC_MRG_DI (MTK_PIN_NO(132) | 3)
-#define PINMUX_GPIO132__FUNC_DBG_MON_A26 (MTK_PIN_NO(132) | 7)
-
-#define PINMUX_GPIO133__FUNC_GPIO133 (MTK_PIN_NO(133) | 0)
-#define PINMUX_GPIO133__FUNC_MSDC0_RSTB (MTK_PIN_NO(133) | 1)
-#define PINMUX_GPIO133__FUNC_AGPS_SYNC (MTK_PIN_NO(133) | 3)
-#define PINMUX_GPIO133__FUNC_DBG_MON_A27 (MTK_PIN_NO(133) | 7)
-
-#define PINMUX_GPIO134__FUNC_GPIO134 (MTK_PIN_NO(134) | 0)
-#define PINMUX_GPIO134__FUNC_RTC32K_CK (MTK_PIN_NO(134) | 1)
-
-#define PINMUX_GPIO135__FUNC_GPIO135 (MTK_PIN_NO(135) | 0)
-#define PINMUX_GPIO135__FUNC_WATCHDOG (MTK_PIN_NO(135) | 1)
-
-#define PINMUX_GPIO136__FUNC_GPIO136 (MTK_PIN_NO(136) | 0)
-#define PINMUX_GPIO136__FUNC_AUD_CLK_MOSI (MTK_PIN_NO(136) | 1)
-#define PINMUX_GPIO136__FUNC_AUD_CLK_MISO (MTK_PIN_NO(136) | 2)
-#define PINMUX_GPIO136__FUNC_I2S1_MCK (MTK_PIN_NO(136) | 3)
-#define PINMUX_GPIO136__FUNC_UFS_UNIPRO_SCL (MTK_PIN_NO(136) | 6)
-
-#define PINMUX_GPIO137__FUNC_GPIO137 (MTK_PIN_NO(137) | 0)
-#define PINMUX_GPIO137__FUNC_AUD_SYNC_MOSI (MTK_PIN_NO(137) | 1)
-#define PINMUX_GPIO137__FUNC_AUD_SYNC_MISO (MTK_PIN_NO(137) | 2)
-#define PINMUX_GPIO137__FUNC_I2S1_BCK (MTK_PIN_NO(137) | 3)
-
-#define PINMUX_GPIO138__FUNC_GPIO138 (MTK_PIN_NO(138) | 0)
-#define PINMUX_GPIO138__FUNC_AUD_DAT_MOSI0 (MTK_PIN_NO(138) | 1)
-#define PINMUX_GPIO138__FUNC_AUD_DAT_MISO0 (MTK_PIN_NO(138) | 2)
-#define PINMUX_GPIO138__FUNC_I2S1_LRCK (MTK_PIN_NO(138) | 3)
-#define PINMUX_GPIO138__FUNC_DBG_MON_B24 (MTK_PIN_NO(138) | 7)
-
-#define PINMUX_GPIO139__FUNC_GPIO139 (MTK_PIN_NO(139) | 0)
-#define PINMUX_GPIO139__FUNC_AUD_DAT_MOSI1 (MTK_PIN_NO(139) | 1)
-#define PINMUX_GPIO139__FUNC_AUD_DAT_MISO1 (MTK_PIN_NO(139) | 2)
-#define PINMUX_GPIO139__FUNC_I2S1_DO (MTK_PIN_NO(139) | 3)
-#define PINMUX_GPIO139__FUNC_UFS_MPHY_SDA (MTK_PIN_NO(139) | 6)
-
-#define PINMUX_GPIO140__FUNC_GPIO140 (MTK_PIN_NO(140) | 0)
-#define PINMUX_GPIO140__FUNC_AUD_CLK_MISO (MTK_PIN_NO(140) | 1)
-#define PINMUX_GPIO140__FUNC_AUD_CLK_MOSI (MTK_PIN_NO(140) | 2)
-#define PINMUX_GPIO140__FUNC_I2S0_MCK (MTK_PIN_NO(140) | 3)
-#define PINMUX_GPIO140__FUNC_UFS_UNIPRO_SDA (MTK_PIN_NO(140) | 6)
-
-#define PINMUX_GPIO141__FUNC_GPIO141 (MTK_PIN_NO(141) | 0)
-#define PINMUX_GPIO141__FUNC_AUD_SYNC_MISO (MTK_PIN_NO(141) | 1)
-#define PINMUX_GPIO141__FUNC_AUD_SYNC_MOSI (MTK_PIN_NO(141) | 2)
-#define PINMUX_GPIO141__FUNC_I2S0_BCK (MTK_PIN_NO(141) | 3)
-
-#define PINMUX_GPIO142__FUNC_GPIO142 (MTK_PIN_NO(142) | 0)
-#define PINMUX_GPIO142__FUNC_AUD_DAT_MISO0 (MTK_PIN_NO(142) | 1)
-#define PINMUX_GPIO142__FUNC_AUD_DAT_MOSI0 (MTK_PIN_NO(142) | 2)
-#define PINMUX_GPIO142__FUNC_I2S0_LRCK (MTK_PIN_NO(142) | 3)
-#define PINMUX_GPIO142__FUNC_VOW_DAT_MISO (MTK_PIN_NO(142) | 4)
-#define PINMUX_GPIO142__FUNC_DBG_MON_B25 (MTK_PIN_NO(142) | 7)
-
-#define PINMUX_GPIO143__FUNC_GPIO143 (MTK_PIN_NO(143) | 0)
-#define PINMUX_GPIO143__FUNC_AUD_DAT_MISO1 (MTK_PIN_NO(143) | 1)
-#define PINMUX_GPIO143__FUNC_AUD_DAT_MOSI1 (MTK_PIN_NO(143) | 2)
-#define PINMUX_GPIO143__FUNC_I2S0_DI (MTK_PIN_NO(143) | 3)
-#define PINMUX_GPIO143__FUNC_VOW_CLK_MISO (MTK_PIN_NO(143) | 4)
-#define PINMUX_GPIO143__FUNC_UFS_MPHY_SCL (MTK_PIN_NO(143) | 6)
-#define PINMUX_GPIO143__FUNC_DBG_MON_B26 (MTK_PIN_NO(143) | 7)
-
-#define PINMUX_GPIO144__FUNC_GPIO144 (MTK_PIN_NO(144) | 0)
-#define PINMUX_GPIO144__FUNC_PWRAP_SPI0_MI (MTK_PIN_NO(144) | 1)
-#define PINMUX_GPIO144__FUNC_PWRAP_SPI0_MO (MTK_PIN_NO(144) | 2)
-
-#define PINMUX_GPIO145__FUNC_GPIO145 (MTK_PIN_NO(145) | 0)
-#define PINMUX_GPIO145__FUNC_PWRAP_SPI0_CSN (MTK_PIN_NO(145) | 1)
-
-#define PINMUX_GPIO146__FUNC_GPIO146 (MTK_PIN_NO(146) | 0)
-#define PINMUX_GPIO146__FUNC_PWRAP_SPI0_MO (MTK_PIN_NO(146) | 1)
-#define PINMUX_GPIO146__FUNC_PWRAP_SPI0_MI (MTK_PIN_NO(146) | 2)
-
-#define PINMUX_GPIO147__FUNC_GPIO147 (MTK_PIN_NO(147) | 0)
-#define PINMUX_GPIO147__FUNC_PWRAP_SPI0_CK (MTK_PIN_NO(147) | 1)
-
-#define PINMUX_GPIO148__FUNC_GPIO148 (MTK_PIN_NO(148) | 0)
-#define PINMUX_GPIO148__FUNC_SRCLKENA0 (MTK_PIN_NO(148) | 1)
-
-#define PINMUX_GPIO149__FUNC_GPIO149 (MTK_PIN_NO(149) | 0)
-#define PINMUX_GPIO149__FUNC_SRCLKENA1 (MTK_PIN_NO(149) | 1)
-
-#define PINMUX_GPIO150__FUNC_GPIO150 (MTK_PIN_NO(150) | 0)
-#define PINMUX_GPIO150__FUNC_PWM_A (MTK_PIN_NO(150) | 1)
-#define PINMUX_GPIO150__FUNC_CMFLASH (MTK_PIN_NO(150) | 2)
-#define PINMUX_GPIO150__FUNC_CLKM0 (MTK_PIN_NO(150) | 3)
-#define PINMUX_GPIO150__FUNC_DBG_MON_B30 (MTK_PIN_NO(150) | 7)
-
-#define PINMUX_GPIO151__FUNC_GPIO151 (MTK_PIN_NO(151) | 0)
-#define PINMUX_GPIO151__FUNC_PWM_B (MTK_PIN_NO(151) | 1)
-#define PINMUX_GPIO151__FUNC_CMVREF0 (MTK_PIN_NO(151) | 2)
-#define PINMUX_GPIO151__FUNC_CLKM1 (MTK_PIN_NO(151) | 3)
-#define PINMUX_GPIO151__FUNC_DBG_MON_B20 (MTK_PIN_NO(151) | 7)
-
-#define PINMUX_GPIO152__FUNC_GPIO152 (MTK_PIN_NO(152) | 0)
-#define PINMUX_GPIO152__FUNC_PWM_C (MTK_PIN_NO(152) | 1)
-#define PINMUX_GPIO152__FUNC_CMFLASH (MTK_PIN_NO(152) | 2)
-#define PINMUX_GPIO152__FUNC_CLKM2 (MTK_PIN_NO(152) | 3)
-#define PINMUX_GPIO152__FUNC_DBG_MON_B21 (MTK_PIN_NO(152) | 7)
-
-#define PINMUX_GPIO153__FUNC_GPIO153 (MTK_PIN_NO(153) | 0)
-#define PINMUX_GPIO153__FUNC_PWM_A (MTK_PIN_NO(153) | 1)
-#define PINMUX_GPIO153__FUNC_CMVREF0 (MTK_PIN_NO(153) | 2)
-#define PINMUX_GPIO153__FUNC_CLKM3 (MTK_PIN_NO(153) | 3)
-#define PINMUX_GPIO153__FUNC_DBG_MON_B22 (MTK_PIN_NO(153) | 7)
-
-#define PINMUX_GPIO154__FUNC_GPIO154 (MTK_PIN_NO(154) | 0)
-#define PINMUX_GPIO154__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(154) | 1)
-#define PINMUX_GPIO154__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(154) | 2)
-#define PINMUX_GPIO154__FUNC_DBG_MON_B18 (MTK_PIN_NO(154) | 7)
-
-#define PINMUX_GPIO155__FUNC_GPIO155 (MTK_PIN_NO(155) | 0)
-#define PINMUX_GPIO155__FUNC_ANT_SEL0 (MTK_PIN_NO(155) | 1)
-#define PINMUX_GPIO155__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(155) | 2)
-#define PINMUX_GPIO155__FUNC_CMVREF1 (MTK_PIN_NO(155) | 3)
-#define PINMUX_GPIO155__FUNC_SCP_JTAG_TDI (MTK_PIN_NO(155) | 7)
-
-#define PINMUX_GPIO156__FUNC_GPIO156 (MTK_PIN_NO(156) | 0)
-#define PINMUX_GPIO156__FUNC_ANT_SEL1 (MTK_PIN_NO(156) | 1)
-#define PINMUX_GPIO156__FUNC_SRCLKENAI0 (MTK_PIN_NO(156) | 2)
-#define PINMUX_GPIO156__FUNC_SCL6 (MTK_PIN_NO(156) | 3)
-#define PINMUX_GPIO156__FUNC_KPCOL2 (MTK_PIN_NO(156) | 4)
-#define PINMUX_GPIO156__FUNC_IDDIG (MTK_PIN_NO(156) | 5)
-#define PINMUX_GPIO156__FUNC_SCP_JTAG_TCK (MTK_PIN_NO(156) | 7)
-
-#define PINMUX_GPIO157__FUNC_GPIO157 (MTK_PIN_NO(157) | 0)
-#define PINMUX_GPIO157__FUNC_ANT_SEL2 (MTK_PIN_NO(157) | 1)
-#define PINMUX_GPIO157__FUNC_SRCLKENAI1 (MTK_PIN_NO(157) | 2)
-#define PINMUX_GPIO157__FUNC_SDA6 (MTK_PIN_NO(157) | 3)
-#define PINMUX_GPIO157__FUNC_KPROW2 (MTK_PIN_NO(157) | 4)
-#define PINMUX_GPIO157__FUNC_USB_DRVVBUS (MTK_PIN_NO(157) | 5)
-#define PINMUX_GPIO157__FUNC_SCP_JTAG_TRSTN (MTK_PIN_NO(157) | 7)
-
-#define PINMUX_GPIO158__FUNC_GPIO158 (MTK_PIN_NO(158) | 0)
-#define PINMUX_GPIO158__FUNC_ANT_SEL3 (MTK_PIN_NO(158) | 1)
-
-#define PINMUX_GPIO159__FUNC_GPIO159 (MTK_PIN_NO(159) | 0)
-#define PINMUX_GPIO159__FUNC_ANT_SEL4 (MTK_PIN_NO(159) | 1)
-
-#define PINMUX_GPIO160__FUNC_GPIO160 (MTK_PIN_NO(160) | 0)
-#define PINMUX_GPIO160__FUNC_ANT_SEL5 (MTK_PIN_NO(160) | 1)
-
-#define PINMUX_GPIO161__FUNC_GPIO161 (MTK_PIN_NO(161) | 0)
-#define PINMUX_GPIO161__FUNC_SPI1_A_MI (MTK_PIN_NO(161) | 1)
-#define PINMUX_GPIO161__FUNC_SCP_SPI1_MI (MTK_PIN_NO(161) | 2)
-#define PINMUX_GPIO161__FUNC_IDDIG (MTK_PIN_NO(161) | 3)
-#define PINMUX_GPIO161__FUNC_ANT_SEL6 (MTK_PIN_NO(161) | 4)
-#define PINMUX_GPIO161__FUNC_KPCOL2 (MTK_PIN_NO(161) | 5)
-#define PINMUX_GPIO161__FUNC_PTA_RXD (MTK_PIN_NO(161) | 6)
-#define PINMUX_GPIO161__FUNC_DBG_MON_B19 (MTK_PIN_NO(161) | 7)
-
-#define PINMUX_GPIO162__FUNC_GPIO162 (MTK_PIN_NO(162) | 0)
-#define PINMUX_GPIO162__FUNC_SPI1_A_CSB (MTK_PIN_NO(162) | 1)
-#define PINMUX_GPIO162__FUNC_SCP_SPI1_CS (MTK_PIN_NO(162) | 2)
-#define PINMUX_GPIO162__FUNC_USB_DRVVBUS (MTK_PIN_NO(162) | 3)
-#define PINMUX_GPIO162__FUNC_ANT_SEL5 (MTK_PIN_NO(162) | 4)
-#define PINMUX_GPIO162__FUNC_KPROW2 (MTK_PIN_NO(162) | 5)
-#define PINMUX_GPIO162__FUNC_PTA_TXD (MTK_PIN_NO(162) | 6)
-
-#define PINMUX_GPIO163__FUNC_GPIO163 (MTK_PIN_NO(163) | 0)
-#define PINMUX_GPIO163__FUNC_SPI1_A_MO (MTK_PIN_NO(163) | 1)
-#define PINMUX_GPIO163__FUNC_SCP_SPI1_MO (MTK_PIN_NO(163) | 2)
-#define PINMUX_GPIO163__FUNC_SDA1 (MTK_PIN_NO(163) | 3)
-#define PINMUX_GPIO163__FUNC_ANT_SEL4 (MTK_PIN_NO(163) | 4)
-#define PINMUX_GPIO163__FUNC_CMMCLK2 (MTK_PIN_NO(163) | 5)
-#define PINMUX_GPIO163__FUNC_DMIC_CLK (MTK_PIN_NO(163) | 6)
-
-#define PINMUX_GPIO164__FUNC_GPIO164 (MTK_PIN_NO(164) | 0)
-#define PINMUX_GPIO164__FUNC_SPI1_A_CLK (MTK_PIN_NO(164) | 1)
-#define PINMUX_GPIO164__FUNC_SCP_SPI1_CK (MTK_PIN_NO(164) | 2)
-#define PINMUX_GPIO164__FUNC_SCL1 (MTK_PIN_NO(164) | 3)
-#define PINMUX_GPIO164__FUNC_ANT_SEL3 (MTK_PIN_NO(164) | 4)
-#define PINMUX_GPIO164__FUNC_CMMCLK3 (MTK_PIN_NO(164) | 5)
-#define PINMUX_GPIO164__FUNC_DMIC_DAT (MTK_PIN_NO(164) | 6)
-
-#define PINMUX_GPIO165__FUNC_GPIO165 (MTK_PIN_NO(165) | 0)
-#define PINMUX_GPIO165__FUNC_PWM_B (MTK_PIN_NO(165) | 1)
-#define PINMUX_GPIO165__FUNC_CMMCLK2 (MTK_PIN_NO(165) | 2)
-#define PINMUX_GPIO165__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(165) | 3)
-#define PINMUX_GPIO165__FUNC_TDM_MCK_2ND (MTK_PIN_NO(165) | 6)
-#define PINMUX_GPIO165__FUNC_SCP_JTAG_TDO (MTK_PIN_NO(165) | 7)
-
-#define PINMUX_GPIO166__FUNC_GPIO166 (MTK_PIN_NO(166) | 0)
-#define PINMUX_GPIO166__FUNC_ANT_SEL6 (MTK_PIN_NO(166) | 1)
-
-#define PINMUX_GPIO167__FUNC_GPIO167 (MTK_PIN_NO(167) | 0)
-#define PINMUX_GPIO167__FUNC_RFIC0_BSI_EN (MTK_PIN_NO(167) | 1)
-#define PINMUX_GPIO167__FUNC_SPM_BSI_EN (MTK_PIN_NO(167) | 2)
-
-#define PINMUX_GPIO168__FUNC_GPIO168 (MTK_PIN_NO(168) | 0)
-#define PINMUX_GPIO168__FUNC_RFIC0_BSI_CK (MTK_PIN_NO(168) | 1)
-#define PINMUX_GPIO168__FUNC_SPM_BSI_CK (MTK_PIN_NO(168) | 2)
-
-#define PINMUX_GPIO169__FUNC_GPIO169 (MTK_PIN_NO(169) | 0)
-#define PINMUX_GPIO169__FUNC_PWM_C (MTK_PIN_NO(169) | 1)
-#define PINMUX_GPIO169__FUNC_CMMCLK3 (MTK_PIN_NO(169) | 2)
-#define PINMUX_GPIO169__FUNC_CMVREF1 (MTK_PIN_NO(169) | 3)
-#define PINMUX_GPIO169__FUNC_ANT_SEL7 (MTK_PIN_NO(169) | 4)
-#define PINMUX_GPIO169__FUNC_AGPS_SYNC (MTK_PIN_NO(169) | 5)
-#define PINMUX_GPIO169__FUNC_TDM_BCK_2ND (MTK_PIN_NO(169) | 6)
-#define PINMUX_GPIO169__FUNC_SCP_JTAG_TMS (MTK_PIN_NO(169) | 7)
-
-#define PINMUX_GPIO170__FUNC_GPIO170 (MTK_PIN_NO(170) | 0)
-#define PINMUX_GPIO170__FUNC_I2S1_BCK (MTK_PIN_NO(170) | 1)
-#define PINMUX_GPIO170__FUNC_I2S3_BCK (MTK_PIN_NO(170) | 2)
-#define PINMUX_GPIO170__FUNC_SCL7 (MTK_PIN_NO(170) | 3)
-#define PINMUX_GPIO170__FUNC_I2S5_BCK (MTK_PIN_NO(170) | 4)
-#define PINMUX_GPIO170__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(170) | 5)
-#define PINMUX_GPIO170__FUNC_TDM_LRCK_2ND (MTK_PIN_NO(170) | 6)
-#define PINMUX_GPIO170__FUNC_ANT_SEL3 (MTK_PIN_NO(170) | 7)
-
-#define PINMUX_GPIO171__FUNC_GPIO171 (MTK_PIN_NO(171) | 0)
-#define PINMUX_GPIO171__FUNC_I2S1_LRCK (MTK_PIN_NO(171) | 1)
-#define PINMUX_GPIO171__FUNC_I2S3_LRCK (MTK_PIN_NO(171) | 2)
-#define PINMUX_GPIO171__FUNC_SDA7 (MTK_PIN_NO(171) | 3)
-#define PINMUX_GPIO171__FUNC_I2S5_LRCK (MTK_PIN_NO(171) | 4)
-#define PINMUX_GPIO171__FUNC_URXD1 (MTK_PIN_NO(171) | 5)
-#define PINMUX_GPIO171__FUNC_TDM_DATA0_2ND (MTK_PIN_NO(171) | 6)
-#define PINMUX_GPIO171__FUNC_ANT_SEL4 (MTK_PIN_NO(171) | 7)
-
-#define PINMUX_GPIO172__FUNC_GPIO172 (MTK_PIN_NO(172) | 0)
-#define PINMUX_GPIO172__FUNC_I2S1_DO (MTK_PIN_NO(172) | 1)
-#define PINMUX_GPIO172__FUNC_I2S3_DO (MTK_PIN_NO(172) | 2)
-#define PINMUX_GPIO172__FUNC_SCL8 (MTK_PIN_NO(172) | 3)
-#define PINMUX_GPIO172__FUNC_I2S5_DO (MTK_PIN_NO(172) | 4)
-#define PINMUX_GPIO172__FUNC_UTXD1 (MTK_PIN_NO(172) | 5)
-#define PINMUX_GPIO172__FUNC_TDM_DATA1_2ND (MTK_PIN_NO(172) | 6)
-#define PINMUX_GPIO172__FUNC_ANT_SEL5 (MTK_PIN_NO(172) | 7)
-
-#define PINMUX_GPIO173__FUNC_GPIO173 (MTK_PIN_NO(173) | 0)
-#define PINMUX_GPIO173__FUNC_I2S1_MCK (MTK_PIN_NO(173) | 1)
-#define PINMUX_GPIO173__FUNC_I2S3_MCK (MTK_PIN_NO(173) | 2)
-#define PINMUX_GPIO173__FUNC_SDA8 (MTK_PIN_NO(173) | 3)
-#define PINMUX_GPIO173__FUNC_I2S5_MCK (MTK_PIN_NO(173) | 4)
-#define PINMUX_GPIO173__FUNC_UCTS0 (MTK_PIN_NO(173) | 5)
-#define PINMUX_GPIO173__FUNC_TDM_DATA2_2ND (MTK_PIN_NO(173) | 6)
-#define PINMUX_GPIO173__FUNC_ANT_SEL6 (MTK_PIN_NO(173) | 7)
-
-#define PINMUX_GPIO174__FUNC_GPIO174 (MTK_PIN_NO(174) | 0)
-#define PINMUX_GPIO174__FUNC_I2S2_DI (MTK_PIN_NO(174) | 1)
-#define PINMUX_GPIO174__FUNC_I2S0_DI (MTK_PIN_NO(174) | 2)
-#define PINMUX_GPIO174__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(174) | 3)
-#define PINMUX_GPIO174__FUNC_I2S2_DI2 (MTK_PIN_NO(174) | 4)
-#define PINMUX_GPIO174__FUNC_URTS0 (MTK_PIN_NO(174) | 5)
-#define PINMUX_GPIO174__FUNC_TDM_DATA3_2ND (MTK_PIN_NO(174) | 6)
-#define PINMUX_GPIO174__FUNC_ANT_SEL7 (MTK_PIN_NO(174) | 7)
-
-#define PINMUX_GPIO175__FUNC_GPIO175 (MTK_PIN_NO(175) | 0)
-#define PINMUX_GPIO175__FUNC_ANT_SEL7 (MTK_PIN_NO(175) | 1)
-
-#define PINMUX_GPIO176__FUNC_GPIO176 (MTK_PIN_NO(176) | 0)
-
-#define PINMUX_GPIO177__FUNC_GPIO177 (MTK_PIN_NO(177) | 0)
-
-#define PINMUX_GPIO178__FUNC_GPIO178 (MTK_PIN_NO(178) | 0)
-
-#define PINMUX_GPIO179__FUNC_GPIO179 (MTK_PIN_NO(179) | 0)
-
-#endif /* __MT8183-PINFUNC_H */
diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
index f90df6439c08..1933045da95d 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
@@ -14,7 +14,7 @@
 #include <dt-bindings/reset-controller/mt8183-resets.h>
 #include <dt-bindings/phy/phy.h>
 #include <dt-bindings/thermal/thermal.h>
-#include "mt8183-pinfunc.h"
+#include <dt-bindings/pinctrl/mt8183-pinfunc.h>
 
 / {
 	compatible = "mediatek,mt8183";
-- 
cgit 


From d21292f13f1f0721d60e8122e2db46bea8cf6950 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 9 Aug 2021 16:24:28 +0100
Subject: KVM: arm64: Add hyp_spin_is_locked() for basic locking assertions at
 EL2

Introduce hyp_spin_is_locked() so that functions can easily assert that
a given lock is held (albeit possibly by another CPU!) without having to
drag full lockdep support up to EL2.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-2-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
index 76b537f8d1c6..04f65b655fcf 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
@@ -15,6 +15,7 @@
 
 #include <asm/alternative.h>
 #include <asm/lse.h>
+#include <asm/rwonce.h>
 
 typedef union hyp_spinlock {
 	u32	__val;
@@ -89,4 +90,11 @@ static inline void hyp_spin_unlock(hyp_spinlock_t *lock)
 	: "memory");
 }
 
+static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock)
+{
+	hyp_spinlock_t lockval = READ_ONCE(*lock);
+
+	return lockval.owner != lockval.next;
+}
+
 #endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */
-- 
cgit 


From 8e049e0daf23aa380c264e5e15e4c64ea5497ed7 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:29 +0100
Subject: KVM: arm64: Introduce hyp_assert_lock_held()

Introduce a poor man's lockdep implementation at EL2 which allows to
BUG() whenever a hyp spinlock is not held when it should. Hide this
feature behind a new Kconfig option that targets the EL2 object
specifically, instead of piggy backing on the existing CONFIG_LOCKDEP.
EL2 cannot WARN() cleanly to report locking issues, hence BUG() is the
only option and it is not clear whether we want this widely enabled.
This is most likely going to be useful for local testing until the EL2
WARN() situation has improved.

Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-3-qperret@google.com
---
 arch/arm64/kvm/Kconfig                     |  9 +++++++++
 arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 17 +++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a4eba0908bfa..9b9721895e5c 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -46,6 +46,15 @@ if KVM
 
 source "virt/kvm/Kconfig"
 
+config NVHE_EL2_DEBUG
+	bool "Debug mode for non-VHE EL2 object"
+	help
+	  Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
+	  Failure reports will BUG() in the hypervisor. This is intended for
+	  local EL2 hypervisor development.
+
+	  If unsure, say N.
+
 endif # KVM
 
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
index 04f65b655fcf..4652fd04bdbe 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
@@ -97,4 +97,21 @@ static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock)
 	return lockval.owner != lockval.next;
 }
 
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static inline void hyp_assert_lock_held(hyp_spinlock_t *lock)
+{
+	/*
+	 * The __pkvm_init() path accesses protected data-structures without
+	 * holding locks as the other CPUs are guaranteed to not enter EL2
+	 * concurrently at this point in time. The point by which EL2 is
+	 * initialized on all CPUs is reflected in the pkvm static key, so
+	 * wait until it is set before checking the lock state.
+	 */
+	if (static_branch_likely(&kvm_protected_mode_initialized))
+		BUG_ON(!hyp_spin_is_locked(lock));
+}
+#else
+static inline void hyp_assert_lock_held(hyp_spinlock_t *lock) { }
+#endif
+
 #endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */
-- 
cgit 


From 1bac49d490cbc813f407a5c9806e464bf4a300c9 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:30 +0100
Subject: KVM: arm64: Provide the host_stage2_try() helper macro

We currently unmap all MMIO mappings from the host stage-2 to recycle
the pages whenever we run out. In order to make this pattern easy to
re-use from other places, factor the logic out into a dedicated macro.
While at it, apply the macro for the kvm_pgtable_stage2_set_owner()
calls. They're currently only called early on and are guaranteed to
succeed, but making them robust to the -ENOMEM case doesn't hurt and
will avoid painful debugging sessions later on.

Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-4-qperret@google.com
---
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 40 +++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index d938ce95d3bd..74280a753efb 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -208,6 +208,25 @@ static inline int __host_stage2_idmap(u64 start, u64 end,
 				      prot, &host_s2_pool);
 }
 
+/*
+ * The pool has been provided with enough pages to cover all of memory with
+ * page granularity, but it is difficult to know how much of the MMIO range
+ * we will need to cover upfront, so we may need to 'recycle' the pages if we
+ * run out.
+ */
+#define host_stage2_try(fn, ...)					\
+	({								\
+		int __ret;						\
+		hyp_assert_lock_held(&host_kvm.lock);			\
+		__ret = fn(__VA_ARGS__);				\
+		if (__ret == -ENOMEM) {					\
+			__ret = host_stage2_unmap_dev_all();		\
+			if (!__ret)					\
+				__ret = fn(__VA_ARGS__);		\
+		}							\
+		__ret;							\
+	 })
+
 static int host_stage2_idmap(u64 addr)
 {
 	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
@@ -223,22 +242,7 @@ static int host_stage2_idmap(u64 addr)
 	if (ret)
 		goto unlock;
 
-	ret = __host_stage2_idmap(range.start, range.end, prot);
-	if (ret != -ENOMEM)
-		goto unlock;
-
-	/*
-	 * The pool has been provided with enough pages to cover all of memory
-	 * with page granularity, but it is difficult to know how much of the
-	 * MMIO range we will need to cover upfront, so we may need to 'recycle'
-	 * the pages if we run out.
-	 */
-	ret = host_stage2_unmap_dev_all();
-	if (ret)
-		goto unlock;
-
-	ret = __host_stage2_idmap(range.start, range.end, prot);
-
+	ret = host_stage2_try(__host_stage2_idmap, range.start, range.end, prot);
 unlock:
 	hyp_spin_unlock(&host_kvm.lock);
 
@@ -257,8 +261,8 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
 		return -EINVAL;
 
 	hyp_spin_lock(&host_kvm.lock);
-	ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
-					   &host_s2_pool, pkvm_hyp_id);
+	ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
+			      start, end - start, &host_s2_pool, pkvm_hyp_id);
 	hyp_spin_unlock(&host_kvm.lock);
 
 	return ret != -EAGAIN ? ret : 0;
-- 
cgit 


From 51add457733bbc4a442fc280d73d14bfe262e4a0 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:32 +0100
Subject: KVM: arm64: Expose page-table helpers

The KVM pgtable API exposes the kvm_pgtable_walk() function to allow
the definition of walkers outside of pgtable.c. However, it is not easy
to implement any of those walkers without some of the low-level helpers.
Move some of them to the header file to allow re-use from other places.

Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-6-qperret@google.com
---
 arch/arm64/include/asm/kvm_pgtable.h | 40 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/pgtable.c         | 39 -----------------------------------
 2 files changed, 40 insertions(+), 39 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index e42b55bd50a2..83c21d35be10 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -25,6 +25,46 @@ static inline u64 kvm_get_parange(u64 mmfr0)
 
 typedef u64 kvm_pte_t;
 
+#define KVM_PTE_VALID			BIT(0)
+
+#define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
+#define KVM_PTE_ADDR_51_48		GENMASK(15, 12)
+
+static inline bool kvm_pte_valid(kvm_pte_t pte)
+{
+	return pte & KVM_PTE_VALID;
+}
+
+static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
+{
+	u64 pa = pte & KVM_PTE_ADDR_MASK;
+
+	if (PAGE_SHIFT == 16)
+		pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
+
+	return pa;
+}
+
+static inline u64 kvm_granule_shift(u32 level)
+{
+	/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
+	return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
+}
+
+static inline u64 kvm_granule_size(u32 level)
+{
+	return BIT(kvm_granule_shift(level));
+}
+
+static inline bool kvm_level_supports_block_mapping(u32 level)
+{
+	/*
+	 * Reject invalid block mappings and don't bother with 4TB mappings for
+	 * 52-bit PAs.
+	 */
+	return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
+}
+
 /**
  * struct kvm_pgtable_mm_ops - Memory management callbacks.
  * @zalloc_page:		Allocate a single zeroed memory page.
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 78f36bd5df6c..49d768b92997 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -11,16 +11,12 @@
 #include <asm/kvm_pgtable.h>
 #include <asm/stage2_pgtable.h>
 
-#define KVM_PTE_VALID			BIT(0)
 
 #define KVM_PTE_TYPE			BIT(1)
 #define KVM_PTE_TYPE_BLOCK		0
 #define KVM_PTE_TYPE_PAGE		1
 #define KVM_PTE_TYPE_TABLE		1
 
-#define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
-#define KVM_PTE_ADDR_51_48		GENMASK(15, 12)
-
 #define KVM_PTE_LEAF_ATTR_LO		GENMASK(11, 2)
 
 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX	GENMASK(4, 2)
@@ -61,17 +57,6 @@ struct kvm_pgtable_walk_data {
 	u64				end;
 };
 
-static u64 kvm_granule_shift(u32 level)
-{
-	/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
-	return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
-}
-
-static u64 kvm_granule_size(u32 level)
-{
-	return BIT(kvm_granule_shift(level));
-}
-
 #define KVM_PHYS_INVALID (-1ULL)
 
 static bool kvm_phys_is_valid(u64 phys)
@@ -79,15 +64,6 @@ static bool kvm_phys_is_valid(u64 phys)
 	return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
 }
 
-static bool kvm_level_supports_block_mapping(u32 level)
-{
-	/*
-	 * Reject invalid block mappings and don't bother with 4TB mappings for
-	 * 52-bit PAs.
-	 */
-	return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
-}
-
 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
 {
 	u64 granule = kvm_granule_size(level);
@@ -135,11 +111,6 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
 	return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
 }
 
-static bool kvm_pte_valid(kvm_pte_t pte)
-{
-	return pte & KVM_PTE_VALID;
-}
-
 static bool kvm_pte_table(kvm_pte_t pte, u32 level)
 {
 	if (level == KVM_PGTABLE_MAX_LEVELS - 1)
@@ -151,16 +122,6 @@ static bool kvm_pte_table(kvm_pte_t pte, u32 level)
 	return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
 }
 
-static u64 kvm_pte_to_phys(kvm_pte_t pte)
-{
-	u64 pa = pte & KVM_PTE_ADDR_MASK;
-
-	if (PAGE_SHIFT == 16)
-		pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
-
-	return pa;
-}
-
 static kvm_pte_t kvm_phys_to_pte(u64 pa)
 {
 	kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
-- 
cgit 


From c4f0935e4d957bfcea25ad76860445660a60f3fd Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:33 +0100
Subject: KVM: arm64: Optimize host memory aborts

The kvm_pgtable_stage2_find_range() function is used in the host memory
abort path to try and look for the largest block mapping that can be
used to map the faulting address. In order to do so, the function
currently walks the stage-2 page-table and looks for existing
incompatible mappings within the range of the largest possible block.
If incompatible mappings are found, it tries the same procedure again,
but using a smaller block range, and repeats until a matching range is
found (potentially up to page granularity). While this approach has
benefits (mostly in the fact that it proactively coalesces host stage-2
mappings), it can be slow if the ranges are fragmented, and it isn't
optimized to deal with CPUs faulting on the same IPA as all of them will
do all the work every time.

To avoid these issues, remove kvm_pgtable_stage2_find_range(), and walk
the page-table only once in the host_mem_abort() path to find the
closest leaf to the input address. With this, use the corresponding
range if it is invalid and not owned by another entity. If a valid leaf
is found, return -EAGAIN similar to what is done in the
kvm_pgtable_stage2_map() path to optimize concurrent faults.

Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-7-qperret@google.com
---
 arch/arm64/include/asm/kvm_pgtable.h  | 30 --------------
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 45 ++++++++++++++++++++-
 arch/arm64/kvm/hyp/pgtable.c          | 74 -----------------------------------
 3 files changed, 44 insertions(+), 105 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 83c21d35be10..8d6c710c1996 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -154,16 +154,6 @@ enum kvm_pgtable_prot {
 #define PAGE_HYP_RO		(KVM_PGTABLE_PROT_R)
 #define PAGE_HYP_DEVICE		(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
 
-/**
- * struct kvm_mem_range - Range of Intermediate Physical Addresses
- * @start:	Start of the range.
- * @end:	End of the range.
- */
-struct kvm_mem_range {
-	u64 start;
-	u64 end;
-};
-
 /**
  * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
  * @KVM_PGTABLE_WALK_LEAF:		Visit leaf entries, including invalid
@@ -491,24 +481,4 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
  */
 int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
 			 kvm_pte_t *ptep, u32 *level);
-
-/**
- * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
- *				     Addresses with compatible permission
- *				     attributes.
- * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
- * @addr:	Address that must be covered by the range.
- * @prot:	Protection attributes that the range must be compatible with.
- * @range:	Range structure used to limit the search space at call time and
- *		that will hold the result.
- *
- * The offset of @addr within a page is ignored. An IPA is compatible with @prot
- * iff its corresponding stage-2 page-table entry has default ownership and, if
- * valid, is mapped with protection attributes identical to @prot.
- *
- * Return: 0 on success, negative error code on failure.
- */
-int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
-				  enum kvm_pgtable_prot prot,
-				  struct kvm_mem_range *range);
 #endif	/* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 74280a753efb..2148d3968aa5 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -159,6 +159,11 @@ static int host_stage2_unmap_dev_all(void)
 	return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
 }
 
+struct kvm_mem_range {
+	u64 start;
+	u64 end;
+};
+
 static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
 {
 	int cur, left = 0, right = hyp_memblock_nr;
@@ -227,6 +232,44 @@ static inline int __host_stage2_idmap(u64 start, u64 end,
 		__ret;							\
 	 })
 
+static inline bool range_included(struct kvm_mem_range *child,
+				  struct kvm_mem_range *parent)
+{
+	return parent->start <= child->start && child->end <= parent->end;
+}
+
+static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
+{
+	struct kvm_mem_range cur;
+	kvm_pte_t pte;
+	u32 level;
+	int ret;
+
+	hyp_assert_lock_held(&host_kvm.lock);
+	ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level);
+	if (ret)
+		return ret;
+
+	if (kvm_pte_valid(pte))
+		return -EAGAIN;
+
+	if (pte)
+		return -EPERM;
+
+	do {
+		u64 granule = kvm_granule_size(level);
+		cur.start = ALIGN_DOWN(addr, granule);
+		cur.end = cur.start + granule;
+		level++;
+	} while ((level < KVM_PGTABLE_MAX_LEVELS) &&
+			!(kvm_level_supports_block_mapping(level) &&
+			  range_included(&cur, range)));
+
+	*range = cur;
+
+	return 0;
+}
+
 static int host_stage2_idmap(u64 addr)
 {
 	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
@@ -238,7 +281,7 @@ static int host_stage2_idmap(u64 addr)
 		prot |= KVM_PGTABLE_PROT_X;
 
 	hyp_spin_lock(&host_kvm.lock);
-	ret = kvm_pgtable_stage2_find_range(&host_kvm.pgt, addr, prot, &range);
+	ret = host_stage2_adjust_range(addr, &range);
 	if (ret)
 		goto unlock;
 
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 49d768b92997..4dff2ad39ee4 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1102,77 +1102,3 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
 	pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
 	pgt->pgd = NULL;
 }
-
-#define KVM_PTE_LEAF_S2_COMPAT_MASK	(KVM_PTE_LEAF_ATTR_S2_PERMS | \
-					 KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | \
-					 KVM_PTE_LEAF_ATTR_S2_IGNORED)
-
-static int stage2_check_permission_walker(u64 addr, u64 end, u32 level,
-					  kvm_pte_t *ptep,
-					  enum kvm_pgtable_walk_flags flag,
-					  void * const arg)
-{
-	kvm_pte_t old_attr, pte = *ptep, *new_attr = arg;
-
-	/*
-	 * Compatible mappings are either invalid and owned by the page-table
-	 * owner (whose id is 0), or valid with matching permission attributes.
-	 */
-	if (kvm_pte_valid(pte)) {
-		old_attr = pte & KVM_PTE_LEAF_S2_COMPAT_MASK;
-		if (old_attr != *new_attr)
-			return -EEXIST;
-	} else if (pte) {
-		return -EEXIST;
-	}
-
-	return 0;
-}
-
-int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
-				  enum kvm_pgtable_prot prot,
-				  struct kvm_mem_range *range)
-{
-	kvm_pte_t attr;
-	struct kvm_pgtable_walker check_perm_walker = {
-		.cb		= stage2_check_permission_walker,
-		.flags		= KVM_PGTABLE_WALK_LEAF,
-		.arg		= &attr,
-	};
-	u64 granule, start, end;
-	u32 level;
-	int ret;
-
-	ret = stage2_set_prot_attr(pgt, prot, &attr);
-	if (ret)
-		return ret;
-	attr &= KVM_PTE_LEAF_S2_COMPAT_MASK;
-
-	for (level = pgt->start_level; level < KVM_PGTABLE_MAX_LEVELS; level++) {
-		granule = kvm_granule_size(level);
-		start = ALIGN_DOWN(addr, granule);
-		end = start + granule;
-
-		if (!kvm_level_supports_block_mapping(level))
-			continue;
-
-		if (start < range->start || range->end < end)
-			continue;
-
-		/*
-		 * Check the presence of existing mappings with incompatible
-		 * permissions within the current block range, and try one level
-		 * deeper if one is found.
-		 */
-		ret = kvm_pgtable_walk(pgt, start, granule, &check_perm_walker);
-		if (ret != -EEXIST)
-			break;
-	}
-
-	if (!ret) {
-		range->start = start;
-		range->end = end;
-	}
-
-	return ret;
-}
-- 
cgit 


From 178cac08d588e7406a09351a992f57892d8d9cc9 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:34 +0100
Subject: KVM: arm64: Rename KVM_PTE_LEAF_ATTR_S2_IGNORED

The ignored bits for both stage-1 and stage-2 page and block
descriptors are in [55:58], so rename KVM_PTE_LEAF_ATTR_S2_IGNORED to
make it applicable to both. And while at it, since these bits are more
commonly known as 'software' bits, rename accordingly.

Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-8-qperret@google.com
---
 arch/arm64/kvm/hyp/pgtable.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 4dff2ad39ee4..59a394d82de3 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -36,6 +36,8 @@
 
 #define KVM_PTE_LEAF_ATTR_HI		GENMASK(63, 51)
 
+#define KVM_PTE_LEAF_ATTR_HI_SW		GENMASK(58, 55)
+
 #define KVM_PTE_LEAF_ATTR_HI_S1_XN	BIT(54)
 
 #define KVM_PTE_LEAF_ATTR_HI_S2_XN	BIT(54)
@@ -44,8 +46,6 @@
 					 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
 					 KVM_PTE_LEAF_ATTR_HI_S2_XN)
 
-#define KVM_PTE_LEAF_ATTR_S2_IGNORED	GENMASK(58, 55)
-
 #define KVM_INVALID_PTE_OWNER_MASK	GENMASK(63, 56)
 #define KVM_MAX_OWNER_ID		1
 
-- 
cgit 


From 8a0282c68121e53ab17413283cfed408a47e1a2a Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:35 +0100
Subject: KVM: arm64: Don't overwrite software bits with owner id

We will soon start annotating page-tables with new flags to track shared
pages and such, and we will do so in valid mappings using software bits
in the PTEs, as provided by the architecture. However, it is possible
that we will need to use those flags to annotate invalid mappings as
well in the future, similar to what we do to track page ownership in the
host stage-2.

In order to facilitate the annotation of invalid mappings with such
flags, it would be preferable to re-use the same bits as for valid
mappings (bits [58-55]), but these are currently used for ownership
encoding. Since we have plenty of bits left to use in invalid
mappings, move the ownership bits further down the PTE to avoid the
conflict.

Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-9-qperret@google.com
---
 arch/arm64/kvm/hyp/pgtable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 59a394d82de3..1ee1168ac32d 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -46,7 +46,7 @@
 					 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
 					 KVM_PTE_LEAF_ATTR_HI_S2_XN)
 
-#define KVM_INVALID_PTE_OWNER_MASK	GENMASK(63, 56)
+#define KVM_INVALID_PTE_OWNER_MASK	GENMASK(9, 2)
 #define KVM_MAX_OWNER_ID		1
 
 struct kvm_pgtable_walk_data {
-- 
cgit 


From b53846c5f279cb5329b82f19a7d313f02cb9d21c Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:36 +0100
Subject: KVM: arm64: Tolerate re-creating hyp mappings to set software bits

The current hypervisor stage-1 mapping code doesn't allow changing an
existing valid mapping. Relax this condition by allowing changes that
only target software bits, as that will soon be needed to annotate shared
pages.

Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-10-qperret@google.com
---
 arch/arm64/kvm/hyp/pgtable.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 1ee1168ac32d..2689fcb7901d 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -362,6 +362,21 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
 	return 0;
 }
 
+static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
+{
+	/*
+	 * Tolerate KVM recreating the exact same mapping, or changing software
+	 * bits if the existing mapping was valid.
+	 */
+	if (old == new)
+		return false;
+
+	if (!kvm_pte_valid(old))
+		return true;
+
+	return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
+}
+
 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 				    kvm_pte_t *ptep, struct hyp_map_data *data)
 {
@@ -371,9 +386,8 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 	if (!kvm_block_mapping_supported(addr, end, phys, level))
 		return false;
 
-	/* Tolerate KVM recreating the exact same mapping */
 	new = kvm_init_valid_leaf_pte(phys, data->attr, level);
-	if (old != new && !WARN_ON(kvm_pte_valid(old)))
+	if (hyp_pte_needs_update(old, new))
 		smp_store_release(ptep, new);
 
 	data->phys += granule;
-- 
cgit 


From 5651311941105ca077d3ab74dd4a92e646ecf7fb Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:37 +0100
Subject: KVM: arm64: Enable forcing page-level stage-2 mappings

Much of the stage-2 manipulation logic relies on being able to destroy
block mappings if e.g. installing a smaller mapping in the range. The
rationale for this behaviour is that stage-2 mappings can always be
re-created lazily. However, this gets more complicated when the stage-2
page-table is used to store metadata about the underlying pages. In such
cases, destroying a block mapping may lead to losing part of the state,
and confuse the user of those metadata (such as the hypervisor in nVHE
protected mode).

To avoid this, introduce a callback function in the pgtable struct which
is called during all map operations to determine whether the mappings
can use blocks, or should be forced to page granularity. This is used by
the hypervisor when creating the host stage-2 to force page-level
mappings when using non-default protection attributes.

Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-11-qperret@google.com
---
 arch/arm64/include/asm/kvm_pgtable.h  | 66 ++++++++++++++++++++++-------------
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 34 +++++++++++++++---
 arch/arm64/kvm/hyp/pgtable.c          | 29 ++++++++++++---
 3 files changed, 94 insertions(+), 35 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 8d6c710c1996..ca0c039547b5 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -115,25 +115,6 @@ enum kvm_pgtable_stage2_flags {
 	KVM_PGTABLE_S2_IDMAP			= BIT(1),
 };
 
-/**
- * struct kvm_pgtable - KVM page-table.
- * @ia_bits:		Maximum input address size, in bits.
- * @start_level:	Level at which the page-table walk starts.
- * @pgd:		Pointer to the first top-level entry of the page-table.
- * @mm_ops:		Memory management callbacks.
- * @mmu:		Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
- */
-struct kvm_pgtable {
-	u32					ia_bits;
-	u32					start_level;
-	kvm_pte_t				*pgd;
-	struct kvm_pgtable_mm_ops		*mm_ops;
-
-	/* Stage-2 only */
-	struct kvm_s2_mmu			*mmu;
-	enum kvm_pgtable_stage2_flags		flags;
-};
-
 /**
  * enum kvm_pgtable_prot - Page-table permissions and attributes.
  * @KVM_PGTABLE_PROT_X:		Execute permission.
@@ -149,11 +130,43 @@ enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
 };
 
-#define PAGE_HYP		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
+#define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
+#define KVM_PGTABLE_PROT_RWX	(KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
+
+#define PKVM_HOST_MEM_PROT	KVM_PGTABLE_PROT_RWX
+#define PKVM_HOST_MMIO_PROT	KVM_PGTABLE_PROT_RW
+
+#define PAGE_HYP		KVM_PGTABLE_PROT_RW
 #define PAGE_HYP_EXEC		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
 #define PAGE_HYP_RO		(KVM_PGTABLE_PROT_R)
 #define PAGE_HYP_DEVICE		(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
 
+typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
+					   enum kvm_pgtable_prot prot);
+
+/**
+ * struct kvm_pgtable - KVM page-table.
+ * @ia_bits:		Maximum input address size, in bits.
+ * @start_level:	Level at which the page-table walk starts.
+ * @pgd:		Pointer to the first top-level entry of the page-table.
+ * @mm_ops:		Memory management callbacks.
+ * @mmu:		Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
+ * @flags:		Stage-2 page-table flags.
+ * @force_pte_cb:	Function that returns true if page level mappings must
+ *			be used instead of block mappings.
+ */
+struct kvm_pgtable {
+	u32					ia_bits;
+	u32					start_level;
+	kvm_pte_t				*pgd;
+	struct kvm_pgtable_mm_ops		*mm_ops;
+
+	/* Stage-2 only */
+	struct kvm_s2_mmu			*mmu;
+	enum kvm_pgtable_stage2_flags		flags;
+	kvm_pgtable_force_pte_cb_t		force_pte_cb;
+};
+
 /**
  * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
  * @KVM_PGTABLE_WALK_LEAF:		Visit leaf entries, including invalid
@@ -246,21 +259,24 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
 
 /**
- * kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table.
+ * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
  * @pgt:	Uninitialised page-table structure to initialise.
  * @arch:	Arch-specific KVM structure representing the guest virtual
  *		machine.
  * @mm_ops:	Memory management callbacks.
  * @flags:	Stage-2 configuration flags.
+ * @force_pte_cb: Function that returns true if page level mappings must
+ *		be used instead of block mappings.
  *
  * Return: 0 on success, negative error code on failure.
  */
-int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
-				  struct kvm_pgtable_mm_ops *mm_ops,
-				  enum kvm_pgtable_stage2_flags flags);
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+			      struct kvm_pgtable_mm_ops *mm_ops,
+			      enum kvm_pgtable_stage2_flags flags,
+			      kvm_pgtable_force_pte_cb_t force_pte_cb);
 
 #define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
-	kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)
+	__kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL)
 
 /**
  * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 2148d3968aa5..6fed6772c673 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -89,6 +89,8 @@ static void prepare_host_vtcr(void)
 					  id_aa64mmfr1_el1_sys_val, phys_shift);
 }
 
+static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
+
 int kvm_host_prepare_stage2(void *pgt_pool_base)
 {
 	struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
@@ -101,8 +103,9 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
 	if (ret)
 		return ret;
 
-	ret = kvm_pgtable_stage2_init_flags(&host_kvm.pgt, &host_kvm.arch,
-					    &host_kvm.mm_ops, KVM_HOST_S2_FLAGS);
+	ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
+					&host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
+					host_stage2_force_pte_cb);
 	if (ret)
 		return ret;
 
@@ -270,15 +273,36 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
 	return 0;
 }
 
+static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
+{
+	/*
+	 * Block mappings must be used with care in the host stage-2 as a
+	 * kvm_pgtable_stage2_map() operation targeting a page in the range of
+	 * an existing block will delete the block under the assumption that
+	 * mappings in the rest of the block range can always be rebuilt lazily.
+	 * That assumption is correct for the host stage-2 with RWX mappings
+	 * targeting memory or RW mappings targeting MMIO ranges (see
+	 * host_stage2_idmap() below which implements some of the host memory
+	 * abort logic). However, this is not safe for any other mappings where
+	 * the host stage-2 page-table is in fact the only place where this
+	 * state is stored. In all those cases, it is safer to use page-level
+	 * mappings, hence avoiding to lose the state because of side-effects in
+	 * kvm_pgtable_stage2_map().
+	 */
+	if (range_is_memory(addr, end))
+		return prot != PKVM_HOST_MEM_PROT;
+	else
+		return prot != PKVM_HOST_MMIO_PROT;
+}
+
 static int host_stage2_idmap(u64 addr)
 {
-	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
 	struct kvm_mem_range range;
 	bool is_memory = find_mem_range(addr, &range);
+	enum kvm_pgtable_prot prot;
 	int ret;
 
-	if (is_memory)
-		prot |= KVM_PGTABLE_PROT_X;
+	prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
 
 	hyp_spin_lock(&host_kvm.lock);
 	ret = host_stage2_adjust_range(addr, &range);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 2689fcb7901d..e25d829587b9 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -452,6 +452,8 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
 	pgt->start_level	= KVM_PGTABLE_MAX_LEVELS - levels;
 	pgt->mm_ops		= mm_ops;
 	pgt->mmu		= NULL;
+	pgt->force_pte_cb	= NULL;
+
 	return 0;
 }
 
@@ -489,6 +491,9 @@ struct stage2_map_data {
 	void				*memcache;
 
 	struct kvm_pgtable_mm_ops	*mm_ops;
+
+	/* Force mappings to page granularity */
+	bool				force_pte;
 };
 
 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
@@ -602,6 +607,15 @@ static bool stage2_pte_executable(kvm_pte_t pte)
 	return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
 }
 
+static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
+					struct stage2_map_data *data)
+{
+	if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1)))
+		return false;
+
+	return kvm_block_mapping_supported(addr, end, data->phys, level);
+}
+
 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 				      kvm_pte_t *ptep,
 				      struct stage2_map_data *data)
@@ -611,7 +625,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 	struct kvm_pgtable *pgt = data->mmu->pgt;
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
-	if (!kvm_block_mapping_supported(addr, end, phys, level))
+	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
 		return -E2BIG;
 
 	if (kvm_phys_is_valid(phys))
@@ -655,7 +669,7 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
 	if (data->anchor)
 		return 0;
 
-	if (!kvm_block_mapping_supported(addr, end, data->phys, level))
+	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
 		return 0;
 
 	data->childp = kvm_pte_follow(*ptep, data->mm_ops);
@@ -785,6 +799,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		.mmu		= pgt->mmu,
 		.memcache	= mc,
 		.mm_ops		= pgt->mm_ops,
+		.force_pte	= pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot),
 	};
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_map_walker,
@@ -816,6 +831,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		.memcache	= mc,
 		.mm_ops		= pgt->mm_ops,
 		.owner_id	= owner_id,
+		.force_pte	= true,
 	};
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_map_walker,
@@ -1057,9 +1073,11 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
 	return kvm_pgtable_walk(pgt, addr, size, &walker);
 }
 
-int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
-				  struct kvm_pgtable_mm_ops *mm_ops,
-				  enum kvm_pgtable_stage2_flags flags)
+
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+			      struct kvm_pgtable_mm_ops *mm_ops,
+			      enum kvm_pgtable_stage2_flags flags,
+			      kvm_pgtable_force_pte_cb_t force_pte_cb)
 {
 	size_t pgd_sz;
 	u64 vtcr = arch->vtcr;
@@ -1077,6 +1095,7 @@ int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch
 	pgt->mm_ops		= mm_ops;
 	pgt->mmu		= &arch->mmu;
 	pgt->flags		= flags;
+	pgt->force_pte_cb	= force_pte_cb;
 
 	/* Ensure zeroed PGD pages are visible to the hardware walker */
 	dsb(ishst);
-- 
cgit 


From 4505e9b624cefafa4b75d8a28e72f32076c33375 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:38 +0100
Subject: KVM: arm64: Allow populating software bits

Introduce infrastructure allowing to manipulate software bits in stage-1
and stage-2 page-tables using additional entries in the kvm_pgtable_prot
enum.

This is heavily inspired by Marc's implementation of a similar feature
in the NV patch series, but adapted to allow stage-1 changes as well:

  https://lore.kernel.org/kvmarm/20210510165920.1913477-56-maz@kernel.org/

Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-12-qperret@google.com
---
 arch/arm64/include/asm/kvm_pgtable.h | 12 +++++++++++-
 arch/arm64/kvm/hyp/pgtable.c         |  5 +++++
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index ca0c039547b5..bfea573703d7 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -121,6 +121,10 @@ enum kvm_pgtable_stage2_flags {
  * @KVM_PGTABLE_PROT_W:		Write permission.
  * @KVM_PGTABLE_PROT_R:		Read permission.
  * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
+ * @KVM_PGTABLE_PROT_SW0:	Software bit 0.
+ * @KVM_PGTABLE_PROT_SW1:	Software bit 1.
+ * @KVM_PGTABLE_PROT_SW2:	Software bit 2.
+ * @KVM_PGTABLE_PROT_SW3:	Software bit 3.
  */
 enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_X			= BIT(0),
@@ -128,6 +132,11 @@ enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_R			= BIT(2),
 
 	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
+
+	KVM_PGTABLE_PROT_SW0			= BIT(55),
+	KVM_PGTABLE_PROT_SW1			= BIT(56),
+	KVM_PGTABLE_PROT_SW2			= BIT(57),
+	KVM_PGTABLE_PROT_SW3			= BIT(58),
 };
 
 #define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
@@ -420,7 +429,8 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
  * If there is a valid, leaf page-table entry used to translate @addr, then
  * relax the permissions in that entry according to the read, write and
  * execute permissions specified by @prot. No permissions are removed, and
- * TLB invalidation is performed after updating the entry.
+ * TLB invalidation is performed after updating the entry. Software bits cannot
+ * be set or cleared using kvm_pgtable_stage2_relax_perms().
  *
  * Return: 0 on success, negative error code on failure.
  */
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index e25d829587b9..cff744136044 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -357,6 +357,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
 	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
+	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
 	*ptep = attr;
 
 	return 0;
@@ -558,6 +559,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
 
 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
 	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
+	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
 	*ptep = attr;
 
 	return 0;
@@ -1025,6 +1027,9 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
 	u32 level;
 	kvm_pte_t set = 0, clr = 0;
 
+	if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
+		return -EINVAL;
+
 	if (prot & KVM_PGTABLE_PROT_R)
 		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
 
-- 
cgit 


From ec250a67ea8db6209918a389554cf3aec0395b1f Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:39 +0100
Subject: KVM: arm64: Add helpers to tag shared pages in SW bits

We will soon start annotating shared pages in page-tables in nVHE
protected mode. Define all the states in which a page can be (owned,
shared and owned, shared and borrowed), and provide helpers allowing to
convert this into SW bits annotations using the matching prot
attributes.

Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-13-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 9c227d87c36d..87b1690c439f 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -12,6 +12,32 @@
 #include <asm/virt.h>
 #include <nvhe/spinlock.h>
 
+/*
+ * SW bits 0-1 are reserved to track the memory ownership state of each page:
+ *   00: The page is owned exclusively by the page-table owner.
+ *   01: The page is owned by the page-table owner, but is shared
+ *       with another entity.
+ *   10: The page is shared with, but not owned by the page-table owner.
+ *   11: Reserved for future use (lending).
+ */
+enum pkvm_page_state {
+	PKVM_PAGE_OWNED			= 0ULL,
+	PKVM_PAGE_SHARED_OWNED		= KVM_PGTABLE_PROT_SW0,
+	PKVM_PAGE_SHARED_BORROWED	= KVM_PGTABLE_PROT_SW1,
+};
+
+#define PKVM_PAGE_STATE_PROT_MASK	(KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
+static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
+						 enum pkvm_page_state state)
+{
+	return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state;
+}
+
+static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
+{
+	return prot & PKVM_PAGE_STATE_PROT_MASK;
+}
+
 struct host_kvm {
 	struct kvm_arch arch;
 	struct kvm_pgtable pgt;
-- 
cgit 


From 39257da0e04e5cdb1e4a3ca715dc3d949fe8b059 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:40 +0100
Subject: KVM: arm64: Expose host stage-2 manipulation helpers

We will need to manipulate the host stage-2 page-table from outside
mem_protect.c soon. Introduce two functions allowing this, and make
them usable to users of mem_protect.h.

Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-14-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 ++
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 18 +++++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 87b1690c439f..0849ee8fa260 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -49,6 +49,8 @@ extern struct host_kvm host_kvm;
 int __pkvm_prot_finalize(void);
 int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
 
+int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
+int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
 int kvm_host_prepare_stage2(void *pgt_pool_base);
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 6fed6772c673..f95a5a4aa09c 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -273,6 +273,22 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
 	return 0;
 }
 
+int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
+			     enum kvm_pgtable_prot prot)
+{
+	hyp_assert_lock_held(&host_kvm.lock);
+
+	return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
+}
+
+int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
+{
+	hyp_assert_lock_held(&host_kvm.lock);
+
+	return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
+			       addr, size, &host_s2_pool, owner_id);
+}
+
 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
 {
 	/*
@@ -309,7 +325,7 @@ static int host_stage2_idmap(u64 addr)
 	if (ret)
 		goto unlock;
 
-	ret = host_stage2_try(__host_stage2_idmap, range.start, range.end, prot);
+	ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
 unlock:
 	hyp_spin_unlock(&host_kvm.lock);
 
-- 
cgit 


From 2d77e238badb022adb364332b7d6a1d627f77145 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:41 +0100
Subject: KVM: arm64: Expose pkvm_hyp_id

Allow references to the hypervisor's owner id from outside
mem_protect.c.

Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-15-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 ++
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 0849ee8fa260..23316a021880 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -46,6 +46,8 @@ struct host_kvm {
 };
 extern struct host_kvm host_kvm;
 
+extern const u8 pkvm_hyp_id;
+
 int __pkvm_prot_finalize(void);
 int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index f95a5a4aa09c..ee255171945c 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -31,7 +31,7 @@ static struct hyp_pool host_s2_pool;
 u64 id_aa64mmfr0_el1_sys_val;
 u64 id_aa64mmfr1_el1_sys_val;
 
-static const u8 pkvm_hyp_id = 1;
+const u8 pkvm_hyp_id = 1;
 
 static void *host_s2_zalloc_pages_exact(size_t size)
 {
-- 
cgit 


From e009dce1292c37cf8ee7c33e0887ad3c642f980f Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:42 +0100
Subject: KVM: arm64: Introduce addr_is_memory()

Introduce a helper usable in nVHE protected mode to check whether a
physical address is in a RAM region or not.

Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-16-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 +
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 23316a021880..49db0ec5a606 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -51,6 +51,7 @@ extern const u8 pkvm_hyp_id;
 int __pkvm_prot_finalize(void);
 int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
 
+bool addr_is_memory(phys_addr_t phys);
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
 int kvm_host_prepare_stage2(void *pgt_pool_base);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index ee255171945c..cb023d31666e 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -197,6 +197,13 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
 	return false;
 }
 
+bool addr_is_memory(phys_addr_t phys)
+{
+	struct kvm_mem_range range;
+
+	return find_mem_range(phys, &range);
+}
+
 static bool range_is_memory(u64 start, u64 end)
 {
 	struct kvm_mem_range r1, r2;
-- 
cgit 


From 9024b3d0069ab4b8ef70cf55f0ee09e61f3a0747 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:43 +0100
Subject: KVM: arm64: Enable retrieving protections attributes of PTEs

Introduce helper functions in the KVM stage-2 and stage-1 page-table
manipulation library allowing to retrieve the enum kvm_pgtable_prot of a
PTE. This will be useful to implement custom walkers outside of
pgtable.c.

Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-17-qperret@google.com
---
 arch/arm64/include/asm/kvm_pgtable.h | 20 +++++++++++++++++++
 arch/arm64/kvm/hyp/pgtable.c         | 37 ++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index bfea573703d7..027783829584 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -507,4 +507,24 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
  */
 int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
 			 kvm_pte_t *ptep, u32 *level);
+
+/**
+ * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
+ *				   stage-2 Page-Table Entry.
+ * @pte:	Page-table entry
+ *
+ * Return: protection attributes of the page-table entry in the enum
+ *	   kvm_pgtable_prot format.
+ */
+enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
+ *				Page-Table Entry.
+ * @pte:	Page-table entry
+ *
+ * Return: protection attributes of the page-table entry in the enum
+ *	   kvm_pgtable_prot format.
+ */
+enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
 #endif	/* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index cff744136044..f8ceebe4982e 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -363,6 +363,26 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
 	return 0;
 }
 
+enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
+{
+	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
+	u32 ap;
+
+	if (!kvm_pte_valid(pte))
+		return prot;
+
+	if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
+		prot |= KVM_PGTABLE_PROT_X;
+
+	ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
+	if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
+		prot |= KVM_PGTABLE_PROT_R;
+	else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
+		prot |= KVM_PGTABLE_PROT_RW;
+
+	return prot;
+}
+
 static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
 {
 	/*
@@ -565,6 +585,23 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
 	return 0;
 }
 
+enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
+{
+	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
+
+	if (!kvm_pte_valid(pte))
+		return prot;
+
+	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
+		prot |= KVM_PGTABLE_PROT_R;
+	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
+		prot |= KVM_PGTABLE_PROT_W;
+	if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
+		prot |= KVM_PGTABLE_PROT_X;
+
+	return prot;
+}
+
 static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
 {
 	if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
-- 
cgit 


From 2c50166c62ba7f3c23c1bbdbb9324db462ddc97b Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:44 +0100
Subject: KVM: arm64: Mark host bss and rodata section as shared

As the hypervisor maps the host's .bss and .rodata sections in its
stage-1, make sure to tag them as shared in hyp and host page-tables.

But since the hypervisor relies on the presence of these mappings, we
cannot let the host in complete control of the memory regions -- it
must not unshare or donate them to another entity for example. To
prevent this, let's transfer the ownership of those ranges to the
hypervisor itself, and share the pages back with the host.

Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-18-qperret@google.com
---
 arch/arm64/kvm/hyp/nvhe/setup.c | 82 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 74 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 0b574d106519..57c27846320f 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -58,6 +58,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 {
 	void *start, *end, *virt = hyp_phys_to_virt(phys);
 	unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
+	enum kvm_pgtable_prot prot;
 	int ret, i;
 
 	/* Recreate the hyp page-table using the early page allocator */
@@ -83,10 +84,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	if (ret)
 		return ret;
 
-	ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO);
-	if (ret)
-		return ret;
-
 	ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO);
 	if (ret)
 		return ret;
@@ -95,10 +92,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	if (ret)
 		return ret;
 
-	ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
-	if (ret)
-		return ret;
-
 	ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
 	if (ret)
 		return ret;
@@ -117,6 +110,24 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 			return ret;
 	}
 
+	/*
+	 * Map the host's .bss and .rodata sections RO in the hypervisor, but
+	 * transfer the ownership from the host to the hypervisor itself to
+	 * make sure it can't be donated or shared with another entity.
+	 *
+	 * The ownership transition requires matching changes in the host
+	 * stage-2. This will be done later (see finalize_host_mappings()) once
+	 * the hyp_vmemmap is addressable.
+	 */
+	prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
+	ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot);
+	if (ret)
+		return ret;
+
+	ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -148,6 +159,57 @@ static void hpool_put_page(void *addr)
 	hyp_put_page(&hpool, addr);
 }
 
+static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
+					 kvm_pte_t *ptep,
+					 enum kvm_pgtable_walk_flags flag,
+					 void * const arg)
+{
+	enum kvm_pgtable_prot prot;
+	enum pkvm_page_state state;
+	kvm_pte_t pte = *ptep;
+	phys_addr_t phys;
+
+	if (!kvm_pte_valid(pte))
+		return 0;
+
+	if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
+		return -EINVAL;
+
+	phys = kvm_pte_to_phys(pte);
+	if (!addr_is_memory(phys))
+		return 0;
+
+	/*
+	 * Adjust the host stage-2 mappings to match the ownership attributes
+	 * configured in the hypervisor stage-1.
+	 */
+	state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
+	switch (state) {
+	case PKVM_PAGE_OWNED:
+		return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id);
+	case PKVM_PAGE_SHARED_OWNED:
+		prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
+		break;
+	case PKVM_PAGE_SHARED_BORROWED:
+		prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
+}
+
+static int finalize_host_mappings(void)
+{
+	struct kvm_pgtable_walker walker = {
+		.cb	= finalize_host_mappings_walker,
+		.flags	= KVM_PGTABLE_WALK_LEAF,
+	};
+
+	return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker);
+}
+
 void __noreturn __pkvm_init_finalise(void)
 {
 	struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -167,6 +229,10 @@ void __noreturn __pkvm_init_finalise(void)
 	if (ret)
 		goto out;
 
+	ret = finalize_host_mappings();
+	if (ret)
+		goto out;
+
 	pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
 		.zalloc_page = hyp_zalloc_hyp_page,
 		.phys_to_virt = hyp_phys_to_virt,
-- 
cgit 


From ad0e0139a8e163245d8f44ab4f6ec3bc9b08034d Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:45 +0100
Subject: KVM: arm64: Remove __pkvm_mark_hyp

Now that we mark memory owned by the hypervisor in the host stage-2
during __pkvm_init(), we no longer need to rely on the host to
explicitly mark the hyp sections later on.

Remove the __pkvm_mark_hyp() hypercall altogether.

Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-19-qperret@google.com
---
 arch/arm64/include/asm/kvm_asm.h              |  3 +-
 arch/arm64/kvm/arm.c                          | 46 ---------------------------
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  1 -
 arch/arm64/kvm/hyp/nvhe/hyp-main.c            |  9 ------
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 19 -----------
 5 files changed, 1 insertion(+), 77 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 9f0bf2109be7..432a9ea1f02e 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -63,8 +63,7 @@
 #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping	17
 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector		18
 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize		19
-#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp			20
-#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc			21
+#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc			20
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e9a2b8f27792..2f378482471b 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1954,57 +1954,11 @@ static void _kvm_host_prot_finalize(void *discard)
 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
 }
 
-static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
-{
-	return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end);
-}
-
-#define pkvm_mark_hyp_section(__section)		\
-	pkvm_mark_hyp(__pa_symbol(__section##_start),	\
-			__pa_symbol(__section##_end))
-
 static int finalize_hyp_mode(void)
 {
-	int cpu, ret;
-
 	if (!is_protected_kvm_enabled())
 		return 0;
 
-	ret = pkvm_mark_hyp_section(__hyp_idmap_text);
-	if (ret)
-		return ret;
-
-	ret = pkvm_mark_hyp_section(__hyp_text);
-	if (ret)
-		return ret;
-
-	ret = pkvm_mark_hyp_section(__hyp_rodata);
-	if (ret)
-		return ret;
-
-	ret = pkvm_mark_hyp_section(__hyp_bss);
-	if (ret)
-		return ret;
-
-	ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size);
-	if (ret)
-		return ret;
-
-	for_each_possible_cpu(cpu) {
-		phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]);
-		phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order());
-
-		ret = pkvm_mark_hyp(start, end);
-		if (ret)
-			return ret;
-
-		start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu));
-		end = start + PAGE_SIZE;
-		ret = pkvm_mark_hyp(start, end);
-		if (ret)
-			return ret;
-	}
-
 	/*
 	 * Flip the static key upfront as that may no longer be possible
 	 * once the host stage 2 is installed.
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 49db0ec5a606..0118527b07b0 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -49,7 +49,6 @@ extern struct host_kvm host_kvm;
 extern const u8 pkvm_hyp_id;
 
 int __pkvm_prot_finalize(void);
-int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
 
 bool addr_is_memory(phys_addr_t phys);
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 1632f001f4ed..7900d5b66ba3 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -163,14 +163,6 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
 {
 	cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
 }
-
-static void handle___pkvm_mark_hyp(struct kvm_cpu_context *host_ctxt)
-{
-	DECLARE_REG(phys_addr_t, start, host_ctxt, 1);
-	DECLARE_REG(phys_addr_t, end, host_ctxt, 2);
-
-	cpu_reg(host_ctxt, 1) = __pkvm_mark_hyp(start, end);
-}
 typedef void (*hcall_t)(struct kvm_cpu_context *);
 
 #define HANDLE_FUNC(x)	[__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -196,7 +188,6 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__pkvm_create_mappings),
 	HANDLE_FUNC(__pkvm_create_private_mapping),
 	HANDLE_FUNC(__pkvm_prot_finalize),
-	HANDLE_FUNC(__pkvm_mark_hyp),
 };
 
 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index cb023d31666e..2991dc6996b9 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -339,25 +339,6 @@ unlock:
 	return ret;
 }
 
-int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
-{
-	int ret;
-
-	/*
-	 * host_stage2_unmap_dev_all() currently relies on MMIO mappings being
-	 * non-persistent, so don't allow changing page ownership in MMIO range.
-	 */
-	if (!range_is_memory(start, end))
-		return -EINVAL;
-
-	hyp_spin_lock(&host_kvm.lock);
-	ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
-			      start, end - start, &host_s2_pool, pkvm_hyp_id);
-	hyp_spin_unlock(&host_kvm.lock);
-
-	return ret != -EAGAIN ? ret : 0;
-}
-
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
 {
 	struct kvm_vcpu_fault_info fault;
-- 
cgit 


From f9370010e92638f66473baf342e19de940403362 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:46 +0100
Subject: KVM: arm64: Refactor protected nVHE stage-1 locking

Refactor the hypervisor stage-1 locking in nVHE protected mode to expose
a new pkvm_create_mappings_locked() function. This will be used in later
patches to allow walking and changing the hypervisor stage-1 without
releasing the lock.

Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-20-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/mm.h |  1 +
 arch/arm64/kvm/hyp/nvhe/mm.c         | 18 ++++++++++++++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 8ec3a5a7744b..c76d7136ed9b 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -23,6 +23,7 @@ int hyp_map_vectors(void);
 int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
 int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
 int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
+int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
 int __pkvm_create_mappings(unsigned long start, unsigned long size,
 			   unsigned long phys, enum kvm_pgtable_prot prot);
 unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index a8efdf0f9003..6fbe8e8030f6 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -67,13 +67,15 @@ out:
 	return addr;
 }
 
-int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
+int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot)
 {
 	unsigned long start = (unsigned long)from;
 	unsigned long end = (unsigned long)to;
 	unsigned long virt_addr;
 	phys_addr_t phys;
 
+	hyp_assert_lock_held(&pkvm_pgd_lock);
+
 	start = start & PAGE_MASK;
 	end = PAGE_ALIGN(end);
 
@@ -81,7 +83,8 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
 		int err;
 
 		phys = hyp_virt_to_phys((void *)virt_addr);
-		err = __pkvm_create_mappings(virt_addr, PAGE_SIZE, phys, prot);
+		err = kvm_pgtable_hyp_map(&pkvm_pgtable, virt_addr, PAGE_SIZE,
+					  phys, prot);
 		if (err)
 			return err;
 	}
@@ -89,6 +92,17 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
 	return 0;
 }
 
+int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
+{
+	int ret;
+
+	hyp_spin_lock(&pkvm_pgd_lock);
+	ret = pkvm_create_mappings_locked(from, to, prot);
+	hyp_spin_unlock(&pkvm_pgd_lock);
+
+	return ret;
+}
+
 int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back)
 {
 	unsigned long start, end;
-- 
cgit 


From 66c57edd3bc79e3527daaae8123f72ecd1e3fa25 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:47 +0100
Subject: KVM: arm64: Restrict EL2 stage-1 changes in protected mode

The host kernel is currently able to change EL2 stage-1 mappings without
restrictions thanks to the __pkvm_create_mappings() hypercall. But in a
world where the host is no longer part of the TCB, this clearly poses a
problem.

To fix this, introduce a new hypercall to allow the host to share a
physical memory page with the hypervisor, and remove the
__pkvm_create_mappings() variant. The new hypercall implements
ownership and permission checks before allowing the sharing operation,
and it annotates the shared page in the hypervisor stage-1 and host
stage-2 page-tables.

Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-21-qperret@google.com
---
 arch/arm64/include/asm/kvm_asm.h              |  2 +-
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  1 +
 arch/arm64/kvm/hyp/nvhe/hyp-main.c            | 11 ++--
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 88 +++++++++++++++++++++++++++
 arch/arm64/kvm/mmu.c                          | 28 +++++++--
 5 files changed, 118 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 432a9ea1f02e..aed2aa61766a 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -59,7 +59,7 @@
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs		13
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs		14
 #define __KVM_HOST_SMCCC_FUNC___pkvm_init			15
-#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings		16
+#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp		16
 #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping	17
 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector		18
 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize		19
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 0118527b07b0..03e604f842e2 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -49,6 +49,7 @@ extern struct host_kvm host_kvm;
 extern const u8 pkvm_hyp_id;
 
 int __pkvm_prot_finalize(void);
+int __pkvm_host_share_hyp(u64 pfn);
 
 bool addr_is_memory(phys_addr_t phys);
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 7900d5b66ba3..2da6aa8da868 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -140,14 +140,11 @@ static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt)
 	cpu_reg(host_ctxt, 1) = pkvm_cpu_set_vector(slot);
 }
 
-static void handle___pkvm_create_mappings(struct kvm_cpu_context *host_ctxt)
+static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
 {
-	DECLARE_REG(unsigned long, start, host_ctxt, 1);
-	DECLARE_REG(unsigned long, size, host_ctxt, 2);
-	DECLARE_REG(unsigned long, phys, host_ctxt, 3);
-	DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4);
+	DECLARE_REG(u64, pfn, host_ctxt, 1);
 
-	cpu_reg(host_ctxt, 1) = __pkvm_create_mappings(start, size, phys, prot);
+	cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
 }
 
 static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
@@ -185,7 +182,7 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__vgic_v3_restore_aprs),
 	HANDLE_FUNC(__pkvm_init),
 	HANDLE_FUNC(__pkvm_cpu_set_vector),
-	HANDLE_FUNC(__pkvm_create_mappings),
+	HANDLE_FUNC(__pkvm_host_share_hyp),
 	HANDLE_FUNC(__pkvm_create_private_mapping),
 	HANDLE_FUNC(__pkvm_prot_finalize),
 };
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 2991dc6996b9..8165390d3ec9 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -339,6 +339,94 @@ unlock:
 	return ret;
 }
 
+static inline bool check_prot(enum kvm_pgtable_prot prot,
+			      enum kvm_pgtable_prot required,
+			      enum kvm_pgtable_prot denied)
+{
+	return (prot & (required | denied)) == required;
+}
+
+int __pkvm_host_share_hyp(u64 pfn)
+{
+	phys_addr_t addr = hyp_pfn_to_phys(pfn);
+	enum kvm_pgtable_prot prot, cur;
+	void *virt = __hyp_va(addr);
+	enum pkvm_page_state state;
+	kvm_pte_t pte;
+	int ret;
+
+	if (!addr_is_memory(addr))
+		return -EINVAL;
+
+	hyp_spin_lock(&host_kvm.lock);
+	hyp_spin_lock(&pkvm_pgd_lock);
+
+	ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
+	if (ret)
+		goto unlock;
+	if (!pte)
+		goto map_shared;
+
+	/*
+	 * Check attributes in the host stage-2 PTE. We need the page to be:
+	 *  - mapped RWX as we're sharing memory;
+	 *  - not borrowed, as that implies absence of ownership.
+	 * Otherwise, we can't let it got through
+	 */
+	cur = kvm_pgtable_stage2_pte_prot(pte);
+	prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
+	if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
+		ret = -EPERM;
+		goto unlock;
+	}
+
+	state = pkvm_getstate(cur);
+	if (state == PKVM_PAGE_OWNED)
+		goto map_shared;
+
+	/*
+	 * Tolerate double-sharing the same page, but this requires
+	 * cross-checking the hypervisor stage-1.
+	 */
+	if (state != PKVM_PAGE_SHARED_OWNED) {
+		ret = -EPERM;
+		goto unlock;
+	}
+
+	ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
+	if (ret)
+		goto unlock;
+
+	/*
+	 * If the page has been shared with the hypervisor, it must be
+	 * already mapped as SHARED_BORROWED in its stage-1.
+	 */
+	cur = kvm_pgtable_hyp_pte_prot(pte);
+	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
+	if (!check_prot(cur, prot, ~prot))
+		ret = EPERM;
+	goto unlock;
+
+map_shared:
+	/*
+	 * If the page is not yet shared, adjust mappings in both page-tables
+	 * while both locks are held.
+	 */
+	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
+	ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
+	BUG_ON(ret);
+
+	prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
+	ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
+	BUG_ON(ret);
+
+unlock:
+	hyp_spin_unlock(&pkvm_pgd_lock);
+	hyp_spin_unlock(&host_kvm.lock);
+
+	return ret;
+}
+
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
 {
 	struct kvm_vcpu_fault_info fault;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 0625bf2353c2..cbab146cda6a 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -259,10 +259,8 @@ static int __create_hyp_mappings(unsigned long start, unsigned long size,
 {
 	int err;
 
-	if (!kvm_host_owns_hyp_mappings()) {
-		return kvm_call_hyp_nvhe(__pkvm_create_mappings,
-					 start, size, phys, prot);
-	}
+	if (WARN_ON(!kvm_host_owns_hyp_mappings()))
+		return -EINVAL;
 
 	mutex_lock(&kvm_hyp_pgd_mutex);
 	err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
@@ -282,6 +280,21 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
 	}
 }
 
+static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
+{
+	phys_addr_t addr;
+	int ret;
+
+	for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
+		ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
+					__phys_to_pfn(addr));
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
  * @from:	The virtual kernel start address of the range
@@ -302,6 +315,13 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
 	if (is_kernel_in_hyp_mode())
 		return 0;
 
+	if (!kvm_host_owns_hyp_mappings()) {
+		if (WARN_ON(prot != PAGE_HYP))
+			return -EPERM;
+		return pkvm_share_hyp(kvm_kaddr_to_phys(from),
+				      kvm_kaddr_to_phys(to));
+	}
+
 	start = start & PAGE_MASK;
 	end = PAGE_ALIGN(end);
 
-- 
cgit 


From 64a80fb766f9a91e26930bfc56d8e7c12425df12 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Mon, 9 Aug 2021 16:24:48 +0100
Subject: KVM: arm64: Make __pkvm_create_mappings static

The __pkvm_create_mappings() function is no longer used outside of
nvhe/mm.c, make it static.

Signed-off-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210809152448.1810400-22-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/mm.h | 2 --
 arch/arm64/kvm/hyp/nvhe/mm.c         | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index c76d7136ed9b..c9a8f535212e 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -24,8 +24,6 @@ int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
 int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
 int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
 int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
-int __pkvm_create_mappings(unsigned long start, unsigned long size,
-			   unsigned long phys, enum kvm_pgtable_prot prot);
 unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
 					    enum kvm_pgtable_prot prot);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 6fbe8e8030f6..2fabeceb889a 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -23,8 +23,8 @@ u64 __io_map_base;
 struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
 unsigned int hyp_memblock_nr;
 
-int __pkvm_create_mappings(unsigned long start, unsigned long size,
-			  unsigned long phys, enum kvm_pgtable_prot prot)
+static int __pkvm_create_mappings(unsigned long start, unsigned long size,
+				  unsigned long phys, enum kvm_pgtable_prot prot)
 {
 	int err;
 
-- 
cgit 


From 6fadc1241c33fe0228c94bc6a1aa6c1da8872e8b Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Wed, 11 Aug 2021 08:57:06 +0530
Subject: KVM: arm64: perf: Replace '0xf' instances with
 ID_AA64DFR0_PMUVER_IMP_DEF

ID_AA64DFR0_PMUVER_IMP_DEF which indicate implementation defined PMU, never
actually gets used although there are '0xf' instances scattered all around.
Just do the macro replacement to improve readability.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: linux-perf-users@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/perf.c     | 2 +-
 arch/arm64/kvm/pmu-emul.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c
index 151c31fb9860..f9bb3b14130e 100644
--- a/arch/arm64/kvm/perf.c
+++ b/arch/arm64/kvm/perf.c
@@ -50,7 +50,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
 
 int kvm_perf_init(void)
 {
-	if (kvm_pmu_probe_pmuver() != 0xf && !is_protected_kvm_enabled())
+	if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled())
 		static_branch_enable(&kvm_arm_pmu_available);
 
 	return perf_register_guest_info_callbacks(&kvm_guest_cbs);
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index f33825c995cb..60f89bdbeebb 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -745,7 +745,7 @@ int kvm_pmu_probe_pmuver(void)
 	struct perf_event_attr attr = { };
 	struct perf_event *event;
 	struct arm_pmu *pmu;
-	int pmuver = 0xf;
+	int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
 
 	/*
 	 * Create a dummy event that only counts user cycles. As we'll never
@@ -770,7 +770,7 @@ int kvm_pmu_probe_pmuver(void)
 	if (IS_ERR(event)) {
 		pr_err_once("kvm: pmu event creation failed %ld\n",
 			    PTR_ERR(event));
-		return 0xf;
+		return ID_AA64DFR0_PMUVER_IMP_DEF;
 	}
 
 	if (event->pmu) {
@@ -923,7 +923,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 	if (!vcpu->kvm->arch.pmuver)
 		vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
 
-	if (vcpu->kvm->arch.pmuver == 0xf)
+	if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
 		return -ENODEV;
 
 	switch (attr->attr) {
-- 
cgit 


From b31578f627177bda5c16894e3170a7a6a1236136 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Tue, 10 Aug 2021 09:59:42 +0530
Subject: arm64/mm: Define ID_AA64MMFR0_TGRAN_2_SHIFT

Streamline the Stage-2 TGRAN value extraction from ID_AA64MMFR0 register by
adding a page size agnostic ID_AA64MMFR0_TGRAN_2_SHIFT. This is similar to
the existing Stage-1 TGRAN shift i.e ID_AA64MMFR0_TGRAN_SHIFT.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1628569782-30213-1-git-send-email-anshuman.khandual@arm.com
---
 arch/arm64/include/asm/sysreg.h |  3 +++
 arch/arm64/kvm/reset.c          | 17 ++---------------
 2 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 7b9c3acba684..943d31d92b5b 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1028,14 +1028,17 @@
 
 #if defined(CONFIG_ARM64_4K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN4_SHIFT
+#define ID_AA64MMFR0_TGRAN_2_SHIFT		ID_AA64MMFR0_TGRAN4_2_SHIFT
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN4_SUPPORTED
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	0x7
 #elif defined(CONFIG_ARM64_16K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN16_SHIFT
+#define ID_AA64MMFR0_TGRAN_2_SHIFT		ID_AA64MMFR0_TGRAN16_2_SHIFT
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN16_SUPPORTED
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	0xF
 #elif defined(CONFIG_ARM64_64K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN64_SHIFT
+#define ID_AA64MMFR0_TGRAN_2_SHIFT		ID_AA64MMFR0_TGRAN64_2_SHIFT
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN64_SUPPORTED
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	0x7
 #endif
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index cba7872d69a8..20588220fe66 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -311,7 +311,7 @@ u32 get_kvm_ipa_limit(void)
 
 int kvm_set_ipa_limit(void)
 {
-	unsigned int parange, tgran_2;
+	unsigned int parange;
 	u64 mmfr0;
 
 	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
@@ -322,20 +322,7 @@ int kvm_set_ipa_limit(void)
 	 * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
 	 * Stage-2. If not, things will stop very quickly.
 	 */
-	switch (PAGE_SIZE) {
-	default:
-	case SZ_4K:
-		tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT;
-		break;
-	case SZ_16K:
-		tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT;
-		break;
-	case SZ_64K:
-		tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT;
-		break;
-	}
-
-	switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) {
+	switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN_2_SHIFT)) {
 	case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE:
 		kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
 		return -EINVAL;
-- 
cgit 


From 5e5df9571c319fb107d7a523cc96fcc99961ee70 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Wed, 11 Aug 2021 16:41:15 +0530
Subject: KVM: arm64: Restrict IPA size to maximum 48 bits on 4K and 16K page
 size

Even though ID_AA64MMFR0.PARANGE reports 52 bit PA size support, it cannot
be enabled as guest IPA size on 4K or 16K page size configurations. Hence
kvm_ipa_limit must be restricted to 48 bits. This change achieves required
IPA capping.

Before the commit c9b69a0cf0b4 ("KVM: arm64: Don't constrain maximum IPA
size based on host configuration"), the problem here would have been just
latent via PHYS_MASK_SHIFT (which earlier in turn capped kvm_ipa_limit),
which remains capped at 48 bits on 4K and 16K configs.

Cc: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-kernel@vger.kernel.org
Fixes: c9b69a0cf0b4 ("KVM: arm64: Don't constrain maximum IPA size based on host configuration")
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1628680275-16578-1-git-send-email-anshuman.khandual@arm.com
---
 arch/arm64/kvm/reset.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 20588220fe66..18ffc6ad67b8 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -317,6 +317,14 @@ int kvm_set_ipa_limit(void)
 	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	parange = cpuid_feature_extract_unsigned_field(mmfr0,
 				ID_AA64MMFR0_PARANGE_SHIFT);
+	/*
+	 * IPA size beyond 48 bits could not be supported
+	 * on either 4K or 16K page size. Hence let's cap
+	 * it to 48 bits, in case it's reported as larger
+	 * on the system.
+	 */
+	if (PAGE_SIZE != SZ_64K)
+		parange = min(parange, (unsigned int)ID_AA64MMFR0_PARANGE_48);
 
 	/*
 	 * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
-- 
cgit 


From ed5aacc81cd41efc4d561e14af408d1003f7b855 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 26 May 2021 00:03:37 -0700
Subject: xtensa: fix kconfig unmet dependency warning for HAVE_FUTEX_CMPXCHG

XTENSA should only select HAVE_FUTEX_CMPXCHG when FUTEX is
set/enabled. This prevents a kconfig warning.

WARNING: unmet direct dependencies detected for HAVE_FUTEX_CMPXCHG
  Depends on [n]: FUTEX [=n]
  Selected by [y]:
  - XTENSA [=y] && !MMU [=n]

Fixes: d951ba21b959 ("xtensa: nommu: select HAVE_FUTEX_CMPXCHG")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: linux-xtensa@linux-xtensa.org
Message-Id: <20210526070337.28130-1-rdunlap@infradead.org>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 3878880469d1..b843902ad9fd 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -30,7 +30,7 @@ config XTENSA
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_EXIT_THREAD
 	select HAVE_FUNCTION_TRACER
-	select HAVE_FUTEX_CMPXCHG if !MMU
+	select HAVE_FUTEX_CMPXCHG if !MMU && FUTEX
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_PCI
-- 
cgit 


From 43ba2237281a59b40ea2393da9e89ea3a68de2a5 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Fri, 9 Jul 2021 04:13:23 -0700
Subject: xtensa: add fairness to IRQ handling

Track which IRQs have been served at each level to make sure that no IRQ
is served more than once while other IRQs at the same level are pending.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/traps.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index efc3a29cde80..874b6efc6fb3 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -268,6 +268,7 @@ void do_interrupt(struct pt_regs *regs)
 		XCHAL_INTLEVEL7_MASK,
 	};
 	struct pt_regs *old_regs;
+	unsigned unhandled = ~0u;
 
 	trace_hardirqs_off();
 
@@ -283,6 +284,10 @@ void do_interrupt(struct pt_regs *regs)
 		for (level = LOCKLEVEL; level > 0; --level) {
 			if (int_at_level & int_level_mask[level]) {
 				int_at_level &= int_level_mask[level];
+				if (int_at_level & unhandled)
+					int_at_level &= unhandled;
+				else
+					unhandled |= int_level_mask[level];
 				break;
 			}
 		}
@@ -290,6 +295,8 @@ void do_interrupt(struct pt_regs *regs)
 		if (level == 0)
 			break;
 
+		/* clear lowest pending irq in the unhandled mask */
+		unhandled ^= (int_at_level & -int_at_level);
 		do_IRQ(__ffs(int_at_level), regs);
 	}
 
-- 
cgit 


From 13066c303769b5c6bc67c0b990a4eb80058fb1b4 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Tue, 10 Aug 2021 16:08:06 -0700
Subject: xtensa: ISS: don't use string pointer before NULL check

Move strlen call inside the if block that checks string pointer for NULL.
While at it also fix the following coccicheck warning:

./arch/xtensa/platforms/iss/console.c:204:10-11: WARNING comparing
pointer to 0.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/platforms/iss/console.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 21184488c277..f6e5fc5579be 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -186,10 +186,10 @@ late_initcall(rs_init);
 
 static void iss_console_write(struct console *co, const char *s, unsigned count)
 {
-	int len = strlen(s);
-
-	if (s != 0 && *s != 0)
+	if (s && *s != 0) {
+		int len = strlen(s);
 		simc_write(1, s, count < len ? count : len);
+	}
 }
 
 static struct tty_driver* iss_console_device(struct console *c, int *index)
-- 
cgit 


From ef71db4845c02fe9f898e09046857ab1a9ff4be8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 12 Aug 2021 01:37:28 +0900
Subject: xtensa: remove unneeded exports

These are not used in any of subdirectories.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Message-Id: <20210811163731.186125-1-masahiroy@kernel.org>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/Makefile | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index e9c8f064c44d..6fab7fc87579 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -17,7 +17,6 @@
 variant-y := $(patsubst "%",%,$(CONFIG_XTENSA_VARIANT_NAME))
 
 VARIANT = $(variant-y)
-export VARIANT
 
 ifneq ($(VARIANT),)
   ifdef cross_compiling
@@ -34,7 +33,6 @@ platform-$(CONFIG_XTENSA_PLATFORM_ISS)		:= iss
 platform-$(CONFIG_XTENSA_PLATFORM_XTFPGA)	:= xtfpga
 
 PLATFORM = $(platform-y)
-export PLATFORM
 
 # temporarily until string.h is fixed
 KBUILD_CFLAGS += -ffreestanding -D__linux__
-- 
cgit 


From c548584438d1a447900797a40571d95fb36a605f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 12 Aug 2021 01:37:29 +0900
Subject: xtensa: do not build variants directory

None of arch/xtensa/variants/*/ has Makefile, so 'buildvar' is always
empty.

Perhaps, downstream variant code might be dropped in, but given the
fact that none of upstream variants builds anything in their variant
directory, I doubt this is needed.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Message-Id: <20210811163731.186125-2-masahiroy@kernel.org>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/Makefile | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index 6fab7fc87579..3c0573fe6761 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -55,9 +55,7 @@ KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(vardirs) $(plfdirs))
 
 KBUILD_DEFCONFIG := iss_defconfig
 
-# Only build variant and/or platform if it includes a Makefile
-
-buildvar := $(shell test -e $(srctree)/arch/xtensa/variants/$(VARIANT)/Makefile && echo arch/xtensa/variants/$(VARIANT)/)
+# Only build platform if it includes a Makefile
 buildplf := $(shell test -e $(srctree)/arch/xtensa/platforms/$(PLATFORM)/Makefile && echo arch/xtensa/platforms/$(PLATFORM)/)
 
 # Find libgcc.a
@@ -66,7 +64,7 @@ LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
 head-y		:= arch/xtensa/kernel/head.o
 core-y		+= arch/xtensa/kernel/ arch/xtensa/mm/
-core-y		+= $(buildvar) $(buildplf)
+core-y		+= $(buildplf)
 core-y 		+= arch/xtensa/boot/dts/
 
 libs-y		+= arch/xtensa/lib/ $(LIBGCC)
-- 
cgit 


From 59210499a02a58e5b2c6da763d08adecb760d74b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 12 Aug 2021 01:37:30 +0900
Subject: xtensa: build platform directories unconditionally

All of arch/xtensa/platforms/*/ have Makefile. You do not need to
check the presence of Makefile.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Message-Id: <20210811163731.186125-3-masahiroy@kernel.org>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/Makefile | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index 3c0573fe6761..093e87b889be 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -32,8 +32,6 @@ platform-$(CONFIG_XTENSA_PLATFORM_XT2000)	:= xt2000
 platform-$(CONFIG_XTENSA_PLATFORM_ISS)		:= iss
 platform-$(CONFIG_XTENSA_PLATFORM_XTFPGA)	:= xtfpga
 
-PLATFORM = $(platform-y)
-
 # temporarily until string.h is fixed
 KBUILD_CFLAGS += -ffreestanding -D__linux__
 KBUILD_CFLAGS += -pipe -mlongcalls -mtext-section-literals
@@ -55,16 +53,13 @@ KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(vardirs) $(plfdirs))
 
 KBUILD_DEFCONFIG := iss_defconfig
 
-# Only build platform if it includes a Makefile
-buildplf := $(shell test -e $(srctree)/arch/xtensa/platforms/$(PLATFORM)/Makefile && echo arch/xtensa/platforms/$(PLATFORM)/)
-
 # Find libgcc.a
 
 LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
 head-y		:= arch/xtensa/kernel/head.o
 core-y		+= arch/xtensa/kernel/ arch/xtensa/mm/
-core-y		+= $(buildplf)
+core-y		+= arch/xtensa/platforms/$(platform-y)/
 core-y 		+= arch/xtensa/boot/dts/
 
 libs-y		+= arch/xtensa/lib/ $(LIBGCC)
-- 
cgit 


From 7b7cec477fc3cd42ce565dfc3e53f144504fc95c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 12 Aug 2021 01:37:31 +0900
Subject: xtensa: move core-y in arch/xtensa/Makefile to arch/xtensa/Kbuild

Use obj-y to clean up Makefile.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Message-Id: <20210811163731.186125-4-masahiroy@kernel.org>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/Kbuild             | 1 +
 arch/xtensa/Makefile           | 3 ---
 arch/xtensa/platforms/Makefile | 4 ++++
 3 files changed, 5 insertions(+), 3 deletions(-)
 create mode 100644 arch/xtensa/platforms/Makefile

(limited to 'arch')

diff --git a/arch/xtensa/Kbuild b/arch/xtensa/Kbuild
index a4e40e534e6a..fd12f61745ba 100644
--- a/arch/xtensa/Kbuild
+++ b/arch/xtensa/Kbuild
@@ -1 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-y += kernel/ mm/ platforms/ boot/dts/
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index 093e87b889be..96714ef7c89e 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -58,9 +58,6 @@ KBUILD_DEFCONFIG := iss_defconfig
 LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
 head-y		:= arch/xtensa/kernel/head.o
-core-y		+= arch/xtensa/kernel/ arch/xtensa/mm/
-core-y		+= arch/xtensa/platforms/$(platform-y)/
-core-y 		+= arch/xtensa/boot/dts/
 
 libs-y		+= arch/xtensa/lib/ $(LIBGCC)
 
diff --git a/arch/xtensa/platforms/Makefile b/arch/xtensa/platforms/Makefile
new file mode 100644
index 000000000000..e2e7e0726979
--- /dev/null
+++ b/arch/xtensa/platforms/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_XTENSA_PLATFORM_XT2000)	+= xt2000/
+obj-$(CONFIG_XTENSA_PLATFORM_ISS)	+= iss/
+obj-$(CONFIG_XTENSA_PLATFORM_XTFPGA)	+= xtfpga/
-- 
cgit 


From 12593568d7319c34c72038ea799ab1bd0f0eb01c Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Wed, 11 Aug 2021 18:36:25 +0100
Subject: KVM: arm64: Return -EPERM from __pkvm_host_share_hyp()

Fix the error code returned by __pkvm_host_share_hyp() when the
host attempts to share with EL2 a page that has already been shared with
another entity.

Reported-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210811173630.2536721-1-qperret@google.com
---
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 8165390d3ec9..6ec695311498 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -404,7 +404,7 @@ int __pkvm_host_share_hyp(u64 pfn)
 	cur = kvm_pgtable_hyp_pte_prot(pte);
 	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
 	if (!check_prot(cur, prot, ~prot))
-		ret = EPERM;
+		ret = -EPERM;
 	goto unlock;
 
 map_shared:
-- 
cgit 


From 59a27e1122133831111f9e2e40fec2307d742487 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alex@ghiti.fr>
Date: Wed, 21 Jul 2021 09:59:37 +0200
Subject: riscv: Optimize kernel virtual address conversion macro

The current test in kernel_mapping_va_to_pa only applies when
CONFIG_XIP_KERNEL is set, so use IS_ENABLED to optimize this macro at
compile-time in standard kernels that do not require this test.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Tested-by: Emil Renner Berthing <kernel@esmil.dk>
Reviewed-by: Jisheng Zhang <jszhang@kernel.org>
Reviewed-By: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/include/asm/page.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index cca8764aed83..d40d77d76d82 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -122,7 +122,7 @@ extern struct kernel_mapping kernel_map;
 #define linear_mapping_va_to_pa(x)	((unsigned long)(x) - kernel_map.va_pa_offset)
 #define kernel_mapping_va_to_pa(y) ({						\
 	unsigned long _y = y;							\
-	(_y < kernel_map.virt_addr + XIP_OFFSET) ?					\
+	(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ?	\
 		((unsigned long)(_y) - kernel_map.va_kernel_xip_pa_offset) :		\
 		((unsigned long)(_y) - kernel_map.va_kernel_pa_offset - XIP_OFFSET);	\
 	})
-- 
cgit 


From 0aba691a7443a7541c1dc56423e0c92cc6ea7164 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alex@ghiti.fr>
Date: Fri, 23 Jul 2021 15:01:24 +0200
Subject: riscv: Introduce va_kernel_pa_offset for 32-bit kernel

va_kernel_pa_offset was only used for 64-bit as the kernel mapping lies
in the linear mapping for 32-bit kernel and then only the offset between
the PAGE_OFFSET and the kernel load address is needed.

But this distinction complexifies the code with #ifdefs and especially
with a separate definition of the address conversions macros.

Simplify the code by defining this variable for both 32-bit and 64-bit.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/include/asm/page.h | 15 ++-------------
 arch/riscv/mm/init.c          |  3 ---
 2 files changed, 2 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index d40d77d76d82..4da92cf28a19 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -91,10 +91,8 @@ struct kernel_mapping {
 	uintptr_t size;
 	/* Offset between linear mapping virtual address and kernel load address */
 	unsigned long va_pa_offset;
-#ifdef CONFIG_64BIT
 	/* Offset between kernel mapping virtual address and kernel load address */
 	unsigned long va_kernel_pa_offset;
-#endif
 	unsigned long va_kernel_xip_pa_offset;
 #ifdef CONFIG_XIP_KERNEL
 	uintptr_t xiprom;
@@ -104,11 +102,11 @@ struct kernel_mapping {
 
 extern struct kernel_mapping kernel_map;
 
-#ifdef CONFIG_64BIT
 #define is_kernel_mapping(x)	\
 	((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
+
 #define is_linear_mapping(x)	\
-	((x) >= PAGE_OFFSET && (x) < kernel_map.virt_addr)
+	((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr))
 
 #define linear_mapping_pa_to_va(x)	((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
 #define kernel_mapping_pa_to_va(y)	({						\
@@ -132,15 +130,6 @@ extern struct kernel_mapping kernel_map;
 	is_linear_mapping(_x) ?							\
 		linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x);	\
 	})
-#else
-#define is_kernel_mapping(x)	\
-	((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
-#define is_linear_mapping(x)	\
-	((x) >= PAGE_OFFSET)
-
-#define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) + kernel_map.va_pa_offset))
-#define __va_to_pa_nodebug(x)  ((unsigned long)(x) - kernel_map.va_pa_offset)
-#endif /* CONFIG_64BIT */
 
 #ifdef CONFIG_DEBUG_VIRTUAL
 extern phys_addr_t __virt_to_phys(unsigned long x);
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 269fc648ef3d..263ae055e81a 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -552,11 +552,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	kernel_map.phys_addr = (uintptr_t)(&_start);
 	kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
 #endif
-
 	kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
-#ifdef CONFIG_64BIT
 	kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
-#endif
 
 	pfn_base = PFN_DOWN(kernel_map.phys_addr);
 
-- 
cgit 


From 526f83df1d83b9c95e571ea5d4dff12be1b215ec Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alex@ghiti.fr>
Date: Fri, 23 Jul 2021 15:01:25 +0200
Subject: riscv: Get rid of map_size parameter to create_kernel_page_table

The kernel must always be mapped using PMD_SIZE, and this is already the
case, this just simplifies create_kernel_page_table.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/mm/init.c | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 263ae055e81a..83d0ed915914 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -495,36 +495,35 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
 #endif
 
 #ifdef CONFIG_XIP_KERNEL
-static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size,
+static void __init create_kernel_page_table(pgd_t *pgdir,
 					    __always_unused bool early)
 {
 	uintptr_t va, end_va;
 
 	/* Map the flash resident part */
 	end_va = kernel_map.virt_addr + kernel_map.xiprom_sz;
-	for (va = kernel_map.virt_addr; va < end_va; va += map_size)
+	for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
 		create_pgd_mapping(pgdir, va,
 				   kernel_map.xiprom + (va - kernel_map.virt_addr),
-				   map_size, PAGE_KERNEL_EXEC);
+				   PMD_SIZE, PAGE_KERNEL_EXEC);
 
 	/* Map the data in RAM */
 	end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size;
-	for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += map_size)
+	for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE)
 		create_pgd_mapping(pgdir, va,
 				   kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)),
-				   map_size, PAGE_KERNEL);
+				   PMD_SIZE, PAGE_KERNEL);
 }
 #else
-static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size,
-					    bool early)
+static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
 {
 	uintptr_t va, end_va;
 
 	end_va = kernel_map.virt_addr + kernel_map.size;
-	for (va = kernel_map.virt_addr; va < end_va; va += map_size)
+	for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
 		create_pgd_mapping(pgdir, va,
 				   kernel_map.phys_addr + (va - kernel_map.virt_addr),
-				   map_size,
+				   PMD_SIZE,
 				   early ?
 					PAGE_KERNEL_EXEC : pgprot_from_va(va));
 }
@@ -533,7 +532,6 @@ static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size,
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 {
 	uintptr_t __maybe_unused pa;
-	uintptr_t map_size;
 #ifndef __PAGETABLE_PMD_FOLDED
 	pmd_t fix_bmap_spmd, fix_bmap_epmd;
 #endif
@@ -557,15 +555,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 
 	pfn_base = PFN_DOWN(kernel_map.phys_addr);
 
-	/*
-	 * Enforce boot alignment requirements of RV32 and
-	 * RV64 by only allowing PMD or PGD mappings.
-	 */
-	map_size = PMD_SIZE;
-
 	/* Sanity check alignment and size */
 	BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
-	BUG_ON((kernel_map.phys_addr % map_size) != 0);
+	BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
 
 	pt_ops.alloc_pte = alloc_pte_early;
 	pt_ops.get_pte_virt = get_pte_virt_early;
@@ -602,7 +594,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	 * us to reach paging_init(). We map all memory banks later
 	 * in setup_vm_final() below.
 	 */
-	create_kernel_page_table(early_pg_dir, map_size, true);
+	create_kernel_page_table(early_pg_dir, true);
 
 #ifndef __PAGETABLE_PMD_FOLDED
 	/* Setup early PMD for DTB */
@@ -718,7 +710,7 @@ static void __init setup_vm_final(void)
 
 #ifdef CONFIG_64BIT
 	/* Map the kernel */
-	create_kernel_page_table(swapper_pg_dir, PMD_SIZE, false);
+	create_kernel_page_table(swapper_pg_dir, false);
 #endif
 
 	/* Clear fixmap PTE and PMD mappings */
-- 
cgit 


From 6f3e5fd241c33d2407f7aaa930b141af6ad7c35b Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alex@ghiti.fr>
Date: Fri, 23 Jul 2021 15:01:26 +0200
Subject: riscv: Use __maybe_unused instead of #ifdefs around variable
 declarations

This allows to simplify the code and make it more readable.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/mm/init.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 83d0ed915914..a456b5a68d99 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -532,9 +532,7 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 {
 	uintptr_t __maybe_unused pa;
-#ifndef __PAGETABLE_PMD_FOLDED
-	pmd_t fix_bmap_spmd, fix_bmap_epmd;
-#endif
+	pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
 
 	kernel_map.virt_addr = KERNEL_LINK_ADDR;
 
-- 
cgit 


From 977765ce319b98939205cf07aa1d76150713c69b Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alex@ghiti.fr>
Date: Fri, 23 Jul 2021 15:01:27 +0200
Subject: riscv: Simplify BUILTIN_DTB device tree mapping handling

__PAGETABLE_PMD_FOLDED defines a 2-level page table that is only used in
32-bit kernel, so there is no need to check for CONFIG_64BIT in #ifndef
__PAGETABLE_PMD_FOLDED and vice-versa.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/mm/init.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index a456b5a68d99..a93822df9d81 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -607,18 +607,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 			   pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
 	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
 #else /* CONFIG_BUILTIN_DTB */
-#ifdef CONFIG_64BIT
 	/*
 	 * __va can't be used since it would return a linear mapping address
 	 * whereas dtb_early_va will be used before setup_vm_final installs
 	 * the linear mapping.
 	 */
 	dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
-#else
-	dtb_early_va = __va(dtb_pa);
-#endif /* CONFIG_64BIT */
 #endif /* CONFIG_BUILTIN_DTB */
-#else
+#else /* __PAGETABLE_PMD_FOLDED */
 #ifndef CONFIG_BUILTIN_DTB
 	/* Create two consecutive PGD mappings for FDT early scan */
 	pa = dtb_pa & ~(PGDIR_SIZE - 1);
@@ -628,13 +624,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 			   pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
 	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
 #else /* CONFIG_BUILTIN_DTB */
-#ifdef CONFIG_64BIT
-	dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
-#else
 	dtb_early_va = __va(dtb_pa);
-#endif /* CONFIG_64BIT */
 #endif /* CONFIG_BUILTIN_DTB */
-#endif
+#endif /* __PAGETABLE_PMD_FOLDED */
 	dtb_early_pa = dtb_pa;
 
 	/*
-- 
cgit 


From fe45ffa4c505783637233609b677446020738b87 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alex@ghiti.fr>
Date: Fri, 23 Jul 2021 15:01:28 +0200
Subject: riscv: Move early fdt mapping creation in its own function

The code that handles the early fdt mapping is hard to read and does not
create the same mapping size depending on the kernel:

- for 64-bit, 2 PMD entries are used which amounts to a 4MB mapping
- for 32-bit, 2 PGDIR entries are used which amounts to a 8MB mapping

So keep using 2 PMD entries for 64-bit and use only one PGD entry for
32-bit needed to cover 4MB. Move that into a new function called
create_fdt_early_page_table which, using the same naming as
create_kernel_page_table.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/mm/init.c | 76 +++++++++++++++++++++++++++-------------------------
 1 file changed, 40 insertions(+), 36 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index a93822df9d81..fdf093d01c6f 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -222,6 +222,7 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
 
 pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 
 #ifdef CONFIG_XIP_KERNEL
 #define trampoline_pg_dir      ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
@@ -302,7 +303,6 @@ static void __init create_pte_mapping(pte_t *ptep,
 static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
 static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
 static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
-static pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 
 #ifdef CONFIG_XIP_KERNEL
 #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
@@ -388,6 +388,7 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
 	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
 #define fixmap_pgd_next		fixmap_pte
+#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
 #endif
 
 void __init create_pgd_mapping(pgd_t *pgdp,
@@ -529,9 +530,44 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
 }
 #endif
 
+/*
+ * Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel,
+ * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
+ * entry.
+ */
+static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
+{
+#ifndef CONFIG_BUILTIN_DTB
+	uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
+
+	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+			   IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa,
+			   PGDIR_SIZE,
+			   IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
+
+	if (IS_ENABLED(CONFIG_64BIT)) {
+		create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
+				   pa, PMD_SIZE, PAGE_KERNEL);
+		create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
+				   pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
+	}
+
+	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+#else
+	/*
+	 * For 64-bit kernel, __va can't be used since it would return a linear
+	 * mapping address whereas dtb_early_va will be used before
+	 * setup_vm_final installs the linear mapping. For 32-bit kernel, as the
+	 * kernel is mapped in the linear mapping, that makes no difference.
+	 */
+	dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
+#endif
+
+	dtb_early_pa = dtb_pa;
+}
+
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 {
-	uintptr_t __maybe_unused pa;
 	pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
 
 	kernel_map.virt_addr = KERNEL_LINK_ADDR;
@@ -594,40 +630,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	 */
 	create_kernel_page_table(early_pg_dir, true);
 
-#ifndef __PAGETABLE_PMD_FOLDED
-	/* Setup early PMD for DTB */
-	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
-			   (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE);
-#ifndef CONFIG_BUILTIN_DTB
-	/* Create two consecutive PMD mappings for FDT early scan */
-	pa = dtb_pa & ~(PMD_SIZE - 1);
-	create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
-			   pa, PMD_SIZE, PAGE_KERNEL);
-	create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
-			   pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
-	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
-#else /* CONFIG_BUILTIN_DTB */
-	/*
-	 * __va can't be used since it would return a linear mapping address
-	 * whereas dtb_early_va will be used before setup_vm_final installs
-	 * the linear mapping.
-	 */
-	dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
-#endif /* CONFIG_BUILTIN_DTB */
-#else /* __PAGETABLE_PMD_FOLDED */
-#ifndef CONFIG_BUILTIN_DTB
-	/* Create two consecutive PGD mappings for FDT early scan */
-	pa = dtb_pa & ~(PGDIR_SIZE - 1);
-	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
-			   pa, PGDIR_SIZE, PAGE_KERNEL);
-	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
-			   pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
-	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
-#else /* CONFIG_BUILTIN_DTB */
-	dtb_early_va = __va(dtb_pa);
-#endif /* CONFIG_BUILTIN_DTB */
-#endif /* __PAGETABLE_PMD_FOLDED */
-	dtb_early_pa = dtb_pa;
+	/* Setup early mapping for FDT early scan */
+	create_fdt_early_page_table(early_pg_dir, dtb_pa);
 
 	/*
 	 * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
-- 
cgit 


From 666173ee32e2d6f2853bd54b11b8127ac97eb019 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Fri, 23 Jul 2021 10:25:35 +0800
Subject: MIPS: generic: Allow generating FIT image for Marduk board

Marduk is based on IMG pistachio SoC. The platform is using
MIPS UHI booting protocol and does have a proper devicetree
implement, thus it could be a part of generic kernel.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/boot/dts/Makefile          |  1 +
 arch/mips/boot/dts/img/Makefile      |  2 ++
 arch/mips/generic/Kconfig            |  6 ++++++
 arch/mips/generic/Platform           |  1 +
 arch/mips/generic/board-marduk.its.S | 22 ++++++++++++++++++++++
 5 files changed, 32 insertions(+)
 create mode 100644 arch/mips/generic/board-marduk.its.S

(limited to 'arch')

diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
index 60bd7d2a9ad8..188301164d9e 100644
--- a/arch/mips/boot/dts/Makefile
+++ b/arch/mips/boot/dts/Makefile
@@ -2,6 +2,7 @@
 subdir-$(CONFIG_BMIPS_GENERIC)		+= brcm
 subdir-$(CONFIG_CAVIUM_OCTEON_SOC)	+= cavium-octeon
 subdir-$(CONFIG_MACH_PISTACHIO)		+= img
+subdir-$(CONFIG_FIT_IMAGE_FDT_MARDUK)   += img
 subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON)	+= img
 subdir-$(CONFIG_MACH_INGENIC)		+= ingenic
 subdir-$(CONFIG_LANTIQ)			+= lantiq
diff --git a/arch/mips/boot/dts/img/Makefile b/arch/mips/boot/dts/img/Makefile
index 441a3c16efb0..24f6bbeadd48 100644
--- a/arch/mips/boot/dts/img/Makefile
+++ b/arch/mips/boot/dts/img/Makefile
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_FIT_IMAGE_FDT_BOSTON)	+= boston.dtb
 
+dtb-$(CONFIG_FIT_IMAGE_FDT_MARDUK)	+= pistachio_marduk.dtb
+
 dtb-$(CONFIG_MACH_PISTACHIO)	+= pistachio_marduk.dtb
 obj-$(CONFIG_MACH_PISTACHIO)	+= pistachio_marduk.dtb.o
diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig
index 657dd93c5e76..7dc5b3821cc6 100644
--- a/arch/mips/generic/Kconfig
+++ b/arch/mips/generic/Kconfig
@@ -58,6 +58,12 @@ config FIT_IMAGE_FDT_BOSTON
 	  enable this if you wish to boot on a MIPS Boston board, as it is
 	  expected by the bootloader.
 
+config FIT_IMAGE_FDT_MARDUK
+	bool "Include FDT for IMG Pistachio Marduk (CI40) boards"
+	help
+	  Enable this to include the FDT for the IMG Pistachio Marduk (CI40)
+	  from Imagination Technologies in the FIT kernel image.
+
 config FIT_IMAGE_FDT_NI169445
 	bool "Include FDT for NI 169445"
 	help
diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform
index b871af16b5b6..e1abc113b409 100644
--- a/arch/mips/generic/Platform
+++ b/arch/mips/generic/Platform
@@ -24,3 +24,4 @@ its-$(CONFIG_FIT_IMAGE_FDT_LUTON)	+= board-luton.its.S
 its-$(CONFIG_FIT_IMAGE_FDT_JAGUAR2)	+= board-jaguar2.its.S
 its-$(CONFIG_FIT_IMAGE_FDT_SERVAL)	+= board-serval.its.S
 its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA)	+= board-xilfpga.its.S
+its-$(CONFIG_FIT_IMAGE_FDT_MARDUK)	+= board-marduk.its.S
diff --git a/arch/mips/generic/board-marduk.its.S b/arch/mips/generic/board-marduk.its.S
new file mode 100644
index 000000000000..4f633794db90
--- /dev/null
+++ b/arch/mips/generic/board-marduk.its.S
@@ -0,0 +1,22 @@
+/ {
+	images {
+		fdt-marduk {
+			description = "img,pistachio-marduk Device Tree";
+			data = /incbin/("boot/dts/img/pistachio_marduk.dtb");
+			type = "flat_dt";
+			arch = "mips";
+			compression = "none";
+			hash {
+				algo = "sha1";
+			};
+		};
+	};
+
+	configurations {
+		conf-marduk {
+			description = "Marduk Linux kernel";
+			kernel = "kernel";
+			fdt = "fdt-marduk";
+		};
+	};
+};
-- 
cgit 


From d32524a2d05791181756bb4ab4f6e0628471fde2 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Fri, 23 Jul 2021 10:25:36 +0800
Subject: MIPS: DTS: Pistachio add missing cpc and cdmm

CPC and CDMM addresses are adjustable and we should tell kernel
how to place them in devicetree.

Note that MACH_PISTACHIO code hardcoded CDMM base to 0x1bdd0000,
however it will collide with GIC address range. As we don't have
any CDMM device on this platform it won't be a problem.
I found another spare range, 0x1bdf0000~0x1be00000 to place CDMM
instead.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/boot/dts/img/pistachio.dtsi | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/img/pistachio.dtsi b/arch/mips/boot/dts/img/pistachio.dtsi
index dc3b7909de73..b1db8b8f446f 100644
--- a/arch/mips/boot/dts/img/pistachio.dtsi
+++ b/arch/mips/boot/dts/img/pistachio.dtsi
@@ -900,6 +900,16 @@
 		};
 	};
 
+	cpc: cpc@1bde0000 {
+		compatible = "mti,mips-cpc";
+		reg = <0x1bde0000 0x10000>;
+	};
+
+	cdmm: cdmm@1bdf0000 {
+		compatible = "mti,mips-cdmm";
+		reg = <0x1bdf0000 0x10000>;
+	};
+
 	usb_phy: usb-phy {
 		compatible = "img,pistachio-usb-phy";
 		clocks = <&clk_core CLK_USB_PHY>;
-- 
cgit 


From 917b64f1df2b9cec0a0859bd8d96b24679038f78 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Fri, 23 Jul 2021 10:25:41 +0800
Subject: MIPS: config: generic: Add config for Marduk board

The config is minimal config to allow it boot from SD Card.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/configs/generic/board-marduk.config | 53 +++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 arch/mips/configs/generic/board-marduk.config

(limited to 'arch')

diff --git a/arch/mips/configs/generic/board-marduk.config b/arch/mips/configs/generic/board-marduk.config
new file mode 100644
index 000000000000..05ca34cd5a73
--- /dev/null
+++ b/arch/mips/configs/generic/board-marduk.config
@@ -0,0 +1,53 @@
+CONFIG_FIT_IMAGE_FDT_MARDUK=y
+
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+
+CONFIG_CLKSRC_PISTACHIO=y
+
+CONFIG_COMMON_CLK_PISTACHIO=y
+
+CONFIG_DMADEVICES=y
+CONFIG_IMG_MDC_DMA=y
+
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_PCH=y
+
+CONFIG_I2C=y
+CONFIG_I2C_IMG=y
+
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_PLTFM=y
+
+CONFIG_NETDEVICES=y
+CONFIG_STMMAC_ETH=y
+CONFIG_STMMAC_PLATFORM=y
+
+CONFIG_PHY_PISTACHIO_USB=y
+
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_PISTACHIO=y
+
+CONFIG_RESET_PISTACHIO=y
+
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_8250_DW=y
+
+CONFIG_SPI=y
+CONFIG_SRAM=y
+
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_DWC2=y
+
+CONFIG_CRYPTO_DEV_IMGTEC_HASH=y
+CONFIG_IMGPDC_WDT=y
+CONFIG_IR_IMG=y
+CONFIG_CC10001_ADC=y
+CONFIG_SND_SOC_IMG=y
-- 
cgit 


From 104f942b2832ab1340dab34ae2dad8b31b772788 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Fri, 23 Jul 2021 10:25:42 +0800
Subject: MIPS: Retire MACH_PISTACHIO

Now it can be replaced by generic kernel.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/Kbuild.platforms            |   1 -
 arch/mips/Kconfig                     |  30 ----
 arch/mips/boot/dts/Makefile           |   1 -
 arch/mips/boot/dts/img/Makefile       |   3 -
 arch/mips/configs/pistachio_defconfig | 316 ----------------------------------
 arch/mips/pistachio/Kconfig           |  14 --
 arch/mips/pistachio/Makefile          |   2 -
 arch/mips/pistachio/Platform          |   6 -
 arch/mips/pistachio/init.c            | 125 --------------
 arch/mips/pistachio/irq.c             |  24 ---
 arch/mips/pistachio/time.c            |  55 ------
 11 files changed, 577 deletions(-)
 delete mode 100644 arch/mips/configs/pistachio_defconfig
 delete mode 100644 arch/mips/pistachio/Kconfig
 delete mode 100644 arch/mips/pistachio/Makefile
 delete mode 100644 arch/mips/pistachio/Platform
 delete mode 100644 arch/mips/pistachio/init.c
 delete mode 100644 arch/mips/pistachio/irq.c
 delete mode 100644 arch/mips/pistachio/time.c

(limited to 'arch')

diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index e4f6e49417a9..584081df89c2 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -21,7 +21,6 @@ platform-$(CONFIG_MIPS_MALTA)		+= mti-malta/
 platform-$(CONFIG_MACH_NINTENDO64)	+= n64/
 platform-$(CONFIG_NLM_COMMON)		+= netlogic/
 platform-$(CONFIG_PIC32MZDA)		+= pic32/
-platform-$(CONFIG_MACH_PISTACHIO)	+= pistachio/
 platform-$(CONFIG_RALINK)		+= ralink/
 platform-$(CONFIG_MIKROTIK_RB532)	+= rb532/
 platform-$(CONFIG_SGI_IP22)		+= sgi-ip22/
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index cee6087cd686..b783264243e4 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -513,35 +513,6 @@ config MACH_LOONGSON64
 	  and Loongson-2F which will be removed), developed by the Institute
 	  of Computing Technology (ICT), Chinese Academy of Sciences (CAS).
 
-config MACH_PISTACHIO
-	bool "IMG Pistachio SoC based boards"
-	select BOOT_ELF32
-	select BOOT_RAW
-	select CEVT_R4K
-	select CLKSRC_MIPS_GIC
-	select COMMON_CLK
-	select CSRC_R4K
-	select DMA_NONCOHERENT
-	select GPIOLIB
-	select IRQ_MIPS_CPU
-	select MFD_SYSCON
-	select MIPS_CPU_SCACHE
-	select MIPS_GIC
-	select PINCTRL
-	select REGULATOR
-	select SYS_HAS_CPU_MIPS32_R2
-	select SYS_SUPPORTS_32BIT_KERNEL
-	select SYS_SUPPORTS_LITTLE_ENDIAN
-	select SYS_SUPPORTS_MIPS_CPS
-	select SYS_SUPPORTS_MULTITHREADING
-	select SYS_SUPPORTS_RELOCATABLE
-	select SYS_SUPPORTS_ZBOOT
-	select SYS_HAS_EARLY_PRINTK
-	select USE_GENERIC_EARLY_PRINTK_8250
-	select USE_OF
-	help
-	  This enables support for the IMG Pistachio SoC platform.
-
 config MIPS_MALTA
 	bool "MIPS Malta board"
 	select ARCH_MAY_HAVE_PC_FDC
@@ -1088,7 +1059,6 @@ source "arch/mips/ingenic/Kconfig"
 source "arch/mips/jazz/Kconfig"
 source "arch/mips/lantiq/Kconfig"
 source "arch/mips/pic32/Kconfig"
-source "arch/mips/pistachio/Kconfig"
 source "arch/mips/ralink/Kconfig"
 source "arch/mips/sgi-ip27/Kconfig"
 source "arch/mips/sibyte/Kconfig"
diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
index 188301164d9e..be96d35eb582 100644
--- a/arch/mips/boot/dts/Makefile
+++ b/arch/mips/boot/dts/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 subdir-$(CONFIG_BMIPS_GENERIC)		+= brcm
 subdir-$(CONFIG_CAVIUM_OCTEON_SOC)	+= cavium-octeon
-subdir-$(CONFIG_MACH_PISTACHIO)		+= img
 subdir-$(CONFIG_FIT_IMAGE_FDT_MARDUK)   += img
 subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON)	+= img
 subdir-$(CONFIG_MACH_INGENIC)		+= ingenic
diff --git a/arch/mips/boot/dts/img/Makefile b/arch/mips/boot/dts/img/Makefile
index 24f6bbeadd48..ebb47490b04b 100644
--- a/arch/mips/boot/dts/img/Makefile
+++ b/arch/mips/boot/dts/img/Makefile
@@ -2,6 +2,3 @@
 dtb-$(CONFIG_FIT_IMAGE_FDT_BOSTON)	+= boston.dtb
 
 dtb-$(CONFIG_FIT_IMAGE_FDT_MARDUK)	+= pistachio_marduk.dtb
-
-dtb-$(CONFIG_MACH_PISTACHIO)	+= pistachio_marduk.dtb
-obj-$(CONFIG_MACH_PISTACHIO)	+= pistachio_marduk.dtb.o
diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
deleted file mode 100644
index b9adf15ebbec..000000000000
--- a/arch/mips/configs/pistachio_defconfig
+++ /dev/null
@@ -1,316 +0,0 @@
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="localhost"
-CONFIG_SYSVIPC=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_IKCONFIG=m
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=18
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_CFS_BANDWIDTH=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_NAMESPACES=y
-CONFIG_USER_NS=y
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_RD_BZIP2 is not set
-# CONFIG_RD_LZMA is not set
-# CONFIG_RD_LZO is not set
-# CONFIG_RD_LZ4 is not set
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_EMBEDDED=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_PROFILING=y
-CONFIG_MACH_PISTACHIO=y
-CONFIG_MIPS_CPS=y
-CONFIG_NR_CPUS=4
-CONFIG_PM_DEBUG=y
-CONFIG_PM_ADVANCED_DEBUG=y
-CONFIG_CPU_IDLE=y
-# CONFIG_MIPS_CPS_CPUIDLE is not set
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_COMPACTION is not set
-CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
-CONFIG_ZSMALLOC=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
-# CONFIG_INET_DIAG is not set
-CONFIG_TCP_CONG_ADVANCED=y
-# CONFIG_TCP_CONG_BIC is not set
-# CONFIG_TCP_CONG_WESTWOOD is not set
-# CONFIG_TCP_CONG_HTCP is not set
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_MD5SIG=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_XFRM_MODE_TRANSPORT=m
-CONFIG_INET6_XFRM_MODE_TUNNEL=m
-CONFIG_INET6_XFRM_MODE_BEET=m
-CONFIG_IPV6_SIT=m
-CONFIG_NETWORK_SECMARK=y
-CONFIG_NETFILTER=y
-# CONFIG_BRIDGE_NETFILTER is not set
-CONFIG_NF_CONNTRACK=y
-CONFIG_NF_CT_NETLINK=y
-CONFIG_NETFILTER_XT_MARK=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
-CONFIG_NETFILTER_XT_TARGET_DSCP=y
-CONFIG_NETFILTER_XT_TARGET_NFLOG=y
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
-CONFIG_NETFILTER_XT_TARGET_SECMARK=y
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
-CONFIG_NETFILTER_XT_MATCH_DSCP=y
-CONFIG_NETFILTER_XT_MATCH_POLICY=y
-CONFIG_NETFILTER_XT_MATCH_STATE=y
-CONFIG_NF_NAT_IPV4=m
-CONFIG_IP_NF_IPTABLES=y
-CONFIG_IP_NF_FILTER=y
-CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_MANGLE=y
-CONFIG_NF_NAT_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_CODEL=m
-CONFIG_NET_SCH_FQ_CODEL=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_MARK=y
-CONFIG_BT=m
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_HCIBTUSB=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_CFG80211=m
-CONFIG_NL80211_TESTMODE=y
-CONFIG_CFG80211_DEBUGFS=y
-CONFIG_CFG80211_WEXT=y
-CONFIG_MAC80211=m
-CONFIG_MAC80211_LEDS=y
-CONFIG_MAC80211_DEBUGFS=y
-CONFIG_MAC80211_DEBUG_MENU=y
-CONFIG_MAC80211_VERBOSE_DEBUG=y
-CONFIG_RFKILL=y
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_DEBUG_DEVRES=y
-CONFIG_CONNECTOR=y
-CONFIG_MTD=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_SPI_NOR=y
-CONFIG_MTD_UBI=y
-CONFIG_MTD_UBI_BLOCK=y
-CONFIG_ZRAM=m
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_BLK_DEV_SR=m
-CONFIG_SCSI_SPI_ATTRS=y
-CONFIG_MD=y
-CONFIG_BLK_DEV_DM=y
-CONFIG_DM_CRYPT=y
-CONFIG_DM_VERITY=y
-CONFIG_NETDEVICES=y
-CONFIG_TUN=m
-CONFIG_VETH=m
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_MICROCHIP is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SMSC is not set
-CONFIG_STMMAC_ETH=y
-# CONFIG_NET_VENDOR_VIA is not set
-CONFIG_PPP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_RTL8152=m
-CONFIG_USB_NET_DM9601=m
-CONFIG_USB_NET_SMSC75XX=m
-CONFIG_USB_NET_SMSC95XX=m
-CONFIG_USB_NET_MCS7830=m
-# CONFIG_USB_NET_CDC_SUBSET is not set
-# CONFIG_USB_NET_ZAURUS is not set
-CONFIG_HOSTAP=m
-CONFIG_HOSTAP_FIRMWARE=y
-CONFIG_HOSTAP_FIRMWARE_NVRAM=y
-CONFIG_LIBERTAS_THINFIRM=m
-CONFIG_RT2X00=m
-CONFIG_RT2800USB=m
-CONFIG_MAC80211_HWSIM=m
-CONFIG_USB_NET_RNDIS_WLAN=m
-CONFIG_INPUT_EVDEV=y
-# CONFIG_KEYBOARD_ATKBD is not set
-CONFIG_KEYBOARD_GPIO=y
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DW=y
-CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_HW_RANDOM=y
-CONFIG_TCG_TPM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_IMG=y
-CONFIG_I2C_STUB=m
-CONFIG_SPI=y
-CONFIG_SPI_BITBANG=m
-CONFIG_SPI_IMG_SPFI=y
-CONFIG_SPI_SPIDEV=y
-CONFIG_DEBUG_GPIO=y
-CONFIG_GPIO_SYSFS=y
-CONFIG_POWER_SUPPLY=y
-CONFIG_THERMAL=y
-CONFIG_WATCHDOG=y
-CONFIG_IMGPDC_WDT=y
-CONFIG_REGULATOR_FIXED_VOLTAGE=y
-CONFIG_REGULATOR_GPIO=y
-CONFIG_RC_CORE=y
-CONFIG_RC_DEVICES=y
-CONFIG_IR_IMG=y
-CONFIG_IR_IMG_NEC=y
-CONFIG_IR_IMG_JVC=y
-CONFIG_IR_IMG_SONY=y
-CONFIG_IR_IMG_SHARP=y
-CONFIG_IR_IMG_SANYO=y
-CONFIG_IR_IMG_RC5=y
-CONFIG_IR_IMG_RC6=y
-CONFIG_MEDIA_SUPPORT=y
-CONFIG_FB=y
-CONFIG_FB_MODE_HELPERS=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-CONFIG_SOUND=y
-CONFIG_SND=y
-CONFIG_SND_HRTIMER=m
-CONFIG_SND_DYNAMIC_MINORS=y
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-# CONFIG_SND_SPI is not set
-CONFIG_SND_USB_AUDIO=m
-CONFIG_USB=y
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-# CONFIG_USB_DEFAULT_PERSIST is not set
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_ACM=y
-CONFIG_USB_STORAGE=y
-CONFIG_USB_DWC2=y
-CONFIG_USB_SERIAL=y
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_CP210X=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_OTI6858=m
-CONFIG_USB_SERIAL_QUALCOMM=m
-CONFIG_USB_SERIAL_SIERRAWIRELESS=m
-CONFIG_USB_SERIAL_OPTION=m
-CONFIG_MMC=y
-CONFIG_MMC_BLOCK_MINORS=16
-CONFIG_MMC_TEST=m
-CONFIG_MMC_DW=y
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_RTC_CLASS=y
-CONFIG_DMADEVICES=y
-CONFIG_IMG_MDC_DMA=y
-CONFIG_STAGING=y
-CONFIG_ASHMEM=y
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_MEMORY=y
-CONFIG_IIO=y
-CONFIG_CC10001_ADC=y
-CONFIG_PWM=y
-CONFIG_PWM_IMG=y
-CONFIG_PHY_PISTACHIO_USB=y
-CONFIG_ANDROID=y
-CONFIG_EXT4_FS=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-# CONFIG_DNOTIFY is not set
-CONFIG_FUSE_FS=m
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_ECRYPT_FS=y
-CONFIG_HFSPLUS_FS=m
-CONFIG_UBIFS_FS=y
-CONFIG_SQUASHFS=y
-CONFIG_SQUASHFS_FILE_DIRECT=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_PSTORE=y
-CONFIG_PSTORE_CONSOLE=y
-CONFIG_PSTORE_RAM=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_SECURITY_YAMA=y
-CONFIG_CRYPTO_AUTHENC=y
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_ARC4=y
-CONFIG_CRYPTO_DES=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=m
-CONFIG_CRC7=m
-# CONFIG_XZ_DEC_X86 is not set
-CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0
-# CONFIG_SCHED_DEBUG is not set
-CONFIG_SCHEDSTATS=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_CREDENTIALS=y
-CONFIG_FUNCTION_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_LKDTM=y
-CONFIG_TEST_UDELAY=m
diff --git a/arch/mips/pistachio/Kconfig b/arch/mips/pistachio/Kconfig
deleted file mode 100644
index 9a0e06c95184..000000000000
--- a/arch/mips/pistachio/Kconfig
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-config PISTACHIO_GPTIMER_CLKSRC
-	bool "Enable General Purpose Timer based clocksource"
-	depends on MACH_PISTACHIO
-	select CLKSRC_PISTACHIO
-	select MIPS_EXTERNAL_TIMER
-	help
-	  This option enables a clocksource driver based on a Pistachio
-	  SoC General Purpose external timer.
-
-	  If you want to enable the CPUFreq, you need to enable
-	  this option.
-
-	  If you don't want to enable CPUFreq, you can leave this disabled.
diff --git a/arch/mips/pistachio/Makefile b/arch/mips/pistachio/Makefile
deleted file mode 100644
index 66f4af17fb66..000000000000
--- a/arch/mips/pistachio/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-y	+= init.o irq.o time.o
diff --git a/arch/mips/pistachio/Platform b/arch/mips/pistachio/Platform
deleted file mode 100644
index c59de86dbddf..000000000000
--- a/arch/mips/pistachio/Platform
+++ /dev/null
@@ -1,6 +0,0 @@
-#
-# IMG Pistachio SoC
-#
-load-$(CONFIG_MACH_PISTACHIO)		+= 0xffffffff80400000
-zload-$(CONFIG_MACH_PISTACHIO)		+= 0xffffffff81000000
-all-$(CONFIG_MACH_PISTACHIO)		:= uImage.gz
diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c
deleted file mode 100644
index e0bacfc3c6b4..000000000000
--- a/arch/mips/pistachio/init.c
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Pistachio platform setup
- *
- * Copyright (C) 2014 Google, Inc.
- * Copyright (C) 2016 Imagination Technologies
- */
-
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/of_address.h>
-#include <linux/of_fdt.h>
-
-#include <asm/cacheflush.h>
-#include <asm/fw/fw.h>
-#include <asm/mips-boards/generic.h>
-#include <asm/mips-cps.h>
-#include <asm/prom.h>
-#include <asm/smp-ops.h>
-#include <asm/traps.h>
-
-/*
- * Core revision register decoding
- * Bits 23 to 20: Major rev
- * Bits 15 to 8: Minor rev
- * Bits 7 to 0: Maintenance rev
- */
-#define PISTACHIO_CORE_REV_REG	0xB81483D0
-#define PISTACHIO_CORE_REV_A1	0x00100006
-#define PISTACHIO_CORE_REV_B0	0x00100106
-
-const char *get_system_type(void)
-{
-	u32 core_rev;
-	const char *sys_type;
-
-	core_rev = __raw_readl((const void *)PISTACHIO_CORE_REV_REG);
-
-	switch (core_rev) {
-	case PISTACHIO_CORE_REV_B0:
-		sys_type = "IMG Pistachio SoC (B0)";
-		break;
-
-	case PISTACHIO_CORE_REV_A1:
-		sys_type = "IMG Pistachio SoC (A1)";
-		break;
-
-	default:
-		sys_type = "IMG Pistachio SoC";
-		break;
-	}
-
-	return sys_type;
-}
-
-void __init *plat_get_fdt(void)
-{
-	if (fw_arg0 != -2)
-		panic("Device-tree not present");
-	return (void *)fw_arg1;
-}
-
-void __init plat_mem_setup(void)
-{
-	__dt_setup_arch(plat_get_fdt());
-}
-
-#define DEFAULT_CPC_BASE_ADDR	0x1bde0000
-#define DEFAULT_CDMM_BASE_ADDR	0x1bdd0000
-
-phys_addr_t mips_cpc_default_phys_base(void)
-{
-	return DEFAULT_CPC_BASE_ADDR;
-}
-
-phys_addr_t mips_cdmm_phys_base(void)
-{
-	return DEFAULT_CDMM_BASE_ADDR;
-}
-
-static void __init mips_nmi_setup(void)
-{
-	void *base;
-
-	base = cpu_has_veic ?
-		(void *)(CAC_BASE + 0xa80) :
-		(void *)(CAC_BASE + 0x380);
-	memcpy(base, except_vec_nmi, 0x80);
-	flush_icache_range((unsigned long)base,
-			   (unsigned long)base + 0x80);
-}
-
-static void __init mips_ejtag_setup(void)
-{
-	void *base;
-	extern char except_vec_ejtag_debug[];
-
-	base = cpu_has_veic ?
-		(void *)(CAC_BASE + 0xa00) :
-		(void *)(CAC_BASE + 0x300);
-	memcpy(base, except_vec_ejtag_debug, 0x80);
-	flush_icache_range((unsigned long)base,
-			   (unsigned long)base + 0x80);
-}
-
-void __init prom_init(void)
-{
-	board_nmi_handler_setup = mips_nmi_setup;
-	board_ejtag_handler_setup = mips_ejtag_setup;
-
-	mips_cm_probe();
-	mips_cpc_probe();
-	register_cps_smp_ops();
-
-	pr_info("SoC Type: %s\n", get_system_type());
-}
-
-void __init device_tree_init(void)
-{
-	if (!initial_boot_params)
-		return;
-
-	unflatten_and_copy_device_tree();
-}
diff --git a/arch/mips/pistachio/irq.c b/arch/mips/pistachio/irq.c
deleted file mode 100644
index 437c3101ac45..000000000000
--- a/arch/mips/pistachio/irq.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Pistachio IRQ setup
- *
- * Copyright (C) 2014 Google, Inc.
- */
-
-#include <linux/init.h>
-#include <linux/irqchip.h>
-#include <linux/kernel.h>
-
-#include <asm/cpu-features.h>
-#include <asm/irq_cpu.h>
-
-void __init arch_init_irq(void)
-{
-	pr_info("EIC is %s\n", cpu_has_veic ? "on" : "off");
-	pr_info("VINT is %s\n", cpu_has_vint ? "on" : "off");
-
-	if (!cpu_has_veic)
-		mips_cpu_irq_init();
-
-	irqchip_init();
-}
diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c
deleted file mode 100644
index de64751dec40..000000000000
--- a/arch/mips/pistachio/time.c
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Pistachio clocksource/timer setup
- *
- * Copyright (C) 2014 Google, Inc.
- */
-
-#include <linux/clk.h>
-#include <linux/clocksource.h>
-#include <linux/init.h>
-#include <linux/of.h>
-#include <linux/of_clk.h>
-
-#include <asm/mips-cps.h>
-#include <asm/time.h>
-
-unsigned int get_c0_compare_int(void)
-{
-	return gic_get_c0_compare_int();
-}
-
-int get_c0_perfcount_int(void)
-{
-	return gic_get_c0_perfcount_int();
-}
-EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
-
-int get_c0_fdc_int(void)
-{
-	return gic_get_c0_fdc_int();
-}
-
-void __init plat_time_init(void)
-{
-	struct device_node *np;
-	struct clk *clk;
-
-	of_clk_init(NULL);
-	timer_probe();
-
-	np = of_get_cpu_node(0, NULL);
-	if (!np) {
-		pr_err("Failed to get CPU node\n");
-		return;
-	}
-
-	clk = of_clk_get(np, 0);
-	if (IS_ERR(clk)) {
-		pr_err("Failed to get CPU clock: %ld\n", PTR_ERR(clk));
-		return;
-	}
-
-	mips_hpt_frequency = clk_get_rate(clk) / 2;
-	clk_put(clk);
-}
-- 
cgit 


From 3f66601ef3f3e336c1f8181f183001b58be62067 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Fri, 23 Jul 2021 10:25:43 +0800
Subject: MIPS: Make a alias for pistachio_defconfig

For those who miss the old defconfig, make a alias to generic
kernel.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/Makefile | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 4e942b7ef022..6eaeee3fb46c 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -560,6 +560,9 @@ sead3micro_defconfig-y		:= micro32r2el_defconfig BOARDS=sead-3
 legacy_defconfigs		+= xilfpga_defconfig
 xilfpga_defconfig-y		:= 32r2el_defconfig BOARDS=xilfpga
 
+legacy_defconfigs		+= pistachio_defconfig
+pistachio_defconfig-y		:= 32r2el_defconfig BOARDS=marduk
+
 .PHONY: $(legacy_defconfigs)
 $(legacy_defconfigs):
 	$(Q)$(MAKE) -f $(srctree)/Makefile $($@-y)
-- 
cgit 


From 4d2ee1be4c2a5552f083b6d7db4be224f96313b5 Mon Sep 17 00:00:00 2001
From: Huilong Deng <denghuilong@cdjrlc.com>
Date: Wed, 11 Aug 2021 12:36:15 +0800
Subject: MIPS: generic: Return true/false (not 1/0) from bool functions

./arch/mips/generic/board-ocelot.c:29:9-10: WARNING: return of 0/1 in
function 'ocelot_detect' with return type bool

Signed-off-by: Huilong Deng <denghuilong@cdjrlc.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/generic/board-ocelot.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c
index c238e95190ac..7115410acb4f 100644
--- a/arch/mips/generic/board-ocelot.c
+++ b/arch/mips/generic/board-ocelot.c
@@ -26,13 +26,13 @@ static __init bool ocelot_detect(void)
 	tlb_probe_hazard();
 	idx = read_c0_index();
 	if (idx < 0)
-		return 0;
+		return false;
 
 	/* A TLB entry exists, lets assume its usable and check the CHIP ID */
 	rev = __raw_readl((void __iomem *)DEVCPU_GCB_CHIP_REGS_CHIP_ID);
 
 	if ((rev & CHIP_ID_PART_ID) != OCELOT_PART_ID)
-		return 0;
+		return false;
 
 	/* Copy command line from bootloader early for Initrd detection */
 	if (fw_arg0 < 10 && (fw_arg1 & 0xFFF00000) == 0x80000000) {
@@ -44,7 +44,7 @@ static __init bool ocelot_detect(void)
 			strcpy(arcs_cmdline, prom_argv[1]);
 	}
 
-	return 1;
+	return true;
 }
 
 static void __init ocelot_earlyprintk_init(void)
-- 
cgit 


From e15ac2080ec2fde6ebc59f1c8afdf7c52374035e Mon Sep 17 00:00:00 2001
From: Krzysztof Wilczyński <kw@linux.com>
Date: Thu, 12 Aug 2021 17:17:17 +0000
Subject: x86/PCI: Add pci_numachip_init() declaration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

numachip.c defines pci_numachip_init(), but neglected to include its
declaration, causing the following sparse and compile time warnings:

  arch/x86/pci/numachip.c:108:12: warning: no previous prototype for function 'pci_numachip_init' [-Wmissing-prototypes]
  arch/x86/pci/numachip.c:108:12: warning: symbol 'pci_numachip_init' was not declared. Should it be static?

Include asm/numachip/numachip.h, which includes the missing declaration.

Link: https://lore.kernel.org/r/20210812171717.1471243-1-kw@linux.com
Signed-off-by: Krzysztof Wilczyński <kw@linux.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/x86/pci/numachip.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c
index 01a085d9135a..4f0147d4e225 100644
--- a/arch/x86/pci/numachip.c
+++ b/arch/x86/pci/numachip.c
@@ -12,6 +12,7 @@
 
 #include <linux/pci.h>
 #include <asm/pci_x86.h>
+#include <asm/numachip/numachip.h>
 
 static u8 limit __read_mostly;
 
-- 
cgit 


From 34e9f860071f717965f1816171a11eaf2d378ee0 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@linux.alibaba.com>
Date: Tue, 10 Aug 2021 01:43:05 +0800
Subject: KVM: X86: Remove unneeded KVM_DEBUGREG_RELOAD

Commit ae561edeb421 ("KVM: x86: DR0-DR3 are not clear on reset") added code to
ensure eff_db are updated when they're modified through non-standard paths.

But there is no reason to also update hardware DRs unless hardware breakpoints
are active or DR exiting is disabled, and in those cases updating hardware is
handled by KVM_DEBUGREG_WONT_EXIT and KVM_DEBUGREG_BP_ENABLED.

KVM_DEBUGREG_RELOAD just causes unnecesarry load of hardware DRs and is better
to be removed.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
Message-Id: <20210809174307.145263-1-jiangshanlai@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 arch/x86/kvm/x86.c              | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 07d2652c6d61..5e35334980a6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -523,7 +523,6 @@ struct kvm_pmu_ops;
 enum {
 	KVM_DEBUGREG_BP_ENABLED = 1,
 	KVM_DEBUGREG_WONT_EXIT = 2,
-	KVM_DEBUGREG_RELOAD = 4,
 };
 
 struct kvm_mtrr_range {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3cedc7cc132a..eca5f005a7fc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1181,7 +1181,6 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
 	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
 		for (i = 0; i < KVM_NR_DB_REGS; i++)
 			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
-		vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
 	}
 }
 
@@ -9604,7 +9603,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		set_debugreg(vcpu->arch.eff_db[2], 2);
 		set_debugreg(vcpu->arch.eff_db[3], 3);
 		set_debugreg(vcpu->arch.dr6, 6);
-		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	} else if (unlikely(hw_breakpoint_active())) {
 		set_debugreg(0, 7);
 	}
@@ -9634,7 +9632,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		static_call(kvm_x86_sync_dirty_debug_regs)(vcpu);
 		kvm_update_dr0123(vcpu);
 		kvm_update_dr7(vcpu);
-		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	}
 
 	/*
-- 
cgit 


From 375e28ffc0cf4fc48862c03994ec4a93254cf1c6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 10 Aug 2021 06:07:06 -0400
Subject: KVM: X86: Set host DR6 only on VMX and for KVM_DEBUGREG_WONT_EXIT

Commit c77fb5fe6f03 ("KVM: x86: Allow the guest to run with dirty debug
registers") allows the guest accessing to DRs without exiting when
KVM_DEBUGREG_WONT_EXIT and we need to ensure that they are synchronized
on entry to the guest---including DR6 that was not synced before the commit.

But the commit sets the hardware DR6 not only when KVM_DEBUGREG_WONT_EXIT,
but also when KVM_DEBUGREG_BP_ENABLED.  The second case is unnecessary
and just leads to a more case which leaks stale DR6 to the host which has
to be resolved by unconditionally reseting DR6 in kvm_arch_vcpu_put().

Even if KVM_DEBUGREG_WONT_EXIT, however, setting the host DR6 only matters
on VMX because SVM always uses the DR6 value from the VMCB.  So move this
line to vmx.c and make it conditional on KVM_DEBUGREG_WONT_EXIT.

Reported-by: Lai Jiangshan <jiangshanlai@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 4 ++++
 arch/x86/kvm/x86.c     | 1 -
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ae8e62df16dd..7c1c8ef1a286 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6625,6 +6625,10 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		vmx->loaded_vmcs->host_state.cr4 = cr4;
 	}
 
+	/* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */
+	if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
+		set_debugreg(vcpu->arch.dr6, 6);
+
 	/* When single-stepping over STI and MOV SS, we must clear the
 	 * corresponding interruptibility bits in the guest state. Otherwise
 	 * vmentry fails as it then expects bit 14 (BS) in pending debug
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eca5f005a7fc..15ac8645e532 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9602,7 +9602,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		set_debugreg(vcpu->arch.eff_db[1], 1);
 		set_debugreg(vcpu->arch.eff_db[2], 2);
 		set_debugreg(vcpu->arch.eff_db[3], 3);
-		set_debugreg(vcpu->arch.dr6, 6);
 	} else if (unlikely(hw_breakpoint_active())) {
 		set_debugreg(0, 7);
 	}
-- 
cgit 


From 1ccb6f983a063e794daeb03f90b3517f87dfae8f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 10 Aug 2021 06:11:35 -0400
Subject: KVM: VMX: Reset DR6 only when KVM_DEBUGREG_WONT_EXIT

The commit efdab992813fb ("KVM: x86: fix escape of guest dr6 to the host")
fixed a bug by resetting DR6 unconditionally when the vcpu being scheduled out.

But writing to debug registers is slow, and it can be visible in perf results
sometimes, even if neither the host nor the guest activate breakpoints.

Since KVM_DEBUGREG_WONT_EXIT on Intel processors is the only case
where DR6 gets the guest value, and it never happens at all on SVM,
the register can be cleared in vmx.c right after reading it.

Reported-by: Lai Jiangshan <laijs@linux.alibaba.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 6 ++++++
 arch/x86/kvm/x86.c     | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7c1c8ef1a286..123ecca91c27 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5110,6 +5110,12 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
 	exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
+
+	/*
+	 * exc_debug expects dr6 to be cleared after it runs, avoid that it sees
+	 * a stale dr6 from the guest.
+	 */
+	set_debugreg(DR6_RESERVED, 6);
 }
 
 static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 15ac8645e532..70d42b50199a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4310,12 +4310,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
 	static_call(kvm_x86_vcpu_put)(vcpu);
 	vcpu->arch.last_host_tsc = rdtsc();
-	/*
-	 * If userspace has set any breakpoints or watchpoints, dr6 is restored
-	 * on every vmexit, but if not, we might have a stale dr6 from the
-	 * guest. do_debug expects dr6 to be cleared after it runs, do the same.
-	 */
-	set_debugreg(0, 6);
 }
 
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
-- 
cgit 


From 389ab25216c9d09e0d335e764eeeb84c2089614f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 10 Aug 2021 10:19:50 -0700
Subject: KVM: nVMX: Pull KVM L0's desired controls directly from vmcs01

When preparing controls for vmcs02, grab KVM's desired controls from
vmcs01's shadow state instead of recalculating the controls from scratch,
or in the secondary execution controls, instead of using the dedicated
cache.  Calculating secondary exec controls is eye-poppingly expensive
due to the guest CPUID checks, hence the dedicated cache, but the other
calculations aren't exactly free either.

Explicitly clear several bits (x2APIC, DESC exiting, and load EFER on
exit) as appropriate as they may be set in vmcs01, whereas the previous
implementation relied on dynamic bits being cleared in the calculator.

Intentionally propagate VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL from
vmcs01 to vmcs02.  Whether or not PERF_GLOBAL_CTRL is loaded depends on
whether or not perf itself is active, so unless perf stops between the
exit from L1 and entry to L2, vmcs01 will hold the desired value.  This
is purely an optimization as atomic_switch_perf_msrs() will set/clear
the control as needed at VM-Enter, i.e. it avoids two extra VMWRITEs in
the case where perf is active (versus starting with the bits clear in
vmcs02, which was the previous behavior).

Cc: Zeng Guang <guang.zeng@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210810171952.2758100-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 25 ++++++++++++++++---------
 arch/x86/kvm/vmx/vmx.h    |  6 +++++-
 2 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0d0dd6580cfd..264a9f4c9179 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2170,7 +2170,8 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
 	}
 }
 
-static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
+				 struct vmcs12 *vmcs12)
 {
 	u32 exec_control;
 	u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
@@ -2181,7 +2182,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	/*
 	 * PIN CONTROLS
 	 */
-	exec_control = vmx_pin_based_exec_ctrl(vmx);
+	exec_control = __pin_controls_get(vmcs01);
 	exec_control |= (vmcs12->pin_based_vm_exec_control &
 			 ~PIN_BASED_VMX_PREEMPTION_TIMER);
 
@@ -2197,7 +2198,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	/*
 	 * EXEC CONTROLS
 	 */
-	exec_control = vmx_exec_control(vmx); /* L0's desires */
+	exec_control = __exec_controls_get(vmcs01); /* L0's desires */
 	exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
 	exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
 	exec_control &= ~CPU_BASED_TPR_SHADOW;
@@ -2234,10 +2235,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	 * SECONDARY EXEC CONTROLS
 	 */
 	if (cpu_has_secondary_exec_ctrls()) {
-		exec_control = vmx->secondary_exec_control;
+		exec_control = __secondary_exec_controls_get(vmcs01);
 
 		/* Take the following fields only from vmcs12 */
 		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+				  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
 				  SECONDARY_EXEC_ENABLE_INVPCID |
 				  SECONDARY_EXEC_ENABLE_RDTSCP |
 				  SECONDARY_EXEC_XSAVES |
@@ -2245,7 +2247,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
 				  SECONDARY_EXEC_ENABLE_VMFUNC |
-				  SECONDARY_EXEC_TSC_SCALING);
+				  SECONDARY_EXEC_TSC_SCALING |
+				  SECONDARY_EXEC_DESC);
+
 		if (nested_cpu_has(vmcs12,
 				   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
 			exec_control |= vmcs12->secondary_vm_exec_control;
@@ -2285,8 +2289,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	 * on the related bits (if supported by the CPU) in the hope that
 	 * we can avoid VMWrites during vmx_set_efer().
 	 */
-	exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) &
-			~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
+	exec_control = __vm_entry_controls_get(vmcs01);
+	exec_control |= vmcs12->vm_entry_controls;
+	exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
 	if (cpu_has_load_ia32_efer()) {
 		if (guest_efer & EFER_LMA)
 			exec_control |= VM_ENTRY_IA32E_MODE;
@@ -2302,9 +2307,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
 	 * bits may be modified by vmx_set_efer() in prepare_vmcs02().
 	 */
-	exec_control = vmx_vmexit_ctrl();
+	exec_control = __vm_exit_controls_get(vmcs01);
 	if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
 		exec_control |= VM_EXIT_LOAD_IA32_EFER;
+	else
+		exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
 	vm_exit_controls_set(vmx, exec_control);
 
 	/*
@@ -3347,7 +3354,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 
 	vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
 
-	prepare_vmcs02_early(vmx, vmcs12);
+	prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
 
 	if (from_vmentry) {
 		if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 4f151175d45a..414b440de9ac 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -418,9 +418,13 @@ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val)	    \
 		vmx->loaded_vmcs->controls_shadow.lname = val;		    \
 	}								    \
 }									    \
+static inline u32 __##lname##_controls_get(struct loaded_vmcs *vmcs)	    \
+{									    \
+	return vmcs->controls_shadow.lname;				    \
+}									    \
 static inline u32 lname##_controls_get(struct vcpu_vmx *vmx)		    \
 {									    \
-	return vmx->loaded_vmcs->controls_shadow.lname;			    \
+	return __##lname##_controls_get(vmx->loaded_vmcs);		    \
 }									    \
 static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val)   \
 {									    \
-- 
cgit 


From b6247686b7571003eca2305b2096f59e1e1ce976 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 10 Aug 2021 10:19:51 -0700
Subject: KVM: VMX: Drop caching of KVM's desired sec exec controls for vmcs01

Remove the secondary execution controls cache now that it's effectively
dead code; it is only read immediately after it is written.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210810171952.2758100-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 20 ++++++++------------
 arch/x86/kvm/vmx/vmx.h |  3 +--
 2 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 123ecca91c27..f2622dd335ce 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4212,7 +4212,7 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
 #define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \
 	vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true)
 
-static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
+u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 {
 	struct kvm_vcpu *vcpu = &vmx->vcpu;
 
@@ -4298,7 +4298,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 	if (!vcpu->kvm->arch.bus_lock_detection_enabled)
 		exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION;
 
-	vmx->secondary_exec_control = exec_control;
+	return exec_control;
 }
 
 #define VMX_XSS_EXIT_BITMAP 0
@@ -4322,10 +4322,8 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 
 	exec_controls_set(vmx, vmx_exec_control(vmx));
 
-	if (cpu_has_secondary_exec_ctrls()) {
-		vmx_compute_secondary_exec_control(vmx);
-		secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
-	}
+	if (cpu_has_secondary_exec_ctrls())
+		secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx));
 
 	if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
@@ -6992,7 +6990,7 @@ exit:
 	return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
 }
 
-static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
+static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl)
 {
 	/*
 	 * These bits in the secondary execution controls field
@@ -7006,7 +7004,6 @@ static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 		SECONDARY_EXEC_DESC;
 
-	u32 new_ctl = vmx->secondary_exec_control;
 	u32 cur_ctl = secondary_exec_controls_get(vmx);
 
 	secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
@@ -7151,10 +7148,9 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 
 	vmx_setup_uret_msrs(vmx);
 
-	if (cpu_has_secondary_exec_ctrls()) {
-		vmx_compute_secondary_exec_control(vmx);
-		vmcs_set_secondary_exec_control(vmx);
-	}
+	if (cpu_has_secondary_exec_ctrls())
+		vmcs_set_secondary_exec_control(vmx,
+						vmx_secondary_exec_control(vmx));
 
 	if (nested_vmx_allowed(vcpu))
 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 414b440de9ac..2bd07867e9da 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -263,8 +263,6 @@ struct vcpu_vmx {
 	u64		      spec_ctrl;
 	u32		      msr_ia32_umwait_control;
 
-	u32 secondary_exec_control;
-
 	/*
 	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
 	 * non-nested (L1) guest, it always points to vmcs01. For a nested
@@ -477,6 +475,7 @@ static inline u32 vmx_vmexit_ctrl(void)
 }
 
 u32 vmx_exec_control(struct vcpu_vmx *vmx);
+u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx);
 u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx);
 
 static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
-- 
cgit 


From 2fba4fc155280727b4997c6ee86f24c260dd9155 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 10 Aug 2021 10:19:52 -0700
Subject: KVM: VMX: Hide VMCS control calculators in vmx.c

Now that nested VMX pulls KVM's desired VMCS controls from vmcs01 instead
of re-calculating on the fly, bury the helpers that do the calcluations
in vmx.c.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210810171952.2758100-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 30 +++++++++++++++++++++++++++---
 arch/x86/kvm/vmx/vmx.h | 26 --------------------------
 2 files changed, 27 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f2622dd335ce..cd913100b300 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4095,7 +4095,7 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
 	vmcs_writel(CR4_GUEST_HOST_MASK, ~vcpu->arch.cr4_guest_owned_bits);
 }
 
-u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
+static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 {
 	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
 
@@ -4111,6 +4111,30 @@ u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 	return pin_based_exec_ctrl;
 }
 
+static u32 vmx_vmentry_ctrl(void)
+{
+	u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
+
+	if (vmx_pt_mode_is_system())
+		vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
+				  VM_ENTRY_LOAD_IA32_RTIT_CTL);
+	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
+	return vmentry_ctrl &
+		~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
+}
+
+static u32 vmx_vmexit_ctrl(void)
+{
+	u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
+
+	if (vmx_pt_mode_is_system())
+		vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
+				 VM_EXIT_CLEAR_IA32_RTIT_CTL);
+	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
+	return vmexit_ctrl &
+		~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
+}
+
 static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4130,7 +4154,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 	vmx_update_msr_bitmap_x2apic(vcpu);
 }
 
-u32 vmx_exec_control(struct vcpu_vmx *vmx)
+static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 {
 	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
 
@@ -4212,7 +4236,7 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
 #define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \
 	vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true)
 
-u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
+static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 {
 	struct kvm_vcpu *vcpu = &vmx->vcpu;
 
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 2bd07867e9da..4858c5fd95f2 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -452,32 +452,6 @@ static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.regs_dirty = 0;
 }
 
-static inline u32 vmx_vmentry_ctrl(void)
-{
-	u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
-	if (vmx_pt_mode_is_system())
-		vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
-				  VM_ENTRY_LOAD_IA32_RTIT_CTL);
-	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
-	return vmentry_ctrl &
-		~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
-}
-
-static inline u32 vmx_vmexit_ctrl(void)
-{
-	u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
-	if (vmx_pt_mode_is_system())
-		vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
-				 VM_EXIT_CLEAR_IA32_RTIT_CTL);
-	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
-	return vmexit_ctrl &
-		~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
-}
-
-u32 vmx_exec_control(struct vcpu_vmx *vmx);
-u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx);
-u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx);
-
 static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
 {
 	return container_of(kvm, struct kvm_vmx, kvm);
-- 
cgit 


From ad0577c375299a2cc426913c141086c0e9033c78 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 9 Aug 2021 10:39:54 -0700
Subject: KVM: x86: Kill off __ex() and __kvm_handle_fault_on_reboot()

Remove the __kvm_handle_fault_on_reboot() and __ex() macros now that all
VMX and SVM instructions use asm goto to handle the fault (or in the
case of VMREAD, completely custom logic).  Drop kvm_spurious_fault()'s
asmlinkage annotation as __kvm_handle_fault_on_reboot() was the only
flow that invoked it from assembly code.

Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Like Xu <like.xu.linux@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210809173955.1710866-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 25 +------------------------
 arch/x86/kvm/svm/sev.c          |  2 --
 arch/x86/kvm/svm/svm.c          |  2 --
 arch/x86/kvm/vmx/vmx_ops.h      |  2 --
 arch/x86/kvm/x86.c              |  9 ++++++++-
 5 files changed, 9 insertions(+), 31 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5e35334980a6..8c8aa8ab5f0b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1811,30 +1811,7 @@ enum {
 #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
 #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
 
-asmlinkage void kvm_spurious_fault(void);
-
-/*
- * Hardware virtualization extension instructions may fault if a
- * reboot turns off virtualization while processes are running.
- * Usually after catching the fault we just panic; during reboot
- * instead the instruction is ignored.
- */
-#define __kvm_handle_fault_on_reboot(insn)				\
-	"666: \n\t"							\
-	insn "\n\t"							\
-	"jmp	668f \n\t"						\
-	"667: \n\t"							\
-	"1: \n\t"							\
-	".pushsection .discard.instr_begin \n\t"			\
-	".long 1b - . \n\t"						\
-	".popsection \n\t"						\
-	"call	kvm_spurious_fault \n\t"				\
-	"1: \n\t"							\
-	".pushsection .discard.instr_end \n\t"				\
-	".long 1b - . \n\t"						\
-	".popsection \n\t"						\
-	"668: \n\t"							\
-	_ASM_EXTABLE(666b, 667b)
+void kvm_spurious_fault(void);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 81601c31f549..75e0b21ad07c 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -28,8 +28,6 @@
 #include "cpuid.h"
 #include "trace.h"
 
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
-
 #ifndef CONFIG_KVM_AMD_SEV
 /*
  * When this config is not defined, SEV feature is not supported and APIs in
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9d72b1df426e..2b6632d4c76f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -46,8 +46,6 @@
 #include "kvm_onhyperv.h"
 #include "svm_onhyperv.h"
 
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
-
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 164b64f65a8f..c0d74b994b56 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -10,8 +10,6 @@
 #include "evmcs.h"
 #include "vmcs.h"
 
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
-
 asmlinkage void vmread_error(unsigned long field, bool fault);
 __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
 							 bool fault);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 70d42b50199a..58a72c7d3330 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -486,7 +486,14 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
-asmlinkage __visible noinstr void kvm_spurious_fault(void)
+/*
+ * Handle a fault on a hardware virtualization (VMX or SVM) instruction.
+ *
+ * Hardware virtualization extension instructions may fault if a reboot turns
+ * off virtualization while processes are running.  Usually after catching the
+ * fault we just panic; during reboot instead the instruction is ignored.
+ */
+noinstr void kvm_spurious_fault(void)
 {
 	/* Fault while not rebooting.  We want the trace. */
 	BUG_ON(!kvm_rebooting);
-- 
cgit 


From 65297341d8e15b04cc9e206597a3d7c407c346f6 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 9 Aug 2021 10:39:55 -0700
Subject: KVM: x86: Move declaration of kvm_spurious_fault() to x86.h

Move the declaration of kvm_spurious_fault() to KVM's "private" x86.h,
it should never be called by anything other than low level KVM code.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
[sean: rebased to a series without __ex()/__kvm_handle_fault_on_reboot()]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210809173955.1710866-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 --
 arch/x86/kvm/svm/svm_ops.h      | 2 +-
 arch/x86/kvm/vmx/vmx_ops.h      | 2 +-
 arch/x86/kvm/x86.h              | 2 ++
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8c8aa8ab5f0b..a612d213be5c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1811,8 +1811,6 @@ enum {
 #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
 #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
 
-void kvm_spurious_fault(void);
-
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 8170f2a5a16f..22e2b019de37 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -4,7 +4,7 @@
 
 #include <linux/compiler_types.h>
 
-#include <asm/kvm_host.h>
+#include "x86.h"
 
 #define svm_asm(insn, clobber...)				\
 do {								\
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index c0d74b994b56..9e9ef47e988c 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -4,11 +4,11 @@
 
 #include <linux/nospec.h>
 
-#include <asm/kvm_host.h>
 #include <asm/vmx.h>
 
 #include "evmcs.h"
 #include "vmcs.h"
+#include "x86.h"
 
 asmlinkage void vmread_error(unsigned long field, bool fault);
 __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 44ae10312740..7d66d63dc55a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,6 +8,8 @@
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
 
+void kvm_spurious_fault(void);
+
 static __always_inline void kvm_guest_enter_irqoff(void)
 {
 	/*
-- 
cgit 


From c1a527a1de46ad6f0f9d5907b29fc98e50267f8e Mon Sep 17 00:00:00 2001
From: Like Xu <likexu@tencent.com>
Date: Mon, 9 Aug 2021 17:34:08 +0800
Subject: KVM: x86: Clean up redundant ROL16(val, n) macro definition

The ROL16(val, n) macro is repeatedly defined in several vmcs-related
files, and it has never been used outside the KVM context.

Let's move it to vmcs.h without any intended functional changes.

Signed-off-by: Like Xu <likexu@tencent.com>
Message-Id: <20210809093410.59304-4-likexu@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/evmcs.c  | 1 -
 arch/x86/kvm/vmx/evmcs.h  | 4 ----
 arch/x86/kvm/vmx/vmcs.h   | 2 ++
 arch/x86/kvm/vmx/vmcs12.c | 1 -
 arch/x86/kvm/vmx/vmcs12.h | 4 ----
 5 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 896b2a50b4aa..0dab1b7b529f 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -14,7 +14,6 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs);
 
 #if IS_ENABLED(CONFIG_HYPERV)
 
-#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
 #define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
 #define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
 		{EVMCS1_OFFSET(name), clean_field}
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 2ec9b46f0d0c..152ab0aa82cf 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -73,8 +73,6 @@ struct evmcs_field {
 extern const struct evmcs_field vmcs_field_to_evmcs_1[];
 extern const unsigned int nr_evmcs_1_fields;
 
-#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
-
 static __always_inline int get_evmcs_offset(unsigned long field,
 					    u16 *clean_field)
 {
@@ -95,8 +93,6 @@ static __always_inline int get_evmcs_offset(unsigned long field,
 	return evmcs_field->offset;
 }
 
-#undef ROL16
-
 static inline void evmcs_write64(unsigned long field, u64 value)
 {
 	u16 clean_field;
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 4b9957e2bf5b..6e5de2e2b0da 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -11,6 +11,8 @@
 
 #include "capabilities.h"
 
+#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
+
 struct vmcs_hdr {
 	u32 revision_id:31;
 	u32 shadow_vmcs:1;
diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
index d9f5d7c56ae3..cab6ba7a5005 100644
--- a/arch/x86/kvm/vmx/vmcs12.c
+++ b/arch/x86/kvm/vmx/vmcs12.c
@@ -2,7 +2,6 @@
 
 #include "vmcs12.h"
 
-#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
 #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
 #define FIELD(number, name)	[ROL16(number, 6)] = VMCS12_OFFSET(name)
 #define FIELD64(number, name)						\
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 5e0e1b39f495..2a45f026ee11 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -364,8 +364,6 @@ static inline void vmx_check_vmcs12_offsets(void)
 extern const unsigned short vmcs_field_to_offset_table[];
 extern const unsigned int nr_vmcs12_fields;
 
-#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
-
 static inline short vmcs_field_to_offset(unsigned long field)
 {
 	unsigned short offset;
@@ -385,8 +383,6 @@ static inline short vmcs_field_to_offset(unsigned long field)
 	return offset;
 }
 
-#undef ROL16
-
 static inline u64 vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field,
 				  u16 offset)
 {
-- 
cgit 


From f7782bb8d818d8f47c26b22079db10599922787a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 10 Aug 2021 07:45:26 -0700
Subject: KVM: nVMX: Unconditionally clear nested.pi_pending on nested VM-Enter

Clear nested.pi_pending on nested VM-Enter even if L2 will run without
posted interrupts enabled.  If nested.pi_pending is left set from a
previous L2, vmx_complete_nested_posted_interrupt() will pick up the
stale flag and exit to userspace with an "internal emulation error" due
the new L2 not having a valid nested.pi_desc.

Arguably, vmx_complete_nested_posted_interrupt() should first check for
posted interrupts being enabled, but it's also completely reasonable that
KVM wouldn't screw up a fundamental flag.  Not to mention that the mere
existence of nested.pi_pending is a long-standing bug as KVM shouldn't
move the posted interrupt out of the IRR until it's actually processed,
e.g. KVM effectively drops an interrupt when it performs a nested VM-Exit
with a "pending" posted interrupt.  Fixing the mess is a future problem.

Prior to vmx_complete_nested_posted_interrupt() interpreting a null PI
descriptor as an error, this was a benign bug as the null PI descriptor
effectively served as a check on PI not being enabled.  Even then, the
new flow did not become problematic until KVM started checking the result
of kvm_check_nested_events().

Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing")
Fixes: 966eefb89657 ("KVM: nVMX: Disable vmcs02 posted interrupts if vmcs12 PID isn't mappable")
Fixes: 47d3530f86c0 ("KVM: x86: Exit to userspace when kvm_check_nested_events fails")
Cc: stable@vger.kernel.org
Cc: Jim Mattson <jmattson@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210810144526.2662272-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 264a9f4c9179..bc6327950657 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2187,12 +2187,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
 			 ~PIN_BASED_VMX_PREEMPTION_TIMER);
 
 	/* Posted interrupts setting is only taken from vmcs12.  */
-	if (nested_cpu_has_posted_intr(vmcs12)) {
+	vmx->nested.pi_pending = false;
+	if (nested_cpu_has_posted_intr(vmcs12))
 		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
-		vmx->nested.pi_pending = false;
-	} else {
+	else
 		exec_control &= ~PIN_BASED_POSTED_INTR;
-	}
 	pin_controls_set(vmx, exec_control);
 
 	/*
-- 
cgit 


From b11748e693166679acc13c8a4328a71efe1d4a89 Mon Sep 17 00:00:00 2001
From: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
Date: Sun, 1 Aug 2021 09:38:20 +0200
Subject: powerpc: wii.dts: Reduce the size of the control area

This is wrong, but needed in order to avoid overlapping ranges with the
OTP area added in the next commit.  A refactor of this part of the
device tree is needed: according to Wiibrew[1], this area starts at
0x0d800000 and spans 0x400 bytes (that is, 0x100 32-bit registers),
encompassing PIC and GPIO registers, amongst the ones already exposed in
this device tree, which should become children of the control@d800000
node.

[1] https://wiibrew.org/wiki/Hardware/Hollywood_Registers

Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210801073822.12452-4-linkmauve@linkmauve.fr
---
 arch/powerpc/boot/dts/wii.dts | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index aaa381da1906..c5fb54f8cc02 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -216,7 +216,13 @@
 
 		control@d800100 {
 			compatible = "nintendo,hollywood-control";
-			reg = <0x0d800100 0x300>;
+			/*
+			 * Both the address and length are wrong, according to
+			 * Wiibrew this should be <0x0d800000 0x400>, but it
+			 * requires refactoring the PIC1 and GPIO nodes before
+			 * changing that.
+			 */
+			reg = <0x0d800100 0xa0>;
 		};
 
 		disk@d806000 {
-- 
cgit 


From 562a610b4c5119034aed300f6ae212ec7a20c4b4 Mon Sep 17 00:00:00 2001
From: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
Date: Sun, 1 Aug 2021 09:38:21 +0200
Subject: powerpc: wii.dts: Expose the OTP on this platform

This can be used by the newly-added nintendo-otp nvmem module.

Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210801073822.12452-5-linkmauve@linkmauve.fr
---
 arch/powerpc/boot/dts/wii.dts | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index c5fb54f8cc02..e9c945b123c6 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -219,12 +219,17 @@
 			/*
 			 * Both the address and length are wrong, according to
 			 * Wiibrew this should be <0x0d800000 0x400>, but it
-			 * requires refactoring the PIC1 and GPIO nodes before
-			 * changing that.
+			 * requires refactoring the PIC1, GPIO and OTP nodes
+			 * before changing that.
 			 */
 			reg = <0x0d800100 0xa0>;
 		};
 
+		otp@d8001ec {
+			compatible = "nintendo,hollywood-otp";
+			reg = <0x0d8001ec 0x8>;
+		};
+
 		disk@d806000 {
 			compatible = "nintendo,hollywood-di";
 			reg = <0x0d806000 0x40>;
-- 
cgit 


From 140a89b7bfe65e9649c4a3678f74c32556834ec1 Mon Sep 17 00:00:00 2001
From: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
Date: Sun, 1 Aug 2021 09:38:22 +0200
Subject: powerpc: wii_defconfig: Enable OTP by default

This selects the nintendo-otp module when building for this platform.

Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210801073822.12452-6-linkmauve@linkmauve.fr
---
 arch/powerpc/configs/wii_defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 379c171f3ddd..a0c45bf2bfb1 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -99,6 +99,7 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_PANIC=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=y
+CONFIG_NVMEM_NINTENDO_OTP=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_FUSE_FS=m
-- 
cgit 


From 3e188b1ae8807f26cc5a530a9d55f3f643fe050a Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Thu, 12 Aug 2021 18:58:30 +0530
Subject: powerpc/book3s64/radix: make tlb_single_page_flush_ceiling a debugfs
 entry

Similar to x86/s390 add a debugfs file to tune tlb_single_page_flush_ceiling.
Also add a debugfs entry for tlb_local_single_page_flush_ceiling.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210812132831.233794-1-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/mm/book3s64/radix_tlb.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index aefc100d79a7..1fa2bc6a969e 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -17,6 +17,7 @@
 #include <asm/trace.h>
 #include <asm/cputhreads.h>
 #include <asm/plpar_wrappers.h>
+#include <asm/debugfs.h>
 
 #include "internal.h"
 
@@ -1106,8 +1107,8 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
  * invalidating a full PID, so it has a far lower threshold to change from
  * individual page flushes to full-pid flushes.
  */
-static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
-static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
+static u32 tlb_single_page_flush_ceiling __read_mostly = 33;
+static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
 
 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
 					    unsigned long start, unsigned long end)
@@ -1524,3 +1525,14 @@ void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
 
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+static int __init create_tlb_single_page_flush_ceiling(void)
+{
+	debugfs_create_u32("tlb_single_page_flush_ceiling", 0600,
+			   powerpc_debugfs_root, &tlb_single_page_flush_ceiling);
+	debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600,
+			   powerpc_debugfs_root, &tlb_local_single_page_flush_ceiling);
+	return 0;
+}
+late_initcall(create_tlb_single_page_flush_ceiling);
+
-- 
cgit 


From dbf77fed8b302e87561c7c2fc06050c88f4d3120 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Thu, 12 Aug 2021 18:58:31 +0530
Subject: powerpc: rename powerpc_debugfs_root to arch_debugfs_dir

No functional change in this patch. arch_debugfs_dir is the generic kernel
name declared in linux/debugfs.h for arch-specific debugfs directory.
Architectures like x86/s390 already use the name. Rename powerpc
specific powerpc_debugfs_root to arch_debugfs_dir.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210812132831.233794-2-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/include/asm/debugfs.h          | 13 -------------
 arch/powerpc/kernel/Makefile                |  3 ++-
 arch/powerpc/kernel/dawr.c                  |  3 +--
 arch/powerpc/kernel/eeh.c                   | 16 ++++++++--------
 arch/powerpc/kernel/eeh_cache.c             |  4 ++--
 arch/powerpc/kernel/fadump.c                |  4 ++--
 arch/powerpc/kernel/hw_breakpoint.c         |  1 -
 arch/powerpc/kernel/kdebugfs.c              | 14 ++++++++++++++
 arch/powerpc/kernel/security.c              | 16 ++++++++--------
 arch/powerpc/kernel/setup-common.c          | 13 -------------
 arch/powerpc/kernel/setup_64.c              |  1 -
 arch/powerpc/kernel/traps.c                 |  4 ++--
 arch/powerpc/kvm/book3s_xics.c              |  6 +++---
 arch/powerpc/kvm/book3s_xive.c              |  3 +--
 arch/powerpc/kvm/book3s_xive_native.c       |  3 +--
 arch/powerpc/mm/book3s64/hash_utils.c       |  4 ++--
 arch/powerpc/mm/book3s64/pgtable.c          |  4 ++--
 arch/powerpc/mm/book3s64/radix_tlb.c        |  6 +++---
 arch/powerpc/mm/ptdump/bats.c               |  4 ++--
 arch/powerpc/mm/ptdump/segment_regs.c       |  4 ++--
 arch/powerpc/platforms/cell/axon_msi.c      |  4 ++--
 arch/powerpc/platforms/powernv/memtrace.c   |  3 +--
 arch/powerpc/platforms/powernv/opal-imc.c   |  4 ++--
 arch/powerpc/platforms/powernv/opal-lpc.c   |  4 ++--
 arch/powerpc/platforms/powernv/opal-xscom.c |  4 ++--
 arch/powerpc/platforms/powernv/pci-ioda.c   |  4 ++--
 arch/powerpc/platforms/pseries/dtl.c        |  4 ++--
 arch/powerpc/platforms/pseries/lpar.c       |  5 +++--
 arch/powerpc/sysdev/xive/common.c           |  3 +--
 arch/powerpc/xmon/xmon.c                    |  6 +++---
 30 files changed, 75 insertions(+), 92 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/debugfs.h
 create mode 100644 arch/powerpc/kernel/kdebugfs.c

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/debugfs.h b/arch/powerpc/include/asm/debugfs.h
deleted file mode 100644
index 2c5c48571d75..000000000000
--- a/arch/powerpc/include/asm/debugfs.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ASM_POWERPC_DEBUGFS_H
-#define _ASM_POWERPC_DEBUGFS_H
-
-/*
- * Copyright 2017, Michael Ellerman, IBM Corporation.
- */
-
-#include <linux/debugfs.h>
-
-extern struct dentry *powerpc_debugfs_root;
-
-#endif /* _ASM_POWERPC_DEBUGFS_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index f66b63e81c3b..7be36c1e1db6 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -46,7 +46,8 @@ obj-y				:= cputable.o syscalls.o \
 				   prom.o traps.o setup-common.o \
 				   udbg.o misc.o io.o misc_$(BITS).o \
 				   of_platform.o prom_parse.o firmware.o \
-				   hw_breakpoint_constraints.o interrupt.o
+				   hw_breakpoint_constraints.o interrupt.o \
+				   kdebugfs.o
 obj-y				+= ptrace/
 obj-$(CONFIG_PPC64)		+= setup_64.o \
 				   paca.o nvram_64.o note.o
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index cdc2dccb987d..64e423d2fe0f 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -9,7 +9,6 @@
 #include <linux/export.h>
 #include <linux/fs.h>
 #include <linux/debugfs.h>
-#include <asm/debugfs.h>
 #include <asm/machdep.h>
 #include <asm/hvcall.h>
 
@@ -101,7 +100,7 @@ static int __init dawr_force_setup(void)
 	if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
 		/* Turn DAWR off by default, but allow admin to turn it on */
 		debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
-					   powerpc_debugfs_root,
+					   arch_debugfs_dir,
 					   &dawr_force_enable,
 					   &dawr_enable_fops);
 	}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 3bbdcc86d01b..e9b597ed423c 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -21,9 +21,9 @@
 #include <linux/spinlock.h>
 #include <linux/export.h>
 #include <linux/of.h>
+#include <linux/debugfs.h>
 
 #include <linux/atomic.h>
-#include <asm/debugfs.h>
 #include <asm/eeh.h>
 #include <asm/eeh_event.h>
 #include <asm/io.h>
@@ -1901,24 +1901,24 @@ static int __init eeh_init_proc(void)
 		proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show);
 #ifdef CONFIG_DEBUG_FS
 		debugfs_create_file_unsafe("eeh_enable", 0600,
-					   powerpc_debugfs_root, NULL,
+					   arch_debugfs_dir, NULL,
 					   &eeh_enable_dbgfs_ops);
 		debugfs_create_u32("eeh_max_freezes", 0600,
-				powerpc_debugfs_root, &eeh_max_freezes);
+				arch_debugfs_dir, &eeh_max_freezes);
 		debugfs_create_bool("eeh_disable_recovery", 0600,
-				powerpc_debugfs_root,
+				arch_debugfs_dir,
 				&eeh_debugfs_no_recover);
 		debugfs_create_file_unsafe("eeh_dev_check", 0600,
-				powerpc_debugfs_root, NULL,
+				arch_debugfs_dir, NULL,
 				&eeh_dev_check_fops);
 		debugfs_create_file_unsafe("eeh_dev_break", 0600,
-				powerpc_debugfs_root, NULL,
+				arch_debugfs_dir, NULL,
 				&eeh_dev_break_fops);
 		debugfs_create_file_unsafe("eeh_force_recover", 0600,
-				powerpc_debugfs_root, NULL,
+				arch_debugfs_dir, NULL,
 				&eeh_force_recover_fops);
 		debugfs_create_file_unsafe("eeh_dev_can_recover", 0600,
-				powerpc_debugfs_root, NULL,
+				arch_debugfs_dir, NULL,
 				&eeh_dev_can_recover_fops);
 		eeh_cache_debugfs_init();
 #endif
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index bf3270426d82..9bdaaf7fddc9 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -12,8 +12,8 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
+#include <linux/debugfs.h>
 #include <asm/pci-bridge.h>
-#include <asm/debugfs.h>
 #include <asm/ppc-pci.h>
 
 
@@ -283,6 +283,6 @@ DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
 void eeh_cache_debugfs_init(void)
 {
 	debugfs_create_file_unsafe("eeh_address_cache", 0400,
-			powerpc_debugfs_root, NULL,
+			arch_debugfs_dir, NULL,
 			&eeh_addr_cache_fops);
 }
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index b990075285f5..b7ceb041743c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -24,8 +24,8 @@
 #include <linux/slab.h>
 #include <linux/cma.h>
 #include <linux/hugetlb.h>
+#include <linux/debugfs.h>
 
-#include <asm/debugfs.h>
 #include <asm/page.h>
 #include <asm/prom.h>
 #include <asm/fadump.h>
@@ -1557,7 +1557,7 @@ static void fadump_init_files(void)
 		return;
 	}
 
-	debugfs_create_file("fadump_region", 0444, powerpc_debugfs_root, NULL,
+	debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
 			    &fadump_region_fops);
 
 	if (fw_dump.dump_active) {
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 21a638aff72f..91a3be14808b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -22,7 +22,6 @@
 #include <asm/processor.h>
 #include <asm/sstep.h>
 #include <asm/debug.h>
-#include <asm/debugfs.h>
 #include <asm/hvcall.h>
 #include <asm/inst.h>
 #include <linux/uaccess.h>
diff --git a/arch/powerpc/kernel/kdebugfs.c b/arch/powerpc/kernel/kdebugfs.c
new file mode 100644
index 000000000000..36d3124d5a8b
--- /dev/null
+++ b/arch/powerpc/kernel/kdebugfs.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
+static int __init arch_kdebugfs_init(void)
+{
+	arch_debugfs_dir = debugfs_create_dir("powerpc", NULL);
+	return 0;
+}
+arch_initcall(arch_kdebugfs_init);
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index cc51fa52e783..1a998490fe60 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -11,10 +11,10 @@
 #include <linux/nospec.h>
 #include <linux/prctl.h>
 #include <linux/seq_buf.h>
+#include <linux/debugfs.h>
 
 #include <asm/asm-prototypes.h>
 #include <asm/code-patching.h>
-#include <asm/debugfs.h>
 #include <asm/security_features.h>
 #include <asm/setup.h>
 #include <asm/inst.h>
@@ -106,7 +106,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get,
 static __init int barrier_nospec_debugfs_init(void)
 {
 	debugfs_create_file_unsafe("barrier_nospec", 0600,
-				   powerpc_debugfs_root, NULL,
+				   arch_debugfs_dir, NULL,
 				   &fops_barrier_nospec);
 	return 0;
 }
@@ -114,7 +114,7 @@ device_initcall(barrier_nospec_debugfs_init);
 
 static __init int security_feature_debugfs_init(void)
 {
-	debugfs_create_x64("security_features", 0400, powerpc_debugfs_root,
+	debugfs_create_x64("security_features", 0400, arch_debugfs_dir,
 			   &powerpc_security_features);
 	return 0;
 }
@@ -420,7 +420,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set,
 
 static __init int stf_barrier_debugfs_init(void)
 {
-	debugfs_create_file_unsafe("stf_barrier", 0600, powerpc_debugfs_root,
+	debugfs_create_file_unsafe("stf_barrier", 0600, arch_debugfs_dir,
 				   NULL, &fops_stf_barrier);
 	return 0;
 }
@@ -748,7 +748,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
 static __init int count_cache_flush_debugfs_init(void)
 {
 	debugfs_create_file_unsafe("count_cache_flush", 0600,
-				   powerpc_debugfs_root, NULL,
+				   arch_debugfs_dir, NULL,
 				   &fops_count_cache_flush);
 	return 0;
 }
@@ -834,9 +834,9 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set
 
 static __init int rfi_flush_debugfs_init(void)
 {
-	debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
-	debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
-	debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
+	debugfs_create_file("rfi_flush", 0600, arch_debugfs_dir, NULL, &fops_rfi_flush);
+	debugfs_create_file("entry_flush", 0600, arch_debugfs_dir, NULL, &fops_entry_flush);
+	debugfs_create_file("uaccess_flush", 0600, arch_debugfs_dir, NULL, &fops_uaccess_flush);
 	return 0;
 }
 device_initcall(rfi_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index aa9c2d01424a..b1e43b69a559 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -33,7 +33,6 @@
 #include <linux/of_platform.h>
 #include <linux/hugetlb.h>
 #include <linux/pgtable.h>
-#include <asm/debugfs.h>
 #include <asm/io.h>
 #include <asm/paca.h>
 #include <asm/prom.h>
@@ -773,18 +772,6 @@ static int __init check_cache_coherency(void)
 late_initcall(check_cache_coherency);
 #endif /* CONFIG_CHECK_CACHE_COHERENCY */
 
-#ifdef CONFIG_DEBUG_FS
-struct dentry *powerpc_debugfs_root;
-EXPORT_SYMBOL(powerpc_debugfs_root);
-
-static int powerpc_debugfs_init(void)
-{
-	powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL);
-	return 0;
-}
-arch_initcall(powerpc_debugfs_init);
-#endif
-
 void ppc_printk_progress(char *s, unsigned short hex)
 {
 	pr_info("%s\n", s);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 1ff258f6c76c..eaa79a0996d1 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -32,7 +32,6 @@
 #include <linux/nmi.h>
 #include <linux/pgtable.h>
 
-#include <asm/debugfs.h>
 #include <asm/kvm_guest.h>
 #include <asm/io.h>
 #include <asm/kdump.h>
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index dfbce527c98e..c8f648727d36 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -37,10 +37,10 @@
 #include <linux/smp.h>
 #include <linux/console.h>
 #include <linux/kmsg_dump.h>
+#include <linux/debugfs.h>
 
 #include <asm/emulated_ops.h>
 #include <linux/uaccess.h>
-#include <asm/debugfs.h>
 #include <asm/interrupt.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
@@ -2267,7 +2267,7 @@ static int __init ppc_warn_emulated_init(void)
 	struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
 
 	dir = debugfs_create_dir("emulated_instructions",
-				 powerpc_debugfs_root);
+				 arch_debugfs_dir);
 
 	debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated);
 
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 303e3cb096db..ebd5d920de8c 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -10,13 +10,13 @@
 #include <linux/gfp.h>
 #include <linux/anon_inodes.h>
 #include <linux/spinlock.h>
-
+#include <linux/debugfs.h>
 #include <linux/uaccess.h>
+
 #include <asm/kvm_book3s.h>
 #include <asm/kvm_ppc.h>
 #include <asm/hvcall.h>
 #include <asm/xics.h>
-#include <asm/debugfs.h>
 #include <asm/time.h>
 
 #include <linux/seq_file.h>
@@ -1024,7 +1024,7 @@ static void xics_debugfs_init(struct kvmppc_xics *xics)
 		return;
 	}
 
-	xics->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
+	xics->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir,
 					   xics, &xics_debug_fops);
 
 	pr_debug("%s: created %s\n", __func__, name);
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 912c1e9eef6b..a18db9e16ea4 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -22,7 +22,6 @@
 #include <asm/xive.h>
 #include <asm/xive-regs.h>
 #include <asm/debug.h>
-#include <asm/debugfs.h>
 #include <asm/time.h>
 #include <asm/opal.h>
 
@@ -2360,7 +2359,7 @@ static void xive_debugfs_init(struct kvmppc_xive *xive)
 		return;
 	}
 
-	xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+	xive->dentry = debugfs_create_file(name, S_IRUGO, arch_debugfs_dir,
 					   xive, &xive_debug_fops);
 
 	pr_debug("%s: created %s\n", __func__, name);
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index af65ea21bde7..99db9ac49901 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -20,7 +20,6 @@
 #include <asm/xive.h>
 #include <asm/xive-regs.h>
 #include <asm/debug.h>
-#include <asm/debugfs.h>
 #include <asm/opal.h>
 
 #include <linux/debugfs.h>
@@ -1268,7 +1267,7 @@ static void xive_native_debugfs_init(struct kvmppc_xive *xive)
 		return;
 	}
 
-	xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
+	xive->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir,
 					   xive, &xive_native_debug_fops);
 
 	pr_debug("%s: created %s\n", __func__, name);
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index ac5720371c0d..c145776d3ae5 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -36,8 +36,8 @@
 #include <linux/hugetlb.h>
 #include <linux/cpu.h>
 #include <linux/pgtable.h>
+#include <linux/debugfs.h>
 
-#include <asm/debugfs.h>
 #include <asm/interrupt.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
@@ -2072,7 +2072,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n")
 
 static int __init hash64_debugfs(void)
 {
-	debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root, NULL,
+	debugfs_create_file("hpt_order", 0600, arch_debugfs_dir, NULL,
 			    &fops_hpt_order);
 	return 0;
 }
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 300099de553b..9e16c7b1a6c5 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -6,9 +6,9 @@
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/memblock.h>
+#include <linux/debugfs.h>
 #include <misc/cxl-base.h>
 
-#include <asm/debugfs.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/trace.h>
@@ -520,7 +520,7 @@ static int __init pgtable_debugfs_setup(void)
 	 * invalidated as expected.
 	 */
 	debugfs_create_bool("tlbie_enabled", 0600,
-			powerpc_debugfs_root,
+			arch_debugfs_dir,
 			&tlbie_enabled);
 
 	return 0;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 1fa2bc6a969e..7724af19ed7e 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -10,6 +10,7 @@
 #include <linux/memblock.h>
 #include <linux/mmu_context.h>
 #include <linux/sched/mm.h>
+#include <linux/debugfs.h>
 
 #include <asm/ppc-opcode.h>
 #include <asm/tlb.h>
@@ -17,7 +18,6 @@
 #include <asm/trace.h>
 #include <asm/cputhreads.h>
 #include <asm/plpar_wrappers.h>
-#include <asm/debugfs.h>
 
 #include "internal.h"
 
@@ -1529,9 +1529,9 @@ EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
 static int __init create_tlb_single_page_flush_ceiling(void)
 {
 	debugfs_create_u32("tlb_single_page_flush_ceiling", 0600,
-			   powerpc_debugfs_root, &tlb_single_page_flush_ceiling);
+			   arch_debugfs_dir, &tlb_single_page_flush_ceiling);
 	debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600,
-			   powerpc_debugfs_root, &tlb_local_single_page_flush_ceiling);
+			   arch_debugfs_dir, &tlb_local_single_page_flush_ceiling);
 	return 0;
 }
 late_initcall(create_tlb_single_page_flush_ceiling);
diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index c4c628b03cf8..8bf7383fb26c 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -7,7 +7,7 @@
  */
 
 #include <linux/pgtable.h>
-#include <asm/debugfs.h>
+#include <linux/debugfs.h>
 #include <asm/cpu_has_feature.h>
 
 #include "ptdump.h"
@@ -103,7 +103,7 @@ static const struct file_operations bats_fops = {
 static int __init bats_init(void)
 {
 	debugfs_create_file("block_address_translation", 0400,
-			    powerpc_debugfs_root, NULL, &bats_fops);
+			    arch_debugfs_dir, NULL, &bats_fops);
 	return 0;
 }
 device_initcall(bats_init);
diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c
index 565048a0c9be..9223dfb85c51 100644
--- a/arch/powerpc/mm/ptdump/segment_regs.c
+++ b/arch/powerpc/mm/ptdump/segment_regs.c
@@ -6,7 +6,7 @@
  * This dumps the content of Segment Registers
  */
 
-#include <asm/debugfs.h>
+#include <linux/debugfs.h>
 
 static void seg_show(struct seq_file *m, int i)
 {
@@ -55,7 +55,7 @@ static const struct file_operations sr_fops = {
 
 static int __init sr_init(void)
 {
-	debugfs_create_file("segment_registers", 0400, powerpc_debugfs_root,
+	debugfs_create_file("segment_registers", 0400, arch_debugfs_dir,
 			    NULL, &sr_fops);
 	return 0;
 }
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index ca2555b8a0c2..82335e364c44 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -12,8 +12,8 @@
 #include <linux/export.h>
 #include <linux/of_platform.h>
 #include <linux/slab.h>
+#include <linux/debugfs.h>
 
-#include <asm/debugfs.h>
 #include <asm/dcr.h>
 #include <asm/machdep.h>
 #include <asm/prom.h>
@@ -480,6 +480,6 @@ void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
 
 	snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
 
-	debugfs_create_file(name, 0600, powerpc_debugfs_root, msic, &fops_msic);
+	debugfs_create_file(name, 0600, arch_debugfs_dir, msic, &fops_msic);
 }
 #endif /* DEBUG */
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 537a4daed614..877720c64515 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -18,7 +18,6 @@
 #include <linux/memory_hotplug.h>
 #include <linux/numa.h>
 #include <asm/machdep.h>
-#include <asm/debugfs.h>
 #include <asm/cacheflush.h>
 
 /* This enables us to keep track of the memory removed from each node. */
@@ -330,7 +329,7 @@ DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
 static int memtrace_init(void)
 {
 	memtrace_debugfs_dir = debugfs_create_dir("memtrace",
-						  powerpc_debugfs_root);
+						  arch_debugfs_dir);
 
 	debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
 			    NULL, &memtrace_init_fops);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index ba02a75c1410..05d3832019b9 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -13,11 +13,11 @@
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/crash_dump.h>
+#include <linux/debugfs.h>
 #include <asm/opal.h>
 #include <asm/io.h>
 #include <asm/imc-pmu.h>
 #include <asm/cputhreads.h>
-#include <asm/debugfs.h>
 
 static struct dentry *imc_debugfs_parent;
 
@@ -56,7 +56,7 @@ static void export_imc_mode_and_cmd(struct device_node *node,
 	u32 cb_offset;
 	struct imc_mem_info *ptr = pmu_ptr->mem_info;
 
-	imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
+	imc_debugfs_parent = debugfs_create_dir("imc", arch_debugfs_dir);
 
 	if (of_property_read_u32(node, "cb_offset", &cb_offset))
 		cb_offset = IMC_CNTL_BLK_OFFSET;
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
index 608569082ba0..1e5d51db40f8 100644
--- a/arch/powerpc/platforms/powernv/opal-lpc.c
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -10,13 +10,13 @@
 #include <linux/bug.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/debugfs.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/opal.h>
 #include <asm/prom.h>
 #include <linux/uaccess.h>
-#include <asm/debugfs.h>
 #include <asm/isa-bridge.h>
 
 static int opal_lpc_chip_id = -1;
@@ -371,7 +371,7 @@ static int opal_lpc_init_debugfs(void)
 	if (opal_lpc_chip_id < 0)
 		return -ENODEV;
 
-	root = debugfs_create_dir("lpc", powerpc_debugfs_root);
+	root = debugfs_create_dir("lpc", arch_debugfs_dir);
 
 	rc |= opal_lpc_debugfs_create_type(root, "io", OPAL_LPC_IO);
 	rc |= opal_lpc_debugfs_create_type(root, "mem", OPAL_LPC_MEM);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index fd510d961b8c..6b4eed2ef4fa 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -14,11 +14,11 @@
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/debugfs.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/opal.h>
-#include <asm/debugfs.h>
 #include <asm/prom.h>
 
 static u64 opal_scom_unmangle(u64 addr)
@@ -189,7 +189,7 @@ static int scom_debug_init(void)
 	if (!firmware_has_feature(FW_FEATURE_OPAL))
 		return 0;
 
-	root = debugfs_create_dir("scom", powerpc_debugfs_root);
+	root = debugfs_create_dir("scom", arch_debugfs_dir);
 	if (!root)
 		return -1;
 
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2389cd79c3c8..3dd35c327d1c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -20,6 +20,7 @@
 #include <linux/iommu.h>
 #include <linux/rculist.h>
 #include <linux/sizes.h>
+#include <linux/debugfs.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -32,7 +33,6 @@
 #include <asm/iommu.h>
 #include <asm/tce.h>
 #include <asm/xics.h>
-#include <asm/debugfs.h>
 #include <asm/firmware.h>
 #include <asm/pnv-pci.h>
 #include <asm/mmzone.h>
@@ -2475,7 +2475,7 @@ static void pnv_pci_ioda_create_dbgfs(void)
 		phb = hose->private_data;
 
 		sprintf(name, "PCI%04x", hose->global_number);
-		phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
+		phb->dbgfs = debugfs_create_dir(name, arch_debugfs_dir);
 
 		debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs,
 					   phb, &pnv_pci_diag_data_fops);
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 982f069e4c31..352af5b14a0f 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -11,10 +11,10 @@
 #include <linux/spinlock.h>
 #include <asm/smp.h>
 #include <linux/uaccess.h>
+#include <linux/debugfs.h>
 #include <asm/firmware.h>
 #include <asm/dtl.h>
 #include <asm/lppaca.h>
-#include <asm/debugfs.h>
 #include <asm/plpar_wrappers.h>
 #include <asm/machdep.h>
 
@@ -338,7 +338,7 @@ static int dtl_init(void)
 
 	/* set up common debugfs structure */
 
-	dtl_dir = debugfs_create_dir("dtl", powerpc_debugfs_root);
+	dtl_dir = debugfs_create_dir("dtl", arch_debugfs_dir);
 
 	debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask);
 	debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 869ef638698a..bd1fcb0881ea 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -22,6 +22,8 @@
 #include <linux/workqueue.h>
 #include <linux/proc_fs.h>
 #include <linux/pgtable.h>
+#include <linux/debugfs.h>
+
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
@@ -39,7 +41,6 @@
 #include <asm/kexec.h>
 #include <asm/fadump.h>
 #include <asm/asm-prototypes.h>
-#include <asm/debugfs.h>
 #include <asm/dtl.h>
 
 #include "pseries.h"
@@ -2019,7 +2020,7 @@ static int __init vpa_debugfs_init(void)
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 		return 0;
 
-	vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
+	vpa_dir = debugfs_create_dir("vpa", arch_debugfs_dir);
 
 	/* set up the per-cpu vpa file*/
 	for_each_possible_cpu(i) {
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 4018964bbd69..458645c7a72b 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -21,7 +21,6 @@
 #include <linux/msi.h>
 #include <linux/vmalloc.h>
 
-#include <asm/debugfs.h>
 #include <asm/prom.h>
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -1769,7 +1768,7 @@ DEFINE_SHOW_ATTRIBUTE(xive_core_debug);
 int xive_core_debug_init(void)
 {
 	if (xive_enabled())
-		debugfs_create_file("xive", 0400, powerpc_debugfs_root,
+		debugfs_create_file("xive", 0400, arch_debugfs_dir,
 				    NULL, &xive_core_debug_fops);
 	return 0;
 }
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index da4d7f225a40..ead460b80905 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -26,8 +26,8 @@
 #include <linux/ctype.h>
 #include <linux/highmem.h>
 #include <linux/security.h>
+#include <linux/debugfs.h>
 
-#include <asm/debugfs.h>
 #include <asm/ptrace.h>
 #include <asm/smp.h>
 #include <asm/string.h>
@@ -4077,8 +4077,8 @@ DEFINE_SIMPLE_ATTRIBUTE(xmon_dbgfs_ops, xmon_dbgfs_get,
 
 static int __init setup_xmon_dbgfs(void)
 {
-	debugfs_create_file("xmon", 0600, powerpc_debugfs_root, NULL,
-				&xmon_dbgfs_ops);
+	debugfs_create_file("xmon", 0600, arch_debugfs_dir, NULL,
+			    &xmon_dbgfs_ops);
 	return 0;
 }
 device_initcall(setup_xmon_dbgfs);
-- 
cgit 


From 7e35ef662ca05c42dbc2f401bb76d9219dd7fd02 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Thu, 12 Aug 2021 18:52:19 +0530
Subject: powerpc/pseries: rename min_common_depth to primary_domain_index

No functional change in this patch.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210812132223.225214-2-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/mm/numa.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 094a1076fd1f..79132744b728 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL(numa_cpu_lookup_table);
 EXPORT_SYMBOL(node_to_cpumask_map);
 EXPORT_SYMBOL(node_data);
 
-static int min_common_depth;
+static int primary_domain_index;
 static int n_mem_addr_cells, n_mem_size_cells;
 static int form1_affinity;
 
@@ -232,8 +232,8 @@ static int associativity_to_nid(const __be32 *associativity)
 	if (!numa_enabled)
 		goto out;
 
-	if (of_read_number(associativity, 1) >= min_common_depth)
-		nid = of_read_number(&associativity[min_common_depth], 1);
+	if (of_read_number(associativity, 1) >= primary_domain_index)
+		nid = of_read_number(&associativity[primary_domain_index], 1);
 
 	/* POWER4 LPAR uses 0xffff as invalid node */
 	if (nid == 0xffff || nid >= nr_node_ids)
@@ -284,9 +284,9 @@ int of_node_to_nid(struct device_node *device)
 }
 EXPORT_SYMBOL(of_node_to_nid);
 
-static int __init find_min_common_depth(void)
+static int __init find_primary_domain_index(void)
 {
-	int depth;
+	int index;
 	struct device_node *root;
 
 	if (firmware_has_feature(FW_FEATURE_OPAL))
@@ -326,7 +326,7 @@ static int __init find_min_common_depth(void)
 	}
 
 	if (form1_affinity) {
-		depth = of_read_number(distance_ref_points, 1);
+		index = of_read_number(distance_ref_points, 1);
 	} else {
 		if (distance_ref_points_depth < 2) {
 			printk(KERN_WARNING "NUMA: "
@@ -334,7 +334,7 @@ static int __init find_min_common_depth(void)
 			goto err;
 		}
 
-		depth = of_read_number(&distance_ref_points[1], 1);
+		index = of_read_number(&distance_ref_points[1], 1);
 	}
 
 	/*
@@ -348,7 +348,7 @@ static int __init find_min_common_depth(void)
 	}
 
 	of_node_put(root);
-	return depth;
+	return index;
 
 err:
 	of_node_put(root);
@@ -437,16 +437,16 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb)
 	int nid = default_nid;
 	int rc, index;
 
-	if ((min_common_depth < 0) || !numa_enabled)
+	if ((primary_domain_index < 0) || !numa_enabled)
 		return default_nid;
 
 	rc = of_get_assoc_arrays(&aa);
 	if (rc)
 		return default_nid;
 
-	if (min_common_depth <= aa.array_sz &&
+	if (primary_domain_index <= aa.array_sz &&
 	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
-		index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
+		index = lmb->aa_index * aa.array_sz + primary_domain_index - 1;
 		nid = of_read_number(&aa.arrays[index], 1);
 
 		if (nid == 0xffff || nid >= nr_node_ids)
@@ -708,18 +708,18 @@ static int __init parse_numa_properties(void)
 		return -1;
 	}
 
-	min_common_depth = find_min_common_depth();
+	primary_domain_index = find_primary_domain_index();
 
-	if (min_common_depth < 0) {
+	if (primary_domain_index < 0) {
 		/*
-		 * if we fail to parse min_common_depth from device tree
+		 * if we fail to parse primary_domain_index from device tree
 		 * mark the numa disabled, boot with numa disabled.
 		 */
 		numa_enabled = false;
-		return min_common_depth;
+		return primary_domain_index;
 	}
 
-	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
+	dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index);
 
 	/*
 	 * Even though we connect cpus to numa domains later in SMP
@@ -924,7 +924,7 @@ static void __init find_possible_nodes(void)
 			goto out;
 	}
 
-	max_nodes = of_read_number(&domains[min_common_depth], 1);
+	max_nodes = of_read_number(&domains[primary_domain_index], 1);
 	pr_info("Partition configured for %d NUMA nodes.\n", max_nodes);
 
 	for (i = 0; i < max_nodes; i++) {
@@ -933,7 +933,7 @@ static void __init find_possible_nodes(void)
 	}
 
 	prop_length /= sizeof(int);
-	if (prop_length > min_common_depth + 2)
+	if (prop_length > primary_domain_index + 2)
 		coregroup_enabled = 1;
 
 out:
@@ -1266,7 +1266,7 @@ int cpu_to_coregroup_id(int cpu)
 		goto out;
 
 	index = of_read_number(associativity, 1);
-	if (index > min_common_depth + 1)
+	if (index > primary_domain_index + 1)
 		return of_read_number(&associativity[index - 1], 1);
 
 out:
-- 
cgit 


From 0eacd06bb8adea8dd9edb0a30144166d9f227e64 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Thu, 12 Aug 2021 18:52:20 +0530
Subject: powerpc/pseries: Rename TYPE1_AFFINITY to FORM1_AFFINITY

Also make related code cleanup that will allow adding FORM2_AFFINITY in
later patches. No functional change in this patch.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210812132223.225214-3-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/include/asm/firmware.h       |  4 ++--
 arch/powerpc/include/asm/prom.h           |  2 +-
 arch/powerpc/kernel/prom_init.c           |  2 +-
 arch/powerpc/mm/numa.c                    | 35 ++++++++++++++++++-------------
 arch/powerpc/platforms/pseries/firmware.c |  2 +-
 5 files changed, 26 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 7604673787d6..60b631161360 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -44,7 +44,7 @@
 #define FW_FEATURE_OPAL		ASM_CONST(0x0000000010000000)
 #define FW_FEATURE_SET_MODE	ASM_CONST(0x0000000040000000)
 #define FW_FEATURE_BEST_ENERGY	ASM_CONST(0x0000000080000000)
-#define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
+#define FW_FEATURE_FORM1_AFFINITY ASM_CONST(0x0000000100000000)
 #define FW_FEATURE_PRRN		ASM_CONST(0x0000000200000000)
 #define FW_FEATURE_DRMEM_V2	ASM_CONST(0x0000000400000000)
 #define FW_FEATURE_DRC_INFO	ASM_CONST(0x0000000800000000)
@@ -69,7 +69,7 @@ enum {
 		FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
 		FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
 		FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
-		FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
+		FW_FEATURE_FORM1_AFFINITY | FW_FEATURE_PRRN |
 		FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
 		FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
 		FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR |
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 324a13351749..df9fec9d232c 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -147,7 +147,7 @@ extern int of_read_drc_info_cell(struct property **prop,
 #define OV5_MSI			0x0201	/* PCIe/MSI support */
 #define OV5_CMO			0x0480	/* Cooperative Memory Overcommitment */
 #define OV5_XCMO		0x0440	/* Page Coalescing */
-#define OV5_TYPE1_AFFINITY	0x0580	/* Type 1 NUMA affinity */
+#define OV5_FORM1_AFFINITY	0x0580	/* FORM1 NUMA affinity */
 #define OV5_PRRN		0x0540	/* Platform Resource Reassignment */
 #define OV5_HP_EVT		0x0604	/* Hot Plug Event support */
 #define OV5_RESIZE_HPT		0x0601	/* Hash Page Table resizing */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index a5bf355ce1d6..57db605ad33a 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1096,7 +1096,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
 #else
 		0,
 #endif
-		.associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN),
+		.associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN),
 		.bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT),
 		.micro_checkpoint = 0,
 		.reserved0 = 0,
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 79132744b728..0bad11b3e929 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -53,7 +53,10 @@ EXPORT_SYMBOL(node_data);
 
 static int primary_domain_index;
 static int n_mem_addr_cells, n_mem_size_cells;
-static int form1_affinity;
+
+#define FORM0_AFFINITY 0
+#define FORM1_AFFINITY 1
+static int affinity_form;
 
 #define MAX_DISTANCE_REF_POINTS 4
 static int distance_ref_points_depth;
@@ -190,7 +193,7 @@ int __node_distance(int a, int b)
 	int i;
 	int distance = LOCAL_DISTANCE;
 
-	if (!form1_affinity)
+	if (affinity_form == FORM0_AFFINITY)
 		return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
 
 	for (i = 0; i < distance_ref_points_depth; i++) {
@@ -210,7 +213,7 @@ static void initialize_distance_lookup_table(int nid,
 {
 	int i;
 
-	if (!form1_affinity)
+	if (affinity_form != FORM1_AFFINITY)
 		return;
 
 	for (i = 0; i < distance_ref_points_depth; i++) {
@@ -289,6 +292,17 @@ static int __init find_primary_domain_index(void)
 	int index;
 	struct device_node *root;
 
+	/*
+	 * Check for which form of affinity.
+	 */
+	if (firmware_has_feature(FW_FEATURE_OPAL)) {
+		affinity_form = FORM1_AFFINITY;
+	} else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) {
+		dbg("Using form 1 affinity\n");
+		affinity_form = FORM1_AFFINITY;
+	} else
+		affinity_form = FORM0_AFFINITY;
+
 	if (firmware_has_feature(FW_FEATURE_OPAL))
 		root = of_find_node_by_path("/ibm,opal");
 	else
@@ -318,23 +332,16 @@ static int __init find_primary_domain_index(void)
 	}
 
 	distance_ref_points_depth /= sizeof(int);
-
-	if (firmware_has_feature(FW_FEATURE_OPAL) ||
-	    firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) {
-		dbg("Using form 1 affinity\n");
-		form1_affinity = 1;
-	}
-
-	if (form1_affinity) {
-		index = of_read_number(distance_ref_points, 1);
-	} else {
+	if (affinity_form == FORM0_AFFINITY) {
 		if (distance_ref_points_depth < 2) {
 			printk(KERN_WARNING "NUMA: "
-				"short ibm,associativity-reference-points\n");
+			       "short ibm,associativity-reference-points\n");
 			goto err;
 		}
 
 		index = of_read_number(&distance_ref_points[1], 1);
+	} else {
+		index = of_read_number(distance_ref_points, 1);
 	}
 
 	/*
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 4c7b7f5a2ebc..5d4c2bc20bba 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -119,7 +119,7 @@ struct vec5_fw_feature {
 
 static __initdata struct vec5_fw_feature
 vec5_fw_features_table[] = {
-	{FW_FEATURE_TYPE1_AFFINITY,	OV5_TYPE1_AFFINITY},
+	{FW_FEATURE_FORM1_AFFINITY,	OV5_FORM1_AFFINITY},
 	{FW_FEATURE_PRRN,		OV5_PRRN},
 	{FW_FEATURE_DRMEM_V2,		OV5_DRMEM_V2},
 	{FW_FEATURE_DRC_INFO,		OV5_DRC_INFO},
-- 
cgit 


From 8ddc6448ec5a5ef50eaa581a7dec0e12a02850ff Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Thu, 12 Aug 2021 18:52:21 +0530
Subject: powerpc/pseries: Consolidate different NUMA distance update code
 paths

The associativity details of the newly added resourced are collected from
the hypervisor via "ibm,configure-connector" rtas call. Update the numa
distance details of the newly added numa node after the above call.

Instead of updating NUMA distance every time we lookup a node id
from the associativity property, add helpers that can be used
during boot which does this only once. Also remove the distance
update from node id lookup helpers.

Currently, we duplicate parsing code for ibm,associativity and
ibm,associativity-lookup-arrays in the kernel. The associativity array provided
by these device tree properties are very similar and hence can use
a helper to parse the node id and numa distance details.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210812132223.225214-4-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/include/asm/topology.h             |   2 +
 arch/powerpc/mm/numa.c                          | 212 +++++++++++++++++-------
 arch/powerpc/platforms/pseries/hotplug-cpu.c    |   2 +
 arch/powerpc/platforms/pseries/hotplug-memory.c |   2 +
 4 files changed, 161 insertions(+), 57 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index e4db64c0e184..a6425a70c37b 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -64,6 +64,7 @@ static inline int early_cpu_to_node(int cpu)
 }
 
 int of_drconf_to_nid_single(struct drmem_lmb *lmb);
+void update_numa_distance(struct device_node *node);
 
 #else
 
@@ -93,6 +94,7 @@ static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
 	return first_online_node;
 }
 
+static inline void update_numa_distance(struct device_node *node) {}
 #endif /* CONFIG_NUMA */
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 0bad11b3e929..3cda37c52f26 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -208,50 +208,35 @@ int __node_distance(int a, int b)
 }
 EXPORT_SYMBOL(__node_distance);
 
-static void initialize_distance_lookup_table(int nid,
-		const __be32 *associativity)
+static int __associativity_to_nid(const __be32 *associativity,
+				  int max_array_sz)
 {
-	int i;
+	int nid;
+	/*
+	 * primary_domain_index is 1 based array index.
+	 */
+	int index = primary_domain_index  - 1;
 
-	if (affinity_form != FORM1_AFFINITY)
-		return;
+	if (!numa_enabled || index >= max_array_sz)
+		return NUMA_NO_NODE;
 
-	for (i = 0; i < distance_ref_points_depth; i++) {
-		const __be32 *entry;
+	nid = of_read_number(&associativity[index], 1);
 
-		entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
-		distance_lookup_table[nid][i] = of_read_number(entry, 1);
-	}
+	/* POWER4 LPAR uses 0xffff as invalid node */
+	if (nid == 0xffff || nid >= nr_node_ids)
+		nid = NUMA_NO_NODE;
+	return nid;
 }
-
 /*
  * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
  * info is found.
  */
 static int associativity_to_nid(const __be32 *associativity)
 {
-	int nid = NUMA_NO_NODE;
-
-	if (!numa_enabled)
-		goto out;
-
-	if (of_read_number(associativity, 1) >= primary_domain_index)
-		nid = of_read_number(&associativity[primary_domain_index], 1);
-
-	/* POWER4 LPAR uses 0xffff as invalid node */
-	if (nid == 0xffff || nid >= nr_node_ids)
-		nid = NUMA_NO_NODE;
-
-	if (nid > 0 &&
-		of_read_number(associativity, 1) >= distance_ref_points_depth) {
-		/*
-		 * Skip the length field and send start of associativity array
-		 */
-		initialize_distance_lookup_table(nid, associativity + 1);
-	}
+	int array_sz = of_read_number(associativity, 1);
 
-out:
-	return nid;
+	/* Skip the first element in the associativity array */
+	return __associativity_to_nid((associativity + 1), array_sz);
 }
 
 /* Returns the nid associated with the given device tree node,
@@ -287,6 +272,60 @@ int of_node_to_nid(struct device_node *device)
 }
 EXPORT_SYMBOL(of_node_to_nid);
 
+static void __initialize_form1_numa_distance(const __be32 *associativity,
+					     int max_array_sz)
+{
+	int i, nid;
+
+	if (affinity_form != FORM1_AFFINITY)
+		return;
+
+	nid = __associativity_to_nid(associativity, max_array_sz);
+	if (nid != NUMA_NO_NODE) {
+		for (i = 0; i < distance_ref_points_depth; i++) {
+			const __be32 *entry;
+			int index = be32_to_cpu(distance_ref_points[i]) - 1;
+
+			/*
+			 * broken hierarchy, return with broken distance table
+			 */
+			if (WARN(index >= max_array_sz, "Broken ibm,associativity property"))
+				return;
+
+			entry = &associativity[index];
+			distance_lookup_table[nid][i] = of_read_number(entry, 1);
+		}
+	}
+}
+
+static void initialize_form1_numa_distance(const __be32 *associativity)
+{
+	int array_sz;
+
+	array_sz = of_read_number(associativity, 1);
+	/* Skip the first element in the associativity array */
+	__initialize_form1_numa_distance(associativity + 1, array_sz);
+}
+
+/*
+ * Used to update distance information w.r.t newly added node.
+ */
+void update_numa_distance(struct device_node *node)
+{
+	if (affinity_form == FORM0_AFFINITY)
+		return;
+	else if (affinity_form == FORM1_AFFINITY) {
+		const __be32 *associativity;
+
+		associativity = of_get_associativity(node);
+		if (!associativity)
+			return;
+
+		initialize_form1_numa_distance(associativity);
+		return;
+	}
+}
+
 static int __init find_primary_domain_index(void)
 {
 	int index;
@@ -433,6 +472,38 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
 	return 0;
 }
 
+static int get_nid_and_numa_distance(struct drmem_lmb *lmb)
+{
+	struct assoc_arrays aa = { .arrays = NULL };
+	int default_nid = NUMA_NO_NODE;
+	int nid = default_nid;
+	int rc, index;
+
+	if ((primary_domain_index < 0) || !numa_enabled)
+		return default_nid;
+
+	rc = of_get_assoc_arrays(&aa);
+	if (rc)
+		return default_nid;
+
+	if (primary_domain_index <= aa.array_sz &&
+	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
+		const __be32 *associativity;
+
+		index = lmb->aa_index * aa.array_sz;
+		associativity = &aa.arrays[index];
+		nid = __associativity_to_nid(associativity, aa.array_sz);
+		if (nid > 0 && affinity_form == FORM1_AFFINITY) {
+			/*
+			 * lookup array associativity entries have
+			 * no length of the array as the first element.
+			 */
+			__initialize_form1_numa_distance(associativity, aa.array_sz);
+		}
+	}
+	return nid;
+}
+
 /*
  * This is like of_node_to_nid_single() for memory represented in the
  * ibm,dynamic-reconfiguration-memory node.
@@ -453,26 +524,19 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb)
 
 	if (primary_domain_index <= aa.array_sz &&
 	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
-		index = lmb->aa_index * aa.array_sz + primary_domain_index - 1;
-		nid = of_read_number(&aa.arrays[index], 1);
-
-		if (nid == 0xffff || nid >= nr_node_ids)
-			nid = default_nid;
+		const __be32 *associativity;
 
-		if (nid > 0) {
-			index = lmb->aa_index * aa.array_sz;
-			initialize_distance_lookup_table(nid,
-							&aa.arrays[index]);
-		}
+		index = lmb->aa_index * aa.array_sz;
+		associativity = &aa.arrays[index];
+		nid = __associativity_to_nid(associativity, aa.array_sz);
 	}
-
 	return nid;
 }
 
 #ifdef CONFIG_PPC_SPLPAR
-static int vphn_get_nid(long lcpu)
+
+static int __vphn_get_associativity(long lcpu, __be32 *associativity)
 {
-	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
 	long rc, hwid;
 
 	/*
@@ -492,12 +556,30 @@ static int vphn_get_nid(long lcpu)
 
 		rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
 		if (rc == H_SUCCESS)
-			return associativity_to_nid(associativity);
+			return 0;
 	}
 
+	return -1;
+}
+
+static int vphn_get_nid(long lcpu)
+{
+	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+
+
+	if (!__vphn_get_associativity(lcpu, associativity))
+		return associativity_to_nid(associativity);
+
 	return NUMA_NO_NODE;
+
 }
 #else
+
+static int __vphn_get_associativity(long lcpu, __be32 *associativity)
+{
+	return -1;
+}
+
 static int vphn_get_nid(long unused)
 {
 	return NUMA_NO_NODE;
@@ -692,7 +774,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
 			size = read_n_cells(n_mem_size_cells, usm);
 		}
 
-		nid = of_drconf_to_nid_single(lmb);
+		nid = get_nid_and_numa_distance(lmb);
 		fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
 					  &nid);
 		node_set_online(nid);
@@ -709,6 +791,7 @@ static int __init parse_numa_properties(void)
 	struct device_node *memory;
 	int default_nid = 0;
 	unsigned long i;
+	const __be32 *associativity;
 
 	if (numa_enabled == 0) {
 		printk(KERN_WARNING "NUMA disabled by user\n");
@@ -734,18 +817,30 @@ static int __init parse_numa_properties(void)
 	 * each node to be onlined must have NODE_DATA etc backing it.
 	 */
 	for_each_present_cpu(i) {
+		__be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
 		struct device_node *cpu;
-		int nid = vphn_get_nid(i);
+		int nid = NUMA_NO_NODE;
 
-		/*
-		 * Don't fall back to default_nid yet -- we will plug
-		 * cpus into nodes once the memory scan has discovered
-		 * the topology.
-		 */
-		if (nid == NUMA_NO_NODE) {
+		memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
+
+		if (__vphn_get_associativity(i, vphn_assoc) == 0) {
+			nid = associativity_to_nid(vphn_assoc);
+			initialize_form1_numa_distance(vphn_assoc);
+		} else {
+
+			/*
+			 * Don't fall back to default_nid yet -- we will plug
+			 * cpus into nodes once the memory scan has discovered
+			 * the topology.
+			 */
 			cpu = of_get_cpu_node(i, NULL);
 			BUG_ON(!cpu);
-			nid = of_node_to_nid_single(cpu);
+
+			associativity = of_get_associativity(cpu);
+			if (associativity) {
+				nid = associativity_to_nid(associativity);
+				initialize_form1_numa_distance(associativity);
+			}
 			of_node_put(cpu);
 		}
 
@@ -781,8 +876,11 @@ new_range:
 		 * have associativity properties.  If none, then
 		 * everything goes to default_nid.
 		 */
-		nid = of_node_to_nid_single(memory);
-		if (nid < 0)
+		associativity = of_get_associativity(memory);
+		if (associativity) {
+			nid = associativity_to_nid(associativity);
+			initialize_form1_numa_distance(associativity);
+		} else
 			nid = default_nid;
 
 		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index e1f224320102..1ef40ef699a6 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -580,6 +580,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 		return saved_rc;
 	}
 
+	update_numa_distance(dn);
+
 	rc = dlpar_online_cpu(dn);
 	if (rc) {
 		saved_rc = rc;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 0beb3ca2b549..14fccd7b9c99 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
 		return -ENODEV;
 	}
 
+	update_numa_distance(lmb_node);
+
 	dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 	if (!dr_node) {
 		dlpar_free_cc_nodes(lmb_node);
-- 
cgit 


From ef31cb83d19c4c589d650747cd5a7e502be9f665 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Thu, 12 Aug 2021 18:52:22 +0530
Subject: powerpc/pseries: Add a helper for form1 cpu distance

This helper is only used with the dispatch trace log collection.
A later patch will add Form2 affinity support and this change helps
in keeping that simpler. Also add a comment explaining we don't expect
the code to be called with FORM0

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210812132223.225214-5-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/include/asm/topology.h   |  4 ++--
 arch/powerpc/mm/numa.c                | 10 +++++++++-
 arch/powerpc/platforms/pseries/lpar.c |  4 ++--
 3 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index a6425a70c37b..a4712ecad3e9 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -36,7 +36,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 				 cpu_all_mask :				\
 				 cpumask_of_node(pcibus_to_node(bus)))
 
-extern int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
+int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
 extern int __node_distance(int, int);
 #define node_distance(a, b) __node_distance(a, b)
 
@@ -84,7 +84,7 @@ static inline void sysfs_remove_device_from_node(struct device *dev,
 
 static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
 
-static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+static inline int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
 {
 	return 0;
 }
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 3cda37c52f26..fe3a9a38eddf 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -166,7 +166,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
 }
 #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
 
-int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
 {
 	int dist = 0;
 
@@ -182,6 +182,14 @@ int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
 	return dist;
 }
 
+int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+	/* We should not get called with FORM0 */
+	VM_WARN_ON(affinity_form == FORM0_AFFINITY);
+
+	return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
+}
+
 /* must hold reference to node during call */
 static const __be32 *of_get_associativity(struct device_node *dev)
 {
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index bd1fcb0881ea..3df6bdfea475 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -262,7 +262,7 @@ static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu)
 	if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc)
 		return -EIO;
 
-	return cpu_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
+	return cpu_relative_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
 }
 
 static int cpu_home_node_dispatch_distance(int disp_cpu)
@@ -282,7 +282,7 @@ static int cpu_home_node_dispatch_distance(int disp_cpu)
 	if (!disp_cpu_assoc || !vcpu_assoc)
 		return -EIO;
 
-	return cpu_distance(disp_cpu_assoc, vcpu_assoc);
+	return cpu_relative_distance(disp_cpu_assoc, vcpu_assoc);
 }
 
 static void update_vcpu_disp_stat(int disp_cpu)
-- 
cgit 


From 1c6b5a7e74052768977855f95d6b8812f6e7772c Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Thu, 12 Aug 2021 18:52:23 +0530
Subject: powerpc/pseries: Add support for FORM2 associativity

PAPR interface currently supports two different ways of communicating resource
grouping details to the OS. These are referred to as Form 0 and Form 1
associativity grouping. Form 0 is the older format and is now considered
deprecated. This patch adds another resource grouping named FORM2.

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210812132223.225214-6-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/include/asm/firmware.h       |   3 +-
 arch/powerpc/include/asm/prom.h           |   1 +
 arch/powerpc/kernel/prom_init.c           |   3 +-
 arch/powerpc/mm/numa.c                    | 187 ++++++++++++++++++++++++------
 arch/powerpc/platforms/pseries/firmware.c |   1 +
 5 files changed, 158 insertions(+), 37 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 60b631161360..97a3bd9ffeb9 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -53,6 +53,7 @@
 #define FW_FEATURE_ULTRAVISOR	ASM_CONST(0x0000004000000000)
 #define FW_FEATURE_STUFF_TCE	ASM_CONST(0x0000008000000000)
 #define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000)
+#define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -73,7 +74,7 @@ enum {
 		FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
 		FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
 		FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR |
-		FW_FEATURE_RPT_INVALIDATE,
+		FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
 	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
 	FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index df9fec9d232c..5c80152e8f18 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -149,6 +149,7 @@ extern int of_read_drc_info_cell(struct property **prop,
 #define OV5_XCMO		0x0440	/* Page Coalescing */
 #define OV5_FORM1_AFFINITY	0x0580	/* FORM1 NUMA affinity */
 #define OV5_PRRN		0x0540	/* Platform Resource Reassignment */
+#define OV5_FORM2_AFFINITY	0x0520	/* Form2 NUMA affinity */
 #define OV5_HP_EVT		0x0604	/* Hot Plug Event support */
 #define OV5_RESIZE_HPT		0x0601	/* Hash Page Table resizing */
 #define OV5_PFO_HW_RNG		0x1180	/* PFO Random Number Generator */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 57db605ad33a..95a42d49e291 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1096,7 +1096,8 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
 #else
 		0,
 #endif
-		.associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN),
+		.associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN) |
+		OV5_FEAT(OV5_FORM2_AFFINITY),
 		.bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT),
 		.micro_checkpoint = 0,
 		.reserved0 = 0,
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fe3a9a38eddf..fe9c6ced40b2 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -56,12 +56,17 @@ static int n_mem_addr_cells, n_mem_size_cells;
 
 #define FORM0_AFFINITY 0
 #define FORM1_AFFINITY 1
+#define FORM2_AFFINITY 2
 static int affinity_form;
 
 #define MAX_DISTANCE_REF_POINTS 4
 static int distance_ref_points_depth;
 static const __be32 *distance_ref_points;
 static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
+static int numa_distance_table[MAX_NUMNODES][MAX_NUMNODES] = {
+	[0 ... MAX_NUMNODES - 1] = { [0 ... MAX_NUMNODES - 1] = -1 }
+};
+static int numa_id_index_table[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };
 
 /*
  * Allocate node_to_cpumask_map based on number of available nodes
@@ -166,6 +171,54 @@ static void unmap_cpu_from_node(unsigned long cpu)
 }
 #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
 
+static int __associativity_to_nid(const __be32 *associativity,
+				  int max_array_sz)
+{
+	int nid;
+	/*
+	 * primary_domain_index is 1 based array index.
+	 */
+	int index = primary_domain_index  - 1;
+
+	if (!numa_enabled || index >= max_array_sz)
+		return NUMA_NO_NODE;
+
+	nid = of_read_number(&associativity[index], 1);
+
+	/* POWER4 LPAR uses 0xffff as invalid node */
+	if (nid == 0xffff || nid >= nr_node_ids)
+		nid = NUMA_NO_NODE;
+	return nid;
+}
+/*
+ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
+ * info is found.
+ */
+static int associativity_to_nid(const __be32 *associativity)
+{
+	int array_sz = of_read_number(associativity, 1);
+
+	/* Skip the first element in the associativity array */
+	return __associativity_to_nid((associativity + 1), array_sz);
+}
+
+static int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+	int dist;
+	int node1, node2;
+
+	node1 = associativity_to_nid(cpu1_assoc);
+	node2 = associativity_to_nid(cpu2_assoc);
+
+	dist = numa_distance_table[node1][node2];
+	if (dist <= LOCAL_DISTANCE)
+		return 0;
+	else if (dist <= REMOTE_DISTANCE)
+		return 1;
+	else
+		return 2;
+}
+
 static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
 {
 	int dist = 0;
@@ -186,8 +239,9 @@ int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
 {
 	/* We should not get called with FORM0 */
 	VM_WARN_ON(affinity_form == FORM0_AFFINITY);
-
-	return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
+	if (affinity_form == FORM1_AFFINITY)
+		return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
+	return __cpu_form2_relative_distance(cpu1_assoc, cpu2_assoc);
 }
 
 /* must hold reference to node during call */
@@ -201,7 +255,9 @@ int __node_distance(int a, int b)
 	int i;
 	int distance = LOCAL_DISTANCE;
 
-	if (affinity_form == FORM0_AFFINITY)
+	if (affinity_form == FORM2_AFFINITY)
+		return numa_distance_table[a][b];
+	else if (affinity_form == FORM0_AFFINITY)
 		return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
 
 	for (i = 0; i < distance_ref_points_depth; i++) {
@@ -216,37 +272,6 @@ int __node_distance(int a, int b)
 }
 EXPORT_SYMBOL(__node_distance);
 
-static int __associativity_to_nid(const __be32 *associativity,
-				  int max_array_sz)
-{
-	int nid;
-	/*
-	 * primary_domain_index is 1 based array index.
-	 */
-	int index = primary_domain_index  - 1;
-
-	if (!numa_enabled || index >= max_array_sz)
-		return NUMA_NO_NODE;
-
-	nid = of_read_number(&associativity[index], 1);
-
-	/* POWER4 LPAR uses 0xffff as invalid node */
-	if (nid == 0xffff || nid >= nr_node_ids)
-		nid = NUMA_NO_NODE;
-	return nid;
-}
-/*
- * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
- * info is found.
- */
-static int associativity_to_nid(const __be32 *associativity)
-{
-	int array_sz = of_read_number(associativity, 1);
-
-	/* Skip the first element in the associativity array */
-	return __associativity_to_nid((associativity + 1), array_sz);
-}
-
 /* Returns the nid associated with the given device tree node,
  * or -1 if not found.
  */
@@ -320,6 +345,8 @@ static void initialize_form1_numa_distance(const __be32 *associativity)
  */
 void update_numa_distance(struct device_node *node)
 {
+	int nid;
+
 	if (affinity_form == FORM0_AFFINITY)
 		return;
 	else if (affinity_form == FORM1_AFFINITY) {
@@ -332,6 +359,84 @@ void update_numa_distance(struct device_node *node)
 		initialize_form1_numa_distance(associativity);
 		return;
 	}
+
+	/* FORM2 affinity  */
+	nid = of_node_to_nid_single(node);
+	if (nid == NUMA_NO_NODE)
+		return;
+
+	/*
+	 * With FORM2 we expect NUMA distance of all possible NUMA
+	 * nodes to be provided during boot.
+	 */
+	WARN(numa_distance_table[nid][nid] == -1,
+	     "NUMA distance details for node %d not provided\n", nid);
+}
+
+/*
+ * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
+ * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements}
+ */
+static void initialize_form2_numa_distance_lookup_table(void)
+{
+	int i, j;
+	struct device_node *root;
+	const __u8 *numa_dist_table;
+	const __be32 *numa_lookup_index;
+	int numa_dist_table_length;
+	int max_numa_index, distance_index;
+
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		root = of_find_node_by_path("/ibm,opal");
+	else
+		root = of_find_node_by_path("/rtas");
+	if (!root)
+		root = of_find_node_by_path("/");
+
+	numa_lookup_index = of_get_property(root, "ibm,numa-lookup-index-table", NULL);
+	max_numa_index = of_read_number(&numa_lookup_index[0], 1);
+
+	/* first element of the array is the size and is encode-int */
+	numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL);
+	numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1);
+	/* Skip the size which is encoded int */
+	numa_dist_table += sizeof(__be32);
+
+	pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n",
+		 numa_dist_table_length, max_numa_index);
+
+	for (i = 0; i < max_numa_index; i++)
+		/* +1 skip the max_numa_index in the property */
+		numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1);
+
+
+	if (numa_dist_table_length != max_numa_index * max_numa_index) {
+		WARN(1, "Wrong NUMA distance information\n");
+		/* consider everybody else just remote. */
+		for (i = 0;  i < max_numa_index; i++) {
+			for (j = 0; j < max_numa_index; j++) {
+				int nodeA = numa_id_index_table[i];
+				int nodeB = numa_id_index_table[j];
+
+				if (nodeA == nodeB)
+					numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE;
+				else
+					numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE;
+			}
+		}
+	}
+
+	distance_index = 0;
+	for (i = 0;  i < max_numa_index; i++) {
+		for (j = 0; j < max_numa_index; j++) {
+			int nodeA = numa_id_index_table[i];
+			int nodeB = numa_id_index_table[j];
+
+			numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++];
+			pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]);
+		}
+	}
+	of_node_put(root);
 }
 
 static int __init find_primary_domain_index(void)
@@ -344,6 +449,9 @@ static int __init find_primary_domain_index(void)
 	 */
 	if (firmware_has_feature(FW_FEATURE_OPAL)) {
 		affinity_form = FORM1_AFFINITY;
+	} else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) {
+		dbg("Using form 2 affinity\n");
+		affinity_form = FORM2_AFFINITY;
 	} else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) {
 		dbg("Using form 1 affinity\n");
 		affinity_form = FORM1_AFFINITY;
@@ -388,9 +496,12 @@ static int __init find_primary_domain_index(void)
 
 		index = of_read_number(&distance_ref_points[1], 1);
 	} else {
+		/*
+		 * Both FORM1 and FORM2 affinity find the primary domain details
+		 * at the same offset.
+		 */
 		index = of_read_number(distance_ref_points, 1);
 	}
-
 	/*
 	 * Warn and cap if the hardware supports more than
 	 * MAX_DISTANCE_REF_POINTS domains.
@@ -819,6 +930,12 @@ static int __init parse_numa_properties(void)
 
 	dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index);
 
+	/*
+	 * If it is FORM2 initialize the distance table here.
+	 */
+	if (affinity_form == FORM2_AFFINITY)
+		initialize_form2_numa_distance_lookup_table();
+
 	/*
 	 * Even though we connect cpus to numa domains later in SMP
 	 * init, we need to know the node ids now. This is because
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 5d4c2bc20bba..f162156b7b68 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -123,6 +123,7 @@ vec5_fw_features_table[] = {
 	{FW_FEATURE_PRRN,		OV5_PRRN},
 	{FW_FEATURE_DRMEM_V2,		OV5_DRMEM_V2},
 	{FW_FEATURE_DRC_INFO,		OV5_DRC_INFO},
+	{FW_FEATURE_FORM2_AFFINITY,	OV5_FORM2_AFFINITY},
 };
 
 static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
-- 
cgit 


From fb31f0a499332a053477ed57312b214e42476e6d Mon Sep 17 00:00:00 2001
From: Kenneth Lee <liguozhu@hisilicon.com>
Date: Wed, 28 Jul 2021 15:15:57 +0800
Subject: riscv: fix the global name pfn_base confliction error

RISCV uses a global variable pfn_base for page/pfn translation. But this
is a common name and will be used elsewhere. In those cases, the
page-pfn macros which refer to this name will be referred to the
local/input variable instead. (such as in vfio_pin_pages_remote). This
make everything wrong.

This patch changes the name from pfn_base to riscv_pfn_base to fix
this problem.

Signed-off-by: Kenneth Lee <liguozhu@hisilicon.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/include/asm/page.h | 4 ++--
 arch/riscv/mm/init.c          | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 4da92cf28a19..d34327be7574 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -79,8 +79,8 @@ typedef struct page *pgtable_t;
 #endif
 
 #ifdef CONFIG_MMU
-extern unsigned long pfn_base;
-#define ARCH_PFN_OFFSET		(pfn_base)
+extern unsigned long riscv_pfn_base;
+#define ARCH_PFN_OFFSET		(riscv_pfn_base)
 #else
 #define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
 #endif /* CONFIG_MMU */
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index fdf093d01c6f..888dff9530b8 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -214,8 +214,8 @@ static struct pt_alloc_ops _pt_ops __initdata;
 #define pt_ops _pt_ops
 #endif
 
-unsigned long pfn_base __ro_after_init;
-EXPORT_SYMBOL(pfn_base);
+unsigned long riscv_pfn_base __ro_after_init;
+EXPORT_SYMBOL(riscv_pfn_base);
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
@@ -587,7 +587,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
 	kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
 
-	pfn_base = PFN_DOWN(kernel_map.phys_addr);
+	riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
 
 	/* Sanity check alignment and size */
 	BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
-- 
cgit 


From 8ba1a8b77ba1eb3aef441ed2caf28ab2b1261f5f Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 30 Jul 2021 20:48:41 +0800
Subject: riscv: Support allocating gigantic hugepages using CMA

This patch adds support to allocate gigantic hugepages using CMA by
specifying the hugetlb_cma= kernel parameter.  This is only supported on
RV64.

Reviewed-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/mm/init.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 888dff9530b8..248f9ec393c1 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -19,6 +19,7 @@
 #include <linux/set_memory.h>
 #include <linux/dma-map-ops.h>
 #include <linux/crash_dump.h>
+#include <linux/hugetlb.h>
 
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
@@ -202,6 +203,8 @@ static void __init setup_bootmem(void)
 
 	early_init_fdt_scan_reserved_mem();
 	dma_contiguous_reserve(dma32_phys_limit);
+	if (IS_ENABLED(CONFIG_64BIT))
+		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
 	memblock_allow_resize();
 }
 
-- 
cgit 


From db87a7199229b75c9996bf78117eceb81854fce2 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 13 Apr 2021 16:38:09 +0000
Subject: powerpc/bug: Remove specific powerpc BUG_ON() and WARN_ON() on PPC32

powerpc BUG_ON() and WARN_ON() are based on using twnei instruction.

For catching simple conditions like a variable having value 0, this
is efficient because it does the test and the trap at the same time.
But most conditions used with BUG_ON or WARN_ON are more complex and
forces GCC to format the condition into a 0 or 1 value in a register.
This will usually require 2 to 3 instructions.

The most efficient solution would be to use __builtin_trap() because
GCC is able to optimise the use of the different trap instructions
based on the requested condition, but this is complex if not
impossible for the following reasons:
- __builtin_trap() is a non-recoverable instruction, so it can't be
used for WARN_ON
- Knowing which line of code generated the trap would require the
analysis of DWARF information. This is not a feature we have today.

As mentioned in commit 8d4fbcfbe0a4 ("Fix WARN_ON() on bitfield ops")
the way WARN_ON() is implemented is suboptimal. That commit also
mentions an issue with 'long long' condition. It fixed it for
WARN_ON() but the same problem still exists today with BUG_ON() on
PPC32. It will be fixed by using the generic implementation.

By using the generic implementation, gcc will naturally generate a
branch to the unconditional trap generated by BUG().

As modern powerpc implement zero-cycle branch,
that's even more efficient.

And for the functions using WARN_ON() and its return, the test
on return from WARN_ON() is now also used for the WARN_ON() itself.

On PPC64 we don't want it because we want to be able to use CFAR
register to track how we entered the code that trapped. The CFAR
register would be clobbered by the branch.

A simple test function:

	unsigned long test9w(unsigned long a, unsigned long b)
	{
		if (WARN_ON(!b))
			return 0;
		return a / b;
	}

Before the patch:

	0000046c <test9w>:
	 46c:	7c 89 00 34 	cntlzw  r9,r4
	 470:	55 29 d9 7e 	rlwinm  r9,r9,27,5,31
	 474:	0f 09 00 00 	twnei   r9,0
	 478:	2c 04 00 00 	cmpwi   r4,0
	 47c:	41 82 00 0c 	beq     488 <test9w+0x1c>
	 480:	7c 63 23 96 	divwu   r3,r3,r4
	 484:	4e 80 00 20 	blr

	 488:	38 60 00 00 	li      r3,0
	 48c:	4e 80 00 20 	blr

After the patch:

	00000468 <test9w>:
	 468:	2c 04 00 00 	cmpwi   r4,0
	 46c:	41 82 00 0c 	beq     478 <test9w+0x10>
	 470:	7c 63 23 96 	divwu   r3,r3,r4
	 474:	4e 80 00 20 	blr

	 478:	0f e0 00 00 	twui    r0,0
	 47c:	38 60 00 00 	li      r3,0
	 480:	4e 80 00 20 	blr

So we see before the patch we need 3 instructions on the likely path
to handle the WARN_ON(). With the patch the trap goes on the unlikely
path.

See below the difference at the entry of system_call_exception where
we have several BUG_ON(), allthough less impressing.

With the patch:

	00000000 <system_call_exception>:
	   0:	81 6a 00 84 	lwz     r11,132(r10)
	   4:	90 6a 00 88 	stw     r3,136(r10)
	   8:	71 60 00 02 	andi.   r0,r11,2
	   c:	41 82 00 70 	beq     7c <system_call_exception+0x7c>
	  10:	71 60 40 00 	andi.   r0,r11,16384
	  14:	41 82 00 6c 	beq     80 <system_call_exception+0x80>
	  18:	71 6b 80 00 	andi.   r11,r11,32768
	  1c:	41 82 00 68 	beq     84 <system_call_exception+0x84>
	  20:	94 21 ff e0 	stwu    r1,-32(r1)
	  24:	93 e1 00 1c 	stw     r31,28(r1)
	  28:	7d 8c 42 e6 	mftb    r12
	...
	  7c:	0f e0 00 00 	twui    r0,0
	  80:	0f e0 00 00 	twui    r0,0
	  84:	0f e0 00 00 	twui    r0,0

Without the patch:

	00000000 <system_call_exception>:
	   0:	94 21 ff e0 	stwu    r1,-32(r1)
	   4:	93 e1 00 1c 	stw     r31,28(r1)
	   8:	90 6a 00 88 	stw     r3,136(r10)
	   c:	81 6a 00 84 	lwz     r11,132(r10)
	  10:	69 60 00 02 	xori    r0,r11,2
	  14:	54 00 ff fe 	rlwinm  r0,r0,31,31,31
	  18:	0f 00 00 00 	twnei   r0,0
	  1c:	69 60 40 00 	xori    r0,r11,16384
	  20:	54 00 97 fe 	rlwinm  r0,r0,18,31,31
	  24:	0f 00 00 00 	twnei   r0,0
	  28:	69 6b 80 00 	xori    r11,r11,32768
	  2c:	55 6b 8f fe 	rlwinm  r11,r11,17,31,31
	  30:	0f 0b 00 00 	twnei   r11,0
	  34:	7d 8c 42 e6 	mftb    r12

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b286e07fb771a664b631cd07a40b09c06f26e64b.1618331881.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/bug.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 0b2162890d8b..d844de5adfcb 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -68,7 +68,11 @@
 	BUG_ENTRY("twi 31, 0, 0", 0);				\
 	unreachable();						\
 } while (0)
+#define HAVE_ARCH_BUG
+
+#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
 
+#ifdef CONFIG_PPC64
 #define BUG_ON(x) do {						\
 	if (__builtin_constant_p(x)) {				\
 		if (x)						\
@@ -78,8 +82,6 @@
 	}							\
 } while (0)
 
-#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
-
 #define WARN_ON(x) ({						\
 	int __ret_warn_on = !!(x);				\
 	if (__builtin_constant_p(__ret_warn_on)) {		\
@@ -93,9 +95,10 @@
 	unlikely(__ret_warn_on);				\
 })
 
-#define HAVE_ARCH_BUG
 #define HAVE_ARCH_BUG_ON
 #define HAVE_ARCH_WARN_ON
+#endif
+
 #endif /* __ASSEMBLY __ */
 #else
 #ifdef __ASSEMBLY__
-- 
cgit 


From 1e688dd2a3d6759d416616ff07afc4bb836c4213 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 13 Apr 2021 16:38:10 +0000
Subject: powerpc/bug: Provide better flexibility to WARN_ON/__WARN_FLAGS()
 with asm goto

Using asm goto in __WARN_FLAGS() and WARN_ON() allows more
flexibility to GCC.

For that add an entry to the exception table so that
program_check_exception() knowns where to resume execution
after a WARNING.

Here are two exemples. The first one is done on PPC32 (which
benefits from the previous patch), the second is on PPC64.

	unsigned long test(struct pt_regs *regs)
	{
		int ret;

		WARN_ON(regs->msr & MSR_PR);

		return regs->gpr[3];
	}

	unsigned long test9w(unsigned long a, unsigned long b)
	{
		if (WARN_ON(!b))
			return 0;
		return a / b;
	}

Before the patch:

	000003a8 <test>:
	 3a8:	81 23 00 84 	lwz     r9,132(r3)
	 3ac:	71 29 40 00 	andi.   r9,r9,16384
	 3b0:	40 82 00 0c 	bne     3bc <test+0x14>
	 3b4:	80 63 00 0c 	lwz     r3,12(r3)
	 3b8:	4e 80 00 20 	blr

	 3bc:	0f e0 00 00 	twui    r0,0
	 3c0:	80 63 00 0c 	lwz     r3,12(r3)
	 3c4:	4e 80 00 20 	blr

	0000000000000bf0 <.test9w>:
	 bf0:	7c 89 00 74 	cntlzd  r9,r4
	 bf4:	79 29 d1 82 	rldicl  r9,r9,58,6
	 bf8:	0b 09 00 00 	tdnei   r9,0
	 bfc:	2c 24 00 00 	cmpdi   r4,0
	 c00:	41 82 00 0c 	beq     c0c <.test9w+0x1c>
	 c04:	7c 63 23 92 	divdu   r3,r3,r4
	 c08:	4e 80 00 20 	blr

	 c0c:	38 60 00 00 	li      r3,0
	 c10:	4e 80 00 20 	blr

After the patch:

	000003a8 <test>:
	 3a8:	81 23 00 84 	lwz     r9,132(r3)
	 3ac:	71 29 40 00 	andi.   r9,r9,16384
	 3b0:	40 82 00 0c 	bne     3bc <test+0x14>
	 3b4:	80 63 00 0c 	lwz     r3,12(r3)
	 3b8:	4e 80 00 20 	blr

	 3bc:	0f e0 00 00 	twui    r0,0

	0000000000000c50 <.test9w>:
	 c50:	7c 89 00 74 	cntlzd  r9,r4
	 c54:	79 29 d1 82 	rldicl  r9,r9,58,6
	 c58:	0b 09 00 00 	tdnei   r9,0
	 c5c:	7c 63 23 92 	divdu   r3,r3,r4
	 c60:	4e 80 00 20 	blr

	 c70:	38 60 00 00 	li      r3,0
	 c74:	4e 80 00 20 	blr

In the first exemple, we see GCC doesn't need to duplicate what
happens after the trap.

In the second exemple, we see that GCC doesn't need to emit a test
and a branch in the likely path in addition to the trap.

We've got some WARN_ON() in .softirqentry.text section so it needs
to be added in the OTHER_TEXT_SECTIONS in modpost.c

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/389962b1b702e3c78d169e59bcfac56282889173.1618331882.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/book3s/64/kup.h |  2 +-
 arch/powerpc/include/asm/bug.h           | 54 ++++++++++++++++++++++++++------
 arch/powerpc/include/asm/extable.h       | 14 +++++++++
 arch/powerpc/include/asm/ppc_asm.h       | 11 +------
 arch/powerpc/kernel/entry_64.S           |  2 +-
 arch/powerpc/kernel/misc_32.S            |  2 +-
 arch/powerpc/kernel/traps.c              |  9 ++++--
 7 files changed, 70 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index a1cc73a88710..170339969b7c 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -90,7 +90,7 @@
 	/* Prevent access to userspace using any key values */
 	LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED)
 999:	tdne	\gpr1, \gpr2
-	EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+	EMIT_WARN_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
 	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67)
 #endif
 .endm
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index d844de5adfcb..1ee0f22313ee 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -4,6 +4,7 @@
 #ifdef __KERNEL__
 
 #include <asm/asm-compat.h>
+#include <asm/extable.h>
 
 #ifdef CONFIG_BUG
 
@@ -30,6 +31,11 @@
 .endm
 #endif /* verbose */
 
+.macro EMIT_WARN_ENTRY addr,file,line,flags
+	EX_TABLE(\addr,\addr+4)
+	EMIT_BUG_ENTRY \addr,\file,\line,\flags
+.endm
+
 #else /* !__ASSEMBLY__ */
 /* _EMIT_BUG_ENTRY expects args %0,%1,%2,%3 to be FILE, LINE, flags and
    sizeof(struct bug_entry), respectively */
@@ -58,6 +64,16 @@
 		  "i" (sizeof(struct bug_entry)),	\
 		  ##__VA_ARGS__)
 
+#define WARN_ENTRY(insn, flags, label, ...)		\
+	asm_volatile_goto(				\
+		"1:	" insn "\n"			\
+		EX_TABLE(1b, %l[label])			\
+		_EMIT_BUG_ENTRY				\
+		: : "i" (__FILE__), "i" (__LINE__),	\
+		  "i" (flags),				\
+		  "i" (sizeof(struct bug_entry)),	\
+		  ##__VA_ARGS__ : : label)
+
 /*
  * BUG_ON() and WARN_ON() do their best to cooperate with compile-time
  * optimisations. However depending on the complexity of the condition
@@ -70,7 +86,15 @@
 } while (0)
 #define HAVE_ARCH_BUG
 
-#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
+#define __WARN_FLAGS(flags) do {				\
+	__label__ __label_warn_on;				\
+								\
+	WARN_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags), __label_warn_on); \
+	unreachable();						\
+								\
+__label_warn_on:						\
+	break;							\
+} while (0)
 
 #ifdef CONFIG_PPC64
 #define BUG_ON(x) do {						\
@@ -83,15 +107,24 @@
 } while (0)
 
 #define WARN_ON(x) ({						\
-	int __ret_warn_on = !!(x);				\
-	if (__builtin_constant_p(__ret_warn_on)) {		\
-		if (__ret_warn_on)				\
+	bool __ret_warn_on = false;				\
+	do {							\
+		if (__builtin_constant_p((x))) {		\
+			if (!(x)) 				\
+				break; 				\
 			__WARN();				\
-	} else {						\
-		BUG_ENTRY(PPC_TLNEI " %4, 0",			\
-			  BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN),	\
-			  "r" (__ret_warn_on));	\
-	}							\
+			__ret_warn_on = true;			\
+		} else {					\
+			__label__ __label_warn_on;		\
+								\
+			WARN_ENTRY(PPC_TLNEI " %4, 0",		\
+				   BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN),	\
+				   __label_warn_on, "r" (x));	\
+			break;					\
+__label_warn_on:						\
+			__ret_warn_on = true;			\
+		}						\
+	} while (0);						\
 	unlikely(__ret_warn_on);				\
 })
 
@@ -104,8 +137,11 @@
 #ifdef __ASSEMBLY__
 .macro EMIT_BUG_ENTRY addr,file,line,flags
 .endm
+.macro EMIT_WARN_ENTRY addr,file,line,flags
+.endm
 #else /* !__ASSEMBLY__ */
 #define _EMIT_BUG_ENTRY
+#define _EMIT_WARN_ENTRY
 #endif
 #endif /* CONFIG_BUG */
 
diff --git a/arch/powerpc/include/asm/extable.h b/arch/powerpc/include/asm/extable.h
index eb91b2d2935a..26ce2e5c0fa8 100644
--- a/arch/powerpc/include/asm/extable.h
+++ b/arch/powerpc/include/asm/extable.h
@@ -17,6 +17,8 @@
 
 #define ARCH_HAS_RELATIVE_EXTABLE
 
+#ifndef __ASSEMBLY__
+
 struct exception_table_entry {
 	int insn;
 	int fixup;
@@ -28,3 +30,15 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
 }
 
 #endif
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target)		\
+	stringify_in_c(.section __ex_table,"a";)\
+	stringify_in_c(.balign 4;)		\
+	stringify_in_c(.long (_fault) - . ;)	\
+	stringify_in_c(.long (_target) - . ;)	\
+	stringify_in_c(.previous)
+
+#endif
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 116c1519728a..ffe712307e11 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -10,6 +10,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/firmware.h>
 #include <asm/feature-fixups.h>
+#include <asm/extable.h>
 
 #ifdef __ASSEMBLY__
 
@@ -752,16 +753,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
 
 #endif /*  __ASSEMBLY__ */
 
-/*
- * Helper macro for exception table entries
- */
-#define EX_TABLE(_fault, _target)		\
-	stringify_in_c(.section __ex_table,"a";)\
-	stringify_in_c(.balign 4;)		\
-	stringify_in_c(.long (_fault) - . ;)	\
-	stringify_in_c(.long (_target) - . ;)	\
-	stringify_in_c(.previous)
-
 #define SOFT_MASK_TABLE(_start, _end)		\
 	stringify_in_c(.section __soft_mask_table,"a";)\
 	stringify_in_c(.balign 8;)		\
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 15720f8661a1..70cff7b49e17 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -309,7 +309,7 @@ _GLOBAL(enter_rtas)
 	 */
 	lbz	r0,PACAIRQSOFTMASK(r13)
 1:	tdeqi	r0,IRQS_ENABLED
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+	EMIT_WARN_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
 #endif
 
 	/* Hard-disable interrupts */
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 39ab15419592..d8645efff902 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -237,7 +237,7 @@ _GLOBAL(copy_page)
 	addi	r3,r3,-4
 
 0:	twnei	r5, 0	/* WARN if r3 is not cache aligned */
-	EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+	EMIT_WARN_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
 
 	addi	r4,r4,-4
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index c8f648727d36..51d4f5faf425 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1477,8 +1477,13 @@ static void do_program_check(struct pt_regs *regs)
 
 		if (!(regs->msr & MSR_PR) &&  /* not user-mode */
 		    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
-			regs_add_return_ip(regs, 4);
-			return;
+			const struct exception_table_entry *entry;
+
+			entry = search_exception_tables(bugaddr);
+			if (entry) {
+				regs_set_return_ip(regs, extable_fixup(entry) + regs->nip - bugaddr);
+				return;
+			}
 		}
 		_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
 		return;
-- 
cgit 


From 0355785313e2191be4e1108cdbda94ddb0238c48 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 13 Aug 2021 13:05:11 -0700
Subject: powerpc: Add "-z notext" flag to disable diagnostic

Object files used to link .tmp_vmlinux.kallsyms1 have many
R_PPC64_ADDR64 relocations in non-SHF_WRITE sections. There are many
text relocations (e.g. in .rela___ksymtab_gpl+* and .rela__mcount_loc
sections) in a -pie link and are disallowed by LLD:

  ld.lld: error: can't create dynamic relocation R_PPC64_ADDR64 against local symbol in readonly segment; recompile object files with -fPIC or pass '-Wl,-z,notext' to allow text relocations in the output
  >>> defined in arch/powerpc/kernel/head_64.o
  >>> referenced by arch/powerpc/kernel/head_64.o:(__restart_table+0x10)

Newer GNU ld configured with "--enable-textrel-check=error" will report
an error as well:

  $ ld-new -EL -m elf64lppc -pie ... -o .tmp_vmlinux.kallsyms1 ...
  ld-new: read-only segment has dynamic relocations

Add "-z notext" to suppress the errors. Non-CONFIG_RELOCATABLE builds
use the default -no-pie mode and thus R_PPC64_ADDR64 relocations can be
resolved at link-time.

Reported-by: Itaru Kitayama <itaru.kitayama@riken.jp>
Co-developed-by: Bill Wendling <morbo@google.com>
Signed-off-by: Fangrui Song <maskray@google.com>
Signed-off-by: Bill Wendling <morbo@google.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210813200511.1905703-1-morbo@google.com
---
 arch/powerpc/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 9aaf1abbc641..aa6808e70647 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -122,6 +122,7 @@ endif
 
 LDFLAGS_vmlinux-y := -Bstatic
 LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
+LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) += -z notext
 LDFLAGS_vmlinux	:= $(LDFLAGS_vmlinux-y)
 
 ifdef CONFIG_PPC64
-- 
cgit 


From 8b893ef190b0c440877de04f767efca4bf4d6af8 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 16 Aug 2021 12:30:11 +1000
Subject: powerpc/pseries: Fix build error when NUMA=n

As reported by lkp, if NUMA=n we see a build error:

   arch/powerpc/platforms/pseries/hotplug-cpu.c: In function 'pseries_cpu_hotplug_init':
   arch/powerpc/platforms/pseries/hotplug-cpu.c:1022:8: error: 'node_to_cpumask_map' undeclared
    1022 |        node_to_cpumask_map[node]);

Use cpumask_of_node() which has an empty stub for NUMA=n, and when
NUMA=y does a lookup from node_to_cpumask_map[].

Fixes: bd1dd4c5f528 ("powerpc/pseries: Prevent free CPU ids being reused on another node")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210816041032.2839343-1-mpe@ellerman.id.au
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 1ef40ef699a6..d646c22e94ab 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -1021,7 +1021,7 @@ static int __init pseries_cpu_hotplug_init(void)
 			/* Record ids of CPU added at boot time */
 			cpumask_or(node_recorded_ids_map[node],
 				   node_recorded_ids_map[node],
-				   node_to_cpumask_map[node]);
+				   cpumask_of_node(node));
 		}
 
 		of_reconfig_notifier_register(&pseries_smp_nb);
-- 
cgit 


From 47c258d71ebfc832a760a1dc6540cf3c33968023 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 15 Aug 2021 15:23:34 -0700
Subject: powerpc/head_check: use stdout for error messages

Prefer stderr instead of stdout for error messages.
This is a good practice and can help CI error detecting and
reporting (0day in this case).

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210815222334.9575-1-rdunlap@infradead.org
---
 arch/powerpc/tools/head_check.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh
index e32d3162e5ed..e477837fdc58 100644
--- a/arch/powerpc/tools/head_check.sh
+++ b/arch/powerpc/tools/head_check.sh
@@ -56,9 +56,9 @@ expected_start_head_addr=$vma
 start_head_addr=$(cat .tmp_symbols.txt | grep " t start_first_256B$" | cut -d' ' -f1)
 
 if [ "$start_head_addr" != "$expected_start_head_addr" ]; then
-	echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr"
-	echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option"
-	echo "ERROR: see comments in arch/powerpc/tools/head_check.sh"
+	echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr" 1>&2
+	echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" 1>&2
+	echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" 1>&2
 
 	exit 1
 fi
@@ -70,9 +70,9 @@ expected_start_text_addr=$(cat .tmp_symbols.txt | grep " a text_start$" | cut -d
 start_text_addr=$(cat .tmp_symbols.txt | grep " t start_text$" | cut -d' ' -f1)
 
 if [ "$start_text_addr" != "$expected_start_text_addr" ]; then
-	echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr"
-	echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option"
-	echo "ERROR: see comments in arch/powerpc/tools/head_check.sh"
+	echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr" 1>&2
+	echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" 1>&2
+	echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" 1>&2
 
 	exit 1
 fi
-- 
cgit 


From 4aae683f132777a92b80d6971d56174d5f4cb3f3 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sat, 31 Jul 2021 14:22:32 +0900
Subject: tracing: Refactor TRACE_IRQFLAGS_SUPPORT in Kconfig
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make architectures select TRACE_IRQFLAGS_SUPPORT instead of
having many defines.

Link: https://lkml.kernel.org/r/20210731052233.4703-2-masahiroy@kernel.org

Acked-by: Heiko Carstens <hca@linux.ibm.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>   #arch/arc
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/Kconfig                  | 3 +++
 arch/arc/Kconfig              | 4 +---
 arch/arm/Kconfig              | 5 +----
 arch/arm64/Kconfig            | 4 +---
 arch/csky/Kconfig             | 4 +---
 arch/hexagon/Kconfig          | 4 +---
 arch/microblaze/Kconfig       | 1 +
 arch/microblaze/Kconfig.debug | 5 -----
 arch/mips/Kconfig             | 1 +
 arch/mips/Kconfig.debug       | 4 ----
 arch/nds32/Kconfig            | 4 +---
 arch/nios2/Kconfig            | 3 ---
 arch/openrisc/Kconfig         | 4 +---
 arch/parisc/Kconfig           | 1 +
 arch/parisc/Kconfig.debug     | 3 ---
 arch/powerpc/Kconfig          | 5 +----
 arch/riscv/Kconfig            | 4 +---
 arch/s390/Kconfig             | 1 +
 arch/s390/Kconfig.debug       | 3 ---
 arch/sh/Kconfig               | 1 +
 arch/sh/Kconfig.debug         | 3 ---
 arch/sparc/Kconfig            | 1 +
 arch/sparc/Kconfig.debug      | 4 ----
 arch/um/Kconfig               | 5 +----
 arch/x86/Kconfig              | 1 +
 arch/x86/Kconfig.debug        | 3 ---
 arch/xtensa/Kconfig           | 4 +---
 27 files changed, 21 insertions(+), 64 deletions(-)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index 129df498a8e1..9471a0feecaf 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -197,6 +197,9 @@ config HAVE_FUNCTION_ERROR_INJECTION
 config HAVE_NMI
 	bool
 
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+
 #
 # An arch should select this if it provides all these things:
 #
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index d8f51eb8963b..0c81df3a5c7a 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -49,9 +49,7 @@ config ARC
 	select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
 	select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
 	select SET_FS
-
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
+	select TRACE_IRQFLAGS_SUPPORT
 
 config LOCKDEP_SUPPORT
 	def_bool y
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 82f908fa5676..3564647283e1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -128,6 +128,7 @@ config ARM
 	select RTC_LIB
 	select SET_FS
 	select SYS_SUPPORTS_APM_EMULATION
+	select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
 	# Above selects are sorted alphabetically; please add new ones
 	# according to that.  Thanks.
 	help
@@ -191,10 +192,6 @@ config LOCKDEP_SUPPORT
 	bool
 	default y
 
-config TRACE_IRQFLAGS_SUPPORT
-	bool
-	default !CPU_V7M
-
 config ARCH_HAS_ILOG2_U32
 	bool
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b5b13a932561..67b04ae5d010 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -221,6 +221,7 @@ config ARM64
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
 	select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
+	select TRACE_IRQFLAGS_SUPPORT
 	help
 	  ARM 64-bit (AArch64) Linux support.
 
@@ -288,9 +289,6 @@ config ILLEGAL_POINTER_VALUE
 config LOCKDEP_SUPPORT
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config GENERIC_BUG
 	def_bool y
 	depends on BUG
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 2716f6395ba7..9d4d898df76b 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -82,6 +82,7 @@ config CSKY
 	select PCI_SYSCALL if PCI
 	select PCI_MSI if PCI
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 
 config LOCKDEP_SUPPORT
 	def_bool y
@@ -139,9 +140,6 @@ config STACKTRACE_SUPPORT
 config TIME_LOW_RES
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config CPU_TLB_SIZE
 	int
 	default "128"	if (CPU_CK610 || CPU_CK807 || CPU_CK810)
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index e5a852080730..f993c4deaf23 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -31,6 +31,7 @@ config HEXAGON
 	select GENERIC_CPU_DEVICES
 	select SET_FS
 	select ARCH_WANT_LD_ORPHAN_WARN
+	select TRACE_IRQFLAGS_SUPPORT
 	help
 	  Qualcomm Hexagon is a processor architecture designed for high
 	  performance and low power across a wide variety of applications.
@@ -52,9 +53,6 @@ config EARLY_PRINTK
 config MMU
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config GENERIC_CSUM
 	def_bool y
 
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 14a67a42fcae..59798e43cdb0 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -44,6 +44,7 @@ config MICROBLAZE
 	select SPARSE_IRQ
 	select SET_FS
 	select ZONE_DMA
+	select TRACE_IRQFLAGS_SUPPORT
 
 # Endianness selection
 choice
diff --git a/arch/microblaze/Kconfig.debug b/arch/microblaze/Kconfig.debug
index 865527ac332a..a4e40e534e6a 100644
--- a/arch/microblaze/Kconfig.debug
+++ b/arch/microblaze/Kconfig.debug
@@ -1,6 +1 @@
 # SPDX-License-Identifier: GPL-2.0-only
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.rst.
-
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index cee6087cd686..3017c9e8bb07 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -97,6 +97,7 @@ config MIPS
 	select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
 	select RTC_LIB
 	select SYSCTL_EXCEPTION_TRACE
+	select TRACE_IRQFLAGS_SUPPORT
 	select VIRT_TO_BUS
 	select ARCH_HAS_ELFCORE_COMPAT
 
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 43dbf5930796..f4ae7900fcd3 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -1,9 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-	bool
-	default y
-
 config EARLY_PRINTK
 	bool "Early printk" if EXPERT
 	depends on SYS_HAS_EARLY_PRINTK
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 62313902d75d..dd50123335d8 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -48,6 +48,7 @@ config NDS32
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 	help
 	  Andes(nds32) Linux support.
 
@@ -64,9 +65,6 @@ config GENERIC_LOCKBREAK
 	def_bool y
 	depends on PREEMPTION
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config STACKTRACE_SUPPORT
 	def_bool y
 
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index c24955c81c92..eabdc41f67f6 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -43,9 +43,6 @@ config NO_IOPORT_MAP
 config FPU
 	def_bool n
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool n
-
 menu "Kernel features"
 
 source "kernel/Kconfig.hz"
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 591acc5990dc..ebc5e7eafc5c 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -39,6 +39,7 @@ config OPENRISC
 	select GENERIC_IRQ_MULTI_HANDLER
 	select MMU_GATHER_NO_RANGE if MMU
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 
 config CPU_BIG_ENDIAN
 	def_bool y
@@ -52,9 +53,6 @@ config GENERIC_HWEIGHT
 config NO_IOPORT_MAP
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 # For now, use generic checksum functions
 #These can be reimplemented in assembly later if so inclined
 config GENERIC_CSUM
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index bde9907bc5b2..57a0d0896ef6 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -67,6 +67,7 @@ config PARISC
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug
index 1478ded0e247..f66554cd5c45 100644
--- a/arch/parisc/Kconfig.debug
+++ b/arch/parisc/Kconfig.debug
@@ -1,4 +1 @@
 # SPDX-License-Identifier: GPL-2.0
-
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d01e3401581d..76a28452c042 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -94,10 +94,6 @@ config STACKTRACE_SUPPORT
 	bool
 	default y
 
-config TRACE_IRQFLAGS_SUPPORT
-	bool
-	default y
-
 config LOCKDEP_SUPPORT
 	bool
 	default y
@@ -271,6 +267,7 @@ config PPC
 	select STRICT_KERNEL_RWX if STRICT_MODULE_RWX
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
+	select TRACE_IRQFLAGS_SUPPORT
 	select VIRT_TO_BUS			if !PPC64
 	#
 	# Please keep this list sorted alphabetically.
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 8fcceb8eda07..15f012b9c6d7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -109,6 +109,7 @@ config RISCV
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
+	select TRACE_IRQFLAGS_SUPPORT
 	select UACCESS_MEMCPY if !MMU
 	select ZONE_DMA32 if 64BIT
 
@@ -178,9 +179,6 @@ config ARCH_SUPPORTS_UPROBES
 config STACKTRACE_SUPPORT
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config GENERIC_BUG
 	def_bool y
 	depends on BUG
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a0e2130f0100..8bce206e93a3 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -207,6 +207,7 @@ config S390
 	select SWIOTLB
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
+	select TRACE_IRQFLAGS_SUPPORT
 	select TTY
 	select VIRT_CPU_ACCOUNTING
 	select ZONE_DMA
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index 9ea6e61d5858..e94a2a7f6bf4 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config EARLY_PRINTK
 	def_bool y
 
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 45a0549421cd..8efeffcdf4cf 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -72,6 +72,7 @@ config SUPERH
 	select RTC_LIB
 	select SET_FS
 	select SPARSE_IRQ
+	select TRACE_IRQFLAGS_SUPPORT
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index 28a43d63bde1..958f790273ab 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config SH_STANDARD_BIOS
 	bool "Use LinuxSH standard BIOS"
 	help
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index c5fa7932b550..fb8bdd40c72a 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -50,6 +50,7 @@ config SPARC
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug
index 50a918d496c8..6b2bec1888b3 100644
--- a/arch/sparc/Kconfig.debug
+++ b/arch/sparc/Kconfig.debug
@@ -1,9 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-	bool
-	default y
-
 config DEBUG_DCFLUSH
 	bool "D-cache flush debugging"
 	depends on SPARC64 && DEBUG_KERNEL
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 0561b73cfd9a..0a699440c4c5 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -20,6 +20,7 @@ config UML
 	select GENERIC_CPU_DEVICES
 	select HAVE_GCC_PLUGINS
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 	select TTY # Needed for line.c
 
 config MMU
@@ -50,10 +51,6 @@ config ISA
 config SBUS
 	bool
 
-config TRACE_IRQFLAGS_SUPPORT
-	bool
-	default y
-
 config LOCKDEP_SUPPORT
 	bool
 	default y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 49270655e827..171faee05035 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -261,6 +261,7 @@ config X86
 	select STACK_VALIDATION			if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
+	select TRACE_IRQFLAGS_SUPPORT
 	select USER_STACKTRACE_SUPPORT
 	select VIRT_TO_BUS
 	select HAVE_ARCH_KCSAN			if X86_64
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 80b57e7f4947..d3a6f74a94bd 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config TRACE_IRQFLAGS_NMI_SUPPORT
 	def_bool y
 
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 2332b2156993..90aabb27f7d7 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -41,6 +41,7 @@ config XTENSA
 	select MODULES_USE_ELF_RELA
 	select PERF_USE_VMALLOC
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 	select VIRT_TO_BUS
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
@@ -72,9 +73,6 @@ config LOCKDEP_SUPPORT
 config STACKTRACE_SUPPORT
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config MMU
 	def_bool n
 
-- 
cgit 


From e95ad5f21693a37c0d318b5988c5a0de324eb3e3 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 16 Aug 2021 16:36:02 +1000
Subject: powerpc/head_check: Fix shellcheck errors

Replace "cat file | grep pattern" with "grep pattern file", and quote a
few variables. Together that fixes all shellcheck errors.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817125154.3369884-1-mpe@ellerman.id.au
---
 arch/powerpc/tools/head_check.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh
index e477837fdc58..689907cda996 100644
--- a/arch/powerpc/tools/head_check.sh
+++ b/arch/powerpc/tools/head_check.sh
@@ -49,11 +49,11 @@ vmlinux="$2"
 $nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" > .tmp_symbols.txt
 
 
-vma=$(cat .tmp_symbols.txt | grep -e " [TA] _stext$" | cut -d' ' -f1)
+vma=$(grep -e " [TA] _stext$" .tmp_symbols.txt | cut -d' ' -f1)
 
-expected_start_head_addr=$vma
+expected_start_head_addr="$vma"
 
-start_head_addr=$(cat .tmp_symbols.txt | grep " t start_first_256B$" | cut -d' ' -f1)
+start_head_addr=$(grep " t start_first_256B$" .tmp_symbols.txt | cut -d' ' -f1)
 
 if [ "$start_head_addr" != "$expected_start_head_addr" ]; then
 	echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr" 1>&2
@@ -63,11 +63,11 @@ if [ "$start_head_addr" != "$expected_start_head_addr" ]; then
 	exit 1
 fi
 
-top_vma=$(echo $vma | cut -d'0' -f1)
+top_vma=$(echo "$vma" | cut -d'0' -f1)
 
-expected_start_text_addr=$(cat .tmp_symbols.txt | grep " a text_start$" | cut -d' ' -f1 | sed "s/^0/$top_vma/")
+expected_start_text_addr=$(grep " a text_start$" .tmp_symbols.txt | cut -d' ' -f1 | sed "s/^0/$top_vma/")
 
-start_text_addr=$(cat .tmp_symbols.txt | grep " t start_text$" | cut -d' ' -f1)
+start_text_addr=$(grep " t start_text$" .tmp_symbols.txt | cut -d' ' -f1)
 
 if [ "$start_text_addr" != "$expected_start_text_addr" ]; then
 	echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr" 1>&2
-- 
cgit 


From c5ac55b6cbc604ad4144c380feae20f69453df91 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Tue, 17 Aug 2021 14:24:05 +0930
Subject: powerpc/config: Fix IPV6 warning in mpc855_ads

When building this config there's a warning:

  79:warning: override: reassigning to symbol IPV6

Commit 9a1762a4a4ff ("powerpc/8xx: Update mpc885_ads_defconfig to
improve CI") added CONFIG_IPV6=y, but left '# CONFIG_IPV6 is not set'
in.

IPV6 is default y, so remove both to clean up the build.

Fixes: 9a1762a4a4ff ("powerpc/8xx: Update mpc885_ads_defconfig to improve CI")
Signed-off-by: Joel Stanley <joel@jms.id.au>
Acked-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817045407.2445664-2-joel@jms.id.au
---
 arch/powerpc/configs/mpc885_ads_defconfig | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index d21f266cea9a..5cd17adf903f 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -21,7 +21,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
@@ -76,7 +75,6 @@ CONFIG_PERF_EVENTS=y
 CONFIG_MATH_EMULATION=y
 CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y
 CONFIG_STRICT_KERNEL_RWX=y
-CONFIG_IPV6=y
 CONFIG_BPF_JIT=y
 CONFIG_DEBUG_VM_PGTABLE=y
 CONFIG_BDI_SWITCH=y
-- 
cgit 


From d0e28a6145c3455b69991245e7f6147eb914b34a Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Tue, 17 Aug 2021 14:24:06 +0930
Subject: powerpc/config: Renable MTD_PHYSMAP_OF

CONFIG_MTD_PHYSMAP_OF is not longer enabled as it depends on
MTD_PHYSMAP which is not enabled.

This is a regression from commit 642b1e8dbed7 ("mtd: maps: Merge
physmap_of.c into physmap-core.c"), which added the extra dependency.
Add CONFIG_MTD_PHYSMAP=y so this stays in the config, as Christophe said
it is useful for build coverage.

Fixes: 642b1e8dbed7 ("mtd: maps: Merge physmap_of.c into physmap-core.c")
Signed-off-by: Joel Stanley <joel@jms.id.au>
Acked-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817045407.2445664-3-joel@jms.id.au
---
 arch/powerpc/configs/mpc885_ads_defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index 5cd17adf903f..cd08f9ed2c8d 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -33,6 +33,7 @@ CONFIG_MTD_CFI_GEOMETRY=y
 # CONFIG_MTD_CFI_I2 is not set
 CONFIG_MTD_CFI_I4=y
 CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP=y
 CONFIG_MTD_PHYSMAP_OF=y
 # CONFIG_BLK_DEV is not set
 CONFIG_NETDEVICES=y
-- 
cgit 


From 87e0d46bf68913f4c87dba94aadc00da658a874b Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Tue, 17 Aug 2021 14:24:07 +0930
Subject: powerpc/configs: Regenerate mpc885_ads_defconfig

Regenerate atop v5.14-rc6 by doing a make savedefconfig.

The changes a re-ordering except for the following (which are still set
indirectly):

 - CONFIG_DEBUG_KERNEL=y selected by EXPERT

 - CONFIG_PPC_EARLY_DEBUG_CPM_ADDR=0xff002008 which is the default
   setting

Signed-off-by: Joel Stanley <joel@jms.id.au>
Acked-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817045407.2445664-4-joel@jms.id.au
---
 arch/powerpc/configs/mpc885_ads_defconfig | 46 +++++++++++++++----------------
 1 file changed, 22 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index cd08f9ed2c8d..c74dc76b1d0d 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -1,19 +1,30 @@
-CONFIG_PPC_8xx=y
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_JIT=y
+CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_EXPERT=y
 # CONFIG_ELF_CORE is not set
 # CONFIG_BASE_FULL is not set
 # CONFIG_FUTEX is not set
+CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
+CONFIG_PPC_8xx=y
+CONFIG_8xx_GPIO=y
+CONFIG_SMC_UCODE_PATCH=y
+CONFIG_PIN_TLB=y
 CONFIG_GEN_RTC=y
 CONFIG_HZ_100=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PPC_16K_PAGES=y
+CONFIG_ADVANCED_OPTIONS=y
 # CONFIG_SECCOMP is not set
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_MODULES=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -46,38 +57,25 @@ CONFIG_DAVICOM_PHY=y
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
+CONFIG_SPI=y
+CONFIG_SPI_FSL_SPI=y
 # CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_8xxx_WDT=y
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
 CONFIG_TMPFS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_DEV_TALITOS=y
 CONFIG_CRC32_SLICEBY4=y
 CONFIG_DEBUG_INFO=y
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_PPC_16K_PAGES=y
-CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_FS=y
-CONFIG_PPC_PTDUMP=y
-CONFIG_MODULES=y
-CONFIG_SPI=y
-CONFIG_SPI_FSL_SPI=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_DEV_TALITOS=y
-CONFIG_8xx_GPIO=y
-CONFIG_WATCHDOG=y
-CONFIG_8xxx_WDT=y
-CONFIG_SMC_UCODE_PATCH=y
-CONFIG_ADVANCED_OPTIONS=y
-CONFIG_PIN_TLB=y
-CONFIG_PERF_EVENTS=y
-CONFIG_MATH_EMULATION=y
-CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y
-CONFIG_STRICT_KERNEL_RWX=y
-CONFIG_BPF_JIT=y
 CONFIG_DEBUG_VM_PGTABLE=y
+CONFIG_DETECT_HUNG_TASK=y
 CONFIG_BDI_SWITCH=y
 CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_PPC_EARLY_DEBUG_CPM_ADDR=0xff002008
+CONFIG_PPC_PTDUMP=y
-- 
cgit 


From e225c4d6bc389701f2f63fc246420a1da3465ab5 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 23 Mar 2021 14:29:05 +0800
Subject: powerpc: Remove duplicate includes

interrupt.c: asm/interrupt.h has been included at line 12, so remove the
duplicate one at line 10.

time.c: linux/sched/clock.h has been included at line 33,so remove the
duplicate one at line 56 and move sched/cputime.h under sched including
segament.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Reviewed-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210323062916.295346-1-wanjiabing@vivo.com
---
 arch/powerpc/kernel/interrupt.c | 1 -
 arch/powerpc/kernel/time.c      | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 21bbd615ca41..abc2b3dc3f20 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -8,7 +8,6 @@
 #include <asm/asm-prototypes.h>
 #include <asm/kup.h>
 #include <asm/cputime.h>
-#include <asm/interrupt.h>
 #include <asm/hw_irq.h>
 #include <asm/interrupt.h>
 #include <asm/kprobes.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e45ce427bffb..77bae85e2fae 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -31,6 +31,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/cputime.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/string.h>
@@ -52,8 +53,6 @@
 #include <linux/irq_work.h>
 #include <linux/of_clk.h>
 #include <linux/suspend.h>
-#include <linux/sched/cputime.h>
-#include <linux/sched/clock.h>
 #include <linux/processor.h>
 #include <asm/trace.h>
 
-- 
cgit 


From 504c6295b998effa682089747a96d7bb5933d4db Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 12 Aug 2021 10:39:50 +0530
Subject: arm64/mm: Add remaining ID_AA64MMFR0_PARANGE_ macros

Currently there are macros only for 48 and 52 bits parange value extracted
from the ID_AA64MMFR0.PARANGE field. This change completes the enumeration
and updates the helper id_aa64mmfr0_parange_to_phys_shift(). While here it
also defines ARM64_MIN_PARANGE_BITS as the absolute minimum shift value PA
range which could be supported on a given platform.

Cc: Marc Zyngier <maz@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1628744994-16623-2-git-send-email-anshuman.khandual@arm.com
---
 arch/arm64/include/asm/cpufeature.h | 14 +++++++-------
 arch/arm64/include/asm/sysreg.h     |  7 +++++++
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9bb9d11750d7..8633bdb21f33 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -781,13 +781,13 @@ extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
 static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
 {
 	switch (parange) {
-	case 0: return 32;
-	case 1: return 36;
-	case 2: return 40;
-	case 3: return 42;
-	case 4: return 44;
-	case 5: return 48;
-	case 6: return 52;
+	case ID_AA64MMFR0_PARANGE_32: return 32;
+	case ID_AA64MMFR0_PARANGE_36: return 36;
+	case ID_AA64MMFR0_PARANGE_40: return 40;
+	case ID_AA64MMFR0_PARANGE_42: return 42;
+	case ID_AA64MMFR0_PARANGE_44: return 44;
+	case ID_AA64MMFR0_PARANGE_48: return 48;
+	case ID_AA64MMFR0_PARANGE_52: return 52;
 	/*
 	 * A future PE could use a value unknown to the kernel.
 	 * However, by the "D10.1.4 Principles of the ID scheme
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 943d31d92b5b..1972e4b9be5c 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -853,9 +853,16 @@
 #define ID_AA64MMFR0_TGRAN64_SUPPORTED	0x0
 #define ID_AA64MMFR0_TGRAN16_NI		0x0
 #define ID_AA64MMFR0_TGRAN16_SUPPORTED	0x1
+#define ID_AA64MMFR0_PARANGE_32		0x0
+#define ID_AA64MMFR0_PARANGE_36		0x1
+#define ID_AA64MMFR0_PARANGE_40		0x2
+#define ID_AA64MMFR0_PARANGE_42		0x3
+#define ID_AA64MMFR0_PARANGE_44		0x4
 #define ID_AA64MMFR0_PARANGE_48		0x5
 #define ID_AA64MMFR0_PARANGE_52		0x6
 
+#define ARM64_MIN_PARANGE_BITS		32
+
 #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT	0x0
 #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE	0x1
 #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN	0x2
-- 
cgit 


From 9788c14060f3c179c376b2a87af1a430d4d84973 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 12 Aug 2021 10:39:51 +0530
Subject: KVM: arm64: Use ARM64_MIN_PARANGE_BITS as the minimum supported IPA

Drop hard coded value for the minimum supported IPA range bits (i.e 32).
Instead use ARM64_MIN_PARANGE_BITS which improves the code readability.

Cc: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1628744994-16623-3-git-send-email-anshuman.khandual@arm.com
---
 arch/arm64/kvm/reset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 18ffc6ad67b8..08fd487d5f95 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -364,7 +364,7 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
 	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
 	if (phys_shift) {
 		if (phys_shift > kvm_ipa_limit ||
-		    phys_shift < 32)
+		    phys_shift < ARM64_MIN_PARANGE_BITS)
 			return -EINVAL;
 	} else {
 		phys_shift = KVM_PHYS_SHIFT;
-- 
cgit 


From bf249d9e362f1011a839d57e771b4b1a7eed9656 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 12 Aug 2021 10:39:52 +0530
Subject: KVM: arm64: Drop init_common_resources()

Could do without this additional indirection via init_common_resources() by
just calling kvm_set_ipa_limit() directly instead.

Cc: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1628744994-16623-4-git-send-email-anshuman.khandual@arm.com
---
 arch/arm64/kvm/arm.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e9a2b8f27792..19560e457c11 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1696,11 +1696,6 @@ static bool init_psci_relay(void)
 	return true;
 }
 
-static int init_common_resources(void)
-{
-	return kvm_set_ipa_limit();
-}
-
 static int init_subsystems(void)
 {
 	int err = 0;
@@ -2102,7 +2097,7 @@ int kvm_arch_init(void *opaque)
 		}
 	}
 
-	err = init_common_resources();
+	err = kvm_set_ipa_limit();
 	if (err)
 		return err;
 
-- 
cgit 


From 6b7982fefc1fdcaa31b712f5fbc2e993cc99ad23 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 12 Aug 2021 10:39:53 +0530
Subject: KVM: arm64: Drop check_kvm_target_cpu() based percpu probe

kvm_target_cpu() never returns a negative error code, so check_kvm_target()
would never have 'ret' filled with a negative error code. Hence the percpu
probe via check_kvm_target_cpu() does not make sense as its never going to
find an unsupported CPU, forcing kvm_arch_init() to exit early. Hence lets
just drop this percpu probe (and also check_kvm_target_cpu()) altogether.

While here, this also changes kvm_target_cpu() return type to a u32, making
it explicit that an error code will not be returned from this function.

Cc: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-kernel@vger.kernel.org
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1628744994-16623-5-git-send-email-anshuman.khandual@arm.com
---
 arch/arm64/include/asm/kvm_host.h |  2 +-
 arch/arm64/kvm/arm.c              | 16 +---------------
 arch/arm64/kvm/guest.c            |  4 ++--
 3 files changed, 4 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 41911585ae0c..26bbd84a02de 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -66,7 +66,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 extern unsigned int kvm_sve_max_vl;
 int kvm_arm_init_sve(void);
 
-int __attribute_const__ kvm_target_cpu(void);
+u32 __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 19560e457c11..ee9b1166f330 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1035,7 +1035,7 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			       const struct kvm_vcpu_init *init)
 {
 	unsigned int i, ret;
-	int phys_target = kvm_target_cpu();
+	u32 phys_target = kvm_target_cpu();
 
 	if (init->target != phys_target)
 		return -EINVAL;
@@ -2010,11 +2010,6 @@ static int finalize_hyp_mode(void)
 	return 0;
 }
 
-static void check_kvm_target_cpu(void *ret)
-{
-	*(int *)ret = kvm_target_cpu();
-}
-
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
 {
 	struct kvm_vcpu *vcpu;
@@ -2074,7 +2069,6 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
 int kvm_arch_init(void *opaque)
 {
 	int err;
-	int ret, cpu;
 	bool in_hyp_mode;
 
 	if (!is_hyp_mode_available()) {
@@ -2089,14 +2083,6 @@ int kvm_arch_init(void *opaque)
 		kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
 			 "Only trusted guests should be used on this system.\n");
 
-	for_each_online_cpu(cpu) {
-		smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
-		if (ret < 0) {
-			kvm_err("Error, CPU %d not supported!\n", cpu);
-			return -ENODEV;
-		}
-	}
-
 	err = kvm_set_ipa_limit();
 	if (err)
 		return err;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 1dfb83578277..8ce850fa7b49 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -842,7 +842,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-int __attribute_const__ kvm_target_cpu(void)
+u32 __attribute_const__ kvm_target_cpu(void)
 {
 	unsigned long implementor = read_cpuid_implementor();
 	unsigned long part_number = read_cpuid_part_number();
@@ -874,7 +874,7 @@ int __attribute_const__ kvm_target_cpu(void)
 
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
-	int target = kvm_target_cpu();
+	u32 target = kvm_target_cpu();
 
 	if (target < 0)
 		return -ENODEV;
-- 
cgit 


From 9329752bc8659e3934e2b13434b2fddb0df0bb13 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 12 Aug 2021 10:39:54 +0530
Subject: KVM: arm64: Drop unused REQUIRES_VIRT

This seems like a residue from the past. REQUIRES_VIRT is no more available
. Hence it can just be dropped along with the related code section.

Cc: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1628744994-16623-6-git-send-email-anshuman.khandual@arm.com
---
 arch/arm64/kvm/arm.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index ee9b1166f330..d779a29c5607 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -42,10 +42,6 @@
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_psci.h>
 
-#ifdef REQUIRES_VIRT
-__asm__(".arch_extension	virt");
-#endif
-
 static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
 
-- 
cgit 


From ccac96977243d7916053550f62e6489760ad0adc Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 13 Aug 2021 14:03:36 +0100
Subject: KVM: arm64: Make hyp_panic() more robust when protected mode is
 enabled

When protected mode is enabled, the host is unable to access most parts
of the EL2 hypervisor image, including 'hyp_physvirt_offset' and the
contents of the hypervisor's '.rodata.str' section. Unfortunately,
nvhe_hyp_panic_handler() tries to read from both of these locations when
handling a BUG() triggered at EL2; the former for converting the ELR to
a physical address and the latter for displaying the name of the source
file where the BUG() occurred.

Hack the EL2 panic asm to pass both physical and virtual ELR values to
the host and utilise the newly introduced CONFIG_NVHE_EL2_DEBUG so that
we disable stage-2 protection for the host before returning to the EL1
panic handler. If the debug option is not enabled, display the address
instead of the source file:line information.

Cc: Andrew Scull <ascull@google.com>
Cc: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210813130336.8139-1-will@kernel.org
---
 arch/arm64/kvm/handle_exit.c   | 23 ++++++++++++++---------
 arch/arm64/kvm/hyp/nvhe/host.S | 21 +++++++++++++++++----
 2 files changed, 31 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 6f48336b1d86..04ebab299aa4 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -292,11 +292,12 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
 		kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu));
 }
 
-void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
+void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
+					      u64 elr_virt, u64 elr_phys,
 					      u64 par, uintptr_t vcpu,
 					      u64 far, u64 hpfar) {
-	u64 elr_in_kimg = __phys_to_kimg(__hyp_pa(elr));
-	u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr;
+	u64 elr_in_kimg = __phys_to_kimg(elr_phys);
+	u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt;
 	u64 mode = spsr & PSR_MODE_MASK;
 
 	/*
@@ -309,20 +310,24 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
 		kvm_err("Invalid host exception to nVHE hyp!\n");
 	} else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
 		   (esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) {
-		struct bug_entry *bug = find_bug(elr_in_kimg);
 		const char *file = NULL;
 		unsigned int line = 0;
 
 		/* All hyp bugs, including warnings, are treated as fatal. */
-		if (bug)
-			bug_get_file_line(bug, &file, &line);
+		if (!is_protected_kvm_enabled() ||
+		    IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
+			struct bug_entry *bug = find_bug(elr_in_kimg);
+
+			if (bug)
+				bug_get_file_line(bug, &file, &line);
+		}
 
 		if (file)
 			kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
 		else
-			kvm_err("nVHE hyp BUG at: %016llx!\n", elr + hyp_offset);
+			kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset);
 	} else {
-		kvm_err("nVHE hyp panic at: %016llx!\n", elr + hyp_offset);
+		kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset);
 	}
 
 	/*
@@ -334,5 +339,5 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
 	kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);
 
 	panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
-	      spsr, elr, esr, far, hpfar, par, vcpu);
+	      spsr, elr_virt, esr, far, hpfar, par, vcpu);
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 2b23400e0fb3..4b652ffb591d 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -7,6 +7,7 @@
 #include <linux/linkage.h>
 
 #include <asm/assembler.h>
+#include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 
@@ -85,12 +86,24 @@ SYM_FUNC_START(__hyp_do_panic)
 
 	mov	x29, x0
 
+#ifdef CONFIG_NVHE_EL2_DEBUG
+	/* Ensure host stage-2 is disabled */
+	mrs	x0, hcr_el2
+	bic	x0, x0, #HCR_VM
+	msr	hcr_el2, x0
+	isb
+	tlbi	vmalls12e1
+	dsb	nsh
+#endif
+
 	/* Load the panic arguments into x0-7 */
 	mrs	x0, esr_el2
-	get_vcpu_ptr x4, x5
-	mrs	x5, far_el2
-	mrs	x6, hpfar_el2
-	mov	x7, xzr			// Unused argument
+	mov	x4, x3
+	mov	x3, x2
+	hyp_pa	x3, x6
+	get_vcpu_ptr x5, x6
+	mrs	x6, far_el2
+	mrs	x7, hpfar_el2
 
 	/* Enter the host, conditionally restoring the host context. */
 	cbz	x29, __host_enter_without_restoring
-- 
cgit 


From 51ed00e71f0130e0f3534b8e7d78facd16829426 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Wed, 18 Aug 2021 08:47:28 +0000
Subject: powerpc/32: Remove unneccessary calculations in load_up_{fpu/altivec}

No need to re-read SPRN_THREAD, we can calculate thread address
from current (r2).

And remove a reload of value 1 into r4 as r4 is already 1.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/c04cce578b97a76a9e69a096698b1d89f721768a.1629276437.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/fpu.S    | 3 +--
 arch/powerpc/kernel/vector.S | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 6010adcee16e..ba4afe3b5a9c 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -91,8 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	isync
 	/* enable use of FP after return */
 #ifdef CONFIG_PPC32
-	mfspr	r5,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */
-	tovirt(r5, r5)
+	addi	r5,r2,THREAD
 	lwz	r4,THREAD_FPEXC_MODE(r5)
 	ori	r9,r9,MSR_FP		/* enable FP for current */
 	or	r9,r9,r4
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index fc120fac1910..ba03eedfdcd8 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -65,9 +65,8 @@ _GLOBAL(load_up_altivec)
 1:
 	/* enable use of VMX after return */
 #ifdef CONFIG_PPC32
-	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */
+	addi	r5,r2,THREAD
 	oris	r9,r9,MSR_VEC@h
-	tovirt(r5, r5)
 #else
 	ld	r4,PACACURRENT(r13)
 	addi	r5,r4,THREAD		/* Get THREAD */
@@ -81,7 +80,6 @@ _GLOBAL(load_up_altivec)
 	li	r4,1
 	stb	r4,THREAD_LOAD_VEC(r5)
 	addi	r6,r5,THREAD_VRSTATE
-	li	r4,1
 	li	r10,VRSTATE_VSCR
 	stw	r4,THREAD_USED_VR(r5)
 	lvx	v0,r10,r6
-- 
cgit 


From 31b089bbc15a53ca7c90c77ef51390d87f612088 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 23 Jun 2021 14:04:48 +0200
Subject: ARM/nommu: use the generic dma-direct code for non-coherent devices

Select the right options to just use the generic dma-direct code
instead of reimplementing it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Dillon Min <dillon.minfei@gmail.com>
---
 arch/arm/Kconfig                |   5 +-
 arch/arm/mm/dma-mapping-nommu.c | 173 ++--------------------------------------
 2 files changed, 9 insertions(+), 169 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2fb7012c3246..4faa11d02521 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -18,8 +18,8 @@ config ARM
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
 	select ARCH_HAS_STRICT_MODULE_RWX if MMU
-	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB
-	select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB || !MMU
+	select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB || !MMU
 	select ARCH_HAS_TEARDOWN_DMA_OPS if MMU
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAVE_CUSTOM_GPIO_H
@@ -44,6 +44,7 @@ config ARM
 	select CPU_PM if SUSPEND || CPU_IDLE
 	select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select DMA_DECLARE_COHERENT
+	select DMA_GLOBAL_POOL if !MMU
 	select DMA_OPS
 	select DMA_REMAP if MMU
 	select EDAC_SUPPORT
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index 6bfd2b884505..cfd9c933d2f0 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -5,12 +5,7 @@
  *  Copyright (C) 2000-2004 Russell King
  */
 
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-direct.h>
 #include <linux/dma-map-ops.h>
-#include <linux/scatterlist.h>
-
 #include <asm/cachetype.h>
 #include <asm/cacheflush.h>
 #include <asm/outercache.h>
@@ -18,65 +13,8 @@
 
 #include "dma.h"
 
-/*
- *  The generic direct mapping code is used if
- *   - MMU/MPU is off
- *   - cpu is v7m w/o cache support
- *   - device is coherent
- *  otherwise arm_nommu_dma_ops is used.
- *
- *  arm_nommu_dma_ops rely on consistent DMA memory (please, refer to
- *  [1] on how to declare such memory).
- *
- *  [1] Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
- */
-
-static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
-				 dma_addr_t *dma_handle, gfp_t gfp,
-				 unsigned long attrs)
-
-{
-	void *ret = dma_alloc_from_global_coherent(dev, size, dma_handle);
-
-	/*
-	 * dma_alloc_from_global_coherent() may fail because:
-	 *
-	 * - no consistent DMA region has been defined, so we can't
-	 *   continue.
-	 * - there is no space left in consistent DMA region, so we
-	 *   only can fallback to generic allocator if we are
-	 *   advertised that consistency is not required.
-	 */
-
-	WARN_ON_ONCE(ret == NULL);
-	return ret;
-}
-
-static void arm_nommu_dma_free(struct device *dev, size_t size,
-			       void *cpu_addr, dma_addr_t dma_addr,
-			       unsigned long attrs)
-{
-	int ret = dma_release_from_global_coherent(get_order(size), cpu_addr);
-
-	WARN_ON_ONCE(ret == 0);
-}
-
-static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
-			      void *cpu_addr, dma_addr_t dma_addr, size_t size,
-			      unsigned long attrs)
-{
-	int ret;
-
-	if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret))
-		return ret;
-	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
-		return ret;
-	return -ENXIO;
-}
-
-
-static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size,
-				  enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	dmac_map_area(__va(paddr), size, dir);
 
@@ -86,8 +24,8 @@ static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size,
 		outer_clean_range(paddr, paddr + size);
 }
 
-static void __dma_page_dev_to_cpu(phys_addr_t paddr, size_t size,
-				  enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	if (dir != DMA_TO_DEVICE) {
 		outer_inv_range(paddr, paddr + size);
@@ -95,102 +33,6 @@ static void __dma_page_dev_to_cpu(phys_addr_t paddr, size_t size,
 	}
 }
 
-static dma_addr_t arm_nommu_dma_map_page(struct device *dev, struct page *page,
-					 unsigned long offset, size_t size,
-					 enum dma_data_direction dir,
-					 unsigned long attrs)
-{
-	dma_addr_t handle = page_to_phys(page) + offset;
-
-	__dma_page_cpu_to_dev(handle, size, dir);
-
-	return handle;
-}
-
-static void arm_nommu_dma_unmap_page(struct device *dev, dma_addr_t handle,
-				     size_t size, enum dma_data_direction dir,
-				     unsigned long attrs)
-{
-	__dma_page_dev_to_cpu(handle, size, dir);
-}
-
-
-static int arm_nommu_dma_map_sg(struct device *dev, struct scatterlist *sgl,
-				int nents, enum dma_data_direction dir,
-				unsigned long attrs)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sgl, sg, nents, i) {
-		sg_dma_address(sg) = sg_phys(sg);
-		sg_dma_len(sg) = sg->length;
-		__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
-	}
-
-	return nents;
-}
-
-static void arm_nommu_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
-				   int nents, enum dma_data_direction dir,
-				   unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i)
-		__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
-}
-
-static void arm_nommu_dma_sync_single_for_device(struct device *dev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	__dma_page_cpu_to_dev(handle, size, dir);
-}
-
-static void arm_nommu_dma_sync_single_for_cpu(struct device *dev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	__dma_page_cpu_to_dev(handle, size, dir);
-}
-
-static void arm_nommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
-					     int nents, enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i)
-		__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
-}
-
-static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
-					  int nents, enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i)
-		__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
-}
-
-const struct dma_map_ops arm_nommu_dma_ops = {
-	.alloc			= arm_nommu_dma_alloc,
-	.free			= arm_nommu_dma_free,
-	.alloc_pages		= dma_direct_alloc_pages,
-	.free_pages		= dma_direct_free_pages,
-	.mmap			= arm_nommu_dma_mmap,
-	.map_page		= arm_nommu_dma_map_page,
-	.unmap_page		= arm_nommu_dma_unmap_page,
-	.map_sg			= arm_nommu_dma_map_sg,
-	.unmap_sg		= arm_nommu_dma_unmap_sg,
-	.sync_single_for_device	= arm_nommu_dma_sync_single_for_device,
-	.sync_single_for_cpu	= arm_nommu_dma_sync_single_for_cpu,
-	.sync_sg_for_device	= arm_nommu_dma_sync_sg_for_device,
-	.sync_sg_for_cpu	= arm_nommu_dma_sync_sg_for_cpu,
-};
-EXPORT_SYMBOL(arm_nommu_dma_ops);
-
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent)
 {
@@ -201,14 +43,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 		 * enough to check if MPU is in use or not since in absense of
 		 * MPU system memory map is used.
 		 */
-		dev->archdata.dma_coherent = (cacheid) ? coherent : true;
+		dev->dma_coherent = cacheid ? coherent : true;
 	} else {
 		/*
 		 * Assume coherent DMA in case MMU/MPU has not been set up.
 		 */
-		dev->archdata.dma_coherent = (get_cr() & CR_M) ? coherent : true;
+		dev->dma_coherent = (get_cr() & CR_M) ? coherent : true;
 	}
-
-	if (!dev->archdata.dma_coherent)
-		set_dma_ops(dev, &arm_nommu_dma_ops);
 }
-- 
cgit 


From 39f75da7bcc829ddc4d40bb60d0e95520de7898b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 2 Aug 2021 23:40:31 +0300
Subject: isystem: trim/fixup stdarg.h and other headers

Delete/fixup few includes in anticipation of global -isystem compile
option removal.

Note: crypto/aegis128-neon-inner.c keeps <stddef.h> due to redefinition
of uintptr_t error (one definition comes from <stddef.h>, another from
<linux/types.h>).

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/arm/kernel/process.c         | 2 --
 arch/arm/mach-bcm/bcm_kona_smc.c  | 2 --
 arch/arm64/kernel/process.c       | 3 ---
 arch/openrisc/kernel/process.c    | 2 --
 arch/parisc/kernel/process.c      | 3 ---
 arch/powerpc/kernel/prom.c        | 1 -
 arch/sparc/kernel/process_32.c    | 3 ---
 arch/sparc/kernel/process_64.c    | 3 ---
 arch/um/drivers/rtc_user.c        | 1 +
 arch/um/drivers/vector_user.c     | 1 +
 arch/um/include/shared/irq_user.h | 1 -
 arch/um/include/shared/os.h       | 1 -
 arch/um/os-Linux/signal.c         | 2 +-
 arch/um/os-Linux/util.c           | 1 +
 14 files changed, 4 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fc9e8b37eaa8..bb5ad8a6a4c3 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -5,8 +5,6 @@
  *  Copyright (C) 1996-2000 Russell King - Converted to ARM.
  *  Original Copyright (C) 1995  Linus Torvalds
  */
-#include <stdarg.h>
-
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c
index 43a16f922b53..43829e49ad93 100644
--- a/arch/arm/mach-bcm/bcm_kona_smc.c
+++ b/arch/arm/mach-bcm/bcm_kona_smc.c
@@ -10,8 +10,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
-
-#include <stdarg.h>
 #include <linux/smp.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c8989b999250..5f7ac9a0f9a3 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -6,9 +6,6 @@
  * Copyright (C) 1996-2000 Russell King - Converted to ARM.
  * Copyright (C) 2012 ARM Ltd.
  */
-
-#include <stdarg.h>
-
 #include <linux/compat.h>
 #include <linux/efi.h>
 #include <linux/elf.h>
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index eb62429681fc..b0698d9ce14f 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -14,8 +14,6 @@
  */
 
 #define __KERNEL_SYSCALLS__
-#include <stdarg.h>
-
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 184ec3c1eae4..38ec4ae81239 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -17,9 +17,6 @@
  *    Copyright (C) 2001-2014 Helge Deller <deller@gmx.de>
  *    Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org>
  */
-
-#include <stdarg.h>
-
 #include <linux/elf.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f620e04dc9bf..a1e7ba0fad09 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -11,7 +11,6 @@
 
 #undef DEBUG
 
-#include <stdarg.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 93983d6d431d..bbbe0cfef746 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -8,9 +8,6 @@
 /*
  * This file handles the architecture-dependent parts of process handling..
  */
-
-#include <stdarg.h>
-
 #include <linux/elfcore.h>
 #include <linux/errno.h>
 #include <linux/module.h>
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index d33c58a58d4f..0cabcdfb23fd 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -9,9 +9,6 @@
 /*
  * This file handles the architecture-dependent parts of process handling..
  */
-
-#include <stdarg.h>
-
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c
index 4016bc1d577e..7c3cec4c68cf 100644
--- a/arch/um/drivers/rtc_user.c
+++ b/arch/um/drivers/rtc_user.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2020 Intel Corporation
  * Author: Johannes Berg <johannes@sipsolutions.net>
  */
+#include <stdbool.h>
 #include <os.h>
 #include <errno.h>
 #include <sched.h>
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
index bae53220ce26..e4ffeb9a1fa4 100644
--- a/arch/um/drivers/vector_user.c
+++ b/arch/um/drivers/vector_user.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <stdarg.h>
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index 065829f443ae..86a8a573b65c 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -7,7 +7,6 @@
 #define __IRQ_USER_H__
 
 #include <sysdep/ptrace.h>
-#include <stdbool.h>
 
 enum um_irq_type {
 	IRQ_READ,
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 60b84edc8a68..96d400387c93 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -8,7 +8,6 @@
 #ifndef __OS_H__
 #define __OS_H__
 
-#include <stdarg.h>
 #include <irq_user.h>
 #include <longjmp.h>
 #include <mm_id.h>
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 6de99bb16113..6cf098c23a39 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -67,7 +67,7 @@ int signals_enabled;
 #ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
 static int signals_blocked;
 #else
-#define signals_blocked false
+#define signals_blocked 0
 #endif
 static unsigned int signals_pending;
 static unsigned int signals_active = 0;
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 07327425d06e..41297ec404bf 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
-- 
cgit 


From c0891ac15f0428ffa81b2e818d416bdf3cb74ab6 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 2 Aug 2021 23:40:32 +0300
Subject: isystem: ship and use stdarg.h

Ship minimal stdarg.h (1 type, 4 macros) as <linux/stdarg.h>.
stdarg.h is the only userspace header commonly used in the kernel.

GPL 2 version of <stdarg.h> can be extracted from
http://archive.debian.org/debian/pool/main/g/gcc-4.2/gcc-4.2_4.2.4.orig.tar.gz

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/parisc/kernel/firmware.c   | 2 +-
 arch/powerpc/kernel/prom_init.c | 2 +-
 arch/powerpc/kernel/rtas.c      | 2 +-
 arch/powerpc/kernel/udbg.c      | 2 +-
 arch/s390/boot/pgm_check_info.c | 2 +-
 arch/x86/boot/boot.h            | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 665b70086685..7034227dbdf3 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -51,7 +51,7 @@
  *					prumpf	991016	
  */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 #include <linux/delay.h>
 #include <linux/init.h>
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index a5bf355ce1d6..10664633f7e3 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -14,7 +14,7 @@
 /* we cannot use FORTIFY as it brings in new symbols */
 #define __NO_FORTIFY
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 99f2cce635fb..ff80bbad22a5 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -7,7 +7,7 @@
  * Copyright (C) 2001 IBM.
  */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 01595e8cafe7..b1544b2f6321 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -5,7 +5,7 @@
  * c 2001 PPC 64 Team, IBM Corp
  */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/console.h>
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
index 3a46abed2549..b7d8dd88bbf2 100644
--- a/arch/s390/boot/pgm_check_info.c
+++ b/arch/s390/boot/pgm_check_info.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
+#include <linux/stdarg.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <asm/stacktrace.h>
@@ -8,7 +9,6 @@
 #include <asm/setup.h>
 #include <asm/sclp.h>
 #include <asm/uv.h>
-#include <stdarg.h>
 #include "boot.h"
 
 const char hex_asc[] = "0123456789abcdef";
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index ca866f1cca2e..34c9dbb6a47d 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -18,7 +18,7 @@
 
 #ifndef __ASSEMBLY__
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/types.h>
 #include <linux/edd.h>
 #include <asm/setup.h>
-- 
cgit 


From c1dec343d7abdf8e71aab2a289ab45ce8b1afb7e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 23 Jun 2021 16:11:21 +0200
Subject: hexagon: use the generic global coherent pool

Switch hexagon to use the generic code for dma_alloc_coherent from
a global pre-filled pool.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Cain <bcain@codeaurora.org>
---
 arch/hexagon/Kconfig      |  1 +
 arch/hexagon/kernel/dma.c | 57 +++++++++--------------------------------------
 2 files changed, 12 insertions(+), 46 deletions(-)

(limited to 'arch')

diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index e5a852080730..aab1a40eb653 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -7,6 +7,7 @@ config HEXAGON
 	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_NO_PREEMPT
+	select DMA_GLOBAL_POOL
 	# Other pending projects/to-do items.
 	# select HAVE_REGS_AND_STACK_ACCESS_API
 	# select HAVE_HW_BREAKPOINT if PERF_EVENTS
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
index 00b9a81075dd..882680e81a30 100644
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -7,54 +7,8 @@
 
 #include <linux/dma-map-ops.h>
 #include <linux/memblock.h>
-#include <linux/genalloc.h>
-#include <linux/module.h>
 #include <asm/page.h>
 
-static struct gen_pool *coherent_pool;
-
-
-/* Allocates from a pool of uncached memory that was reserved at boot time */
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_addr,
-		gfp_t flag, unsigned long attrs)
-{
-	void *ret;
-
-	/*
-	 * Our max_low_pfn should have been backed off by 16MB in
-	 * mm/init.c to create DMA coherent space.  Use that as the VA
-	 * for the pool.
-	 */
-
-	if (coherent_pool == NULL) {
-		coherent_pool = gen_pool_create(PAGE_SHIFT, -1);
-
-		if (coherent_pool == NULL)
-			panic("Can't create %s() memory pool!", __func__);
-		else
-			gen_pool_add(coherent_pool,
-				(unsigned long)pfn_to_virt(max_low_pfn),
-				hexagon_coherent_pool_size, -1);
-	}
-
-	ret = (void *) gen_pool_alloc(coherent_pool, size);
-
-	if (ret) {
-		memset(ret, 0, size);
-		*dma_addr = (dma_addr_t) virt_to_phys(ret);
-	} else
-		*dma_addr = ~0;
-
-	return ret;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_addr, unsigned long attrs)
-{
-	gen_pool_free(coherent_pool, (unsigned long) vaddr, size);
-}
-
 void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
 		enum dma_data_direction dir)
 {
@@ -77,3 +31,14 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
 		BUG();
 	}
 }
+
+/*
+ * Our max_low_pfn should have been backed off by 16MB in mm/init.c to create
+ * DMA coherent space.  Use that for the pool.
+ */
+static int __init hexagon_dma_init(void)
+{
+	return dma_init_global_coherent(PFN_PHYS(max_low_pfn),
+					hexagon_coherent_pool_size);
+}
+core_initcall(hexagon_dma_init);
-- 
cgit 


From 6654f9dfcb88fea3b9affc180dc3c04333d0f306 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 18 Aug 2021 20:21:30 +0000
Subject: KVM: arm64: Fix read-side race on updates to vcpu reset state

KVM correctly serializes writes to a vCPU's reset state, however since
we do not take the KVM lock on the read side it is entirely possible to
read state from two different reset requests.

Cure the race for now by taking the KVM lock when reading the
reset_state structure.

Fixes: 358b28f09f0a ("arm/arm64: KVM: Allow a VCPU to fully reset itself")
Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210818202133.1106786-2-oupton@google.com
---
 arch/arm64/kvm/reset.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index cba7872d69a8..d862441b03b1 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -210,10 +210,16 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_reset_state reset_state;
 	int ret;
 	bool loaded;
 	u32 pstate;
 
+	mutex_lock(&vcpu->kvm->lock);
+	reset_state = vcpu->arch.reset_state;
+	WRITE_ONCE(vcpu->arch.reset_state.reset, false);
+	mutex_unlock(&vcpu->kvm->lock);
+
 	/* Reset PMU outside of the non-preemptible section */
 	kvm_pmu_vcpu_reset(vcpu);
 
@@ -276,8 +282,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	 * Additional reset state handling that PSCI may have imposed on us.
 	 * Must be done after all the sys_reg reset.
 	 */
-	if (vcpu->arch.reset_state.reset) {
-		unsigned long target_pc = vcpu->arch.reset_state.pc;
+	if (reset_state.reset) {
+		unsigned long target_pc = reset_state.pc;
 
 		/* Gracefully handle Thumb2 entry point */
 		if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
@@ -286,13 +292,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 		}
 
 		/* Propagate caller endianness */
-		if (vcpu->arch.reset_state.be)
+		if (reset_state.be)
 			kvm_vcpu_set_be(vcpu);
 
 		*vcpu_pc(vcpu) = target_pc;
-		vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0);
-
-		vcpu->arch.reset_state.reset = false;
+		vcpu_set_reg(vcpu, 0, reset_state.r0);
 	}
 
 	/* Reset timer */
-- 
cgit 


From 6826c6849b46aaa91300201213701eb861af4ba0 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 18 Aug 2021 20:21:31 +0000
Subject: KVM: arm64: Handle PSCI resets before userspace touches vCPU state

The CPU_ON PSCI call takes a payload that KVM uses to configure a
destination vCPU to run. This payload is non-architectural state and not
exposed through any existing UAPI. Effectively, we have a race between
CPU_ON and userspace saving/restoring a guest: if the target vCPU isn't
ran again before the VMM saves its state, the requested PC and context
ID are lost. When restored, the target vCPU will be runnable and start
executing at its old PC.

We can avoid this race by making sure the reset payload is serviced
before userspace can access a vCPU's state.

Fixes: 358b28f09f0a ("arm/arm64: KVM: Allow a VCPU to fully reset itself")
Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210818202133.1106786-3-oupton@google.com
---
 arch/arm64/kvm/arm.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e9a2b8f27792..8ce9996bcbfd 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1215,6 +1215,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (copy_from_user(&reg, argp, sizeof(reg)))
 			break;
 
+		/*
+		 * We could owe a reset due to PSCI. Handle the pending reset
+		 * here to ensure userspace register accesses are ordered after
+		 * the reset.
+		 */
+		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
+			kvm_reset_vcpu(vcpu);
+
 		if (ioctl == KVM_SET_ONE_REG)
 			r = kvm_arm_set_reg(vcpu, &reg);
 		else
-- 
cgit 


From e10ecb4d6c0761ca545b3946df1707a41f9f845e Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 18 Aug 2021 20:21:32 +0000
Subject: KVM: arm64: Enforce reserved bits for PSCI target affinities

According to the PSCI specification, ARM DEN 0022D, 5.1.4 "CPU_ON", the
CPU_ON function takes a target_cpu argument that is bit-compatible with
the affinity fields in MPIDR_EL1. All other bits in the argument are
RES0. Note that the same constraints apply to the target_affinity
argument for the AFFINITY_INFO call.

Enforce the spec by returning INVALID_PARAMS if a guest incorrectly sets
a RES0 bit.

Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210818202133.1106786-4-oupton@google.com
---
 arch/arm64/kvm/psci.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index db4056ecccfd..74c47d420253 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -59,6 +59,12 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 	kvm_vcpu_kick(vcpu);
 }
 
+static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu,
+					   unsigned long affinity)
+{
+	return !(affinity & ~MPIDR_HWID_BITMASK);
+}
+
 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 {
 	struct vcpu_reset_state *reset_state;
@@ -66,9 +72,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	struct kvm_vcpu *vcpu = NULL;
 	unsigned long cpu_id;
 
-	cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK;
-	if (vcpu_mode_is_32bit(source_vcpu))
-		cpu_id &= ~((u32) 0);
+	cpu_id = smccc_get_arg1(source_vcpu);
+	if (!kvm_psci_valid_affinity(source_vcpu, cpu_id))
+		return PSCI_RET_INVALID_PARAMS;
 
 	vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
 
@@ -126,6 +132,9 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 	target_affinity = smccc_get_arg1(vcpu);
 	lowest_affinity_level = smccc_get_arg2(vcpu);
 
+	if (!kvm_psci_valid_affinity(vcpu, target_affinity))
+		return PSCI_RET_INVALID_PARAMS;
+
 	/* Determine target affinity mask */
 	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
 	if (!target_affinity_mask)
-- 
cgit 


From fe5161d2c39b8c2801f0e786631460c6e8a1cae4 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Mon, 2 Aug 2021 19:28:07 +0000
Subject: KVM: arm64: Record number of signal exits as a vCPU stat

Most other architectures that implement KVM record a statistic
indicating the number of times a vCPU has exited due to a pending
signal. Add support for that stat to arm64.

Reviewed-by: Jing Zhang <jingzhangos@google.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210802192809.1851010-2-oupton@google.com
---
 arch/arm64/include/asm/kvm_host.h | 1 +
 arch/arm64/kvm/arm.c              | 1 +
 arch/arm64/kvm/guest.c            | 1 +
 3 files changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 41911585ae0c..70e129f2b574 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -576,6 +576,7 @@ struct kvm_vcpu_stat {
 	u64 wfi_exit_stat;
 	u64 mmio_exit_user;
 	u64 mmio_exit_kernel;
+	u64 signal_exits;
 	u64 exits;
 };
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e9a2b8f27792..60d0a546d7fd 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -783,6 +783,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		if (signal_pending(current)) {
 			ret = -EINTR;
 			run->exit_reason = KVM_EXIT_INTR;
+			++vcpu->stat.signal_exits;
 		}
 
 		/*
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 1dfb83578277..853d1e8d2e73 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -50,6 +50,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
 	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
+	STATS_DESC_COUNTER(VCPU, signal_exits),
 	STATS_DESC_COUNTER(VCPU, exits)
 };
 static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-- 
cgit 


From 6caa5812e2d126a0aa8a17816c1ba6f0a0c2b309 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Mon, 2 Aug 2021 19:28:09 +0000
Subject: KVM: arm64: Use generic KVM xfer to guest work function

Clean up handling of checks for pending work by switching to the generic
infrastructure to do so.

We pick up handling for TIF_NOTIFY_RESUME from this switch, meaning that
task work will be correctly handled.

Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210802192809.1851010-4-oupton@google.com
---
 arch/arm64/kvm/Kconfig |  1 +
 arch/arm64/kvm/arm.c   | 72 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 45 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a4eba0908bfa..8bc1fac5fa26 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -26,6 +26,7 @@ menuconfig KVM
 	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
 	select KVM_MMIO
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+	select KVM_XFER_TO_GUEST_WORK
 	select SRCU
 	select KVM_VFIO
 	select HAVE_KVM_EVENTFD
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 60d0a546d7fd..8245efc6e88f 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -6,6 +6,7 @@
 
 #include <linux/bug.h>
 #include <linux/cpu_pm.h>
+#include <linux/entry-kvm.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kvm_host.h>
@@ -714,6 +715,45 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
 		static_branch_unlikely(&arm64_mismatched_32bit_el0);
 }
 
+/**
+ * kvm_vcpu_exit_request - returns true if the VCPU should *not* enter the guest
+ * @vcpu:	The VCPU pointer
+ * @ret:	Pointer to write optional return code
+ *
+ * Returns: true if the VCPU needs to return to a preemptible + interruptible
+ *	    and skip guest entry.
+ *
+ * This function disambiguates between two different types of exits: exits to a
+ * preemptible + interruptible kernel context and exits to userspace. For an
+ * exit to userspace, this function will write the return code to ret and return
+ * true. For an exit to preemptible + interruptible kernel context (i.e. check
+ * for pending work and re-enter), return true without writing to ret.
+ */
+static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
+{
+	struct kvm_run *run = vcpu->run;
+
+	/*
+	 * If we're using a userspace irqchip, then check if we need
+	 * to tell a userspace irqchip about timer or PMU level
+	 * changes and if so, exit to userspace (the actual level
+	 * state gets updated in kvm_timer_update_run and
+	 * kvm_pmu_update_run below).
+	 */
+	if (static_branch_unlikely(&userspace_irqchip_in_use)) {
+		if (kvm_timer_should_notify_user(vcpu) ||
+		    kvm_pmu_should_notify_user(vcpu)) {
+			*ret = -EINTR;
+			run->exit_reason = KVM_EXIT_INTR;
+			return true;
+		}
+	}
+
+	return kvm_request_pending(vcpu) ||
+			need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
+			xfer_to_guest_mode_work_pending();
+}
+
 /**
  * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
  * @vcpu:	The VCPU pointer
@@ -757,7 +797,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		/*
 		 * Check conditions before entering the guest
 		 */
-		cond_resched();
+		ret = xfer_to_guest_mode_handle_work(vcpu);
+		if (!ret)
+			ret = 1;
 
 		update_vmid(&vcpu->arch.hw_mmu->vmid);
 
@@ -776,31 +818,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
 		kvm_vgic_flush_hwstate(vcpu);
 
-		/*
-		 * Exit if we have a signal pending so that we can deliver the
-		 * signal to user space.
-		 */
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			run->exit_reason = KVM_EXIT_INTR;
-			++vcpu->stat.signal_exits;
-		}
-
-		/*
-		 * If we're using a userspace irqchip, then check if we need
-		 * to tell a userspace irqchip about timer or PMU level
-		 * changes and if so, exit to userspace (the actual level
-		 * state gets updated in kvm_timer_update_run and
-		 * kvm_pmu_update_run below).
-		 */
-		if (static_branch_unlikely(&userspace_irqchip_in_use)) {
-			if (kvm_timer_should_notify_user(vcpu) ||
-			    kvm_pmu_should_notify_user(vcpu)) {
-				ret = -EINTR;
-				run->exit_reason = KVM_EXIT_INTR;
-			}
-		}
-
 		/*
 		 * Ensure we set mode to IN_GUEST_MODE after we disable
 		 * interrupts and before the final VCPU requests check.
@@ -809,8 +826,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		 */
 		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
-		if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
-		    kvm_request_pending(vcpu)) {
+		if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) {
 			vcpu->mode = OUTSIDE_GUEST_MODE;
 			isb(); /* Ensure work in x_flush_hwstate is committed */
 			kvm_pmu_sync_hwstate(vcpu);
-- 
cgit 


From b9a51949cebcd57bfb9385d9da62ace52564898c Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Wed, 18 Aug 2021 14:32:05 -0700
Subject: KVM: arm64: vgic: Drop WARN from vgic_get_irq

vgic_get_irq(intid) is used all over the vgic code in order to get a
reference to a struct irq. It warns whenever intid is not a valid number
(like when it's a reserved IRQ number). The issue is that this warning
can be triggered from userspace (e.g., KVM_IRQ_LINE for intid 1020).

Drop the WARN call from vgic_get_irq.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210818213205.598471-1-ricarkol@google.com
---
 arch/arm64/kvm/vgic/vgic.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 111bff47e471..81cec508d413 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -106,7 +106,6 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
 	if (intid >= VGIC_MIN_LPI)
 		return vgic_get_lpi(kvm, intid);
 
-	WARN(1, "Looking up struct vgic_irq for reserved INTID");
 	return NULL;
 }
 
-- 
cgit 


From c26d4c5d4f0df7207da3975458261927f9305465 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Thu, 19 Aug 2021 13:39:53 +0200
Subject: powerpc/kvm: Remove obsolete and unneeded select

Commit a278e7ea608b ("powerpc: Fix compile issue with force DAWR")
selects the non-existing config PPC_DAWR_FORCE_ENABLE for config
KVM_BOOK3S_64_HANDLER. As this commit also introduces a config PPC_DAWR
and this config PPC_DAWR is selected with PPC if PPC64, there is no
need for any further select in the KVM_BOOK3S_64_HANDLER.

Remove an obsolete and unneeded select in config KVM_BOOK3S_64_HANDLER.

The issue was identified with ./scripts/checkkconfigsymbols.py.

Fixes: a278e7ea608b ("powerpc: Fix compile issue with force DAWR")
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Reviewed-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210819113954.17515-2-lukas.bulwahn@gmail.com
---
 arch/powerpc/kvm/Kconfig | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index e45644657d49..ff581d70f20c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -38,7 +38,6 @@ config KVM_BOOK3S_32_HANDLER
 config KVM_BOOK3S_64_HANDLER
 	bool
 	select KVM_BOOK3S_HANDLER
-	select PPC_DAWR_FORCE_ENABLE
 
 config KVM_BOOK3S_PR_POSSIBLE
 	bool
-- 
cgit 


From 6cd717fe9b3a787f8e8f9d0bc9b7634a9c104b3e Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@linux-m68k.org>
Date: Thu, 19 Aug 2021 10:46:54 +1000
Subject: powerpc/tau: Add 'static' storage qualifier to 'tau_work' definition

This patch prevents the following sparse warning.

arch/powerpc/kernel/tau_6xx.c:199:1: sparse: sparse: symbol 'tau_work'
was not declared. Should it be static?

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/44ab381741916a51e783c4a50d0b186abdd8f280.1629334014.git.fthain@linux-m68k.org
---
 arch/powerpc/kernel/tau_6xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index b9a047d92ec0..8e83d19fe8fa 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -164,7 +164,7 @@ static void tau_work_func(struct work_struct *work)
 	queue_work(tau_workq, work);
 }
 
-DECLARE_WORK(tau_work, tau_work_func);
+static DECLARE_WORK(tau_work, tau_work_func);
 
 /*
  * setup the TAU
-- 
cgit 


From f9addd85fbfacf0d155e83dbee8696d6df5ed0c7 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Fri, 13 Aug 2021 13:51:58 +0530
Subject: powerpc/perf/hv-gpci: Fix counter value parsing

H_GetPerformanceCounterInfo (0xF080) hcall returns the counter data in
the result buffer. Result buffer has specific format defined in the PAPR
specification. One of the fields is counter offset and width of the
counter data returned.

Counter data are returned in a unsigned char array in big endian byte
order. To get the final counter data, the values must be left shifted
byte at a time. But commit 220a0c609ad17 ("powerpc/perf: Add support for
the hv gpci (get performance counter info) interface") made the shifting
bitwise and also assumed little endian order. Because of that, hcall
counters values are reported incorrectly.

In particular this can lead to counters go backwards which messes up the
counter prev vs now calculation and leads to huge counter value
reporting:

  #: perf stat -e hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
           -C 0 -I 1000
        time             counts unit events
     1.000078854 18,446,744,073,709,535,232      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
     2.000213293                  0      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
     3.000320107                  0      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
     4.000428392                  0      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
     5.000537864                  0      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
     6.000649087                  0      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
     7.000760312                  0      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
     8.000865218             16,448      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
     9.000978985 18,446,744,073,709,535,232      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
    10.001088891             16,384      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
    11.001201435                  0      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
    12.001307937 18,446,744,073,709,535,232      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/

Fix the shifting logic to correct match the format, ie. read bytes in
big endian order.

Fixes: e4f226b1580b ("powerpc/perf/hv-gpci: Increase request buffer size")
Cc: stable@vger.kernel.org # v4.6+
Reported-by: Nageswara R Sastry<rnsastry@linux.ibm.com>
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Tested-by: Nageswara R Sastry<rnsastry@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210813082158.429023-1-kjain@linux.ibm.com
---
 arch/powerpc/perf/hv-gpci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index d48413e28c39..c756228a081f 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -175,7 +175,7 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index,
 	 */
 	count = 0;
 	for (i = offset; i < offset + length; i++)
-		count |= arg->bytes[i] << (i - offset);
+		count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8);
 
 	*value = count;
 out:
-- 
cgit 


From 3134cc8beb69d0db9de651081707c4651c011621 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 19 Aug 2021 19:03:05 +0100
Subject: KVM: arm64: vgic: Resample HW pending state on deactivation

When a mapped level interrupt (a timer, for example) is deactivated
by the guest, the corresponding host interrupt is equally deactivated.
However, the fate of the pending state still needs to be dealt
with in SW.

This is specially true when the interrupt was in the active+pending
state in the virtual distributor at the point where the guest
was entered. On exit, the pending state is potentially stale
(the guest may have put the interrupt in a non-pending state).

If we don't do anything, the interrupt will be spuriously injected
in the guest. Although this shouldn't have any ill effect (spurious
interrupts are always possible), we can improve the emulation by
detecting the deactivation-while-pending case and resample the
interrupt.

While we're at it, move the logic into a common helper that can
be shared between the two GIC implementations.

Fixes: e40cc57bac79 ("KVM: arm/arm64: vgic: Support level-triggered mapped interrupts")
Reported-by: Raghavendra Rao Ananta <rananta@google.com>
Tested-by: Raghavendra Rao Ananta <rananta@google.com>
Reviewed-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210819180305.1670525-1-maz@kernel.org
---
 arch/arm64/kvm/vgic/vgic-v2.c | 36 +++++-------------------------------
 arch/arm64/kvm/vgic/vgic-v3.c | 36 +++++-------------------------------
 arch/arm64/kvm/vgic/vgic.c    | 38 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/vgic/vgic.h    |  2 ++
 4 files changed, 50 insertions(+), 62 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 2c580204f1dc..95a18cec14a3 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -60,6 +60,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 		u32 val = cpuif->vgic_lr[lr];
 		u32 cpuid, intid = val & GICH_LR_VIRTUALID;
 		struct vgic_irq *irq;
+		bool deactivated;
 
 		/* Extract the source vCPU id from the LR */
 		cpuid = val & GICH_LR_PHYSID_CPUID;
@@ -75,7 +76,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 
 		raw_spin_lock(&irq->irq_lock);
 
-		/* Always preserve the active bit */
+		/* Always preserve the active bit, note deactivation */
+		deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT);
 		irq->active = !!(val & GICH_LR_ACTIVE_BIT);
 
 		if (irq->active && vgic_irq_is_sgi(intid))
@@ -96,36 +98,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 		if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE))
 			irq->pending_latch = false;
 
-		/*
-		 * Level-triggered mapped IRQs are special because we only
-		 * observe rising edges as input to the VGIC.
-		 *
-		 * If the guest never acked the interrupt we have to sample
-		 * the physical line and set the line level, because the
-		 * device state could have changed or we simply need to
-		 * process the still pending interrupt later.
-		 *
-		 * If this causes us to lower the level, we have to also clear
-		 * the physical active state, since we will otherwise never be
-		 * told when the interrupt becomes asserted again.
-		 *
-		 * Another case is when the interrupt requires a helping hand
-		 * on deactivation (no HW deactivation, for example).
-		 */
-		if (vgic_irq_is_mapped_level(irq)) {
-			bool resample = false;
-
-			if (val & GICH_LR_PENDING_BIT) {
-				irq->line_level = vgic_get_phys_line_level(irq);
-				resample = !irq->line_level;
-			} else if (vgic_irq_needs_resampling(irq) &&
-				   !(irq->active || irq->pending_latch)) {
-				resample = true;
-			}
-
-			if (resample)
-				vgic_irq_set_phys_active(irq, false);
-		}
+		/* Handle resampling for mapped interrupts if required */
+		vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT);
 
 		raw_spin_unlock(&irq->irq_lock);
 		vgic_put_irq(vcpu->kvm, irq);
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 66004f61cd83..21a6207fb2ee 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -46,6 +46,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 		u32 intid, cpuid;
 		struct vgic_irq *irq;
 		bool is_v2_sgi = false;
+		bool deactivated;
 
 		cpuid = val & GICH_LR_PHYSID_CPUID;
 		cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
@@ -68,7 +69,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 
 		raw_spin_lock(&irq->irq_lock);
 
-		/* Always preserve the active bit */
+		/* Always preserve the active bit, note deactivation */
+		deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
 		irq->active = !!(val & ICH_LR_ACTIVE_BIT);
 
 		if (irq->active && is_v2_sgi)
@@ -89,36 +91,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 		if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
 			irq->pending_latch = false;
 
-		/*
-		 * Level-triggered mapped IRQs are special because we only
-		 * observe rising edges as input to the VGIC.
-		 *
-		 * If the guest never acked the interrupt we have to sample
-		 * the physical line and set the line level, because the
-		 * device state could have changed or we simply need to
-		 * process the still pending interrupt later.
-		 *
-		 * If this causes us to lower the level, we have to also clear
-		 * the physical active state, since we will otherwise never be
-		 * told when the interrupt becomes asserted again.
-		 *
-		 * Another case is when the interrupt requires a helping hand
-		 * on deactivation (no HW deactivation, for example).
-		 */
-		if (vgic_irq_is_mapped_level(irq)) {
-			bool resample = false;
-
-			if (val & ICH_LR_PENDING_BIT) {
-				irq->line_level = vgic_get_phys_line_level(irq);
-				resample = !irq->line_level;
-			} else if (vgic_irq_needs_resampling(irq) &&
-				   !(irq->active || irq->pending_latch)) {
-				resample = true;
-			}
-
-			if (resample)
-				vgic_irq_set_phys_active(irq, false);
-		}
+		/* Handle resampling for mapped interrupts if required */
+		vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
 
 		raw_spin_unlock(&irq->irq_lock);
 		vgic_put_irq(vcpu->kvm, irq);
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 81cec508d413..5dad4996cfb2 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -1021,3 +1021,41 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
 
 	return map_is_active;
 }
+
+/*
+ * Level-triggered mapped IRQs are special because we only observe rising
+ * edges as input to the VGIC.
+ *
+ * If the guest never acked the interrupt we have to sample the physical
+ * line and set the line level, because the device state could have changed
+ * or we simply need to process the still pending interrupt later.
+ *
+ * We could also have entered the guest with the interrupt active+pending.
+ * On the next exit, we need to re-evaluate the pending state, as it could
+ * otherwise result in a spurious interrupt by injecting a now potentially
+ * stale pending state.
+ *
+ * If this causes us to lower the level, we have to also clear the physical
+ * active state, since we will otherwise never be told when the interrupt
+ * becomes asserted again.
+ *
+ * Another case is when the interrupt requires a helping hand on
+ * deactivation (no HW deactivation, for example).
+ */
+void vgic_irq_handle_resampling(struct vgic_irq *irq,
+				bool lr_deactivated, bool lr_pending)
+{
+	if (vgic_irq_is_mapped_level(irq)) {
+		bool resample = false;
+
+		if (unlikely(vgic_irq_needs_resampling(irq))) {
+			resample = !(irq->active || irq->pending_latch);
+		} else if (lr_pending || (lr_deactivated && irq->line_level)) {
+			irq->line_level = vgic_get_phys_line_level(irq);
+			resample = !irq->line_level;
+		}
+
+		if (resample)
+			vgic_irq_set_phys_active(irq, false);
+	}
+}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index dc1f3d1657ee..14a9218641f5 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -169,6 +169,8 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
 			   unsigned long flags);
 void vgic_kick_vcpus(struct kvm *kvm);
+void vgic_irq_handle_resampling(struct vgic_irq *irq,
+				bool lr_deactivated, bool lr_pending);
 
 int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
 		      phys_addr_t addr, phys_addr_t alignment);
-- 
cgit 


From 923a547d71b967c808a596968cf8022102f8b5b2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 6 Aug 2021 12:31:06 +0100
Subject: KVM: arm64: Move kern_hyp_va() usage in __load_guest_stage2() into
 the callers

It is a bit awkward to use kern_hyp_va() in __load_guest_stage2(),
specially as the helper is shared between VHE and nVHE.

Instead, move the use of kern_hyp_va() in the nVHE code, and
pass a pointer to the kvm->arch structure instead. Although
this may look a bit awkward, it allows for some further simplification.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jade Alglave <jade.alglave@arm.com>
Cc: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210806113109.2475-4-will@kernel.org
---
 arch/arm64/include/asm/kvm_mmu.h | 5 +++--
 arch/arm64/kvm/hyp/nvhe/switch.c | 4 +++-
 arch/arm64/kvm/hyp/nvhe/tlb.c    | 2 +-
 arch/arm64/kvm/hyp/vhe/switch.c  | 2 +-
 arch/arm64/kvm/hyp/vhe/tlb.c     | 2 +-
 5 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b52c5c4b9a3d..05e089653a1a 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -280,9 +280,10 @@ static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long
 	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
-static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
+static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu,
+						struct kvm_arch *arch)
 {
-	__load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr);
+	__load_stage2(mmu, arch->vtcr);
 }
 
 static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index f7af9688c1f7..e50a49082923 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -170,6 +170,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpu_context *host_ctxt;
 	struct kvm_cpu_context *guest_ctxt;
+	struct kvm_s2_mmu *mmu;
 	bool pmu_switch_needed;
 	u64 exit_code;
 
@@ -213,7 +214,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	__sysreg32_restore_state(vcpu);
 	__sysreg_restore_state_nvhe(guest_ctxt);
 
-	__load_guest_stage2(kern_hyp_va(vcpu->arch.hw_mmu));
+	mmu = kern_hyp_va(vcpu->arch.hw_mmu);
+	__load_guest_stage2(mmu, kern_hyp_va(mmu->arch));
 	__activate_traps(vcpu);
 
 	__hyp_vgic_restore_state(vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 38ed0f6f2703..76229407d8f0 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -39,7 +39,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
 	 * ensuring that we always have an ISB, but not two ISBs back
 	 * to back.
 	 */
-	__load_guest_stage2(mmu);
+	__load_guest_stage2(mmu, kern_hyp_va(mmu->arch));
 	asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index b3229924d243..0cb7523a501a 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -128,7 +128,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 	 * __load_guest_stage2 configures stage 2 translation, and
 	 * __activate_traps clear HCR_EL2.TGE (among other things).
 	 */
-	__load_guest_stage2(vcpu->arch.hw_mmu);
+	__load_guest_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
 	__activate_traps(vcpu);
 
 	__kvm_adjust_pc(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 66f17349f0c3..5e9fb3989e0b 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -53,7 +53,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
 	 * place before clearing TGE. __load_guest_stage2() already
 	 * has an ISB in order to deal with this.
 	 */
-	__load_guest_stage2(mmu);
+	__load_guest_stage2(mmu, mmu->arch);
 	val = read_sysreg(hcr_el2);
 	val &= ~HCR_TGE;
 	write_sysreg(val, hcr_el2);
-- 
cgit 


From 4efc0ede4f31d7ec25c3dee0c8f07f93735cee6d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 6 Aug 2021 12:31:07 +0100
Subject: KVM: arm64: Unify stage-2 programming behind __load_stage2()

The protected mode relies on a separate helper to load the
S2 context. Move over to the __load_guest_stage2() helper
instead, and rename it to __load_stage2() to present a unified
interface.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jade Alglave <jade.alglave@arm.com>
Cc: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210806113109.2475-5-will@kernel.org
---
 arch/arm64/include/asm/kvm_mmu.h              | 11 +++--------
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 +-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         |  2 +-
 arch/arm64/kvm/hyp/nvhe/switch.c              |  2 +-
 arch/arm64/kvm/hyp/nvhe/tlb.c                 |  4 ++--
 arch/arm64/kvm/hyp/vhe/switch.c               |  6 +++---
 arch/arm64/kvm/hyp/vhe/tlb.c                  |  4 ++--
 7 files changed, 13 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 05e089653a1a..08bc81f6944b 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -267,9 +267,10 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
  * Must be called from hyp code running at EL2 with an updated VTTBR
  * and interrupts disabled.
  */
-static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long vtcr)
+static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
+					  struct kvm_arch *arch)
 {
-	write_sysreg(vtcr, vtcr_el2);
+	write_sysreg(arch->vtcr, vtcr_el2);
 	write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
 
 	/*
@@ -280,12 +281,6 @@ static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long
 	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
-static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu,
-						struct kvm_arch *arch)
-{
-	__load_stage2(mmu, arch->vtcr);
-}
-
 static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
 {
 	return container_of(mmu->arch, struct kvm, arch);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 9c227d87c36d..8901dc95d7de 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -29,7 +29,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 static __always_inline void __load_host_stage2(void)
 {
 	if (static_branch_likely(&kvm_protected_mode_initialized))
-		__load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr);
+		__load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
 	else
 		write_sysreg(0, vttbr_el2);
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index d938ce95d3bd..36aea13c9e5a 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -126,7 +126,7 @@ int __pkvm_prot_finalize(void)
 	kvm_flush_dcache_to_poc(params, sizeof(*params));
 
 	write_sysreg(params->hcr_el2, hcr_el2);
-	__load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr);
+	__load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
 
 	/*
 	 * Make sure to have an ISB before the TLB maintenance below but only
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index e50a49082923..3e7ad32b3f0d 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -215,7 +215,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	__sysreg_restore_state_nvhe(guest_ctxt);
 
 	mmu = kern_hyp_va(vcpu->arch.hw_mmu);
-	__load_guest_stage2(mmu, kern_hyp_va(mmu->arch));
+	__load_stage2(mmu, kern_hyp_va(mmu->arch));
 	__activate_traps(vcpu);
 
 	__hyp_vgic_restore_state(vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 76229407d8f0..d296d617f589 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -34,12 +34,12 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
 	}
 
 	/*
-	 * __load_guest_stage2() includes an ISB only when the AT
+	 * __load_stage2() includes an ISB only when the AT
 	 * workaround is applied. Take care of the opposite condition,
 	 * ensuring that we always have an ISB, but not two ISBs back
 	 * to back.
 	 */
-	__load_guest_stage2(mmu, kern_hyp_va(mmu->arch));
+	__load_stage2(mmu, kern_hyp_va(mmu->arch));
 	asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 0cb7523a501a..709f6438283e 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -124,11 +124,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 	 *
 	 * We have already configured the guest's stage 1 translation in
 	 * kvm_vcpu_load_sysregs_vhe above.  We must now call
-	 * __load_guest_stage2 before __activate_traps, because
-	 * __load_guest_stage2 configures stage 2 translation, and
+	 * __load_stage2 before __activate_traps, because
+	 * __load_stage2 configures stage 2 translation, and
 	 * __activate_traps clear HCR_EL2.TGE (among other things).
 	 */
-	__load_guest_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
+	__load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
 	__activate_traps(vcpu);
 
 	__kvm_adjust_pc(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 5e9fb3989e0b..24cef9b87f9e 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -50,10 +50,10 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
 	 *
 	 * ARM erratum 1165522 requires some special handling (again),
 	 * as we need to make sure both stages of translation are in
-	 * place before clearing TGE. __load_guest_stage2() already
+	 * place before clearing TGE. __load_stage2() already
 	 * has an ISB in order to deal with this.
 	 */
-	__load_guest_stage2(mmu, mmu->arch);
+	__load_stage2(mmu, mmu->arch);
 	val = read_sysreg(hcr_el2);
 	val &= ~HCR_TGE;
 	write_sysreg(val, hcr_el2);
-- 
cgit 


From cf364e08ea1c5dd217afb658d510aaef7d0cc6f4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 6 Aug 2021 12:31:08 +0100
Subject: KVM: arm64: Upgrade VMID accesses to {READ,WRITE}_ONCE

Since TLB invalidation can run in parallel with VMID allocation,
we need to be careful and avoid any sort of load/store tearing.
Use {READ,WRITE}_ONCE consistently to avoid any surprise.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jade Alglave <jade.alglave@arm.com>
Cc: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Reviewed-by: Quentin Perret <qperret@google.com>
Link: https://lore.kernel.org/r/20210806113109.2475-6-will@kernel.org
---
 arch/arm64/include/asm/kvm_mmu.h      | 7 ++++++-
 arch/arm64/kvm/arm.c                  | 2 +-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 ++--
 arch/arm64/kvm/mmu.c                  | 2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 08bc81f6944b..02d378887743 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -252,6 +252,11 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
 
 #define kvm_phys_to_vttbr(addr)		phys_to_ttbr(addr)
 
+/*
+ * When this is (directly or indirectly) used on the TLB invalidation
+ * path, we rely on a previously issued DSB so that page table updates
+ * and VMID reads are correctly ordered.
+ */
 static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
 {
 	struct kvm_vmid *vmid = &mmu->vmid;
@@ -259,7 +264,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
 	u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
 
 	baddr = mmu->pgd_phys;
-	vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
+	vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT;
 	return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
 }
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e9a2b8f27792..658f76067f46 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -571,7 +571,7 @@ static void update_vmid(struct kvm_vmid *vmid)
 		kvm_call_hyp(__kvm_flush_vm_context);
 	}
 
-	vmid->vmid = kvm_next_vmid;
+	WRITE_ONCE(vmid->vmid, kvm_next_vmid);
 	kvm_next_vmid++;
 	kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
 
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 36aea13c9e5a..7a0c4af186a0 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -109,8 +109,8 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
 	mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
 	mmu->arch = &host_kvm.arch;
 	mmu->pgt = &host_kvm.pgt;
-	mmu->vmid.vmid_gen = 0;
-	mmu->vmid.vmid = 0;
+	WRITE_ONCE(mmu->vmid.vmid_gen, 0);
+	WRITE_ONCE(mmu->vmid.vmid, 0);
 
 	return 0;
 }
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 0625bf2353c2..d9152717cbd9 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -485,7 +485,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 	mmu->arch = &kvm->arch;
 	mmu->pgt = pgt;
 	mmu->pgd_phys = __pa(pgt->pgd);
-	mmu->vmid.vmid_gen = 0;
+	WRITE_ONCE(mmu->vmid.vmid_gen, 0);
 	return 0;
 
 out_destroy_pgtable:
-- 
cgit 


From 2ea7f655800b00b109951f22539fe2025add210b Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 17 Aug 2021 09:11:20 +0100
Subject: KVM: arm64: placeholder to check if VM is protected

Add a function to check whether a VM is protected (under pKVM).
Since the creation of protected VMs isn't enabled yet, this is a
placeholder that always returns false. The intention is for this
to become a check for protected VMs in the future (see Will's RFC).

No functional change intended.

Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/kvmarm/20210603183347.1695-1-will@kernel.org/
Link: https://lore.kernel.org/r/20210817081134.2918285-2-tabba@google.com
---
 arch/arm64/include/asm/kvm_host.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 41911585ae0c..347781f99b6a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -771,6 +771,11 @@ void kvm_arch_free_vm(struct kvm *kvm);
 
 int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
 
+static inline bool kvm_vm_is_protected(struct kvm *kvm)
+{
+	return false;
+}
+
 int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
 bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
 
-- 
cgit 


From e6bc555c96990046d680ff92c8e2e7b6b43b509f Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 17 Aug 2021 09:11:21 +0100
Subject: KVM: arm64: Remove trailing whitespace in comment

Remove trailing whitespace from comment in trap_dbgauthstatus_el1().

No functional change intended.

Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210817081134.2918285-3-tabba@google.com
---
 arch/arm64/kvm/sys_regs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f6f126eb6ac1..80a6e41cadad 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -318,14 +318,14 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
 /*
  * We want to avoid world-switching all the DBG registers all the
  * time:
- * 
+ *
  * - If we've touched any debug register, it is likely that we're
  *   going to touch more of them. It then makes sense to disable the
  *   traps and start doing the save/restore dance
  * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
  *   then mandatory to save/restore the registers, as the guest
  *   depends on them.
- * 
+ *
  * For this, we use a DIRTY bit, indicating the guest has modified the
  * debug registers, used as follow:
  *
-- 
cgit 


From d6c850dd6ce9ce4b410142a600d8c34dc041d860 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 17 Aug 2021 09:11:22 +0100
Subject: KVM: arm64: MDCR_EL2 is a 64-bit register

Fix the places in KVM that treat MDCR_EL2 as a 32-bit register.
More recent features (e.g., FEAT_SPEv1p2) use bits above 31.

No functional change intended.

Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210817081134.2918285-4-tabba@google.com
---
 arch/arm64/include/asm/kvm_arm.h   | 20 ++++++++++----------
 arch/arm64/include/asm/kvm_asm.h   |  2 +-
 arch/arm64/include/asm/kvm_host.h  |  2 +-
 arch/arm64/kvm/debug.c             |  2 +-
 arch/arm64/kvm/hyp/nvhe/debug-sr.c |  2 +-
 arch/arm64/kvm/hyp/vhe/debug-sr.c  |  2 +-
 6 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index d436831dd706..6a523ec83415 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -281,18 +281,18 @@
 /* Hyp Debug Configuration Register bits */
 #define MDCR_EL2_E2TB_MASK	(UL(0x3))
 #define MDCR_EL2_E2TB_SHIFT	(UL(24))
-#define MDCR_EL2_TTRF		(1 << 19)
-#define MDCR_EL2_TPMS		(1 << 14)
+#define MDCR_EL2_TTRF		(UL(1) << 19)
+#define MDCR_EL2_TPMS		(UL(1) << 14)
 #define MDCR_EL2_E2PB_MASK	(UL(0x3))
 #define MDCR_EL2_E2PB_SHIFT	(UL(12))
-#define MDCR_EL2_TDRA		(1 << 11)
-#define MDCR_EL2_TDOSA		(1 << 10)
-#define MDCR_EL2_TDA		(1 << 9)
-#define MDCR_EL2_TDE		(1 << 8)
-#define MDCR_EL2_HPME		(1 << 7)
-#define MDCR_EL2_TPM		(1 << 6)
-#define MDCR_EL2_TPMCR		(1 << 5)
-#define MDCR_EL2_HPMN_MASK	(0x1F)
+#define MDCR_EL2_TDRA		(UL(1) << 11)
+#define MDCR_EL2_TDOSA		(UL(1) << 10)
+#define MDCR_EL2_TDA		(UL(1) << 9)
+#define MDCR_EL2_TDE		(UL(1) << 8)
+#define MDCR_EL2_HPME		(UL(1) << 7)
+#define MDCR_EL2_TPM		(UL(1) << 6)
+#define MDCR_EL2_TPMCR		(UL(1) << 5)
+#define MDCR_EL2_HPMN_MASK	(UL(0x1F))
 
 /* For compatibility with fault code shared with 32-bit */
 #define FSC_FAULT	ESR_ELx_FSC_FAULT
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 9f0bf2109be7..63ead9060ab5 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -210,7 +210,7 @@ extern u64 __vgic_v3_read_vmcr(void);
 extern void __vgic_v3_write_vmcr(u32 vmcr);
 extern void __vgic_v3_init_lrs(void);
 
-extern u32 __kvm_get_mdcr_el2(void);
+extern u64 __kvm_get_mdcr_el2(void);
 
 #define __KVM_EXTABLE(from, to)						\
 	"	.pushsection	__kvm_ex_table, \"a\"\n"		\
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 347781f99b6a..4d2d974c1522 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -289,7 +289,7 @@ struct kvm_vcpu_arch {
 
 	/* HYP configuration */
 	u64 hcr_el2;
-	u32 mdcr_el2;
+	u64 mdcr_el2;
 
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index d5e79d7ee6e9..db9361338b2a 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -21,7 +21,7 @@
 				DBG_MDSCR_KDE | \
 				DBG_MDSCR_MDE)
 
-static DEFINE_PER_CPU(u32, mdcr_el2);
+static DEFINE_PER_CPU(u64, mdcr_el2);
 
 /**
  * save/restore_guest_debug_regs
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 7d3f25868cae..df361d839902 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -109,7 +109,7 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu)
 	__debug_switch_to_host_common(vcpu);
 }
 
-u32 __kvm_get_mdcr_el2(void)
+u64 __kvm_get_mdcr_el2(void)
 {
 	return read_sysreg(mdcr_el2);
 }
diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c
index f1e2e5a00933..289689b2682d 100644
--- a/arch/arm64/kvm/hyp/vhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c
@@ -20,7 +20,7 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu)
 	__debug_switch_to_host_common(vcpu);
 }
 
-u32 __kvm_get_mdcr_el2(void)
+u64 __kvm_get_mdcr_el2(void)
 {
 	return read_sysreg(mdcr_el2);
 }
-- 
cgit 


From dabb1667d8573302712a75530cccfee8f3ffff84 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 17 Aug 2021 09:11:23 +0100
Subject: KVM: arm64: Fix names of config register fields

Change the names of hcr_el2 register fields to match the Arm
Architecture Reference Manual. Easier for cross-referencing and
for grepping.

Also, change the name of CPTR_EL2_RES1 to CPTR_NVHE_EL2_RES1,
because res1 bits are different for VHE.

No functional change intended.

Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210817081134.2918285-5-tabba@google.com
---
 arch/arm64/include/asm/kvm_arm.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 6a523ec83415..a928b2dc0b0f 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -32,9 +32,9 @@
 #define HCR_TVM		(UL(1) << 26)
 #define HCR_TTLB	(UL(1) << 25)
 #define HCR_TPU		(UL(1) << 24)
-#define HCR_TPC		(UL(1) << 23)
+#define HCR_TPC		(UL(1) << 23) /* HCR_TPCP if FEAT_DPB */
 #define HCR_TSW		(UL(1) << 22)
-#define HCR_TAC		(UL(1) << 21)
+#define HCR_TACR	(UL(1) << 21)
 #define HCR_TIDCP	(UL(1) << 20)
 #define HCR_TSC		(UL(1) << 19)
 #define HCR_TID3	(UL(1) << 18)
@@ -61,7 +61,7 @@
  * The bits we set in HCR:
  * TLOR:	Trap LORegion register accesses
  * RW:		64bit by default, can be overridden for 32bit VMs
- * TAC:		Trap ACTLR
+ * TACR:	Trap ACTLR
  * TSC:		Trap SMC
  * TSW:		Trap cache operations by set/way
  * TWE:		Trap WFE
@@ -76,7 +76,7 @@
  * PTW:		Take a stage2 fault if a stage1 walk steps in device memory
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
-			 HCR_BSU_IS | HCR_FB | HCR_TAC | \
+			 HCR_BSU_IS | HCR_FB | HCR_TACR | \
 			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
 			 HCR_FMO | HCR_IMO | HCR_PTW )
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
@@ -275,8 +275,8 @@
 #define CPTR_EL2_TTA	(1 << 20)
 #define CPTR_EL2_TFP	(1 << CPTR_EL2_TFP_SHIFT)
 #define CPTR_EL2_TZ	(1 << 8)
-#define CPTR_EL2_RES1	0x000032ff /* known RES1 bits in CPTR_EL2 */
-#define CPTR_EL2_DEFAULT	CPTR_EL2_RES1
+#define CPTR_NVHE_EL2_RES1	0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
+#define CPTR_EL2_DEFAULT	CPTR_NVHE_EL2_RES1
 
 /* Hyp Debug Configuration Register bits */
 #define MDCR_EL2_E2TB_MASK	(UL(0x3))
-- 
cgit 


From f76f89e2f73d93720cfcad7fb7b24d022b2846bf Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 17 Aug 2021 09:11:24 +0100
Subject: KVM: arm64: Refactor sys_regs.h,c for nVHE reuse

Refactor sys_regs.h and sys_regs.c to make it easier to reuse
common code. It will be used in nVHE in a later patch.

Note that the refactored code uses __inline_bsearch for find_reg
instead of bsearch to avoid copying the bsearch code for nVHE.

No functional change intended.

Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210817081134.2918285-6-tabba@google.com
---
 arch/arm64/include/asm/sysreg.h |  5 ++++
 arch/arm64/kvm/sys_regs.c       | 60 +++++++++++------------------------------
 arch/arm64/kvm/sys_regs.h       | 31 +++++++++++++++++++++
 3 files changed, 52 insertions(+), 44 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 7b9c3acba684..53a93a9c5253 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1153,6 +1153,11 @@
 #define ICH_VTR_A3V_SHIFT	21
 #define ICH_VTR_A3V_MASK	(1 << ICH_VTR_A3V_SHIFT)
 
+#define ARM64_FEATURE_FIELD_BITS	4
+
+/* Create a mask for the feature bits of the specified feature. */
+#define ARM64_FEATURE_MASK(x)	(GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
+
 #ifdef __ASSEMBLY__
 
 	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 80a6e41cadad..b6a2f8e890db 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -44,10 +44,6 @@
  * 64bit interface.
  */
 
-#define reg_to_encoding(x)						\
-	sys_reg((u32)(x)->Op0, (u32)(x)->Op1,				\
-		(u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2)
-
 static bool read_from_write_only(struct kvm_vcpu *vcpu,
 				 struct sys_reg_params *params,
 				 const struct sys_reg_desc *r)
@@ -1026,8 +1022,6 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
 	return true;
 }
 
-#define FEATURE(x)	(GENMASK_ULL(x##_SHIFT + 3, x##_SHIFT))
-
 /* Read a sanitised cpufeature ID register by sys_reg_desc */
 static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 		struct sys_reg_desc const *r, bool raz)
@@ -1038,40 +1032,40 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 	switch (id) {
 	case SYS_ID_AA64PFR0_EL1:
 		if (!vcpu_has_sve(vcpu))
-			val &= ~FEATURE(ID_AA64PFR0_SVE);
-		val &= ~FEATURE(ID_AA64PFR0_AMU);
-		val &= ~FEATURE(ID_AA64PFR0_CSV2);
-		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
-		val &= ~FEATURE(ID_AA64PFR0_CSV3);
-		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
+			val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_AMU);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2);
+		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
+		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
 		break;
 	case SYS_ID_AA64PFR1_EL1:
-		val &= ~FEATURE(ID_AA64PFR1_MTE);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
 		if (kvm_has_mte(vcpu->kvm)) {
 			u64 pfr, mte;
 
 			pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
 			mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
-			val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte);
+			val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), mte);
 		}
 		break;
 	case SYS_ID_AA64ISAR1_EL1:
 		if (!vcpu_has_ptrauth(vcpu))
-			val &= ~(FEATURE(ID_AA64ISAR1_APA) |
-				 FEATURE(ID_AA64ISAR1_API) |
-				 FEATURE(ID_AA64ISAR1_GPA) |
-				 FEATURE(ID_AA64ISAR1_GPI));
+			val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
+				 ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
+				 ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
+				 ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
 		break;
 	case SYS_ID_AA64DFR0_EL1:
 		/* Limit debug to ARMv8.0 */
-		val &= ~FEATURE(ID_AA64DFR0_DEBUGVER);
-		val |= FIELD_PREP(FEATURE(ID_AA64DFR0_DEBUGVER), 6);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER);
+		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), 6);
 		/* Limit guests to PMUv3 for ARMv8.4 */
 		val = cpuid_feature_cap_perfmon_field(val,
 						      ID_AA64DFR0_PMUVER_SHIFT,
 						      kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0);
 		/* Hide SPE from guests */
-		val &= ~FEATURE(ID_AA64DFR0_PMSVER);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER);
 		break;
 	case SYS_ID_DFR0_EL1:
 		/* Limit guests to PMUv3 for ARMv8.4 */
@@ -2106,23 +2100,6 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
 	return 0;
 }
 
-static int match_sys_reg(const void *key, const void *elt)
-{
-	const unsigned long pval = (unsigned long)key;
-	const struct sys_reg_desc *r = elt;
-
-	return pval - reg_to_encoding(r);
-}
-
-static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
-					 const struct sys_reg_desc table[],
-					 unsigned int num)
-{
-	unsigned long pval = reg_to_encoding(params);
-
-	return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
-}
-
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
 {
 	kvm_inject_undefined(vcpu);
@@ -2365,13 +2342,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 
 	trace_kvm_handle_sys_reg(esr);
 
-	params.Op0 = (esr >> 20) & 3;
-	params.Op1 = (esr >> 14) & 0x7;
-	params.CRn = (esr >> 10) & 0xf;
-	params.CRm = (esr >> 1) & 0xf;
-	params.Op2 = (esr >> 17) & 0x7;
+	params = esr_sys64_to_params(esr);
 	params.regval = vcpu_get_reg(vcpu, Rt);
-	params.is_write = !(esr & 1);
 
 	ret = emulate_sys_reg(vcpu, &params);
 
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 9d0621417c2a..cc0cc95a0280 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -11,6 +11,12 @@
 #ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__
 #define __ARM64_KVM_SYS_REGS_LOCAL_H__
 
+#include <linux/bsearch.h>
+
+#define reg_to_encoding(x)						\
+	sys_reg((u32)(x)->Op0, (u32)(x)->Op1,				\
+		(u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2)
+
 struct sys_reg_params {
 	u8	Op0;
 	u8	Op1;
@@ -21,6 +27,14 @@ struct sys_reg_params {
 	bool	is_write;
 };
 
+#define esr_sys64_to_params(esr)                                               \
+	((struct sys_reg_params){ .Op0 = ((esr) >> 20) & 3,                    \
+				  .Op1 = ((esr) >> 14) & 0x7,                  \
+				  .CRn = ((esr) >> 10) & 0xf,                  \
+				  .CRm = ((esr) >> 1) & 0xf,                   \
+				  .Op2 = ((esr) >> 17) & 0x7,                  \
+				  .is_write = !((esr) & 1) })
+
 struct sys_reg_desc {
 	/* Sysreg string for debug */
 	const char *name;
@@ -152,6 +166,23 @@ static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
 	return i1->Op2 - i2->Op2;
 }
 
+static inline int match_sys_reg(const void *key, const void *elt)
+{
+	const unsigned long pval = (unsigned long)key;
+	const struct sys_reg_desc *r = elt;
+
+	return pval - reg_to_encoding(r);
+}
+
+static inline const struct sys_reg_desc *
+find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[],
+	 unsigned int num)
+{
+	unsigned long pval = reg_to_encoding(params);
+
+	return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
+}
+
 const struct sys_reg_desc *find_reg_by_id(u64 id,
 					  struct sys_reg_params *params,
 					  const struct sys_reg_desc table[],
-- 
cgit 


From 1460b4b25fde52cbee746c11a4b1d3185f2e2847 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 17 Aug 2021 09:11:25 +0100
Subject: KVM: arm64: Restore mdcr_el2 from vcpu

On deactivating traps, restore the value of mdcr_el2 from the
newly created and preserved host value vcpu context, rather than
directly reading the hardware register.

Up until and including this patch the two values are the same,
i.e., the hardware register and the vcpu one. A future patch will
be changing the value of mdcr_el2 on activating traps, and this
ensures that its value will be restored.

No functional change intended.

Signed-off-by: Fuad Tabba <tabba@google.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210817081134.2918285-7-tabba@google.com
---
 arch/arm64/include/asm/kvm_host.h       |  5 ++++-
 arch/arm64/include/asm/kvm_hyp.h        |  2 +-
 arch/arm64/kvm/hyp/include/hyp/switch.h |  6 +++++-
 arch/arm64/kvm/hyp/nvhe/switch.c        | 13 +++++--------
 arch/arm64/kvm/hyp/vhe/switch.c         | 14 +++++---------
 arch/arm64/kvm/hyp/vhe/sysreg-sr.c      |  2 +-
 6 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4d2d974c1522..76462c6a91ee 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -287,10 +287,13 @@ struct kvm_vcpu_arch {
 	/* Stage 2 paging state used by the hardware on next switch */
 	struct kvm_s2_mmu *hw_mmu;
 
-	/* HYP configuration */
+	/* Values of trap registers for the guest. */
 	u64 hcr_el2;
 	u64 mdcr_el2;
 
+	/* Values of trap registers for the host before guest entry. */
+	u64 mdcr_el2_host;
+
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 9d60b3006efc..657d0c94cf82 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -95,7 +95,7 @@ void __sve_restore_state(void *sve_pffr, u32 *fpsr);
 
 #ifndef __KVM_NVHE_HYPERVISOR__
 void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
-void deactivate_traps_vhe_put(void);
+void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu);
 #endif
 
 u64 __guest_enter(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index e4a2f295a394..a0e78a6027be 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -92,11 +92,15 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
 		write_sysreg(0, pmselr_el0);
 		write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
 	}
+
+	vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
 	write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 }
 
-static inline void __deactivate_traps_common(void)
+static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
 {
+	write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
+
 	write_sysreg(0, hstr_el2);
 	if (kvm_arm_support_pmu_v3())
 		write_sysreg(0, pmuserenr_el0);
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index f7af9688c1f7..2ea764a48958 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -69,12 +69,10 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 static void __deactivate_traps(struct kvm_vcpu *vcpu)
 {
 	extern char __kvm_hyp_host_vector[];
-	u64 mdcr_el2, cptr;
+	u64 cptr;
 
 	___deactivate_traps(vcpu);
 
-	mdcr_el2 = read_sysreg(mdcr_el2);
-
 	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
 		u64 val;
 
@@ -92,13 +90,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
 		isb();
 	}
 
-	__deactivate_traps_common();
+	vcpu->arch.mdcr_el2_host &= MDCR_EL2_HPMN_MASK |
+				    MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+				    MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
 
-	mdcr_el2 &= MDCR_EL2_HPMN_MASK;
-	mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
-	mdcr_el2 |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
+	__deactivate_traps_common(vcpu);
 
-	write_sysreg(mdcr_el2, mdcr_el2);
 	write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
 
 	cptr = CPTR_EL2_DEFAULT;
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index b3229924d243..ec158fa41ae6 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -91,17 +91,13 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
 	__activate_traps_common(vcpu);
 }
 
-void deactivate_traps_vhe_put(void)
+void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
 {
-	u64 mdcr_el2 = read_sysreg(mdcr_el2);
+	vcpu->arch.mdcr_el2_host &= MDCR_EL2_HPMN_MASK |
+				    MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+				    MDCR_EL2_TPMS;
 
-	mdcr_el2 &= MDCR_EL2_HPMN_MASK |
-		    MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
-		    MDCR_EL2_TPMS;
-
-	write_sysreg(mdcr_el2, mdcr_el2);
-
-	__deactivate_traps_common();
+	__deactivate_traps_common(vcpu);
 }
 
 /* Switch to the guest for VHE systems running in EL2 */
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 2a0b8c88d74f..007a12dd4351 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -101,7 +101,7 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
 	struct kvm_cpu_context *host_ctxt;
 
 	host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
-	deactivate_traps_vhe_put();
+	deactivate_traps_vhe_put(vcpu);
 
 	__sysreg_save_el1_state(guest_ctxt);
 	__sysreg_save_user_state(guest_ctxt);
-- 
cgit 


From 12849badc6d2456f15f8f2c93037628d5176810b Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 17 Aug 2021 09:11:26 +0100
Subject: KVM: arm64: Keep mdcr_el2's value as set by __init_el2_debug

__init_el2_debug configures mdcr_el2 at initialization based on,
among other things, available hardware support. Trap deactivation
doesn't check that, so keep the initial value.

No functional change intended.

Signed-off-by: Fuad Tabba <tabba@google.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210817081134.2918285-8-tabba@google.com
---
 arch/arm64/kvm/hyp/nvhe/switch.c | 4 ----
 arch/arm64/kvm/hyp/vhe/switch.c  | 4 ----
 2 files changed, 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 2ea764a48958..1778593a08a9 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -90,10 +90,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
 		isb();
 	}
 
-	vcpu->arch.mdcr_el2_host &= MDCR_EL2_HPMN_MASK |
-				    MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
-				    MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
-
 	__deactivate_traps_common(vcpu);
 
 	write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index ec158fa41ae6..0d0c9550fb08 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -93,10 +93,6 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
 
 void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.mdcr_el2_host &= MDCR_EL2_HPMN_MASK |
-				    MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
-				    MDCR_EL2_TPMS;
-
 	__deactivate_traps_common(vcpu);
 }
 
-- 
cgit 


From cd496228fd8de2e82b6636d3d89105631ea2b69c Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 17 Aug 2021 09:11:27 +0100
Subject: KVM: arm64: Track value of cptr_el2 in struct kvm_vcpu_arch

Track the baseline guest value for cptr_el2 in struct
kvm_vcpu_arch, similar to the other registers that control traps.
Use this value when setting cptr_el2 for the guest.

Currently this value is unchanged (CPTR_EL2_DEFAULT), but future
patches will set trapping bits based on features supported for
the guest.

No functional change intended.

Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210817081134.2918285-9-tabba@google.com
---
 arch/arm64/include/asm/kvm_host.h | 1 +
 arch/arm64/kvm/arm.c              | 1 +
 arch/arm64/kvm/hyp/nvhe/switch.c  | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 76462c6a91ee..ac67d5699c68 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -290,6 +290,7 @@ struct kvm_vcpu_arch {
 	/* Values of trap registers for the guest. */
 	u64 hcr_el2;
 	u64 mdcr_el2;
+	u64 cptr_el2;
 
 	/* Values of trap registers for the host before guest entry. */
 	u64 mdcr_el2_host;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e9a2b8f27792..14b12f2c08c0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1104,6 +1104,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	}
 
 	vcpu_reset_hcr(vcpu);
+	vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT;
 
 	/*
 	 * Handle the "start in power-off" case.
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 1778593a08a9..86f3d6482935 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -41,7 +41,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 	___activate_traps(vcpu);
 	__activate_traps_common(vcpu);
 
-	val = CPTR_EL2_DEFAULT;
+	val = vcpu->arch.cptr_el2;
 	val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
 	if (!update_fp_enabled(vcpu)) {
 		val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
-- 
cgit 


From 95b54c3e4c92b9185b15c83e8baab9ba312195f6 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 17 Aug 2021 09:11:28 +0100
Subject: KVM: arm64: Add feature register flag definitions

Add feature register flag definitions to clarify which features
might be supported.

Consolidate the various ID_AA64PFR0_ELx flags for all ELs.

No functional change intended.

Signed-off-by: Fuad Tabba <tabba@google.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210817081134.2918285-10-tabba@google.com
---
 arch/arm64/include/asm/cpufeature.h |  4 ++--
 arch/arm64/include/asm/sysreg.h     | 12 ++++++++----
 arch/arm64/kernel/cpufeature.c      |  8 ++++----
 3 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9bb9d11750d7..b7d9bb17908d 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -602,14 +602,14 @@ static inline bool id_aa64pfr0_32bit_el1(u64 pfr0)
 {
 	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT);
 
-	return val == ID_AA64PFR0_EL1_32BIT_64BIT;
+	return val == ID_AA64PFR0_ELx_32BIT_64BIT;
 }
 
 static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
 {
 	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
 
-	return val == ID_AA64PFR0_EL0_32BIT_64BIT;
+	return val == ID_AA64PFR0_ELx_32BIT_64BIT;
 }
 
 static inline bool id_aa64pfr0_sve(u64 pfr0)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 53a93a9c5253..f84a00f5874d 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -784,14 +784,13 @@
 #define ID_AA64PFR0_AMU			0x1
 #define ID_AA64PFR0_SVE			0x1
 #define ID_AA64PFR0_RAS_V1		0x1
+#define ID_AA64PFR0_RAS_V1P1		0x2
 #define ID_AA64PFR0_FP_NI		0xf
 #define ID_AA64PFR0_FP_SUPPORTED	0x0
 #define ID_AA64PFR0_ASIMD_NI		0xf
 #define ID_AA64PFR0_ASIMD_SUPPORTED	0x0
-#define ID_AA64PFR0_EL1_64BIT_ONLY	0x1
-#define ID_AA64PFR0_EL1_32BIT_64BIT	0x2
-#define ID_AA64PFR0_EL0_64BIT_ONLY	0x1
-#define ID_AA64PFR0_EL0_32BIT_64BIT	0x2
+#define ID_AA64PFR0_ELx_64BIT_ONLY	0x1
+#define ID_AA64PFR0_ELx_32BIT_64BIT	0x2
 
 /* id_aa64pfr1 */
 #define ID_AA64PFR1_MPAMFRAC_SHIFT	16
@@ -847,12 +846,16 @@
 #define ID_AA64MMFR0_ASID_SHIFT		4
 #define ID_AA64MMFR0_PARANGE_SHIFT	0
 
+#define ID_AA64MMFR0_ASID_8		0x0
+#define ID_AA64MMFR0_ASID_16		0x2
+
 #define ID_AA64MMFR0_TGRAN4_NI		0xf
 #define ID_AA64MMFR0_TGRAN4_SUPPORTED	0x0
 #define ID_AA64MMFR0_TGRAN64_NI		0xf
 #define ID_AA64MMFR0_TGRAN64_SUPPORTED	0x0
 #define ID_AA64MMFR0_TGRAN16_NI		0x0
 #define ID_AA64MMFR0_TGRAN16_SUPPORTED	0x1
+#define ID_AA64MMFR0_PARANGE_40		0x2
 #define ID_AA64MMFR0_PARANGE_48		0x5
 #define ID_AA64MMFR0_PARANGE_52		0x6
 
@@ -900,6 +903,7 @@
 #define ID_AA64MMFR2_CNP_SHIFT		0
 
 /* id_aa64dfr0 */
+#define ID_AA64DFR0_MTPMU_SHIFT		48
 #define ID_AA64DFR0_TRBE_SHIFT		44
 #define ID_AA64DFR0_TRACE_FILT_SHIFT	40
 #define ID_AA64DFR0_DOUBLELOCK_SHIFT	36
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0ead8bfedf20..5b59fe5e26e4 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -239,8 +239,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
 	ARM64_FTR_END,
 };
 
@@ -1956,7 +1956,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_EL0_SHIFT,
-		.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
+		.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
 	},
 #ifdef CONFIG_KVM
 	{
@@ -1967,7 +1967,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_EL1_SHIFT,
-		.min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT,
+		.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
 	},
 	{
 		.desc = "Protected KVM",
-- 
cgit 


From 2d701243b9f231b5d7f9a8cb81870650d3eb32bc Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 17 Aug 2021 09:11:29 +0100
Subject: KVM: arm64: Add config register bit definitions

Add hardware configuration register bit definitions for HCR_EL2
and MDCR_EL2. Future patches toggle these hyp configuration
register bits to trap on certain accesses.

No functional change intended.

Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210817081134.2918285-11-tabba@google.com
---
 arch/arm64/include/asm/kvm_arm.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index a928b2dc0b0f..327120c0089f 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -12,8 +12,13 @@
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
+
+#define HCR_TID5	(UL(1) << 58)
+#define HCR_DCT		(UL(1) << 57)
 #define HCR_ATA_SHIFT	56
 #define HCR_ATA		(UL(1) << HCR_ATA_SHIFT)
+#define HCR_AMVOFFEN	(UL(1) << 51)
+#define HCR_FIEN	(UL(1) << 47)
 #define HCR_FWB		(UL(1) << 46)
 #define HCR_API		(UL(1) << 41)
 #define HCR_APK		(UL(1) << 40)
@@ -56,6 +61,7 @@
 #define HCR_PTW		(UL(1) << 2)
 #define HCR_SWIO	(UL(1) << 1)
 #define HCR_VM		(UL(1) << 0)
+#define HCR_RES0	((UL(1) << 48) | (UL(1) << 39))
 
 /*
  * The bits we set in HCR:
@@ -277,11 +283,21 @@
 #define CPTR_EL2_TZ	(1 << 8)
 #define CPTR_NVHE_EL2_RES1	0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
 #define CPTR_EL2_DEFAULT	CPTR_NVHE_EL2_RES1
+#define CPTR_NVHE_EL2_RES0	(GENMASK(63, 32) |	\
+				 GENMASK(29, 21) |	\
+				 GENMASK(19, 14) |	\
+				 BIT(11))
 
 /* Hyp Debug Configuration Register bits */
 #define MDCR_EL2_E2TB_MASK	(UL(0x3))
 #define MDCR_EL2_E2TB_SHIFT	(UL(24))
+#define MDCR_EL2_HPMFZS		(UL(1) << 36)
+#define MDCR_EL2_HPMFZO		(UL(1) << 29)
+#define MDCR_EL2_MTPME		(UL(1) << 28)
+#define MDCR_EL2_TDCC		(UL(1) << 27)
+#define MDCR_EL2_HCCD		(UL(1) << 23)
 #define MDCR_EL2_TTRF		(UL(1) << 19)
+#define MDCR_EL2_HPMD		(UL(1) << 17)
 #define MDCR_EL2_TPMS		(UL(1) << 14)
 #define MDCR_EL2_E2PB_MASK	(UL(0x3))
 #define MDCR_EL2_E2PB_SHIFT	(UL(12))
@@ -293,6 +309,12 @@
 #define MDCR_EL2_TPM		(UL(1) << 6)
 #define MDCR_EL2_TPMCR		(UL(1) << 5)
 #define MDCR_EL2_HPMN_MASK	(UL(0x1F))
+#define MDCR_EL2_RES0		(GENMASK(63, 37) |	\
+				 GENMASK(35, 30) |	\
+				 GENMASK(25, 24) |	\
+				 GENMASK(22, 20) |	\
+				 BIT(18) |		\
+				 GENMASK(16, 15))
 
 /* For compatibility with fault code shared with 32-bit */
 #define FSC_FAULT	ESR_ELx_FSC_FAULT
-- 
cgit 


From 411d63d8c64c2f3b0c497fe4658f13b3bca951e2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 20 Aug 2021 10:46:42 +0100
Subject: KVM: arm64: Upgrade trace_kvm_arm_set_dreg32() to 64bit

A number of registers pased to trace_kvm_arm_set_dreg32() are
actually 64bit. Upgrade the tracepoint to take a 64bit value,
despite the name...

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/trace_handle_exit.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h
index 8d78acc4fba7..064a58c19f48 100644
--- a/arch/arm64/kvm/trace_handle_exit.h
+++ b/arch/arm64/kvm/trace_handle_exit.h
@@ -78,13 +78,17 @@ TRACE_EVENT(kvm_arm_clear_debug,
 	TP_printk("flags: 0x%08x", __entry->guest_debug)
 );
 
+/*
+ * The dreg32 name is a leftover from a distant past. This will really
+ * output a 64bit value...
+ */
 TRACE_EVENT(kvm_arm_set_dreg32,
-	TP_PROTO(const char *name, __u32 value),
+	TP_PROTO(const char *name, __u64 value),
 	TP_ARGS(name, value),
 
 	TP_STRUCT__entry(
 		__field(const char *, name)
-		__field(__u32, value)
+		__field(__u64, value)
 	),
 
 	TP_fast_assign(
@@ -92,7 +96,7 @@ TRACE_EVENT(kvm_arm_set_dreg32,
 		__entry->value = value;
 	),
 
-	TP_printk("%s: 0x%08x", __entry->name, __entry->value)
+	TP_printk("%s: 0x%llx", __entry->name, __entry->value)
 );
 
 TRACE_DEFINE_SIZEOF(__u64);
-- 
cgit 


From 344179fc7ef427910de438affbf3703fed51fe5a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Aug 2021 08:30:19 +0100
Subject: ARM: 9106/1: traps: use get_kernel_nofault instead of set_fs()

ARM uses set_fs() and __get_user() to allow the stack dumping code to
access possibly invalid pointers carefully. These can be changed to the
simpler get_kernel_nofault(), and allow the eventual removal of set_fs().

dump_instr() will print either kernel or user space pointers,
depending on how it was called. For dump_mem(), I assume we are only
interested in kernel pointers, and the only time that this is called
with user_mode(regs)==true is when the regs themselves are unreliable
as a result of the condition that caused the trap.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/traps.c | 47 ++++++++++++++++-------------------------------
 1 file changed, 16 insertions(+), 31 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 64308e3a5d0c..10dd3ef1f398 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -122,17 +122,8 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 		     unsigned long top)
 {
 	unsigned long first;
-	mm_segment_t fs;
 	int i;
 
-	/*
-	 * We need to switch to kernel mode so that we can use __get_user
-	 * to safely read from kernel space.  Note that we now dump the
-	 * code first, just in case the backtrace kills us.
-	 */
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-
 	printk("%s%s(0x%08lx to 0x%08lx)\n", lvl, str, bottom, top);
 
 	for (first = bottom & ~31; first < top; first += 32) {
@@ -145,7 +136,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 		for (p = first, i = 0; i < 8 && p < top; i++, p += 4) {
 			if (p >= bottom && p < top) {
 				unsigned long val;
-				if (__get_user(val, (unsigned long *)p) == 0)
+				if (get_kernel_nofault(val, (unsigned long *)p))
 					sprintf(str + i * 9, " %08lx", val);
 				else
 					sprintf(str + i * 9, " ????????");
@@ -153,11 +144,9 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 		}
 		printk("%s%04lx:%s\n", lvl, first & 0xffff, str);
 	}
-
-	set_fs(fs);
 }
 
-static void __dump_instr(const char *lvl, struct pt_regs *regs)
+static void dump_instr(const char *lvl, struct pt_regs *regs)
 {
 	unsigned long addr = instruction_pointer(regs);
 	const int thumb = thumb_mode(regs);
@@ -173,10 +162,20 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
 	for (i = -4; i < 1 + !!thumb; i++) {
 		unsigned int val, bad;
 
-		if (thumb)
-			bad = get_user(val, &((u16 *)addr)[i]);
-		else
-			bad = get_user(val, &((u32 *)addr)[i]);
+		if (!user_mode(regs)) {
+			if (thumb) {
+				u16 val16;
+				bad = get_kernel_nofault(val16, &((u16 *)addr)[i]);
+				val = val16;
+			} else {
+				bad = get_kernel_nofault(val, &((u32 *)addr)[i]);
+			}
+		} else {
+			if (thumb)
+				bad = get_user(val, &((u16 *)addr)[i]);
+			else
+				bad = get_user(val, &((u32 *)addr)[i]);
+		}
 
 		if (!bad)
 			p += sprintf(p, i == 0 ? "(%0*x) " : "%0*x ",
@@ -189,20 +188,6 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
 	printk("%sCode: %s\n", lvl, str);
 }
 
-static void dump_instr(const char *lvl, struct pt_regs *regs)
-{
-	mm_segment_t fs;
-
-	if (!user_mode(regs)) {
-		fs = get_fs();
-		set_fs(KERNEL_DS);
-		__dump_instr(lvl, regs);
-		set_fs(fs);
-	} else {
-		__dump_instr(lvl, regs);
-	}
-}
-
 #ifdef CONFIG_ARM_UNWIND
 static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
 				  const char *loglvl)
-- 
cgit 


From b6e47f3c11c17965acb2a12001af3b1cd5658f37 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Aug 2021 08:30:20 +0100
Subject: ARM: 9109/1: oabi-compat: add epoll_pwait handler

The epoll_wait() syscall has a special version for OABI compat
mode to convert the arguments to the EABI structure layout
of the kernel. However, the later epoll_pwait() syscall was
added in arch/arm in linux-2.6.32 without this conversion.

Use the same kind of handler for both.

Fixes: 369842658a36 ("ARM: 5677/1: ARM support for TIF_RESTORE_SIGMASK/pselect6/ppoll/epoll_pwait")
Cc: stable@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/sys_oabi-compat.c | 38 +++++++++++++++++++++++++++++++++++---
 arch/arm/tools/syscall.tbl        |  2 +-
 2 files changed, 36 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 075a2e0ed2c1..443203fafb6b 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -265,9 +265,8 @@ asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd,
 	return do_epoll_ctl(epfd, op, fd, &kernel, false);
 }
 
-asmlinkage long sys_oabi_epoll_wait(int epfd,
-				    struct oabi_epoll_event __user *events,
-				    int maxevents, int timeout)
+static long do_oabi_epoll_wait(int epfd, struct oabi_epoll_event __user *events,
+			       int maxevents, int timeout)
 {
 	struct epoll_event *kbuf;
 	struct oabi_epoll_event e;
@@ -314,6 +313,39 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
 }
 #endif
 
+SYSCALL_DEFINE4(oabi_epoll_wait, int, epfd,
+		struct oabi_epoll_event __user *, events,
+		int, maxevents, int, timeout)
+{
+	return do_oabi_epoll_wait(epfd, events, maxevents, timeout);
+}
+
+/*
+ * Implement the event wait interface for the eventpoll file. It is the kernel
+ * part of the user space epoll_pwait(2).
+ */
+SYSCALL_DEFINE6(oabi_epoll_pwait, int, epfd,
+		struct oabi_epoll_event __user *, events, int, maxevents,
+		int, timeout, const sigset_t __user *, sigmask,
+		size_t, sigsetsize)
+{
+	int error;
+
+	/*
+	 * If the caller wants a certain signal mask to be set during the wait,
+	 * we apply it here.
+	 */
+	error = set_user_sigmask(sigmask, sigsetsize);
+	if (error)
+		return error;
+
+	error = do_oabi_epoll_wait(epfd, events, maxevents, timeout);
+	restore_saved_sigmask_unless(error == -EINTR);
+
+	return error;
+}
+#endif
+
 struct oabi_sembuf {
 	unsigned short	sem_num;
 	short		sem_op;
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index c5df1179fc5d..11d0b960b2c2 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -360,7 +360,7 @@
 343	common	vmsplice		sys_vmsplice
 344	common	move_pages		sys_move_pages
 345	common	getcpu			sys_getcpu
-346	common	epoll_pwait		sys_epoll_pwait
+346	common	epoll_pwait		sys_epoll_pwait		sys_oabi_epoll_pwait
 347	common	kexec_load		sys_kexec_load
 348	common	utimensat		sys_utimensat_time32
 349	common	signalfd		sys_signalfd
-- 
cgit 


From 4e57a4ddf6b0d9cce1cf2ffd153df1ad3c2c9cc2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Aug 2021 08:30:21 +0100
Subject: ARM: 9107/1: syscall: always store thread_info->abi_syscall

The system call number is used in a a couple of places, in particular
ptrace, seccomp and /proc/<pid>/syscall.

The last one apparently never worked reliably on ARM for tasks that are
not currently getting traced.

Storing the syscall number in the normal entry path makes it work,
as well as allowing us to see if the current system call is for OABI
compat mode, which is the next thing I want to hook into.

Since the thread_info->syscall field is not just the number any more, it
is now renamed to abi_syscall. In kernels that enable both OABI and EABI,
the upper bits of this field encode 0x900000 (__NR_OABI_SYSCALL_BASE)
for OABI tasks, while normal EABI tasks do not set the upper bits. This
makes it possible to implement the in_oabi_syscall() helper later.

All other users of thread_info->syscall go through the syscall_get_nr()
helper, which in turn filters out the ABI bits.

Note that the ABI information is lost with PTRACE_SET_SYSCALL, so one
cannot set the internal number to a particular version, but this was
already the case. We could change it to let gdb encode the ABI type along
with the syscall in a CONFIG_OABI_COMPAT-enabled kernel, but that itself
would be a (backwards-compatible) ABI change, so I don't do it here.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/syscall.h     |  5 ++++-
 arch/arm/include/asm/thread_info.h |  2 +-
 arch/arm/include/uapi/asm/unistd.h |  1 +
 arch/arm/kernel/asm-offsets.c      |  1 +
 arch/arm/kernel/entry-common.S     |  8 ++++++--
 arch/arm/kernel/ptrace.c           | 14 ++++++++------
 6 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index fd02761ba06c..f055e846a5cc 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -22,7 +22,10 @@ extern const unsigned long sys_call_table[];
 static inline int syscall_get_nr(struct task_struct *task,
 				 struct pt_regs *regs)
 {
-	return task_thread_info(task)->syscall;
+	if (IS_ENABLED(CONFIG_AEABI) && !IS_ENABLED(CONFIG_OABI_COMPAT))
+		return task_thread_info(task)->abi_syscall;
+
+	return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK;
 }
 
 static inline void syscall_rollback(struct task_struct *task,
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 70d4cbc49ae1..17c56051747b 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -62,7 +62,7 @@ struct thread_info {
 	unsigned long		stack_canary;
 #endif
 	struct cpu_context_save	cpu_context;	/* cpu context */
-	__u32			syscall;	/* syscall number */
+	__u32			abi_syscall;	/* ABI type and syscall nr */
 	__u8			used_cp[16];	/* thread used copro */
 	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index ae7749e15726..a1149911464c 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -15,6 +15,7 @@
 #define _UAPI__ASM_ARM_UNISTD_H
 
 #define __NR_OABI_SYSCALL_BASE	0x900000
+#define __NR_SYSCALL_MASK	0x0fffff
 
 #if defined(__thumb__) || defined(__ARM_EABI__)
 #define __NR_SYSCALL_BASE	0
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 70993af22d80..a0945b898ca3 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -48,6 +48,7 @@ int main(void)
   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
   DEFINE(TI_CPU_SAVE,		offsetof(struct thread_info, cpu_context));
+  DEFINE(TI_ABI_SYSCALL,	offsetof(struct thread_info, abi_syscall));
   DEFINE(TI_USED_CP,		offsetof(struct thread_info, used_cp));
   DEFINE(TI_TP_VALUE,		offsetof(struct thread_info, tp_value));
   DEFINE(TI_FPSTATE,		offsetof(struct thread_info, fpstate));
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 7f0b7aba1498..e837af90cd44 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -226,6 +226,7 @@ ENTRY(vector_swi)
 	/* saved_psr and saved_pc are now dead */
 
 	uaccess_disable tbl
+	get_thread_info tsk
 
 	adr	tbl, sys_call_table		@ load syscall table pointer
 
@@ -237,13 +238,17 @@ ENTRY(vector_swi)
 	 * get the old ABI syscall table address.
 	 */
 	bics	r10, r10, #0xff000000
+	strne	r10, [tsk, #TI_ABI_SYSCALL]
+	streq	scno, [tsk, #TI_ABI_SYSCALL]
 	eorne	scno, r10, #__NR_OABI_SYSCALL_BASE
 	ldrne	tbl, =sys_oabi_call_table
 #elif !defined(CONFIG_AEABI)
 	bic	scno, scno, #0xff000000		@ mask off SWI op-code
+	str	scno, [tsk, #TI_ABI_SYSCALL]
 	eor	scno, scno, #__NR_SYSCALL_BASE	@ check OS number
+#else
+	str	scno, [tsk, #TI_ABI_SYSCALL]
 #endif
-	get_thread_info tsk
 	/*
 	 * Reload the registers that may have been corrupted on entry to
 	 * the syscall assembly (by tracing or context tracking.)
@@ -288,7 +293,6 @@ ENDPROC(vector_swi)
 	 * context switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	mov	r1, scno
 	add	r0, sp, #S_OFF
 	bl	syscall_trace_enter
 	mov	scno, r0
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 2771e682220b..d886ea8910cb 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -25,6 +25,7 @@
 #include <linux/tracehook.h>
 #include <linux/unistd.h>
 
+#include <asm/syscall.h>
 #include <asm/traps.h>
 
 #define CREATE_TRACE_POINTS
@@ -811,7 +812,8 @@ long arch_ptrace(struct task_struct *child, long request,
 			break;
 
 		case PTRACE_SET_SYSCALL:
-			task_thread_info(child)->syscall = data;
+			task_thread_info(child)->abi_syscall = data &
+							__NR_SYSCALL_MASK;
 			ret = 0;
 			break;
 
@@ -880,14 +882,14 @@ static void tracehook_report_syscall(struct pt_regs *regs,
 	if (dir == PTRACE_SYSCALL_EXIT)
 		tracehook_report_syscall_exit(regs, 0);
 	else if (tracehook_report_syscall_entry(regs))
-		current_thread_info()->syscall = -1;
+		current_thread_info()->abi_syscall = -1;
 
 	regs->ARM_ip = ip;
 }
 
-asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 {
-	current_thread_info()->syscall = scno;
+	int scno;
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
@@ -898,11 +900,11 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 		return -1;
 #else
 	/* XXX: remove this once OABI gets fixed */
-	secure_computing_strict(current_thread_info()->syscall);
+	secure_computing_strict(syscall_get_nr(current, regs));
 #endif
 
 	/* Tracer or seccomp may have changed syscall. */
-	scno = current_thread_info()->syscall;
+	scno = syscall_get_nr(current, regs);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_enter(regs, scno);
-- 
cgit 


From 249dbe74d3c4b568a623fb55c56cddf19fdf0b89 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Aug 2021 08:30:22 +0100
Subject: ARM: 9108/1: oabi-compat: rework epoll_wait/epoll_pwait emulation

The epoll_wait() system call wrapper is one of the remaining users of
the set_fs() infrasturcture for Arm. Changing it to not require set_fs()
is rather complex unfortunately.

The approach I'm taking here is to allow architectures to override
the code that copies the output to user space, and let the oabi-compat
implementation check whether it is getting called from an EABI or OABI
system call based on the thread_info->syscall value.

The in_oabi_syscall() check here mirrors the in_compat_syscall() and
in_x32_syscall() helpers for 32-bit compat implementations on other
architectures.

Overall, the amount of code goes down, at least with the newly added
sys_oabi_epoll_pwait() helper getting removed again. The downside
is added complexity in the source code for the native implementation.
There should be no difference in runtime performance except for Arm
kernels with CONFIG_OABI_COMPAT enabled that now have to go through
an external function call to check which of the two variants to use.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/syscall.h    | 11 ++++++
 arch/arm/kernel/sys_oabi-compat.c | 83 ++++++++-------------------------------
 arch/arm/tools/syscall.tbl        |  4 +-
 3 files changed, 29 insertions(+), 69 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index f055e846a5cc..24c19d63ff0a 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -28,6 +28,17 @@ static inline int syscall_get_nr(struct task_struct *task,
 	return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK;
 }
 
+static inline bool __in_oabi_syscall(struct task_struct *task)
+{
+	return IS_ENABLED(CONFIG_OABI_COMPAT) &&
+		(task_thread_info(task)->abi_syscall & __NR_OABI_SYSCALL_BASE);
+}
+
+static inline bool in_oabi_syscall(void)
+{
+	return __in_oabi_syscall(current);
+}
+
 static inline void syscall_rollback(struct task_struct *task,
 				    struct pt_regs *regs)
 {
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 443203fafb6b..1f6a433200f1 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -83,6 +83,8 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 
+#include <asm/syscall.h>
+
 struct oldabi_stat64 {
 	unsigned long long st_dev;
 	unsigned int	__pad1;
@@ -264,87 +266,34 @@ asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd,
 
 	return do_epoll_ctl(epfd, op, fd, &kernel, false);
 }
-
-static long do_oabi_epoll_wait(int epfd, struct oabi_epoll_event __user *events,
-			       int maxevents, int timeout)
-{
-	struct epoll_event *kbuf;
-	struct oabi_epoll_event e;
-	mm_segment_t fs;
-	long ret, err, i;
-
-	if (maxevents <= 0 ||
-			maxevents > (INT_MAX/sizeof(*kbuf)) ||
-			maxevents > (INT_MAX/sizeof(*events)))
-		return -EINVAL;
-	if (!access_ok(events, sizeof(*events) * maxevents))
-		return -EFAULT;
-	kbuf = kmalloc_array(maxevents, sizeof(*kbuf), GFP_KERNEL);
-	if (!kbuf)
-		return -ENOMEM;
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout);
-	set_fs(fs);
-	err = 0;
-	for (i = 0; i < ret; i++) {
-		e.events = kbuf[i].events;
-		e.data = kbuf[i].data;
-		err = __copy_to_user(events, &e, sizeof(e));
-		if (err)
-			break;
-		events++;
-	}
-	kfree(kbuf);
-	return err ? -EFAULT : ret;
-}
 #else
 asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd,
 				   struct oabi_epoll_event __user *event)
 {
 	return -EINVAL;
 }
-
-asmlinkage long sys_oabi_epoll_wait(int epfd,
-				    struct oabi_epoll_event __user *events,
-				    int maxevents, int timeout)
-{
-	return -EINVAL;
-}
 #endif
 
-SYSCALL_DEFINE4(oabi_epoll_wait, int, epfd,
-		struct oabi_epoll_event __user *, events,
-		int, maxevents, int, timeout)
+struct epoll_event __user *
+epoll_put_uevent(__poll_t revents, __u64 data,
+		 struct epoll_event __user *uevent)
 {
-	return do_oabi_epoll_wait(epfd, events, maxevents, timeout);
-}
+	if (in_oabi_syscall()) {
+		struct oabi_epoll_event __user *oevent = (void __user *)uevent;
 
-/*
- * Implement the event wait interface for the eventpoll file. It is the kernel
- * part of the user space epoll_pwait(2).
- */
-SYSCALL_DEFINE6(oabi_epoll_pwait, int, epfd,
-		struct oabi_epoll_event __user *, events, int, maxevents,
-		int, timeout, const sigset_t __user *, sigmask,
-		size_t, sigsetsize)
-{
-	int error;
+		if (__put_user(revents, &oevent->events) ||
+		    __put_user(data, &oevent->data))
+			return NULL;
 
-	/*
-	 * If the caller wants a certain signal mask to be set during the wait,
-	 * we apply it here.
-	 */
-	error = set_user_sigmask(sigmask, sigsetsize);
-	if (error)
-		return error;
+		return (void __user *)(oevent+1);
+	}
 
-	error = do_oabi_epoll_wait(epfd, events, maxevents, timeout);
-	restore_saved_sigmask_unless(error == -EINTR);
+	if (__put_user(revents, &uevent->events) ||
+	    __put_user(data, &uevent->data))
+		return NULL;
 
-	return error;
+	return uevent+1;
 }
-#endif
 
 struct oabi_sembuf {
 	unsigned short	sem_num;
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 11d0b960b2c2..344424a9611f 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -266,7 +266,7 @@
 249	common	lookup_dcookie		sys_lookup_dcookie
 250	common	epoll_create		sys_epoll_create
 251	common	epoll_ctl		sys_epoll_ctl		sys_oabi_epoll_ctl
-252	common	epoll_wait		sys_epoll_wait		sys_oabi_epoll_wait
+252	common	epoll_wait		sys_epoll_wait
 253	common	remap_file_pages	sys_remap_file_pages
 # 254 for set_thread_area
 # 255 for get_thread_area
@@ -360,7 +360,7 @@
 343	common	vmsplice		sys_vmsplice
 344	common	move_pages		sys_move_pages
 345	common	getcpu			sys_getcpu
-346	common	epoll_pwait		sys_epoll_pwait		sys_oabi_epoll_pwait
+346	common	epoll_pwait		sys_epoll_pwait
 347	common	kexec_load		sys_kexec_load
 348	common	utimensat		sys_utimensat_time32
 349	common	signalfd		sys_signalfd
-- 
cgit 


From bdec0145286f7e6be9b3134aa35f0f335fa27c38 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Aug 2021 08:30:23 +0100
Subject: ARM: 9114/1: oabi-compat: rework sys_semtimedop emulation

sys_oabi_semtimedop() is one of the last users of set_fs() on Arm. To
remove this one, expose the internal code of the actual implementation
that operates on a kernel pointer and call it directly after copying.

There should be no measurable impact on the normal execution of this
function, and it makes the overly long function a little shorter, which
may help readability.

While reworking the oabi version, make it behave a little more like
the native one, using kvmalloc_array() and restructure the code
flow in a similar way.

The naming of __do_semtimedop() is not very good, I hope someone can
come up with a better name.

One regression was spotted by kernel test robot <rong.a.chen@intel.com>
and fixed before the first mailing list submission.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/sys_oabi-compat.c | 60 ++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 1f6a433200f1..5ea365c35ca5 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -80,6 +80,7 @@
 #include <linux/socket.h>
 #include <linux/net.h>
 #include <linux/ipc.h>
+#include <linux/ipc_namespace.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 
@@ -302,46 +303,52 @@ struct oabi_sembuf {
 	unsigned short	__pad;
 };
 
+#define sc_semopm     sem_ctls[2]
+
+#ifdef CONFIG_SYSVIPC
 asmlinkage long sys_oabi_semtimedop(int semid,
 				    struct oabi_sembuf __user *tsops,
 				    unsigned nsops,
 				    const struct old_timespec32 __user *timeout)
 {
+	struct ipc_namespace *ns;
 	struct sembuf *sops;
-	struct old_timespec32 local_timeout;
 	long err;
 	int i;
 
+	ns = current->nsproxy->ipc_ns;
+	if (nsops > ns->sc_semopm)
+		return -E2BIG;
 	if (nsops < 1 || nsops > SEMOPM)
 		return -EINVAL;
-	if (!access_ok(tsops, sizeof(*tsops) * nsops))
-		return -EFAULT;
-	sops = kmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
+	sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
 	if (!sops)
 		return -ENOMEM;
 	err = 0;
 	for (i = 0; i < nsops; i++) {
 		struct oabi_sembuf osb;
-		err |= __copy_from_user(&osb, tsops, sizeof(osb));
+		err |= copy_from_user(&osb, tsops, sizeof(osb));
 		sops[i].sem_num = osb.sem_num;
 		sops[i].sem_op = osb.sem_op;
 		sops[i].sem_flg = osb.sem_flg;
 		tsops++;
 	}
-	if (timeout) {
-		/* copy this as well before changing domain protection */
-		err |= copy_from_user(&local_timeout, timeout, sizeof(*timeout));
-		timeout = &local_timeout;
-	}
 	if (err) {
 		err = -EFAULT;
-	} else {
-		mm_segment_t fs = get_fs();
-		set_fs(KERNEL_DS);
-		err = sys_semtimedop_time32(semid, sops, nsops, timeout);
-		set_fs(fs);
+		goto out;
+	}
+
+	if (timeout) {
+		struct timespec64 ts;
+		err = get_old_timespec32(&ts, timeout);
+		if (err)
+			goto out;
+		err = __do_semtimedop(semid, sops, nsops, &ts, ns);
+		goto out;
 	}
-	kfree(sops);
+	err = __do_semtimedop(semid, sops, nsops, NULL, ns);
+out:
+	kvfree(sops);
 	return err;
 }
 
@@ -368,6 +375,27 @@ asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
 		return sys_ipc(call, first, second, third, ptr, fifth);
 	}
 }
+#else
+asmlinkage long sys_oabi_semtimedop(int semid,
+				    struct oabi_sembuf __user *tsops,
+				    unsigned nsops,
+				    const struct old_timespec32 __user *timeout)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops,
+			       unsigned nsops)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
+			    void __user *ptr, long fifth)
+{
+	return -ENOSYS;
+}
+#endif
 
 asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen)
 {
-- 
cgit 


From 7e2d8c29ecdd86afbcedb9d9a977bab8af527add Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Aug 2021 08:30:24 +0100
Subject: ARM: 9111/1: oabi-compat: rework fcntl64() emulation

This is one of the last users of get_fs(), and this is fairly easy to
change, since the infrastructure for it is already there.

The replacement here is essentially a copy of the existing fcntl64()
syscall entry function.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/sys_oabi-compat.c | 93 +++++++++++++++++++++++++--------------
 1 file changed, 60 insertions(+), 33 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 5ea365c35ca5..223ee46b6e75 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -194,56 +194,83 @@ struct oabi_flock64 {
 	pid_t	l_pid;
 } __attribute__ ((packed,aligned(4)));
 
-static long do_locks(unsigned int fd, unsigned int cmd,
-				 unsigned long arg)
+static int get_oabi_flock(struct flock64 *kernel, struct oabi_flock64 __user *arg)
 {
-	struct flock64 kernel;
 	struct oabi_flock64 user;
-	mm_segment_t fs;
-	long ret;
 
 	if (copy_from_user(&user, (struct oabi_flock64 __user *)arg,
 			   sizeof(user)))
 		return -EFAULT;
-	kernel.l_type	= user.l_type;
-	kernel.l_whence	= user.l_whence;
-	kernel.l_start	= user.l_start;
-	kernel.l_len	= user.l_len;
-	kernel.l_pid	= user.l_pid;
-
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = sys_fcntl64(fd, cmd, (unsigned long)&kernel);
-	set_fs(fs);
-
-	if (!ret && (cmd == F_GETLK64 || cmd == F_OFD_GETLK)) {
-		user.l_type	= kernel.l_type;
-		user.l_whence	= kernel.l_whence;
-		user.l_start	= kernel.l_start;
-		user.l_len	= kernel.l_len;
-		user.l_pid	= kernel.l_pid;
-		if (copy_to_user((struct oabi_flock64 __user *)arg,
-				 &user, sizeof(user)))
-			ret = -EFAULT;
-	}
-	return ret;
+
+	kernel->l_type	 = user.l_type;
+	kernel->l_whence = user.l_whence;
+	kernel->l_start	 = user.l_start;
+	kernel->l_len	 = user.l_len;
+	kernel->l_pid	 = user.l_pid;
+
+	return 0;
+}
+
+static int put_oabi_flock(struct flock64 *kernel, struct oabi_flock64 __user *arg)
+{
+	struct oabi_flock64 user;
+
+	user.l_type	= kernel->l_type;
+	user.l_whence	= kernel->l_whence;
+	user.l_start	= kernel->l_start;
+	user.l_len	= kernel->l_len;
+	user.l_pid	= kernel->l_pid;
+
+	if (copy_to_user((struct oabi_flock64 __user *)arg,
+			 &user, sizeof(user)))
+		return -EFAULT;
+
+	return 0;
 }
 
 asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
 				 unsigned long arg)
 {
+	void __user *argp = (void __user *)arg;
+	struct fd f = fdget_raw(fd);
+	struct flock64 flock;
+	long err = -EBADF;
+
+	if (!f.file)
+		goto out;
+
 	switch (cmd) {
-	case F_OFD_GETLK:
-	case F_OFD_SETLK:
-	case F_OFD_SETLKW:
 	case F_GETLK64:
+	case F_OFD_GETLK:
+		err = security_file_fcntl(f.file, cmd, arg);
+		if (err)
+			break;
+		err = get_oabi_flock(&flock, argp);
+		if (err)
+			break;
+		err = fcntl_getlk64(f.file, cmd, &flock);
+		if (!err)
+		       err = put_oabi_flock(&flock, argp);
+		break;
 	case F_SETLK64:
 	case F_SETLKW64:
-		return do_locks(fd, cmd, arg);
-
+	case F_OFD_SETLK:
+	case F_OFD_SETLKW:
+		err = security_file_fcntl(f.file, cmd, arg);
+		if (err)
+			break;
+		err = get_oabi_flock(&flock, argp);
+		if (err)
+			break;
+		err = fcntl_setlk64(fd, f.file, cmd, &flock);
+		break;
 	default:
-		return sys_fcntl64(fd, cmd, arg);
+		err = sys_fcntl64(fd, cmd, arg);
+		break;
 	}
+	fdput(f);
+out:
+	return err;
 }
 
 struct oabi_epoll_event {
-- 
cgit 


From 2df4c9a741a0b5e2883cc356c1b724d1f739fb55 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Aug 2021 08:30:25 +0100
Subject: ARM: 9112/1: uaccess: add __{get,put}_kernel_nofault

These mimic the behavior of get_user and put_user, except
for domain switching, address limit checking and handling
of mismatched sizes, none of which are relevant here.

To work with pre-Armv6 kernels, this has to avoid TUSER()
inside of the new macros, the new approach passes the "t"
string along with the opcode, which is a bit uglier but
avoids duplicating more code.

As there is no __get_user_asm_dword(), I work around it
by copying 32 bit at a time, which is possible because
the output size is known.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/uaccess.h | 123 +++++++++++++++++++++++++++--------------
 1 file changed, 83 insertions(+), 40 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index a13d90206472..4f60638755c4 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -308,11 +308,11 @@ static inline void set_fs(mm_segment_t fs)
 #define __get_user(x, ptr)						\
 ({									\
 	long __gu_err = 0;						\
-	__get_user_err((x), (ptr), __gu_err);				\
+	__get_user_err((x), (ptr), __gu_err, TUSER());			\
 	__gu_err;							\
 })
 
-#define __get_user_err(x, ptr, err)					\
+#define __get_user_err(x, ptr, err, __t)				\
 do {									\
 	unsigned long __gu_addr = (unsigned long)(ptr);			\
 	unsigned long __gu_val;						\
@@ -321,18 +321,19 @@ do {									\
 	might_fault();							\
 	__ua_flags = uaccess_save_and_enable();				\
 	switch (sizeof(*(ptr))) {					\
-	case 1:	__get_user_asm_byte(__gu_val, __gu_addr, err);	break;	\
-	case 2:	__get_user_asm_half(__gu_val, __gu_addr, err);	break;	\
-	case 4:	__get_user_asm_word(__gu_val, __gu_addr, err);	break;	\
+	case 1:	__get_user_asm_byte(__gu_val, __gu_addr, err, __t); break;	\
+	case 2:	__get_user_asm_half(__gu_val, __gu_addr, err, __t); break;	\
+	case 4:	__get_user_asm_word(__gu_val, __gu_addr, err, __t); break;	\
 	default: (__gu_val) = __get_user_bad();				\
 	}								\
 	uaccess_restore(__ua_flags);					\
 	(x) = (__typeof__(*(ptr)))__gu_val;				\
 } while (0)
+#endif
 
 #define __get_user_asm(x, addr, err, instr)			\
 	__asm__ __volatile__(					\
-	"1:	" TUSER(instr) " %1, [%2], #0\n"		\
+	"1:	" instr " %1, [%2], #0\n"			\
 	"2:\n"							\
 	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
@@ -348,40 +349,38 @@ do {									\
 	: "r" (addr), "i" (-EFAULT)				\
 	: "cc")
 
-#define __get_user_asm_byte(x, addr, err)			\
-	__get_user_asm(x, addr, err, ldrb)
+#define __get_user_asm_byte(x, addr, err, __t)			\
+	__get_user_asm(x, addr, err, "ldrb" __t)
 
 #if __LINUX_ARM_ARCH__ >= 6
 
-#define __get_user_asm_half(x, addr, err)			\
-	__get_user_asm(x, addr, err, ldrh)
+#define __get_user_asm_half(x, addr, err, __t)			\
+	__get_user_asm(x, addr, err, "ldrh" __t)
 
 #else
 
 #ifndef __ARMEB__
-#define __get_user_asm_half(x, __gu_addr, err)			\
+#define __get_user_asm_half(x, __gu_addr, err, __t)		\
 ({								\
 	unsigned long __b1, __b2;				\
-	__get_user_asm_byte(__b1, __gu_addr, err);		\
-	__get_user_asm_byte(__b2, __gu_addr + 1, err);		\
+	__get_user_asm_byte(__b1, __gu_addr, err, __t);		\
+	__get_user_asm_byte(__b2, __gu_addr + 1, err, __t);	\
 	(x) = __b1 | (__b2 << 8);				\
 })
 #else
-#define __get_user_asm_half(x, __gu_addr, err)			\
+#define __get_user_asm_half(x, __gu_addr, err, __t)		\
 ({								\
 	unsigned long __b1, __b2;				\
-	__get_user_asm_byte(__b1, __gu_addr, err);		\
-	__get_user_asm_byte(__b2, __gu_addr + 1, err);		\
+	__get_user_asm_byte(__b1, __gu_addr, err, __t);		\
+	__get_user_asm_byte(__b2, __gu_addr + 1, err, __t);	\
 	(x) = (__b1 << 8) | __b2;				\
 })
 #endif
 
 #endif /* __LINUX_ARM_ARCH__ >= 6 */
 
-#define __get_user_asm_word(x, addr, err)			\
-	__get_user_asm(x, addr, err, ldr)
-#endif
-
+#define __get_user_asm_word(x, addr, err, __t)			\
+	__get_user_asm(x, addr, err, "ldr" __t)
 
 #define __put_user_switch(x, ptr, __err, __fn)				\
 	do {								\
@@ -425,7 +424,7 @@ do {									\
 #define __put_user_nocheck(x, __pu_ptr, __err, __size)			\
 	do {								\
 		unsigned long __pu_addr = (unsigned long)__pu_ptr;	\
-		__put_user_nocheck_##__size(x, __pu_addr, __err);	\
+		__put_user_nocheck_##__size(x, __pu_addr, __err, TUSER());\
 	} while (0)
 
 #define __put_user_nocheck_1 __put_user_asm_byte
@@ -433,9 +432,11 @@ do {									\
 #define __put_user_nocheck_4 __put_user_asm_word
 #define __put_user_nocheck_8 __put_user_asm_dword
 
+#endif /* !CONFIG_CPU_SPECTRE */
+
 #define __put_user_asm(x, __pu_addr, err, instr)		\
 	__asm__ __volatile__(					\
-	"1:	" TUSER(instr) " %1, [%2], #0\n"		\
+	"1:	" instr " %1, [%2], #0\n"		\
 	"2:\n"							\
 	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
@@ -450,36 +451,36 @@ do {									\
 	: "r" (x), "r" (__pu_addr), "i" (-EFAULT)		\
 	: "cc")
 
-#define __put_user_asm_byte(x, __pu_addr, err)			\
-	__put_user_asm(x, __pu_addr, err, strb)
+#define __put_user_asm_byte(x, __pu_addr, err, __t)		\
+	__put_user_asm(x, __pu_addr, err, "strb" __t)
 
 #if __LINUX_ARM_ARCH__ >= 6
 
-#define __put_user_asm_half(x, __pu_addr, err)			\
-	__put_user_asm(x, __pu_addr, err, strh)
+#define __put_user_asm_half(x, __pu_addr, err, __t)		\
+	__put_user_asm(x, __pu_addr, err, "strh" __t)
 
 #else
 
 #ifndef __ARMEB__
-#define __put_user_asm_half(x, __pu_addr, err)			\
+#define __put_user_asm_half(x, __pu_addr, err, __t)		\
 ({								\
 	unsigned long __temp = (__force unsigned long)(x);	\
-	__put_user_asm_byte(__temp, __pu_addr, err);		\
-	__put_user_asm_byte(__temp >> 8, __pu_addr + 1, err);	\
+	__put_user_asm_byte(__temp, __pu_addr, err, __t);	\
+	__put_user_asm_byte(__temp >> 8, __pu_addr + 1, err, __t);\
 })
 #else
-#define __put_user_asm_half(x, __pu_addr, err)			\
+#define __put_user_asm_half(x, __pu_addr, err, __t)		\
 ({								\
 	unsigned long __temp = (__force unsigned long)(x);	\
-	__put_user_asm_byte(__temp >> 8, __pu_addr, err);	\
-	__put_user_asm_byte(__temp, __pu_addr + 1, err);	\
+	__put_user_asm_byte(__temp >> 8, __pu_addr, err, __t);	\
+	__put_user_asm_byte(__temp, __pu_addr + 1, err, __t);	\
 })
 #endif
 
 #endif /* __LINUX_ARM_ARCH__ >= 6 */
 
-#define __put_user_asm_word(x, __pu_addr, err)			\
-	__put_user_asm(x, __pu_addr, err, str)
+#define __put_user_asm_word(x, __pu_addr, err, __t)		\
+	__put_user_asm(x, __pu_addr, err, "str" __t)
 
 #ifndef __ARMEB__
 #define	__reg_oper0	"%R2"
@@ -489,12 +490,12 @@ do {									\
 #define	__reg_oper1	"%R2"
 #endif
 
-#define __put_user_asm_dword(x, __pu_addr, err)			\
+#define __put_user_asm_dword(x, __pu_addr, err, __t)		\
 	__asm__ __volatile__(					\
- ARM(	"1:	" TUSER(str) "	" __reg_oper1 ", [%1], #4\n"	) \
- ARM(	"2:	" TUSER(str) "	" __reg_oper0 ", [%1]\n"	) \
- THUMB(	"1:	" TUSER(str) "	" __reg_oper1 ", [%1]\n"	) \
- THUMB(	"2:	" TUSER(str) "	" __reg_oper0 ", [%1, #4]\n"	) \
+ ARM(	"1:	str" __t "	" __reg_oper1 ", [%1], #4\n"  ) \
+ ARM(	"2:	str" __t "	" __reg_oper0 ", [%1]\n"      ) \
+ THUMB(	"1:	str" __t "	" __reg_oper1 ", [%1]\n"      ) \
+ THUMB(	"2:	str" __t "	" __reg_oper0 ", [%1, #4]\n"  ) \
 	"3:\n"							\
 	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
@@ -510,7 +511,49 @@ do {									\
 	: "r" (x), "i" (-EFAULT)				\
 	: "cc")
 
-#endif /* !CONFIG_CPU_SPECTRE */
+#define HAVE_GET_KERNEL_NOFAULT
+
+#define __get_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	const type *__pk_ptr = (src);					\
+	unsigned long __src = (unsigned long)(__pk_ptr);		\
+	type __val;							\
+	int __err = 0;							\
+	switch (sizeof(type)) {						\
+	case 1:	__get_user_asm_byte(__val, __src, __err, ""); break;	\
+	case 2: __get_user_asm_half(__val, __src, __err, ""); break;	\
+	case 4: __get_user_asm_word(__val, __src, __err, ""); break;	\
+	case 8: {							\
+		u32 *__v32 = (u32*)&__val;				\
+		__get_user_asm_word(__v32[0], __src, __err, "");	\
+		if (__err)						\
+			break;						\
+		__get_user_asm_word(__v32[1], __src+4, __err, "");	\
+		break;							\
+	}								\
+	default: __err = __get_user_bad(); break;			\
+	}								\
+	*(type *)(dst) = __val;						\
+	if (__err)							\
+		goto err_label;						\
+} while (0)
+
+#define __put_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	const type *__pk_ptr = (dst);					\
+	unsigned long __dst = (unsigned long)__pk_ptr;			\
+	int __err = 0;							\
+	type __val = *(type *)src;					\
+	switch (sizeof(type)) {						\
+	case 1: __put_user_asm_byte(__val, __dst, __err, ""); break;	\
+	case 2:	__put_user_asm_half(__val, __dst, __err, ""); break;	\
+	case 4:	__put_user_asm_word(__val, __dst, __err, ""); break;	\
+	case 8:	__put_user_asm_dword(__val, __dst, __err, ""); break;	\
+	default: __err = __put_user_bad(); break;			\
+	}								\
+	if (__err)							\
+		goto err_label;						\
+} while (0)
 
 #ifdef CONFIG_MMU
 extern unsigned long __must_check
-- 
cgit 


From 8ac6f5d7f84bf362e67591708bcb9788cdc42c50 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Aug 2021 08:30:26 +0100
Subject: ARM: 9113/1: uaccess: remove set_fs() implementation

There are no remaining callers of set_fs(), so just remove it
along with all associated code that operates on
thread_info->addr_limit.

There are still further optimizations that can be done:

- In get_user(), the address check could be moved entirely
  into the out of line code, rather than passing a constant
  as an argument,

- I assume the DACR handling can be simplified as we now
  only change it during user access when CONFIG_CPU_SW_DOMAIN_PAN
  is set, but not during set_fs().

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/Kconfig                   |  1 -
 arch/arm/include/asm/ptrace.h      |  1 -
 arch/arm/include/asm/thread_info.h |  4 ----
 arch/arm/include/asm/uaccess-asm.h |  6 -----
 arch/arm/include/asm/uaccess.h     | 46 ++++----------------------------------
 arch/arm/kernel/asm-offsets.c      |  2 --
 arch/arm/kernel/entry-common.S     | 12 ----------
 arch/arm/kernel/process.c          |  7 +-----
 arch/arm/kernel/signal.c           |  8 -------
 arch/arm/lib/copy_from_user.S      |  3 +--
 arch/arm/lib/copy_to_user.S        |  3 +--
 11 files changed, 7 insertions(+), 86 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 3ea1c417339f..51897e4cc413 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -126,7 +126,6 @@ config ARM
 	select PCI_SYSCALL if PCI
 	select PERF_USE_VMALLOC
 	select RTC_LIB
-	select SET_FS
 	select SYS_SUPPORTS_APM_EMULATION
 	# Above selects are sorted alphabetically; please add new ones
 	# according to that.  Thanks.
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 91d6b7856be4..93051e2f402c 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -19,7 +19,6 @@ struct pt_regs {
 struct svc_pt_regs {
 	struct pt_regs regs;
 	u32 dacr;
-	u32 addr_limit;
 };
 
 #define to_svc_pt_regs(r) container_of(r, struct svc_pt_regs, regs)
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 17c56051747b..d89931aed59f 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -31,8 +31,6 @@ struct task_struct;
 
 #include <asm/types.h>
 
-typedef unsigned long mm_segment_t;
-
 struct cpu_context_save {
 	__u32	r4;
 	__u32	r5;
@@ -54,7 +52,6 @@ struct cpu_context_save {
 struct thread_info {
 	unsigned long		flags;		/* low level flags */
 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
-	mm_segment_t		addr_limit;	/* address limit */
 	struct task_struct	*task;		/* main task structure */
 	__u32			cpu;		/* cpu */
 	__u32			cpu_domain;	/* cpu domain */
@@ -80,7 +77,6 @@ struct thread_info {
 	.task		= &tsk,						\
 	.flags		= 0,						\
 	.preempt_count	= INIT_PREEMPT_COUNT,				\
-	.addr_limit	= KERNEL_DS,					\
 }
 
 /*
diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h
index e6eb7a2aaf1e..6451a433912c 100644
--- a/arch/arm/include/asm/uaccess-asm.h
+++ b/arch/arm/include/asm/uaccess-asm.h
@@ -84,12 +84,8 @@
 	 * if \disable is set.
 	 */
 	.macro	uaccess_entry, tsk, tmp0, tmp1, tmp2, disable
-	ldr	\tmp1, [\tsk, #TI_ADDR_LIMIT]
-	ldr	\tmp2, =TASK_SIZE
-	str	\tmp2, [\tsk, #TI_ADDR_LIMIT]
  DACR(	mrc	p15, 0, \tmp0, c3, c0, 0)
  DACR(	str	\tmp0, [sp, #SVC_DACR])
-	str	\tmp1, [sp, #SVC_ADDR_LIMIT]
 	.if \disable && IS_ENABLED(CONFIG_CPU_SW_DOMAIN_PAN)
 	/* kernel=client, user=no access */
 	mov	\tmp2, #DACR_UACCESS_DISABLE
@@ -106,9 +102,7 @@
 
 	/* Restore the user access state previously saved by uaccess_entry */
 	.macro	uaccess_exit, tsk, tmp0, tmp1
-	ldr	\tmp1, [sp, #SVC_ADDR_LIMIT]
  DACR(	ldr	\tmp0, [sp, #SVC_DACR])
-	str	\tmp1, [\tsk, #TI_ADDR_LIMIT]
  DACR(	mcr	p15, 0, \tmp0, c3, c0, 0)
 	.endm
 
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 4f60638755c4..084d1c07c2d0 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -52,32 +52,8 @@ static __always_inline void uaccess_restore(unsigned int flags)
 extern int __get_user_bad(void);
 extern int __put_user_bad(void);
 
-/*
- * Note that this is actually 0x1,0000,0000
- */
-#define KERNEL_DS	0x00000000
-
 #ifdef CONFIG_MMU
 
-#define USER_DS		TASK_SIZE
-#define get_fs()	(current_thread_info()->addr_limit)
-
-static inline void set_fs(mm_segment_t fs)
-{
-	current_thread_info()->addr_limit = fs;
-
-	/*
-	 * Prevent a mispredicted conditional call to set_fs from forwarding
-	 * the wrong address limit to access_ok under speculation.
-	 */
-	dsb(nsh);
-	isb();
-
-	modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
-}
-
-#define uaccess_kernel()	(get_fs() == KERNEL_DS)
-
 /*
  * We use 33-bit arithmetic here.  Success returns zero, failure returns
  * addr_limit.  We take advantage that addr_limit will be zero for KERNEL_DS,
@@ -89,7 +65,7 @@ static inline void set_fs(mm_segment_t fs)
 	__asm__(".syntax unified\n" \
 		"adds %1, %2, %3; sbcscc %1, %1, %0; movcc %0, #0" \
 		: "=&r" (flag), "=&r" (roksum) \
-		: "r" (addr), "Ir" (size), "0" (current_thread_info()->addr_limit) \
+		: "r" (addr), "Ir" (size), "0" (TASK_SIZE) \
 		: "cc"); \
 	flag; })
 
@@ -120,7 +96,7 @@ static inline void __user *__uaccess_mask_range_ptr(const void __user *ptr,
 	"	subshs	%1, %1, %2\n"
 	"	movlo	%0, #0\n"
 	: "+r" (safe_ptr), "=&r" (tmp)
-	: "r" (size), "r" (current_thread_info()->addr_limit)
+	: "r" (size), "r" (TASK_SIZE)
 	: "cc");
 
 	csdb();
@@ -194,7 +170,7 @@ extern int __get_user_64t_4(void *);
 
 #define __get_user_check(x, p)						\
 	({								\
-		unsigned long __limit = current_thread_info()->addr_limit - 1; \
+		unsigned long __limit = TASK_SIZE - 1; \
 		register typeof(*(p)) __user *__p asm("r0") = (p);	\
 		register __inttype(x) __r2 asm("r2");			\
 		register unsigned long __l asm("r1") = __limit;		\
@@ -245,7 +221,7 @@ extern int __put_user_8(void *, unsigned long long);
 
 #define __put_user_check(__pu_val, __ptr, __err, __s)			\
 	({								\
-		unsigned long __limit = current_thread_info()->addr_limit - 1; \
+		unsigned long __limit = TASK_SIZE - 1; \
 		register typeof(__pu_val) __r2 asm("r2") = __pu_val;	\
 		register const void __user *__p asm("r0") = __ptr;	\
 		register unsigned long __l asm("r1") = __limit;		\
@@ -262,19 +238,8 @@ extern int __put_user_8(void *, unsigned long long);
 
 #else /* CONFIG_MMU */
 
-/*
- * uClinux has only one addr space, so has simplified address limits.
- */
-#define USER_DS			KERNEL_DS
-
-#define uaccess_kernel()	(true)
 #define __addr_ok(addr)		((void)(addr), 1)
 #define __range_ok(addr, size)	((void)(addr), 0)
-#define get_fs()		(KERNEL_DS)
-
-static inline void set_fs(mm_segment_t fs)
-{
-}
 
 #define get_user(x, p)	__get_user(x, p)
 #define __put_user_check __put_user_nocheck
@@ -283,9 +248,6 @@ static inline void set_fs(mm_segment_t fs)
 
 #define access_ok(addr, size)	(__range_ok(addr, size) == 0)
 
-#define user_addr_max() \
-	(uaccess_kernel() ? ~0UL : get_fs())
-
 #ifdef CONFIG_CPU_SPECTRE
 /*
  * When mitigating Spectre variant 1, it is not worth fixing the non-
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index a0945b898ca3..c60fefabc868 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -43,7 +43,6 @@ int main(void)
   BLANK();
   DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
   DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
-  DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit));
   DEFINE(TI_TASK,		offsetof(struct thread_info, task));
   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
@@ -92,7 +91,6 @@ int main(void)
   DEFINE(S_OLD_R0,		offsetof(struct pt_regs, ARM_ORIG_r0));
   DEFINE(PT_REGS_SIZE,		sizeof(struct pt_regs));
   DEFINE(SVC_DACR,		offsetof(struct svc_pt_regs, dacr));
-  DEFINE(SVC_ADDR_LIMIT,	offsetof(struct svc_pt_regs, addr_limit));
   DEFINE(SVC_REGS_SIZE,		sizeof(struct svc_pt_regs));
   BLANK();
   DEFINE(SIGFRAME_RC3_OFFSET,	offsetof(struct sigframe, retcode[3]));
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index e837af90cd44..d9c99db50243 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -49,10 +49,6 @@ __ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
-	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	ldr	r1, =TASK_SIZE
-	cmp	r2, r1
-	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	movs	r1, r1, lsl #16
 	bne	fast_work_pending
@@ -87,10 +83,6 @@ __ret_fast_syscall:
 	bl	do_rseq_syscall
 #endif
 	disable_irq_notrace			@ disable interrupts
-	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	ldr     r1, =TASK_SIZE
-	cmp     r2, r1
-	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	movs	r1, r1, lsl #16
 	beq	no_work_pending
@@ -129,10 +121,6 @@ ret_slow_syscall:
 #endif
 	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
-	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	ldr     r1, =TASK_SIZE
-	cmp	r2, r1
-	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]
 	movs	r1, r1, lsl #16
 	bne	slow_work_pending
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fc9e8b37eaa8..581542fbf5bf 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -108,7 +108,7 @@ void __show_regs(struct pt_regs *regs)
 	unsigned long flags;
 	char buf[64];
 #ifndef CONFIG_CPU_V7M
-	unsigned int domain, fs;
+	unsigned int domain;
 #ifdef CONFIG_CPU_SW_DOMAIN_PAN
 	/*
 	 * Get the domain register for the parent context. In user
@@ -117,14 +117,11 @@ void __show_regs(struct pt_regs *regs)
 	 */
 	if (user_mode(regs)) {
 		domain = DACR_UACCESS_ENABLE;
-		fs = get_fs();
 	} else {
 		domain = to_svc_pt_regs(regs)->dacr;
-		fs = to_svc_pt_regs(regs)->addr_limit;
 	}
 #else
 	domain = get_domain();
-	fs = get_fs();
 #endif
 #endif
 
@@ -160,8 +157,6 @@ void __show_regs(struct pt_regs *regs)
 		if ((domain & domain_mask(DOMAIN_USER)) ==
 		    domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
 			segment = "none";
-		else if (fs == KERNEL_DS)
-			segment = "kernel";
 		else
 			segment = "user";
 
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index a3a38d0a4c85..3e7ddac086c2 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -711,14 +711,6 @@ struct page *get_signal_page(void)
 	return page;
 }
 
-/* Defer to generic check */
-asmlinkage void addr_limit_check_failed(void)
-{
-#ifdef CONFIG_MMU
-	addr_limit_user_check();
-#endif
-}
-
 #ifdef CONFIG_DEBUG_RSEQ
 asmlinkage void do_rseq_syscall(struct pt_regs *regs)
 {
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index f8016e3db65d..480a20766137 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -109,8 +109,7 @@
 
 ENTRY(arm_copy_from_user)
 #ifdef CONFIG_CPU_SPECTRE
-	get_thread_info r3
-	ldr	r3, [r3, #TI_ADDR_LIMIT]
+	ldr	r3, =TASK_SIZE
 	uaccess_mask_range_ptr r1, r2, r3, ip
 #endif
 
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index ebfe4cb3d912..842ea5ede485 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -109,8 +109,7 @@
 ENTRY(__copy_to_user_std)
 WEAK(arm_copy_to_user)
 #ifdef CONFIG_CPU_SPECTRE
-	get_thread_info r3
-	ldr	r3, [r3, #TI_ADDR_LIMIT]
+	ldr	r3, =TASK_SIZE
 	uaccess_mask_range_ptr r0, r2, r3, ip
 #endif
 
-- 
cgit 


From da0b9ee43c152401f711e522c19b1468c84907bf Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Aug 2021 08:30:27 +0100
Subject: ARM: 9110/1: oabi-compat: fix oabi epoll sparse warning

As my patches change the oabi epoll definition, I received a report
from the kernel test robot about a pre-existing issue with a mismatched
__poll_t type.

The OABI code was correct when it was initially added in linux-2.16,
but a later (also correct) change to the generic __poll_t triggered a
type mismatch warning from sparse.

As __poll_t is always 32-bit bits wide and otherwise compatible, using
this instead of __u32 in the oabi_epoll_event definition is a valid
workaround.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 8ced390c2b18 ("define __poll_t, annotate constants")
Fixes: ee219b946e4b ("uapi: turn __poll_t sparse checks on by default")
Fixes: 687ad0191488 ("[ARM] 3109/1: old ABI compat: syscall wrappers for ABI impedance matching")
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/sys_oabi-compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 223ee46b6e75..68112c172025 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -274,7 +274,7 @@ out:
 }
 
 struct oabi_epoll_event {
-	__u32 events;
+	__poll_t events;
 	__u64 data;
 } __attribute__ ((packed,aligned(4)));
 
-- 
cgit 


From 88210317eec69d8c06cac6d6751528160e153ffd Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 11 Aug 2021 10:27:03 +0100
Subject: ARM: 9116/1: unified: Remove check for gcc < 4

Since commit cafa0010cd51fb71 ("Raise the minimum required gcc version
to 4.6"), the kernel can no longer be compiled using gcc-3.
Hence this condition is never true, and the check can thus be removed.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/unified.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index 1e2c3eb04353..ce9689118dbb 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -24,10 +24,6 @@ __asm__(".syntax unified");
 
 #ifdef CONFIG_THUMB2_KERNEL
 
-#if __GNUC__ < 4
-#error Thumb-2 kernel requires gcc >= 4
-#endif
-
 /* The CPSR bit describing the instruction set (Thumb) */
 #define PSR_ISETSTATE	PSR_T_BIT
 
-- 
cgit 


From 6c974e79d37608bc8fd5cf6b61b4233b82b60428 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 11 Aug 2021 10:27:02 +0100
Subject: ARM: 9118/1: div64: Remove always-true __div64_const32_is_OK()
 duplicate

Since commit cafa0010cd51fb71 ("Raise the minimum required gcc version
to 4.6"), the kernel can no longer be compiled using gcc-3.
Hence __div64_const32_is_OK() is always true.

Moreover, __div64_const32_is_OK() is defined in the same way in
include/asm-generic/div64.h, so the ARM-specific definition can be
removed regardless.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/div64.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h
index 595e538f5bfb..4b69cf850451 100644
--- a/arch/arm/include/asm/div64.h
+++ b/arch/arm/include/asm/div64.h
@@ -52,17 +52,6 @@ static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
 
 #else
 
-/*
- * gcc versions earlier than 4.0 are simply too problematic for the
- * __div64_const32() code in asm-generic/div64.h. First there is
- * gcc PR 15089 that tend to trig on more complex constructs, spurious
- * .global __udivsi3 are inserted even if none of those symbols are
- * referenced in the generated code, and those gcc versions are not able
- * to do constant propagation on long long values anyway.
- */
-
-#define __div64_const32_is_OK (__GNUC__ >= 4)
-
 static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias)
 {
 	unsigned long long res;
-- 
cgit 


From 14ecf075fe5be01860927fdf3aa11d7b18023ab2 Mon Sep 17 00:00:00 2001
From: David Brazdil <dbrazdil@google.com>
Date: Wed, 28 Jul 2021 15:32:32 +0000
Subject: KVM: arm64: Minor optimization of range_is_memory

Currently range_is_memory finds the corresponding struct memblock_region
for both the lower and upper bounds of the given address range with two
rounds of binary search, and then checks that the two memblocks are the
same. Simplify this by only doing binary search on the lower bound and
then checking that the upper bound is in the same memblock.

Signed-off-by: David Brazdil <dbrazdil@google.com>
Reviewed-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210728153232.1018911-3-dbrazdil@google.com
---
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index c11b50dd0050..5af2e28b9cd7 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -204,16 +204,19 @@ bool addr_is_memory(phys_addr_t phys)
 	return find_mem_range(phys, &range);
 }
 
+static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
+{
+	return range->start <= addr && addr < range->end;
+}
+
 static bool range_is_memory(u64 start, u64 end)
 {
-	struct kvm_mem_range r1, r2;
+	struct kvm_mem_range r;
 
-	if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2))
-		return false;
-	if (r1.start != r2.start)
+	if (!find_mem_range(start, &r))
 		return false;
 
-	return true;
+	return is_in_mem_range(end - 1, &r);
 }
 
 static inline int __host_stage2_idmap(u64 start, u64 end,
-- 
cgit 


From 4cb266074aa17e9cafed3a92e9f43b161516569f Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 19 Aug 2021 14:56:52 +0200
Subject: powerpc/pseries/vas: Declare pseries_vas_fault_thread_fn() as static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a compile error with W=1.

Fixes: 6d0aaf5e0de0 ("powerpc/pseries/vas: Setup IRQ and fault handling")
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210819125656.14498-3-clg@kaod.org
---
 arch/powerpc/platforms/pseries/vas.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index b5c1cf1bc64d..b043e3936d21 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -184,7 +184,7 @@ static int h_get_nx_fault(u32 winid, u64 buffer)
  * Note: The hypervisor forwards an interrupt for each fault request.
  *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
  */
-irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
 {
 	struct pseries_vas_window *txwin = data;
 	struct coprocessor_request_block crb;
-- 
cgit 


From cb53a93e33e108bfec00a13cd12696e1c0ccc8b6 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 19 Aug 2021 14:56:53 +0200
Subject: KVM: PPC: Book3S PR: Declare kvmppc_handle_exit_pr()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a compile error with W=1.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210819125656.14498-4-clg@kaod.org
---
 arch/powerpc/kvm/book3s.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 740e51def5a5..58391b4b32ed 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -23,7 +23,8 @@ extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
 extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
 					int sprn, ulong *spr_val);
 extern int kvmppc_book3s_init_pr(void);
-extern void kvmppc_book3s_exit_pr(void);
+void kvmppc_book3s_exit_pr(void);
+extern int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr);
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val);
-- 
cgit 


From b352ddae7b2ccd2cb29f495ca0a7c9b6848b623f Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 19 Aug 2021 14:56:54 +0200
Subject: KVM: PPC: Book3S PR: Remove unused variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a compile error with W=1.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210819125656.14498-5-clg@kaod.org
---
 arch/powerpc/kvm/book3s_64_mmu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 26b8b27a3755..feee40cb2ba1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -196,7 +196,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	hva_t ptegp;
 	u64 pteg[16];
 	u64 avpn = 0;
-	u64 v, r;
+	u64 r;
 	u64 v_val, v_mask;
 	u64 eaddr_mask;
 	int i;
@@ -285,7 +285,6 @@ do_second:
 		goto do_second;
 	}
 
-	v = be64_to_cpu(pteg[i]);
 	r = be64_to_cpu(pteg[i+1]);
 	pp = (r & HPTE_R_PP) | key;
 	if (r & HPTE_R_PP0)
-- 
cgit 


From 898a1ef06ad4a2a8e3c9490ce62624fcd1a7b1f8 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 20 Aug 2021 09:28:19 +0000
Subject: powerpc/audit: Avoid unneccessary #ifdef in syscall_get_arguments()

Use is_32bit_task() which already handles CONFIG_COMPAT.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/ba49cdd574558a0363300c3f6b5b062b397cb071.1629451483.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/syscall.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index ba0f88f3a30d..7ea3c4044186 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,10 +90,9 @@ static inline void syscall_get_arguments(struct task_struct *task,
 	unsigned long val, mask = -1UL;
 	unsigned int n = 6;
 
-#ifdef CONFIG_COMPAT
-	if (test_tsk_thread_flag(task, TIF_32BIT))
+	if (is_32bit_task())
 		mask = 0xffffffff;
-#endif
+
 	while (n--) {
 		if (n == 0)
 			val = regs->orig_gpr3;
-- 
cgit 


From 770cec16cdc9d15555e67896dd2214a4fec9a3fa Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 20 Aug 2021 09:39:14 +0000
Subject: powerpc/audit: Simplify syscall_get_arch()

Make use of is_32bit_task() and CONFIG_CPU_LITTLE_ENDIAN
to simplify syscall_get_arch().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/4be53b9187a4d8c163968f4d224267e41a7fcc33.1629451479.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/syscall.h | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 7ea3c4044186..c60ebd04b2ed 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -115,16 +115,11 @@ static inline void syscall_set_arguments(struct task_struct *task,
 
 static inline int syscall_get_arch(struct task_struct *task)
 {
-	int arch;
-
-	if (IS_ENABLED(CONFIG_PPC64) && !test_tsk_thread_flag(task, TIF_32BIT))
-		arch = AUDIT_ARCH_PPC64;
+	if (is_32bit_task())
+		return AUDIT_ARCH_PPC;
+	else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+		return AUDIT_ARCH_PPC64LE;
 	else
-		arch = AUDIT_ARCH_PPC;
-
-#ifdef __LITTLE_ENDIAN__
-	arch |= __AUDIT_ARCH_LE;
-#endif
-	return arch;
+		return AUDIT_ARCH_PPC64;
 }
 #endif	/* _ASM_SYSCALL_H */
-- 
cgit 


From 4139b1972af281e0293c2414a0f1cd59fa5b2980 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 30 Jul 2021 18:04:51 -0400
Subject: KVM: X86: Introduce kvm_mmu_slot_lpages() helpers

Introduce kvm_mmu_slot_lpages() to calculcate lpage_info and rmap array size.
The other __kvm_mmu_slot_lpages() can take an extra parameter of npages rather
than fetching from the memslot pointer.  Start to use the latter one in
kvm_alloc_memslot_metadata().

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20210730220455.26054-4-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  7 -------
 arch/x86/kvm/mmu.h              | 21 +++++++++++++++++++++
 arch/x86/kvm/mmu/page_track.c   |  1 +
 arch/x86/kvm/x86.c              |  6 ++----
 4 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a612d213be5c..1881f4c92de9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -124,13 +124,6 @@
 #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
-	/* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */
-	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
-		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
 #define KVM_PERMILLE_MMU_PAGES 20
 #define KVM_MIN_ALLOC_MMU_PAGES 64UL
 #define KVM_MMU_HASH_SHIFT 12
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 83e6c6965f1e..59e831a8ab9d 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -240,4 +240,25 @@ static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
 	return smp_load_acquire(&kvm->arch.memslots_have_rmaps);
 }
 
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+	/* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */
+	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
+static inline unsigned long
+__kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, unsigned long npages,
+		      int level)
+{
+	return gfn_to_index(slot->base_gfn + npages - 1,
+			    slot->base_gfn, level) + 1;
+}
+
+static inline unsigned long
+kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, int level)
+{
+	return __kvm_mmu_slot_lpages(slot, slot->npages, level);
+}
+
 #endif
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 91a9f7e0fd91..269f11f92fd0 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -16,6 +16,7 @@
 
 #include <asm/kvm_page_track.h>
 
+#include "mmu.h"
 #include "mmu_internal.h"
 
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 58a72c7d3330..4e97f7cd412e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11349,8 +11349,7 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
 
 	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
 		int level = i + 1;
-		int lpages = gfn_to_index(slot->base_gfn + npages - 1,
-					  slot->base_gfn, level) + 1;
+		int lpages = __kvm_mmu_slot_lpages(slot, npages, level);
 
 		WARN_ON(slot->arch.rmap[i]);
 
@@ -11433,8 +11432,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
 		int lpages;
 		int level = i + 1;
 
-		lpages = gfn_to_index(slot->base_gfn + npages - 1,
-				      slot->base_gfn, level) + 1;
+		lpages = __kvm_mmu_slot_lpages(slot, npages, level);
 
 		linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
 		if (!linfo)
-- 
cgit 


From 3bcd0662d66fd07e596d2a7445e6b3215631b901 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 30 Jul 2021 18:04:52 -0400
Subject: KVM: X86: Introduce mmu_rmaps_stat per-vm debugfs file

Use this file to dump rmap statistic information.  The statistic is done by
calculating the rmap count and the result is log-2-based.

An example output of this looks like (idle 6GB guest, right after boot linux):

Rmap_Count:     0       1       2-3     4-7     8-15    16-31   32-63   64-127  128-255 256-511 512-1023
Level=4K:       3086676 53045   12330   1272    502     121     76      2       0       0       0
Level=2M:       5947    231     0       0       0       0       0       0       0       0       0
Level=1G:       32      0       0       0       0       0       0       0       0       0       0

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20210730220455.26054-5-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/debugfs.c          | 111 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu/mmu.c          |  20 ++++++++
 arch/x86/kvm/mmu/mmu_internal.h |   1 +
 3 files changed, 132 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index 95a98413dc32..54a83a744538 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -7,6 +7,8 @@
 #include <linux/kvm_host.h>
 #include <linux/debugfs.h>
 #include "lapic.h"
+#include "mmu.h"
+#include "mmu/mmu_internal.h"
 
 static int vcpu_get_timer_advance_ns(void *data, u64 *val)
 {
@@ -73,3 +75,112 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
 				    &vcpu_tsc_scaling_frac_fops);
 	}
 }
+
+/*
+ * This covers statistics <1024 (11=log(1024)+1), which should be enough to
+ * cover RMAP_RECYCLE_THRESHOLD.
+ */
+#define  RMAP_LOG_SIZE  11
+
+static const char *kvm_lpage_str[KVM_NR_PAGE_SIZES] = { "4K", "2M", "1G" };
+
+static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
+{
+	struct kvm_rmap_head *rmap;
+	struct kvm *kvm = m->private;
+	struct kvm_memory_slot *slot;
+	struct kvm_memslots *slots;
+	unsigned int lpage_size, index;
+	/* Still small enough to be on the stack */
+	unsigned int *log[KVM_NR_PAGE_SIZES], *cur;
+	int i, j, k, l, ret;
+
+	ret = -ENOMEM;
+	memset(log, 0, sizeof(log));
+	for (i = 0; i < KVM_NR_PAGE_SIZES; i++) {
+		log[i] = kcalloc(RMAP_LOG_SIZE, sizeof(unsigned int), GFP_KERNEL);
+		if (!log[i])
+			goto out;
+	}
+
+	mutex_lock(&kvm->slots_lock);
+	write_lock(&kvm->mmu_lock);
+
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+		for (j = 0; j < slots->used_slots; j++) {
+			slot = &slots->memslots[j];
+			for (k = 0; k < KVM_NR_PAGE_SIZES; k++) {
+				rmap = slot->arch.rmap[k];
+				lpage_size = kvm_mmu_slot_lpages(slot, k + 1);
+				cur = log[k];
+				for (l = 0; l < lpage_size; l++) {
+					index = ffs(pte_list_count(&rmap[l]));
+					if (WARN_ON_ONCE(index >= RMAP_LOG_SIZE))
+						index = RMAP_LOG_SIZE - 1;
+					cur[index]++;
+				}
+			}
+		}
+	}
+
+	write_unlock(&kvm->mmu_lock);
+	mutex_unlock(&kvm->slots_lock);
+
+	/* index=0 counts no rmap; index=1 counts 1 rmap */
+	seq_printf(m, "Rmap_Count:\t0\t1\t");
+	for (i = 2; i < RMAP_LOG_SIZE; i++) {
+		j = 1 << (i - 1);
+		k = (1 << i) - 1;
+		seq_printf(m, "%d-%d\t", j, k);
+	}
+	seq_printf(m, "\n");
+
+	for (i = 0; i < KVM_NR_PAGE_SIZES; i++) {
+		seq_printf(m, "Level=%s:\t", kvm_lpage_str[i]);
+		cur = log[i];
+		for (j = 0; j < RMAP_LOG_SIZE; j++)
+			seq_printf(m, "%d\t", cur[j]);
+		seq_printf(m, "\n");
+	}
+
+	ret = 0;
+out:
+	for (i = 0; i < KVM_NR_PAGE_SIZES; i++)
+		kfree(log[i]);
+
+	return ret;
+}
+
+static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+
+	if (!kvm_get_kvm_safe(kvm))
+		return -ENOENT;
+
+	return single_open(file, kvm_mmu_rmaps_stat_show, kvm);
+}
+
+static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+
+	kvm_put_kvm(kvm);
+
+	return single_release(inode, file);
+}
+
+static const struct file_operations mmu_rmaps_stat_fops = {
+	.open		= kvm_mmu_rmaps_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= kvm_mmu_rmaps_stat_release,
+};
+
+int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+	debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm,
+			    &mmu_rmaps_stat_fops);
+	return 0;
+}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index d282ccf5f0c5..5daa8a910e8b 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1035,6 +1035,26 @@ out:
 	return true;
 }
 
+unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
+{
+	struct pte_list_desc *desc;
+	unsigned int count = 0;
+
+	if (!rmap_head->val)
+		return 0;
+	else if (!(rmap_head->val & 1))
+		return 1;
+
+	desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+
+	while (desc) {
+		count += desc->spte_count;
+		desc = desc->more;
+	}
+
+	return count;
+}
+
 static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
 					 const struct kvm_memory_slot *slot)
 {
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index ca7b7595bbfc..62bb8f758b3f 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -131,6 +131,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 				    int min_level);
 void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
 					u64 start_gfn, u64 pages);
+unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
 
 /*
  * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
-- 
cgit 


From 5a324c24b638d0f3194e1dc8f0cebd28a0745238 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 10 Aug 2021 23:52:36 +0300
Subject: Revert "KVM: x86/mmu: Allow zap gfn range to operate under the mmu
 read lock"

This together with the next patch will fix a future race between
kvm_zap_gfn_range and the page fault handler, which will happen
when AVIC memslot is going to be only partially disabled.

The performance impact is minimal since kvm_zap_gfn_range is only
called by users, update_mtrr() and kvm_post_set_cr0().

Both only use it if the guest has non-coherent DMA, in order to
honor the guest's UC memtype.

MTRR and CD setup only happens at boot, and generally in an area
where the page tables should be small (for CD) or should not
include the affected GFNs at all (for MTRRs).

This is based on a patch suggested by Sean Christopherson:
https://lkml.org/lkml/2021/7/22/1025

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-2-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 19 ++++++++-----------
 arch/x86/kvm/mmu/tdp_mmu.c | 15 ++++-----------
 arch/x86/kvm/mmu/tdp_mmu.h | 11 ++++-------
 3 files changed, 16 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5daa8a910e8b..9a7199679f62 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5682,8 +5682,9 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 	int i;
 	bool flush = false;
 
+	write_lock(&kvm->mmu_lock);
+
 	if (kvm_memslots_have_rmaps(kvm)) {
-		write_lock(&kvm->mmu_lock);
 		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
 			slots = __kvm_memslots(kvm, i);
 			kvm_for_each_memslot(memslot, slots) {
@@ -5703,22 +5704,18 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 		}
 		if (flush)
 			kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
-		write_unlock(&kvm->mmu_lock);
 	}
 
 	if (is_tdp_mmu_enabled(kvm)) {
-		flush = false;
-
-		read_lock(&kvm->mmu_lock);
 		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
 			flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, gfn_start,
-							  gfn_end, flush, true);
-		if (flush)
-			kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
-							   gfn_end);
-
-		read_unlock(&kvm->mmu_lock);
+							  gfn_end, flush);
 	}
+
+	if (flush)
+		kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
+
+	write_unlock(&kvm->mmu_lock);
 }
 
 static bool slot_rmap_write_protect(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index a3146434c965..fb1b2dc7a6d1 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -818,21 +818,15 @@ retry:
  * non-root pages mapping GFNs strictly within that range. Returns true if
  * SPTEs have been cleared and a TLB flush is needed before releasing the
  * MMU lock.
- *
- * If shared is true, this thread holds the MMU lock in read mode and must
- * account for the possibility that other threads are modifying the paging
- * structures concurrently. If shared is false, this thread should hold the
- * MMU in write mode.
  */
 bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
-				 gfn_t end, bool can_yield, bool flush,
-				 bool shared)
+				 gfn_t end, bool can_yield, bool flush)
 {
 	struct kvm_mmu_page *root;
 
-	for_each_tdp_mmu_root_yield_safe(kvm, root, as_id, shared)
+	for_each_tdp_mmu_root_yield_safe(kvm, root, as_id, false)
 		flush = zap_gfn_range(kvm, root, start, end, can_yield, flush,
-				      shared);
+				      false);
 
 	return flush;
 }
@@ -843,8 +837,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
 	int i;
 
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull,
-						  flush, false);
+		flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull, flush);
 
 	if (flush)
 		kvm_flush_remote_tlbs(kvm);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index b224d126adf9..358f447d4012 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -20,14 +20,11 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
 			  bool shared);
 
 bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
-				 gfn_t end, bool can_yield, bool flush,
-				 bool shared);
+				 gfn_t end, bool can_yield, bool flush);
 static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id,
-					     gfn_t start, gfn_t end, bool flush,
-					     bool shared)
+					     gfn_t start, gfn_t end, bool flush)
 {
-	return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush,
-					   shared);
+	return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush);
 }
 static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
@@ -44,7 +41,7 @@ static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 	 */
 	lockdep_assert_held_write(&kvm->mmu_lock);
 	return __kvm_tdp_mmu_zap_gfn_range(kvm, kvm_mmu_page_as_id(sp),
-					   sp->gfn, end, false, false, false);
+					   sp->gfn, end, false, false);
 }
 
 void kvm_tdp_mmu_zap_all(struct kvm *kvm);
-- 
cgit 


From 2822da446640d82b7bf65800314ef2a825e8df13 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:37 +0300
Subject: KVM: x86/mmu: fix parameters to kvm_flush_remote_tlbs_with_address

kvm_flush_remote_tlbs_with_address expects (start gfn, number of pages),
and not (start gfn, end gfn)

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-3-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 9a7199679f62..3cb2808e1e5c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5703,13 +5703,17 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 			}
 		}
 		if (flush)
-			kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
+			kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
+							   gfn_end - gfn_start);
 	}
 
 	if (is_tdp_mmu_enabled(kvm)) {
 		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
 			flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, gfn_start,
 							  gfn_end, flush);
+		if (flush)
+			kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
+							   gfn_end - gfn_start);
 	}
 
 	if (flush)
-- 
cgit 


From 88f585358b5e6aec8586425bdbaaa2157112ffc2 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:38 +0300
Subject: KVM: x86/mmu: add comment explaining arguments to kvm_zap_gfn_range

This comment makes it clear that the range of gfns that this
function receives is non inclusive.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-4-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3cb2808e1e5c..e53d09534113 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5675,6 +5675,10 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
 	kvm_mmu_uninit_tdp_mmu(kvm);
 }
 
+/*
+ * Invalidate (zap) SPTEs that cover GFNs from gfn_start and up to gfn_end
+ * (not including it)
+ */
 void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 {
 	struct kvm_memslots *slots;
-- 
cgit 


From edb298c663fccad65fe99fcec6a4f96cc344520d Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:39 +0300
Subject: KVM: x86/mmu: bump mmu notifier count in kvm_zap_gfn_range

This together with previous patch, ensures that
kvm_zap_gfn_range doesn't race with page fault
running on another vcpu, and will make this page fault code
retry instead.

This is based on a patch suggested by Sean Christopherson:
https://lkml.org/lkml/2021/7/22/1025

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-5-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e53d09534113..916083eb4036 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5688,6 +5688,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 
 	write_lock(&kvm->mmu_lock);
 
+	kvm_inc_notifier_count(kvm, gfn_start, gfn_end);
+
 	if (kvm_memslots_have_rmaps(kvm)) {
 		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
 			slots = __kvm_memslots(kvm, i);
@@ -5723,6 +5725,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 	if (flush)
 		kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
 
+	kvm_dec_notifier_count(kvm, gfn_start, gfn_end);
+
 	write_unlock(&kvm->mmu_lock);
 }
 
-- 
cgit 


From 33a5c0009d14e18ca2fbf610efd6cf2e7e34489a Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:40 +0300
Subject: KVM: x86/mmu: rename try_async_pf to kvm_faultin_pfn

try_async_pf is a wrong name for this function, since this code
is used when asynchronous page fault is not enabled as well.

This code is based on a patch from Sean Christopherson:
https://lkml.org/lkml/2021/7/19/2970

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210810205251.424103-6-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c         | 4 ++--
 arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 916083eb4036..e85ec37a8468 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3884,7 +3884,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 				  kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
 }
 
-static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
+static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 			 gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva,
 			 bool write, bool *writable)
 {
@@ -3954,7 +3954,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, &hva,
+	if (kvm_faultin_pfn(vcpu, prefault, gfn, gpa, &pfn, &hva,
 			 write, &map_writable))
 		return RET_PF_RETRY;
 
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index ee044d357b5f..f349eae69bf3 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -881,7 +881,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, &hva,
+	if (kvm_faultin_pfn(vcpu, prefault, walker.gfn, addr, &pfn, &hva,
 			 write_fault, &map_writable))
 		return RET_PF_RETRY;
 
-- 
cgit 


From 8f32d5e563cbf0507756aa929134b1a110b47a62 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:41 +0300
Subject: KVM: x86/mmu: allow kvm_faultin_pfn to return page fault handling
 code

This will allow it to return RET_PF_EMULATE for APIC mmio
emulation.

This code is based on a patch from Sean Christopherson:
https://lkml.org/lkml/2021/7/19/2970

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210810205251.424103-7-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c         | 17 ++++++++++-------
 arch/x86/kvm/mmu/paging_tmpl.h |  4 ++--
 2 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e85ec37a8468..38e36cff82af 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3886,7 +3886,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 
 static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 			 gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva,
-			 bool write, bool *writable)
+			 bool write, bool *writable, int *r)
 {
 	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 	bool async;
@@ -3897,7 +3897,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	 * be zapped before KVM inserts a new MMIO SPTE for the gfn.
 	 */
 	if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
-		return true;
+		goto out_retry;
 
 	/* Don't expose private memslots to L2. */
 	if (is_guest_mode(vcpu) && !kvm_is_visible_memslot(slot)) {
@@ -3917,14 +3917,17 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 		if (kvm_find_async_pf_gfn(vcpu, gfn)) {
 			trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn);
 			kvm_make_request(KVM_REQ_APF_HALT, vcpu);
-			return true;
+			goto out_retry;
 		} else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn))
-			return true;
+			goto out_retry;
 	}
 
 	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL,
 				    write, writable, hva);
-	return false;
+
+out_retry:
+	*r = RET_PF_RETRY;
+	return true;
 }
 
 static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
@@ -3955,8 +3958,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 	smp_rmb();
 
 	if (kvm_faultin_pfn(vcpu, prefault, gfn, gpa, &pfn, &hva,
-			 write, &map_writable))
-		return RET_PF_RETRY;
+			 write, &map_writable, &r))
+		return r;
 
 	if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, &r))
 		return r;
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index f349eae69bf3..7d03e9b7ccfa 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -882,8 +882,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
 	smp_rmb();
 
 	if (kvm_faultin_pfn(vcpu, prefault, walker.gfn, addr, &pfn, &hva,
-			 write_fault, &map_writable))
-		return RET_PF_RETRY;
+			 write_fault, &map_writable, &r))
+		return r;
 
 	if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
 		return r;
-- 
cgit 


From 9cc13d60ba6b9976e01ec6f66fa1ec4a06992929 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:42 +0300
Subject: KVM: x86/mmu: allow APICv memslot to be enabled but invisible

on AMD, APIC virtualization needs to dynamicaly inhibit the AVIC in a
response to some events, and this is problematic and not efficient to do by
enabling/disabling the memslot that covers APIC's mmio range.

Plus due to SRCU locking, it makes it more complex to
request AVIC inhibition.

Instead, the APIC memslot will be always enabled, but be invisible
to the guest, such as the MMU code will not install a SPTE for it,
when it is inhibited and instead jump straight to emulating the access.

When inhibiting the AVIC, this SPTE will be zapped.

This code is based on a suggestion from Sean Christopherson:
https://lkml.org/lkml/2021/7/19/2970

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-8-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 38e36cff82af..8568ae42e867 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3899,11 +3899,24 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
 		goto out_retry;
 
-	/* Don't expose private memslots to L2. */
-	if (is_guest_mode(vcpu) && !kvm_is_visible_memslot(slot)) {
-		*pfn = KVM_PFN_NOSLOT;
-		*writable = false;
-		return false;
+	if (!kvm_is_visible_memslot(slot)) {
+		/* Don't expose private memslots to L2. */
+		if (is_guest_mode(vcpu)) {
+			*pfn = KVM_PFN_NOSLOT;
+			*writable = false;
+			return false;
+		}
+		/*
+		 * If the APIC access page exists but is disabled, go directly
+		 * to emulation without caching the MMIO access or creating a
+		 * MMIO SPTE.  That way the cache doesn't need to be purged
+		 * when the AVIC is re-enabled.
+		 */
+		if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
+		    !kvm_apicv_activated(vcpu->kvm)) {
+			*r = RET_PF_EMULATE;
+			return true;
+		}
 	}
 
 	async = false;
-- 
cgit 


From 36222b117e36d487172c36bec187628085b92575 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:43 +0300
Subject: KVM: x86: don't disable APICv memslot when inhibited

Thanks to the former patches, it is now possible to keep the APICv
memslot always enabled, and it will be invisible to the guest
when it is inhibited

This code is based on a suggestion from Sean Christopherson:
https://lkml.org/lkml/2021/7/19/2970

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-9-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 -
 arch/x86/include/asm/kvm_host.h    |  1 -
 arch/x86/kvm/svm/avic.c            | 21 ++++++---------------
 arch/x86/kvm/svm/svm.c             |  1 -
 arch/x86/kvm/svm/svm.h             |  1 -
 arch/x86/kvm/x86.c                 | 21 ++++++++-------------
 6 files changed, 14 insertions(+), 32 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index a12a4987154e..cefe1d81e2e8 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -72,7 +72,6 @@ KVM_X86_OP(enable_nmi_window)
 KVM_X86_OP(enable_irq_window)
 KVM_X86_OP(update_cr8_intercept)
 KVM_X86_OP(check_apicv_inhibit_reasons)
-KVM_X86_OP_NULL(pre_update_apicv_exec_ctrl)
 KVM_X86_OP(refresh_apicv_exec_ctrl)
 KVM_X86_OP(hwapic_irr_update)
 KVM_X86_OP(hwapic_isr_update)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1881f4c92de9..15eefd9498b4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1352,7 +1352,6 @@ struct kvm_x86_ops {
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
 	bool (*check_apicv_inhibit_reasons)(ulong bit);
-	void (*pre_update_apicv_exec_ctrl)(struct kvm *kvm, bool activate);
 	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index a8ad78a2faa1..d0acbeeab3d6 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -225,31 +225,26 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
  * field of the VMCB. Therefore, we set up the
  * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
  */
-static int avic_update_access_page(struct kvm *kvm, bool activate)
+static int avic_alloc_access_page(struct kvm *kvm)
 {
 	void __user *ret;
 	int r = 0;
 
 	mutex_lock(&kvm->slots_lock);
-	/*
-	 * During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger
-	 * APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
-	 * memory region. So, we need to ensure that kvm->mm == current->mm.
-	 */
-	if ((kvm->arch.apic_access_memslot_enabled == activate) ||
-	    (kvm->mm != current->mm))
+
+	if (kvm->arch.apic_access_memslot_enabled)
 		goto out;
 
 	ret = __x86_set_memory_region(kvm,
 				      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
 				      APIC_DEFAULT_PHYS_BASE,
-				      activate ? PAGE_SIZE : 0);
+				      PAGE_SIZE);
 	if (IS_ERR(ret)) {
 		r = PTR_ERR(ret);
 		goto out;
 	}
 
-	kvm->arch.apic_access_memslot_enabled = activate;
+	kvm->arch.apic_access_memslot_enabled = true;
 out:
 	mutex_unlock(&kvm->slots_lock);
 	return r;
@@ -270,7 +265,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 	if (kvm_apicv_activated(vcpu->kvm)) {
 		int ret;
 
-		ret = avic_update_access_page(vcpu->kvm, true);
+		ret = avic_alloc_access_page(vcpu->kvm);
 		if (ret)
 			return ret;
 	}
@@ -918,10 +913,6 @@ bool svm_check_apicv_inhibit_reasons(ulong bit)
 	return supported & BIT(bit);
 }
 
-void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
-{
-	avic_update_access_page(kvm, activate);
-}
 
 static inline int
 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2b6632d4c76f..b1662d6baa71 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4581,7 +4581,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.set_virtual_apic_mode = svm_set_virtual_apic_mode,
 	.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
 	.check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons,
-	.pre_update_apicv_exec_ctrl = svm_pre_update_apicv_exec_ctrl,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.hwapic_irr_update = svm_hwapic_irr_update,
 	.hwapic_isr_update = svm_hwapic_isr_update,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index bd0fe94c2920..bd41f2a32838 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -534,7 +534,6 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu);
 void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
 void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
 bool svm_check_apicv_inhibit_reasons(ulong bit);
-void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate);
 void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
 void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4e97f7cd412e..4d720a0cdd80 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9255,13 +9255,6 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
 
-/*
- * NOTE: Do not hold any lock prior to calling this.
- *
- * In particular, kvm_request_apicv_update() expects kvm->srcu not to be
- * locked, because it calls __x86_set_memory_region() which does
- * synchronize_srcu(&kvm->srcu).
- */
 void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
 {
 	unsigned long old, new, expected;
@@ -9282,14 +9275,16 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
 		old = cmpxchg(&kvm->arch.apicv_inhibit_reasons, expected, new);
 	} while (old != expected);
 
-	if (!!old == !!new)
-		return;
+	if (!!old != !!new) {
+		trace_kvm_apicv_update_request(activate, bit);
+		kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
+		if (new) {
+			unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
 
-	trace_kvm_apicv_update_request(activate, bit);
-	if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
-		static_call(kvm_x86_pre_update_apicv_exec_ctrl)(kvm, activate);
+			kvm_zap_gfn_range(kvm, gfn, gfn+1);
+		}
+	}
 
-	kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
 }
 EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
 
-- 
cgit 


From b0a1637f64b06586752cc507b94e4aeff02588d6 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:44 +0300
Subject: KVM: x86: APICv: fix race in kvm_request_apicv_update on SVM

Currently on SVM, the kvm_request_apicv_update toggles the APICv
memslot without doing any synchronization.

If there is a mismatch between that memslot state and the AVIC state,
on one of the vCPUs, an APIC mmio access can be lost:

For example:

VCPU0: enable the APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
VCPU1: access an APIC mmio register.

Since AVIC is still disabled on VCPU1, the access will not be intercepted
by it, and neither will it cause MMIO fault, but rather it will just be
read/written from/to the dummy page mapped into the
APIC_ACCESS_PAGE_PRIVATE_MEMSLOT.

Fix that by adding a lock guarding the AVIC state changes, and carefully
order the operations of kvm_request_apicv_update to avoid this race:

1. Take the lock
2. Send KVM_REQ_APICV_UPDATE
3. Update the apic inhibit reason
4. Release the lock

This ensures that at (2) all vCPUs are kicked out of the guest mode,
but don't yet see the new avic state.
Then only after (4) all other vCPUs can update their AVIC state and resume.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-10-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  6 ++++++
 arch/x86/kvm/x86.c              | 39 ++++++++++++++++++++++++---------------
 2 files changed, 30 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 15eefd9498b4..20a3ffe14ff2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1053,6 +1053,9 @@ struct kvm_arch {
 	struct kvm_apic_map __rcu *apic_map;
 	atomic_t apic_map_dirty;
 
+	/* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */
+	struct mutex apicv_update_lock;
+
 	bool apic_access_memslot_enabled;
 	unsigned long apicv_inhibit_reasons;
 
@@ -1736,6 +1739,9 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
 void kvm_request_apicv_update(struct kvm *kvm, bool activate,
 			      unsigned long bit);
 
+void __kvm_request_apicv_update(struct kvm *kvm, bool activate,
+				unsigned long bit);
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4d720a0cdd80..89e666e5a707 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8579,6 +8579,8 @@ EXPORT_SYMBOL_GPL(kvm_apicv_activated);
 
 static void kvm_apicv_init(struct kvm *kvm)
 {
+	mutex_init(&kvm->arch.apicv_update_lock);
+
 	if (enable_apicv)
 		clear_bit(APICV_INHIBIT_REASON_DISABLE,
 			  &kvm->arch.apicv_inhibit_reasons);
@@ -9240,6 +9242,8 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 	if (!lapic_in_kernel(vcpu))
 		return;
 
+	mutex_lock(&vcpu->kvm->arch.apicv_update_lock);
+
 	vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
 	kvm_apic_update_apicv(vcpu);
 	static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
@@ -9252,39 +9256,44 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 	 */
 	if (!vcpu->arch.apicv_active)
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+	mutex_unlock(&vcpu->kvm->arch.apicv_update_lock);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
 
-void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
+void __kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
 {
-	unsigned long old, new, expected;
+	unsigned long old, new;
 
 	if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
 	    !static_call(kvm_x86_check_apicv_inhibit_reasons)(bit))
 		return;
 
-	old = READ_ONCE(kvm->arch.apicv_inhibit_reasons);
-	do {
-		expected = new = old;
-		if (activate)
-			__clear_bit(bit, &new);
-		else
-			__set_bit(bit, &new);
-		if (new == old)
-			break;
-		old = cmpxchg(&kvm->arch.apicv_inhibit_reasons, expected, new);
-	} while (old != expected);
+	old = new = kvm->arch.apicv_inhibit_reasons;
+
+	if (activate)
+		__clear_bit(bit, &new);
+	else
+		__set_bit(bit, &new);
 
 	if (!!old != !!new) {
 		trace_kvm_apicv_update_request(activate, bit);
 		kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
+		kvm->arch.apicv_inhibit_reasons = new;
 		if (new) {
 			unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
-
 			kvm_zap_gfn_range(kvm, gfn, gfn+1);
 		}
-	}
+	} else
+		kvm->arch.apicv_inhibit_reasons = new;
+}
+EXPORT_SYMBOL_GPL(__kvm_request_apicv_update);
 
+void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
+{
+	mutex_lock(&kvm->arch.apicv_update_lock);
+	__kvm_request_apicv_update(kvm, activate, bit);
+	mutex_unlock(&kvm->arch.apicv_update_lock);
 }
 EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
 
-- 
cgit 


From 4628efcd4e8963a41e877318cd10346dea9a6e00 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:45 +0300
Subject: KVM: SVM: add warning for mistmatch between AVIC vcpu state and AVIC
 inhibition

It is never a good idea to enter a guest on a vCPU when the
AVIC inhibition state doesn't match the enablement of
the AVIC on the vCPU.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-11-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b1662d6baa71..22376fdd94f8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3779,6 +3779,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	pre_svm_run(vcpu);
 
+	WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu));
+
 	sync_lapic_to_cr8(vcpu);
 
 	if (unlikely(svm->asid != svm->vmcb->control.asid)) {
-- 
cgit 


From 0f250a646382e017725001a552624be0c86527bf Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 10 Aug 2021 23:52:46 +0300
Subject: KVM: x86: hyper-v: Deactivate APICv only when AutoEOI feature is in
 use

APICV_INHIBIT_REASON_HYPERV is currently unconditionally forced upon
SynIC activation as SynIC's AutoEOI is incompatible with APICv/AVIC. It is,
however, possible to track whether the feature was actually used by the
guest and only inhibit APICv/AVIC when needed.

TLFS suggests a dedicated 'HV_DEPRECATING_AEOI_RECOMMENDED' flag to let
Windows know that AutoEOI feature should be avoided. While it's up to
KVM userspace to set the flag, KVM can help a bit by exposing global
APICv/AVIC enablement.

Maxim:
   - always set HV_DEPRECATING_AEOI_RECOMMENDED in kvm_get_hv_cpuid,
     since this feature can be used regardless of AVIC

Paolo:
   - use arch.apicv_update_lock to protect the hv->synic_auto_eoi_used
     instead of atomic ops

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-12-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  6 ++++++
 arch/x86/kvm/hyperv.c           | 32 ++++++++++++++++++++++++++------
 2 files changed, 32 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 20a3ffe14ff2..28c85c80e831 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -981,6 +981,12 @@ struct kvm_hv {
 	/* How many vCPUs have VP index != vCPU index */
 	atomic_t num_mismatched_vp_indexes;
 
+	/*
+	 * How many SynICs use 'AutoEOI' feature
+	 * (protected by arch.apicv_update_lock)
+	 */
+	unsigned int synic_auto_eoi_used;
+
 	struct hv_partition_assist_pg *hv_pa_pg;
 	struct kvm_hv_syndbg hv_syndbg;
 };
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 0b38f944c6b6..fe4a02715266 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -88,6 +88,10 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
 static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
 				int vector)
 {
+	struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
+	struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
+	int auto_eoi_old, auto_eoi_new;
+
 	if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
 		return;
 
@@ -96,10 +100,30 @@ static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
 	else
 		__clear_bit(vector, synic->vec_bitmap);
 
+	auto_eoi_old = bitmap_weight(synic->auto_eoi_bitmap, 256);
+
 	if (synic_has_vector_auto_eoi(synic, vector))
 		__set_bit(vector, synic->auto_eoi_bitmap);
 	else
 		__clear_bit(vector, synic->auto_eoi_bitmap);
+
+	auto_eoi_new = bitmap_weight(synic->auto_eoi_bitmap, 256);
+
+	if (!!auto_eoi_old == !!auto_eoi_new)
+		return;
+
+	mutex_lock(&vcpu->kvm->arch.apicv_update_lock);
+
+	if (auto_eoi_new)
+		hv->synic_auto_eoi_used++;
+	else
+		hv->synic_auto_eoi_used--;
+
+	__kvm_request_apicv_update(vcpu->kvm,
+				   !hv->synic_auto_eoi_used,
+				   APICV_INHIBIT_REASON_HYPERV);
+
+	mutex_unlock(&vcpu->kvm->arch.apicv_update_lock);
 }
 
 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
@@ -933,12 +957,6 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
 
 	synic = to_hv_synic(vcpu);
 
-	/*
-	 * Hyper-V SynIC auto EOI SINT's are
-	 * not compatible with APICV, so request
-	 * to deactivate APICV permanently.
-	 */
-	kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV);
 	synic->active = true;
 	synic->dont_zero_synic_pages = dont_zero_synic_pages;
 	synic->control = HV_SYNIC_CONTROL_ENABLE;
@@ -2476,6 +2494,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 				ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
 			if (!cpu_smt_possible())
 				ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
+
+			ent->eax |= HV_DEPRECATING_AEOI_RECOMMENDED;
 			/*
 			 * Default number of spinlock retry attempts, matches
 			 * HyperV 2016.
-- 
cgit 


From 30eed56a7e1cbefe933a33d661827e5c72cd136f Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:47 +0300
Subject: KVM: SVM: remove svm_toggle_avic_for_irq_window

Now that kvm_request_apicv_update doesn't need to drop the kvm->srcu lock,
we can call kvm_request_apicv_update directly.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210810205251.424103-13-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/avic.c | 11 -----------
 arch/x86/kvm/svm/svm.c  |  4 ++--
 arch/x86/kvm/svm/svm.h  |  1 -
 3 files changed, 2 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index d0acbeeab3d6..1def54c26259 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -582,17 +582,6 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu)
 	avic_handle_ldr_update(vcpu);
 }
 
-void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
-{
-	if (!enable_apicv || !lapic_in_kernel(vcpu))
-		return;
-
-	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-	kvm_request_apicv_update(vcpu->kvm, activate,
-				 APICV_INHIBIT_REASON_IRQWIN);
-	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-}
-
 void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 {
 	return;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 22376fdd94f8..4e76e78c98ec 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2992,7 +2992,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu)
 	 * In this case AVIC was temporarily disabled for
 	 * requesting the IRQ window and we have to re-enable it.
 	 */
-	svm_toggle_avic_for_irq_window(vcpu, true);
+	kvm_request_apicv_update(vcpu->kvm, true, APICV_INHIBIT_REASON_IRQWIN);
 
 	++vcpu->stat.irq_window_exits;
 	return 1;
@@ -3544,7 +3544,7 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
 		 * via AVIC. In such case, we need to temporarily disable AVIC,
 		 * and fallback to injecting IRQ via V_IRQ.
 		 */
-		svm_toggle_avic_for_irq_window(vcpu, false);
+		kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_IRQWIN);
 		svm_set_vintr(svm);
 	}
 }
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index bd41f2a32838..aae851762b59 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -524,7 +524,6 @@ int avic_ga_log_notifier(u32 ga_tag);
 void avic_vm_destroy(struct kvm *kvm);
 int avic_vm_init(struct kvm *kvm);
 void avic_init_vmcb(struct vcpu_svm *svm);
-void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate);
 int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
 int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
 int avic_init_vcpu(struct vcpu_svm *svm);
-- 
cgit 


From 06ef813466c63ff1a61b5f99592e58d049c2c1ac Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:48 +0300
Subject: KVM: SVM: avoid refreshing avic if its state didn't change

Since AVIC can be inhibited and uninhibited rapidly it is possible that
we have nothing to do by the time the svm_refresh_apicv_exec_ctrl
is called.

Detect and avoid this, which will be useful when we will start calling
avic_vcpu_load/avic_vcpu_put when the avic inhibition state changes.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-14-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 89e666e5a707..bf8cb1021d11 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9239,12 +9239,18 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
 
 void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 {
+	bool activate;
+
 	if (!lapic_in_kernel(vcpu))
 		return;
 
 	mutex_lock(&vcpu->kvm->arch.apicv_update_lock);
 
-	vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
+	activate = kvm_apicv_activated(vcpu->kvm);
+	if (vcpu->arch.apicv_active == activate)
+		goto out;
+
+	vcpu->arch.apicv_active = activate;
 	kvm_apic_update_apicv(vcpu);
 	static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
 
@@ -9257,6 +9263,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.apicv_active)
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
 
+out:
 	mutex_unlock(&vcpu->kvm->arch.apicv_update_lock);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
-- 
cgit 


From bf5f6b9d7ad6b88df15d691d9759f9a397488c7e Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:49 +0300
Subject: KVM: SVM: move check for kvm_vcpu_apicv_active outside of
 avic_vcpu_{put|load}

No functional change intended.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-15-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/avic.c | 10 ++++------
 arch/x86/kvm/svm/svm.c  |  7 +++++--
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 1def54c26259..e7728b16a46f 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -940,9 +940,6 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	int h_physical_id = kvm_cpu_get_apicid(cpu);
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (!kvm_vcpu_apicv_active(vcpu))
-		return;
-
 	/*
 	 * Since the host physical APIC id is 8 bits,
 	 * we can support host APIC ID upto 255.
@@ -970,9 +967,6 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
 	u64 entry;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (!kvm_vcpu_apicv_active(vcpu))
-		return;
-
 	entry = READ_ONCE(*(svm->avic_physical_id_cache));
 	if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
 		avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
@@ -989,6 +983,10 @@ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	svm->avic_is_running = is_run;
+
+	if (!kvm_vcpu_apicv_active(vcpu))
+		return;
+
 	if (is_run)
 		avic_vcpu_load(vcpu, vcpu->cpu);
 	else
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4e76e78c98ec..114c7e29467b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1481,12 +1481,15 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		sd->current_vmcb = svm->vmcb;
 		indirect_branch_prediction_barrier();
 	}
-	avic_vcpu_load(vcpu, cpu);
+	if (kvm_vcpu_apicv_active(vcpu))
+		avic_vcpu_load(vcpu, cpu);
 }
 
 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	avic_vcpu_put(vcpu);
+	if (kvm_vcpu_apicv_active(vcpu))
+		avic_vcpu_put(vcpu);
+
 	svm_prepare_host_switch(vcpu);
 
 	++vcpu->stat.host_state_reload;
-- 
cgit 


From df7e4827c5490a6a0cc41341497f5267712511cf Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:50 +0300
Subject: KVM: SVM: call avic_vcpu_load/avic_vcpu_put when enabling/disabling
 AVIC

Currently it is possible to have the following scenario:

1. AVIC is disabled by svm_refresh_apicv_exec_ctrl
2. svm_vcpu_blocking calls avic_vcpu_put which does nothing
3. svm_vcpu_unblocking enables the AVIC (due to KVM_REQ_APICV_UPDATE)
   and then calls avic_vcpu_load
4. warning is triggered in avic_vcpu_load since
   AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK was never cleared

While it is possible to just remove the warning, it seems to be more robust
to fully disable/enable AVIC in svm_refresh_apicv_exec_ctrl by calling the
avic_vcpu_load/avic_vcpu_put

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-16-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/avic.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index e7728b16a46f..01c0e83e1b71 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -651,6 +651,11 @@ void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 	}
 	vmcb_mark_dirty(vmcb, VMCB_AVIC);
 
+	if (activated)
+		avic_vcpu_load(vcpu, vcpu->cpu);
+	else
+		avic_vcpu_put(vcpu);
+
 	svm_set_pi_irte_mode(vcpu, activated);
 }
 
-- 
cgit 


From 73143035c214f3b0ac5cc2393197f828adeefc1e Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Aug 2021 23:52:51 +0300
Subject: KVM: SVM: AVIC: drop unsupported AVIC base relocation code

APIC base relocation is not supported anyway and won't work
correctly so just drop the code that handles it and keep AVIC
MMIO bar at the default APIC base.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210810205251.424103-17-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/avic.c | 2 ++
 arch/x86/kvm/svm/svm.c  | 7 -------
 arch/x86/kvm/svm/svm.h  | 6 ------
 3 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 01c0e83e1b71..8052d92069e0 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -197,6 +197,8 @@ void avic_init_vmcb(struct vcpu_svm *svm)
 	vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
 	vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
 	vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
+	vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK;
+
 	if (kvm_apicv_activated(svm->vcpu.kvm))
 		vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
 	else
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 114c7e29467b..7b58e445a967 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1314,9 +1314,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	svm->virt_spec_ctrl = 0;
 
 	init_vmcb(vcpu);
-
-	if (kvm_vcpu_apicv_active(vcpu) && !init_event)
-		avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
 }
 
 void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
@@ -2967,10 +2964,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		svm->msr_decfg = data;
 		break;
 	}
-	case MSR_IA32_APICBASE:
-		if (kvm_vcpu_apicv_active(vcpu))
-			avic_update_vapic_bar(to_svm(vcpu), data);
-		fallthrough;
 	default:
 		return kvm_set_msr_common(vcpu, msr);
 	}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index aae851762b59..524d943f3efc 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -503,12 +503,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
 
 #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
 
-static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
-{
-	svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
-	vmcb_mark_dirty(svm->vmcb, VMCB_AVIC);
-}
-
 static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-- 
cgit 


From f95937ccf5bd5e0a6bbac2b8e65a87982ffae403 Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Mon, 2 Aug 2021 16:56:29 +0000
Subject: KVM: stats: Support linear and logarithmic histogram statistics

Add new types of KVM stats, linear and logarithmic histogram.
Histogram are very useful for observing the value distribution
of time or size related stats.

Signed-off-by: Jing Zhang <jingzhangos@google.com>
Message-Id: <20210802165633.1866976-2-jingzhangos@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/kvm/guest.c    | 4 ----
 arch/mips/kvm/mips.c      | 4 ----
 arch/powerpc/kvm/book3s.c | 4 ----
 arch/powerpc/kvm/booke.c  | 4 ----
 arch/s390/kvm/kvm-s390.c  | 4 ----
 arch/x86/kvm/x86.c        | 4 ----
 6 files changed, 24 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 1dfb83578277..5188184d25d0 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -31,8 +31,6 @@
 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	KVM_GENERIC_VM_STATS()
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -52,8 +50,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
 	STATS_DESC_COUNTER(VCPU, exits)
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index af9dd029a4e1..75c6f264c626 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -41,8 +41,6 @@
 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	KVM_GENERIC_VM_STATS()
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -85,8 +83,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, vz_cpucfg_exits),
 #endif
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 79833f78d1da..5cc6e90095b0 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -43,8 +43,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_ICOUNTER(VM, num_2M_pages),
 	STATS_DESC_ICOUNTER(VM, num_1G_pages)
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -88,8 +86,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, pthru_host),
 	STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 551b30d84aee..5ed6c235e059 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -41,8 +41,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_ICOUNTER(VM, num_2M_pages),
 	STATS_DESC_ICOUNTER(VM, num_1G_pages)
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -79,8 +77,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, pthru_host),
 	STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 02574d7b3612..4dc7e966a720 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -66,8 +66,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_COUNTER(VM, inject_service_signal),
 	STATS_DESC_COUNTER(VM, inject_virtio)
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -174,8 +172,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 	STATS_DESC_COUNTER(VCPU, pfault_sync)
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bf8cb1021d11..9425589f34ca 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -238,8 +238,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size),
 	STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions)
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -279,8 +277,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, directed_yield_successful),
 	STATS_DESC_ICOUNTER(VCPU, guest_mode)
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
-- 
cgit 


From 87bcc5fa092f82a9890f9e73e4f4c7016ef64049 Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Mon, 2 Aug 2021 16:56:32 +0000
Subject: KVM: stats: Add halt_wait_ns stats for all architectures

Add simple stats halt_wait_ns to record the time a VCPU has spent on
waiting for all architectures (not just powerpc).

Signed-off-by: Jing Zhang <jingzhangos@google.com>
Message-Id: <20210802165633.1866976-5-jingzhangos@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h | 1 -
 arch/powerpc/kvm/book3s.c           | 1 -
 arch/powerpc/kvm/book3s_hv.c        | 2 +-
 arch/powerpc/kvm/booke.c            | 1 -
 4 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9f52f282b1aa..4931d03e5799 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -103,7 +103,6 @@ struct kvm_vcpu_stat {
 	u64 emulated_inst_exits;
 	u64 dec_exits;
 	u64 ext_intr_exits;
-	u64 halt_wait_ns;
 	u64 halt_successful_wait;
 	u64 dbell_exits;
 	u64 gdbell_exits;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 5cc6e90095b0..b785f6772391 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -69,7 +69,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
 	STATS_DESC_COUNTER(VCPU, dec_exits),
 	STATS_DESC_COUNTER(VCPU, ext_intr_exits),
-	STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
 	STATS_DESC_COUNTER(VCPU, halt_successful_wait),
 	STATS_DESC_COUNTER(VCPU, dbell_exits),
 	STATS_DESC_COUNTER(VCPU, gdbell_exits),
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1d1fcc290fca..813ca155561b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4144,7 +4144,7 @@ out:
 
 	/* Attribute wait time */
 	if (do_sleep) {
-		vc->runner->stat.halt_wait_ns +=
+		vc->runner->stat.generic.halt_wait_ns +=
 			ktime_to_ns(cur) - ktime_to_ns(start_wait);
 		/* Attribute failed poll time */
 		if (vc->halt_poll_ns)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 5ed6c235e059..977801c83aff 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -67,7 +67,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
 	STATS_DESC_COUNTER(VCPU, dec_exits),
 	STATS_DESC_COUNTER(VCPU, ext_intr_exits),
-	STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
 	STATS_DESC_COUNTER(VCPU, halt_successful_wait),
 	STATS_DESC_COUNTER(VCPU, dbell_exits),
 	STATS_DESC_COUNTER(VCPU, gdbell_exits),
-- 
cgit 


From 8ccba534a1a5c6565220c81113d6157571f380cb Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Mon, 2 Aug 2021 16:56:33 +0000
Subject: KVM: stats: Add halt polling related histogram stats

Add three log histogram stats to record the distribution of time spent
on successful polling, failed polling and VCPU wait.
halt_poll_success_hist: Distribution of spent time for a successful poll.
halt_poll_fail_hist: Distribution of spent time for a failed poll.
halt_wait_hist: Distribution of time a VCPU has spent on waiting.

Signed-off-by: Jing Zhang <jingzhangos@google.com>
Message-Id: <20210802165633.1866976-6-jingzhangos@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/kvm/book3s_hv.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 813ca155561b..6d63c8e6d4f0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4146,17 +4146,29 @@ out:
 	if (do_sleep) {
 		vc->runner->stat.generic.halt_wait_ns +=
 			ktime_to_ns(cur) - ktime_to_ns(start_wait);
+		KVM_STATS_LOG_HIST_UPDATE(
+				vc->runner->stat.generic.halt_wait_hist,
+				ktime_to_ns(cur) - ktime_to_ns(start_wait));
 		/* Attribute failed poll time */
-		if (vc->halt_poll_ns)
+		if (vc->halt_poll_ns) {
 			vc->runner->stat.generic.halt_poll_fail_ns +=
 				ktime_to_ns(start_wait) -
 				ktime_to_ns(start_poll);
+			KVM_STATS_LOG_HIST_UPDATE(
+				vc->runner->stat.generic.halt_poll_fail_hist,
+				ktime_to_ns(start_wait) -
+				ktime_to_ns(start_poll));
+		}
 	} else {
 		/* Attribute successful poll time */
-		if (vc->halt_poll_ns)
+		if (vc->halt_poll_ns) {
 			vc->runner->stat.generic.halt_poll_success_ns +=
 				ktime_to_ns(cur) -
 				ktime_to_ns(start_poll);
+			KVM_STATS_LOG_HIST_UPDATE(
+				vc->runner->stat.generic.halt_poll_success_hist,
+				ktime_to_ns(cur) - ktime_to_ns(start_poll));
+		}
 	}
 
 	/* Adjust poll time */
-- 
cgit 


From 4293ddb788c1a98bdfa6479bcfd63ad5ce0a5ce6 Mon Sep 17 00:00:00 2001
From: Mingwei Zhang <mizhang@google.com>
Date: Mon, 2 Aug 2021 21:46:05 -0700
Subject: KVM: x86/mmu: Remove redundant spte present check in mmu_set_spte

Drop an unnecessary is_shadow_present_pte() check when updating the rmaps
after installing a non-MMIO SPTE.  set_spte() is used only to create
shadow-present SPTEs, e.g. MMIO SPTEs are handled early on, mmu_set_spte()
runs with mmu_lock held for write, i.e. the SPTE can't be zapped between
writing the SPTE and updating the rmaps.

Opportunistically combine the "new SPTE" logic for large pages and rmaps.

No functional change intended.

Suggested-by: Ben Gardon <bgardon@google.com>

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20210803044607.599629-2-mizhang@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8568ae42e867..2f6458bca65d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2776,17 +2776,13 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 	pgprintk("%s: setting spte %llx\n", __func__, *sptep);
 	trace_kvm_mmu_set_spte(level, gfn, sptep);
-	if (!was_rmapped && is_large_pte(*sptep))
-		++vcpu->kvm->stat.lpages;
 
-	if (is_shadow_present_pte(*sptep)) {
-		if (!was_rmapped) {
-			rmap_count = rmap_add(vcpu, sptep, gfn);
-			if (rmap_count > vcpu->kvm->stat.max_mmu_rmap_size)
-				vcpu->kvm->stat.max_mmu_rmap_size = rmap_count;
-			if (rmap_count > RMAP_RECYCLE_THRESHOLD)
-				rmap_recycle(vcpu, sptep, gfn);
-		}
+	if (!was_rmapped) {
+		if (is_large_pte(*sptep))
+			++vcpu->kvm->stat.lpages;
+		rmap_count = rmap_add(vcpu, sptep, gfn);
+		if (rmap_count > RMAP_RECYCLE_THRESHOLD)
+			rmap_recycle(vcpu, sptep, gfn);
 	}
 
 	return ret;
-- 
cgit 


From 088acd23526647844aec1c39db4ad02552c86c7b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 2 Aug 2021 21:46:06 -0700
Subject: KVM: x86/mmu: Avoid collision with !PRESENT SPTEs in TDP MMU lpage
 stats

Factor in whether or not the old/new SPTEs are shadow-present when
adjusting the large page stats in the TDP MMU.  A modified MMIO SPTE can
toggle the page size bit, as bit 7 is used to store the MMIO generation,
i.e. is_large_pte() can get a false positive when called on a MMIO SPTE.
Ditto for nuking SPTEs with REMOVED_SPTE, which sets bit 7 in its magic
value.

Opportunistically move the logic below the check to verify at least one
of the old/new SPTEs is shadow present.

Use is/was_leaf even though is/was_present would suffice.  The code
generation is roughly equivalent since all flags need to be computed
prior to the code in question, and using the *_leaf flags will minimize
the diff in a future enhancement to account all pages, i.e. will change
the check to "is_leaf != was_leaf".

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Ben Gardon <bgardon@google.com>

Fixes: 1699f65c8b65 ("kvm/x86: Fix 'lpages' kvm stat for TDM MMU")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20210803044607.599629-3-mizhang@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index fb1b2dc7a6d1..85f69558b490 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -412,6 +412,7 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 	bool was_leaf = was_present && is_last_spte(old_spte, level);
 	bool is_leaf = is_present && is_last_spte(new_spte, level);
 	bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
+	bool was_large, is_large;
 
 	WARN_ON(level > PT64_ROOT_MAX_LEVEL);
 	WARN_ON(level < PG_LEVEL_4K);
@@ -445,13 +446,6 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 
 	trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
 
-	if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
-		if (is_large_pte(old_spte))
-			atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
-		else
-			atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
-	}
-
 	/*
 	 * The only times a SPTE should be changed from a non-present to
 	 * non-present state is when an MMIO entry is installed/modified/
@@ -477,6 +471,18 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 		return;
 	}
 
+	/*
+	 * Update large page stats if a large page is being zapped, created, or
+	 * is replacing an existing shadow page.
+	 */
+	was_large = was_leaf && is_large_pte(old_spte);
+	is_large = is_leaf && is_large_pte(new_spte);
+	if (was_large != is_large) {
+		if (was_large)
+			atomic64_sub(1, (atomic64_t *)&kvm->stat.lpages);
+		else
+			atomic64_add(1, (atomic64_t *)&kvm->stat.lpages);
+	}
 
 	if (was_leaf && is_dirty_spte(old_spte) &&
 	    (!is_present || !is_dirty_spte(new_spte) || pfn_changed))
-- 
cgit 


From 71f51d2c3253645ccff69d6fa3a870f47005f0b3 Mon Sep 17 00:00:00 2001
From: Mingwei Zhang <mizhang@google.com>
Date: Mon, 2 Aug 2021 21:46:07 -0700
Subject: KVM: x86/mmu: Add detailed page size stats

Existing KVM code tracks the number of large pages regardless of their
sizes. Therefore, when large page of 1GB (or larger) is adopted, the
information becomes less useful because lpages counts a mix of 1G and 2M
pages.

So remove the lpages since it is easy for user space to aggregate the info.
Instead, provide a comprehensive page stats of all sizes from 4K to 512G.

Suggested-by: Ben Gardon <bgardon@google.com>

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Cc: Jing Zhang <jingzhangos@google.com>
Cc: David Matlack <dmatlack@google.com>
Cc: Sean Christopherson <seanjc@google.com>
Message-Id: <20210803044607.599629-4-mizhang@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  9 ++++++++-
 arch/x86/kvm/mmu.h              |  4 ++++
 arch/x86/kvm/mmu/mmu.c          | 32 ++++++++++++++++----------------
 arch/x86/kvm/mmu/tdp_mmu.c      | 15 ++-------------
 arch/x86/kvm/x86.c              |  4 +++-
 5 files changed, 33 insertions(+), 31 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 28c85c80e831..4c4983a9378c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1214,7 +1214,14 @@ struct kvm_vm_stat {
 	u64 mmu_recycled;
 	u64 mmu_cache_miss;
 	u64 mmu_unsync;
-	u64 lpages;
+	union {
+		struct {
+			atomic64_t pages_4k;
+			atomic64_t pages_2m;
+			atomic64_t pages_1g;
+		};
+		atomic64_t pages[KVM_NR_PAGE_SIZES];
+	};
 	u64 nx_lpage_splits;
 	u64 max_mmu_page_hash_collisions;
 	u64 max_mmu_rmap_size;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 59e831a8ab9d..e9688a9f7b57 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -261,4 +261,8 @@ kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, int level)
 	return __kvm_mmu_slot_lpages(slot, slot->npages, level);
 }
 
+static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count)
+{
+	atomic64_add(count, &kvm->stat.pages[level - 1]);
+}
 #endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 2f6458bca65d..54cb15e4b550 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -604,10 +604,11 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
  * state bits, it is used to clear the last level sptep.
  * Returns the old PTE.
  */
-static u64 mmu_spte_clear_track_bits(u64 *sptep)
+static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
 {
 	kvm_pfn_t pfn;
 	u64 old_spte = *sptep;
+	int level = sptep_to_sp(sptep)->role.level;
 
 	if (!spte_has_volatile_bits(old_spte))
 		__update_clear_spte_fast(sptep, 0ull);
@@ -617,6 +618,8 @@ static u64 mmu_spte_clear_track_bits(u64 *sptep)
 	if (!is_shadow_present_pte(old_spte))
 		return old_spte;
 
+	kvm_update_page_stats(kvm, level, -1);
+
 	pfn = spte_to_pfn(old_spte);
 
 	/*
@@ -1001,14 +1004,15 @@ static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
 	}
 }
 
-static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep)
+static void pte_list_remove(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+			    u64 *sptep)
 {
-	mmu_spte_clear_track_bits(sptep);
+	mmu_spte_clear_track_bits(kvm, sptep);
 	__pte_list_remove(sptep, rmap_head);
 }
 
 /* Return true if rmap existed, false otherwise */
-static bool pte_list_destroy(struct kvm_rmap_head *rmap_head)
+static bool pte_list_destroy(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 {
 	struct pte_list_desc *desc, *next;
 	int i;
@@ -1017,7 +1021,7 @@ static bool pte_list_destroy(struct kvm_rmap_head *rmap_head)
 		return false;
 
 	if (!(rmap_head->val & 1)) {
-		mmu_spte_clear_track_bits((u64 *)rmap_head->val);
+		mmu_spte_clear_track_bits(kvm, (u64 *)rmap_head->val);
 		goto out;
 	}
 
@@ -1025,7 +1029,7 @@ static bool pte_list_destroy(struct kvm_rmap_head *rmap_head)
 
 	for (; desc; desc = next) {
 		for (i = 0; i < desc->spte_count; i++)
-			mmu_spte_clear_track_bits(desc->sptes[i]);
+			mmu_spte_clear_track_bits(kvm, desc->sptes[i]);
 		next = desc->more;
 		mmu_free_pte_list_desc(desc);
 	}
@@ -1188,7 +1192,7 @@ out:
 
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 {
-	u64 old_spte = mmu_spte_clear_track_bits(sptep);
+	u64 old_spte = mmu_spte_clear_track_bits(kvm, sptep);
 
 	if (is_shadow_present_pte(old_spte))
 		rmap_remove(kvm, sptep);
@@ -1200,7 +1204,6 @@ static bool __drop_large_spte(struct kvm *kvm, u64 *sptep)
 	if (is_large_pte(*sptep)) {
 		WARN_ON(sptep_to_sp(sptep)->role.level == PG_LEVEL_4K);
 		drop_spte(kvm, sptep);
-		--kvm->stat.lpages;
 		return true;
 	}
 
@@ -1450,7 +1453,7 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 			  const struct kvm_memory_slot *slot)
 {
-	return pte_list_destroy(rmap_head);
+	return pte_list_destroy(kvm, rmap_head);
 }
 
 static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
@@ -1481,13 +1484,13 @@ restart:
 		need_flush = 1;
 
 		if (pte_write(pte)) {
-			pte_list_remove(rmap_head, sptep);
+			pte_list_remove(kvm, rmap_head, sptep);
 			goto restart;
 		} else {
 			new_spte = kvm_mmu_changed_pte_notifier_make_spte(
 					*sptep, new_pfn);
 
-			mmu_spte_clear_track_bits(sptep);
+			mmu_spte_clear_track_bits(kvm, sptep);
 			mmu_spte_set(sptep, new_spte);
 		}
 	}
@@ -2292,8 +2295,6 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
 	if (is_shadow_present_pte(pte)) {
 		if (is_last_spte(pte, sp->role.level)) {
 			drop_spte(kvm, spte);
-			if (is_large_pte(pte))
-				--kvm->stat.lpages;
 		} else {
 			child = to_shadow_page(pte & PT64_BASE_ADDR_MASK);
 			drop_parent_pte(child, spte);
@@ -2778,8 +2779,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	trace_kvm_mmu_set_spte(level, gfn, sptep);
 
 	if (!was_rmapped) {
-		if (is_large_pte(*sptep))
-			++vcpu->kvm->stat.lpages;
+		kvm_update_page_stats(vcpu->kvm, level, 1);
 		rmap_count = rmap_add(vcpu, sptep, gfn);
 		if (rmap_count > RMAP_RECYCLE_THRESHOLD)
 			rmap_recycle(vcpu, sptep, gfn);
@@ -5809,7 +5809,7 @@ restart:
 		if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
 		    sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
 							       pfn, PG_LEVEL_NUM)) {
-			pte_list_remove(rmap_head, sptep);
+			pte_list_remove(kvm, rmap_head, sptep);
 
 			if (kvm_available_flush_tlb_with_range())
 				kvm_flush_remote_tlbs_with_address(kvm, sp->gfn,
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 85f69558b490..db636250972a 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -412,7 +412,6 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 	bool was_leaf = was_present && is_last_spte(old_spte, level);
 	bool is_leaf = is_present && is_last_spte(new_spte, level);
 	bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
-	bool was_large, is_large;
 
 	WARN_ON(level > PT64_ROOT_MAX_LEVEL);
 	WARN_ON(level < PG_LEVEL_4K);
@@ -471,18 +470,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 		return;
 	}
 
-	/*
-	 * Update large page stats if a large page is being zapped, created, or
-	 * is replacing an existing shadow page.
-	 */
-	was_large = was_leaf && is_large_pte(old_spte);
-	is_large = is_leaf && is_large_pte(new_spte);
-	if (was_large != is_large) {
-		if (was_large)
-			atomic64_sub(1, (atomic64_t *)&kvm->stat.lpages);
-		else
-			atomic64_add(1, (atomic64_t *)&kvm->stat.lpages);
-	}
+	if (is_leaf != was_leaf)
+		kvm_update_page_stats(kvm, level, is_leaf ? 1 : -1);
 
 	if (was_leaf && is_dirty_spte(old_spte) &&
 	    (!is_present || !is_dirty_spte(new_spte) || pfn_changed))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9425589f34ca..4e07cae56636 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -233,7 +233,9 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_COUNTER(VM, mmu_recycled),
 	STATS_DESC_COUNTER(VM, mmu_cache_miss),
 	STATS_DESC_ICOUNTER(VM, mmu_unsync),
-	STATS_DESC_ICOUNTER(VM, lpages),
+	STATS_DESC_ICOUNTER(VM, pages_4k),
+	STATS_DESC_ICOUNTER(VM, pages_2m),
+	STATS_DESC_ICOUNTER(VM, pages_1g),
 	STATS_DESC_ICOUNTER(VM, nx_lpage_splits),
 	STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size),
 	STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions)
-- 
cgit 


From 9653f2da7522c5e762e2edd2beb53170669d0a2b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 10 Aug 2021 15:45:54 -0700
Subject: KVM: x86/mmu: Drop 'shared' param from tdp_mmu_link_page()

Drop @shared from tdp_mmu_link_page() and hardcode it to work for
mmu_lock being held for read.  The helper has exactly one caller and
in all likelihood will only ever have exactly one caller.  Even if KVM
adds a path to install translations without an initiating page fault,
odds are very, very good that the path will just be a wrapper to the
"page fault" handler (both SNP and TDX RFCs propose patches to do
exactly that).

No functional change intended.

Cc: Ben Gardon <bgardon@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210810224554.2978735-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index db636250972a..64ccfc1fa553 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -255,26 +255,17 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
  *
  * @kvm: kvm instance
  * @sp: the new page
- * @shared: This operation may not be running under the exclusive use of
- *	    the MMU lock and the operation must synchronize with other
- *	    threads that might be adding or removing pages.
  * @account_nx: This page replaces a NX large page and should be marked for
  *		eventual reclaim.
  */
 static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-			      bool shared, bool account_nx)
+			      bool account_nx)
 {
-	if (shared)
-		spin_lock(&kvm->arch.tdp_mmu_pages_lock);
-	else
-		lockdep_assert_held_write(&kvm->mmu_lock);
-
+	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
 	list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
 	if (account_nx)
 		account_huge_nx_page(kvm, sp);
-
-	if (shared)
-		spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+	spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 }
 
 /**
@@ -1062,7 +1053,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 						     !shadow_accessed_mask);
 
 			if (tdp_mmu_set_spte_atomic_no_dirty_log(vcpu->kvm, &iter, new_spte)) {
-				tdp_mmu_link_page(vcpu->kvm, sp, true,
+				tdp_mmu_link_page(vcpu->kvm, sp,
 						  huge_page_disallowed &&
 						  req_level >= iter.level);
 
-- 
cgit 


From 7a4bca85b23f7a573da61f161dfbf8b00e9e2955 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 11 Aug 2021 15:29:22 +0300
Subject: KVM: SVM: split svm_handle_invalid_exit

Split the check for having a vmexit handler to svm_check_exit_valid,
and make svm_handle_invalid_exit only handle a vmexit that is
already not valid.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210811122927.900604-2-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7b58e445a967..e45259177009 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3236,12 +3236,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
 	       "excp_to:", save->last_excp_to);
 }
 
-static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+static bool svm_check_exit_valid(struct kvm_vcpu *vcpu, u64 exit_code)
 {
-	if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
-	    svm_exit_handlers[exit_code])
-		return 0;
+	return (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
+		svm_exit_handlers[exit_code]);
+}
 
+static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+{
 	vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
 	dump_vmcb(vcpu);
 	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -3249,14 +3251,13 @@ static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
 	vcpu->run->internal.ndata = 2;
 	vcpu->run->internal.data[0] = exit_code;
 	vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
-
-	return -EINVAL;
+	return 0;
 }
 
 int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
 {
-	if (svm_handle_invalid_exit(vcpu, exit_code))
-		return 0;
+	if (!svm_check_exit_valid(vcpu, exit_code))
+		return svm_handle_invalid_exit(vcpu, exit_code);
 
 #ifdef CONFIG_RETPOLINE
 	if (exit_code == SVM_EXIT_MSR)
-- 
cgit 


From 61e5f69ef08379cdc74e8f15d3770976ed48480a Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 11 Aug 2021 15:29:26 +0300
Subject: KVM: x86: implement KVM_GUESTDBG_BLOCKIRQ

KVM_GUESTDBG_BLOCKIRQ will allow KVM to block all interrupts
while running.

This change is mostly intended for more robust single stepping
of the guest and it has the following benefits when enabled:

* Resuming from a breakpoint is much more reliable.
  When resuming execution from a breakpoint, with interrupts enabled,
  more often than not, KVM would inject an interrupt and make the CPU
  jump immediately to the interrupt handler and eventually return to
  the breakpoint, to trigger it again.

  From the user point of view it looks like the CPU never executed a
  single instruction and in some cases that can even prevent forward
  progress, for example, when the breakpoint is placed by an automated
  script (e.g lx-symbols), which does something in response to the
  breakpoint and then continues the guest automatically.
  If the script execution takes enough time for another interrupt to
  arrive, the guest will be stuck on the same breakpoint RIP forever.

* Normal single stepping is much more predictable, since it won't
  land the debugger into an interrupt handler.

* RFLAGS.TF has less chance to be leaked to the guest:

  We set that flag behind the guest's back to do single stepping
  but if single step lands us into an interrupt/exception handler
  it will be leaked to the guest in the form of being pushed
  to the stack.
  This doesn't completely eliminate this problem as exceptions
  can still happen, but at least this reduces the chances
  of this happening.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210811122927.900604-6-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 ++-
 arch/x86/include/uapi/asm/kvm.h | 1 +
 arch/x86/kvm/x86.c              | 4 ++++
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4c4983a9378c..7723865077fd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -222,7 +222,8 @@ enum x86_intercept_stage;
 	KVM_GUESTDBG_USE_HW_BP | \
 	KVM_GUESTDBG_USE_SW_BP | \
 	KVM_GUESTDBG_INJECT_BP | \
-	KVM_GUESTDBG_INJECT_DB)
+	KVM_GUESTDBG_INJECT_DB | \
+	KVM_GUESTDBG_BLOCKIRQ)
 
 
 #define PFERR_PRESENT_BIT 0
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a6c327f8ad9e..2ef1f6513c68 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -295,6 +295,7 @@ struct kvm_debug_exit_arch {
 #define KVM_GUESTDBG_USE_HW_BP		0x00020000
 #define KVM_GUESTDBG_INJECT_DB		0x00040000
 #define KVM_GUESTDBG_INJECT_BP		0x00080000
+#define KVM_GUESTDBG_BLOCKIRQ		0x00100000
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4e07cae56636..1a00af1b076b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8892,6 +8892,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
 		can_inject = false;
 	}
 
+	/* Don't inject interrupts if the user asked to avoid doing so */
+	if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ)
+		return 0;
+
 	/*
 	 * Finally, inject interrupt events.  If an event cannot be injected
 	 * due to architectural conditions (e.g. IF=0) a window-open exit
-- 
cgit 


From ec607a564f70519b340f7eb4cfc0f4a6b55285ac Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 6 Aug 2021 07:05:58 -0400
Subject: KVM: x86: clamp host mapping level to max_level in
 kvm_mmu_max_mapping_level

This change started as a way to make kvm_mmu_hugepage_adjust a bit simpler,
but it does fix two bugs as well.

One bug is in zapping collapsible PTEs.  If a large page size is
disallowed but not all of them, kvm_mmu_max_mapping_level will return the
host mapping level and the small PTEs will be zapped up to that level.
However, if e.g. 1GB are prohibited, we can still zap 4KB mapping and
preserve the 2MB ones. This can happen for example when NX huge pages
are in use.

The second would happen when userspace backs guest memory
with a 1gb hugepage but only assign a subset of the page to
the guest.  1gb pages would be disallowed by the memslot, but
not 2mb.  kvm_mmu_max_mapping_level() would fall through to the
host_pfn_mapping_level() logic, see the 1gb hugepage, and map the whole
thing into the guest.

Fixes: 2f57b7051fe8 ("KVM: x86/mmu: Persist gfn_lpage_is_disallowed() to max_level")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 54cb15e4b550..bfd2705a7291 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2910,6 +2910,7 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
 			      kvm_pfn_t pfn, int max_level)
 {
 	struct kvm_lpage_info *linfo;
+	int host_level;
 
 	max_level = min(max_level, max_huge_page_level);
 	for ( ; max_level > PG_LEVEL_4K; max_level--) {
@@ -2921,7 +2922,8 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
 	if (max_level == PG_LEVEL_4K)
 		return PG_LEVEL_4K;
 
-	return host_pfn_mapping_level(kvm, gfn, pfn, slot);
+	host_level = host_pfn_mapping_level(kvm, gfn, pfn, slot);
+	return min(host_level, max_level);
 }
 
 int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
@@ -2945,17 +2947,12 @@ int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
 	if (!slot)
 		return PG_LEVEL_4K;
 
-	level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
-	if (level == PG_LEVEL_4K)
-		return level;
-
-	*req_level = level = min(level, max_level);
-
 	/*
 	 * Enforce the iTLB multihit workaround after capturing the requested
 	 * level, which will be used to do precise, accurate accounting.
 	 */
-	if (huge_page_disallowed)
+	*req_level = level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
+	if (level == PG_LEVEL_4K || huge_page_disallowed)
 		return PG_LEVEL_4K;
 
 	/*
-- 
cgit 


From 746700d21fd52399c97aeb7791584bbf5426983c Mon Sep 17 00:00:00 2001
From: Wei Huang <wei.huang2@amd.com>
Date: Wed, 18 Aug 2021 11:55:47 -0500
Subject: KVM: x86: Allow CPU to force vendor-specific TDP level

AMD future CPUs will require a 5-level NPT if host CR4.LA57 is set.
To prevent kvm_mmu_get_tdp_level() from incorrectly changing NPT level
on behalf of CPUs, add a new parameter in kvm_configure_mmu() to force
a fixed TDP level.

Signed-off-by: Wei Huang <wei.huang2@amd.com>
Message-Id: <20210818165549.3771014-2-wei.huang2@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  5 ++---
 arch/x86/kvm/mmu/mmu.c          | 10 ++++++++--
 arch/x86/kvm/svm/svm.c          |  4 +++-
 arch/x86/kvm/vmx/vmx.c          |  3 ++-
 4 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7723865077fd..24dac52b7bde 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -717,7 +717,6 @@ struct kvm_vcpu_arch {
 
 	u64 reserved_gpa_bits;
 	int maxphyaddr;
-	int max_tdp_level;
 
 	/* emulate context */
 
@@ -1766,8 +1765,8 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
 void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
 
-void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
-		       int tdp_huge_page_level);
+void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
+		       int tdp_max_root_level, int tdp_huge_page_level);
 
 static inline u16 kvm_read_ldt(void)
 {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index bfd2705a7291..b2e68ce8b722 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -97,6 +97,7 @@ module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644);
 bool tdp_enabled = false;
 
 static int max_huge_page_level __read_mostly;
+static int tdp_root_level __read_mostly;
 static int max_tdp_level __read_mostly;
 
 enum {
@@ -4684,6 +4685,10 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
 
 static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
 {
+	/* tdp_root_level is architecture forced level, use it if nonzero */
+	if (tdp_root_level)
+		return tdp_root_level;
+
 	/* Use 5-level TDP if and only if it's useful/necessary. */
 	if (max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48)
 		return 4;
@@ -5375,10 +5380,11 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
 	 */
 }
 
-void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
-		       int tdp_huge_page_level)
+void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
+		       int tdp_max_root_level, int tdp_huge_page_level)
 {
 	tdp_enabled = enable_tdp;
+	tdp_root_level = tdp_forced_root_level;
 	max_tdp_level = tdp_max_root_level;
 
 	/*
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e45259177009..bcffae2e36d2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1013,7 +1013,9 @@ static __init int svm_hardware_setup(void)
 	if (!boot_cpu_has(X86_FEATURE_NPT))
 		npt_enabled = false;
 
-	kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
+	/* Force VM NPT level equal to the host's max NPT level */
+	kvm_configure_mmu(npt_enabled, get_max_npt_level(),
+			  get_max_npt_level(), PG_LEVEL_1G);
 	pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
 
 	/* Note, SEV setup consumes npt_enabled. */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cd913100b300..fada1055f325 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7820,7 +7820,8 @@ static __init int hardware_setup(void)
 		ept_lpage_level = PG_LEVEL_2M;
 	else
 		ept_lpage_level = PG_LEVEL_4K;
-	kvm_configure_mmu(enable_ept, vmx_get_max_tdp_level(), ept_lpage_level);
+	kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
+			  ept_lpage_level);
 
 	/*
 	 * Only enable PML when hardware supports PML feature, and both EPT
-- 
cgit 


From cb0f722aff6e9ba970a9fee9263c7821bbe811de Mon Sep 17 00:00:00 2001
From: Wei Huang <wei.huang2@amd.com>
Date: Wed, 18 Aug 2021 11:55:48 -0500
Subject: KVM: x86/mmu: Support shadowing NPT when 5-level paging is enabled in
 host

When the 5-level page table CPU flag is set in the host, but the guest
has CR4.LA57=0 (including the case of a 32-bit guest), the top level of
the shadow NPT page tables will be fixed, consisting of one pointer to
a lower-level table and 511 non-present entries.  Extend the existing
code that creates the fixed PML4 or PDP table, to provide a fixed PML5
table if needed.

This is not needed on EPT because the number of layers in the tables
is specified in the EPTP instead of depending on the host CR4.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Wei Huang <wei.huang2@amd.com>
Message-Id: <20210818165549.3771014-3-wei.huang2@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu/mmu.c          | 53 ++++++++++++++++++++++++++++-------------
 2 files changed, 38 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 24dac52b7bde..0532f4e84308 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -441,6 +441,7 @@ struct kvm_mmu {
 
 	u64 *pae_root;
 	u64 *pml4_root;
+	u64 *pml5_root;
 
 	/*
 	 * check zero bits on shadow page table entries, these
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b2e68ce8b722..c04e30f6e0db 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3536,15 +3536,22 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	 * the shadow page table may be a PAE or a long mode page table.
 	 */
 	pm_mask = PT_PRESENT_MASK | shadow_me_mask;
-	if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
+	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
 		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 
 		if (WARN_ON_ONCE(!mmu->pml4_root)) {
 			r = -EIO;
 			goto out_unlock;
 		}
-
 		mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;
+
+		if (mmu->shadow_root_level == PT64_ROOT_5LEVEL) {
+			if (WARN_ON_ONCE(!mmu->pml5_root)) {
+				r = -EIO;
+				goto out_unlock;
+			}
+			mmu->pml5_root[0] = __pa(mmu->pml4_root) | pm_mask;
+		}
 	}
 
 	for (i = 0; i < 4; ++i) {
@@ -3563,7 +3570,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		mmu->pae_root[i] = root | pm_mask;
 	}
 
-	if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
+	if (mmu->shadow_root_level == PT64_ROOT_5LEVEL)
+		mmu->root_hpa = __pa(mmu->pml5_root);
+	else if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
 		mmu->root_hpa = __pa(mmu->pml4_root);
 	else
 		mmu->root_hpa = __pa(mmu->pae_root);
@@ -3579,7 +3588,9 @@ out_unlock:
 static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
-	u64 *pml4_root, *pae_root;
+	u64 *pml5_root = NULL;
+	u64 *pml4_root = NULL;
+	u64 *pae_root;
 
 	/*
 	 * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
@@ -3591,21 +3602,15 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	    mmu->shadow_root_level < PT64_ROOT_4LEVEL)
 		return 0;
 
-	/*
-	 * This mess only works with 4-level paging and needs to be updated to
-	 * work with 5-level paging.
-	 */
-	if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))
-		return -EIO;
-
-	if (mmu->pae_root && mmu->pml4_root)
+	if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root)
 		return 0;
 
 	/*
 	 * The special roots should always be allocated in concert.  Yell and
 	 * bail if KVM ends up in a state where only one of the roots is valid.
 	 */
-	if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root))
+	if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root ||
+			 mmu->pml5_root))
 		return -EIO;
 
 	/*
@@ -3616,16 +3621,31 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	if (!pae_root)
 		return -ENOMEM;
 
+#ifdef CONFIG_X86_64
 	pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
-	if (!pml4_root) {
-		free_page((unsigned long)pae_root);
-		return -ENOMEM;
+	if (!pml4_root)
+		goto err_pml4;
+
+	if (mmu->shadow_root_level > PT64_ROOT_4LEVEL) {
+		pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+		if (!pml5_root)
+			goto err_pml5;
 	}
+#endif
 
 	mmu->pae_root = pae_root;
 	mmu->pml4_root = pml4_root;
+	mmu->pml5_root = pml5_root;
 
 	return 0;
+
+#ifdef CONFIG_X86_64
+err_pml5:
+	free_page((unsigned long)pml4_root);
+err_pml4:
+	free_page((unsigned long)pae_root);
+	return -ENOMEM;
+#endif
 }
 
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
@@ -5461,6 +5481,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
 		set_memory_encrypted((unsigned long)mmu->pae_root, 1);
 	free_page((unsigned long)mmu->pae_root);
 	free_page((unsigned long)mmu->pml4_root);
+	free_page((unsigned long)mmu->pml5_root);
 }
 
 static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
-- 
cgit 


From 43e540cc9f2ca12a2364ddf64e5ef929a546550d Mon Sep 17 00:00:00 2001
From: Wei Huang <wei.huang2@amd.com>
Date: Wed, 18 Aug 2021 11:55:49 -0500
Subject: KVM: SVM: Add 5-level page table support for SVM

When the 5-level page table is enabled on host OS, the nested page table
for guest VMs must use 5-level as well. Update get_npt_level() function
to reflect this requirement. In the meanwhile, remove the code that
prevents kvm-amd driver from being loaded when 5-level page table is
detected.

Signed-off-by: Wei Huang <wei.huang2@amd.com>
Message-Id: <20210818165549.3771014-4-wei.huang2@amd.com>
[Tweak condition as suggested by Sean. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index bcffae2e36d2..1a70e11f0487 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -259,7 +259,7 @@ u32 svm_msrpm_offset(u32 msr)
 static int get_max_npt_level(void)
 {
 #ifdef CONFIG_X86_64
-	return PT64_ROOT_4LEVEL;
+	return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
 #else
 	return PT32E_ROOT_LEVEL;
 #endif
@@ -460,11 +460,6 @@ static int has_svm(void)
 		return 0;
 	}
 
-	if (pgtable_l5_enabled()) {
-		pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
-		return 0;
-	}
-
 	return 1;
 }
 
-- 
cgit 


From 126b39368604dbf155ce6dad8781a0454477022a Mon Sep 17 00:00:00 2001
From: Huilong Deng <denghuilong@cdjrlc.com>
Date: Tue, 17 Aug 2021 10:53:38 +0800
Subject: MIPS: Return true/false (not 1/0) from bool functions

./arch/mips/kernel/uprobes.c:261:8-9: WARNING: return of 0/1 in function
'arch_uprobe_skip_sstep' with return type bool
./arch/mips/kernel/uprobes.c:78:10-11: WARNING: return of 0/1 in
function 'is_trap_insn' with return type bool
./arch/mips/kvm/mmu.c:489:9-10: WARNING: return of 0/1 in function
'kvm_test_age_gfn' with return type bool
./arch/mips/kvm/mmu.c:445:8-9: WARNING: return of 0/1 in function
'kvm_unmap_gfn_range' with return type bool

Signed-off-by: Huilong Deng <denghuilong@cdjrlc.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/kernel/uprobes.c | 10 +++++-----
 arch/mips/kvm/mmu.c        |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c
index 6dbe4eab0a0e..9db2a6db5f62 100644
--- a/arch/mips/kernel/uprobes.c
+++ b/arch/mips/kernel/uprobes.c
@@ -75,7 +75,7 @@ bool is_trap_insn(uprobe_opcode_t *insn)
 		case tlt_op:
 		case tltu_op:
 		case tne_op:
-			return 1;
+			return true;
 		}
 		break;
 
@@ -87,12 +87,12 @@ bool is_trap_insn(uprobe_opcode_t *insn)
 		case tlti_op:
 		case tltiu_op:
 		case tnei_op:
-			return 1;
+			return true;
 		}
 		break;
 	}
 
-	return 0;
+	return false;
 }
 
 #define UPROBE_TRAP_NR	ULONG_MAX
@@ -254,9 +254,9 @@ unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
  * See if the instruction can be emulated.
  * Returns true if instruction was emulated, false otherwise.
  *
- * For now we always emulate so this function just returns 0.
+ * For now we always emulate so this function just returns false.
  */
 bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	return 0;
+	return false;
 }
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 6d1f68cf4edf..1bfd1b501d82 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -442,7 +442,7 @@ static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	kvm_mips_flush_gpa_pt(kvm, range->start, range->end);
-	return 1;
+	return true;
 }
 
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@@ -486,7 +486,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 	pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
 
 	if (!gpa_pte)
-		return 0;
+		return false;
 	return pte_young(*gpa_pte);
 }
 
-- 
cgit 


From 0181f6f19c6c35b24f1516d8db22f3bbce762633 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 19 Aug 2021 20:04:15 +0300
Subject: MIPS: mscc: ocelot: disable all switch ports by default

The ocelot switch driver used to ignore ports which do not have a
phy-handle property and not probe those, but this is not quite ok since
it is valid to not have a phy-handle property if there is a fixed-link.

It seems that checking for a phy-handle was a proxy for the proper check
which is for the status, but that doesn't make a lot of sense, since the
ocelot driver already iterates using for_each_available_child_of_node
which skips the disabled ports, so I have no idea.

Anyway, a widespread pattern in device trees is for a SoC dtsi to
disable by default all hardware, and let board dts files enable what is
used. So let's do that and enable only the ports with a phy-handle in
the pcb120 and pcb123 device tree files.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/boot/dts/mscc/ocelot.dtsi       | 11 +++++++++++
 arch/mips/boot/dts/mscc/ocelot_pcb120.dts |  8 ++++++++
 arch/mips/boot/dts/mscc/ocelot_pcb123.dts |  4 ++++
 3 files changed, 23 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
index 535a98284dcb..e51db651af13 100644
--- a/arch/mips/boot/dts/mscc/ocelot.dtsi
+++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
@@ -150,36 +150,47 @@
 
 				port0: port@0 {
 					reg = <0>;
+					status = "disabled";
 				};
 				port1: port@1 {
 					reg = <1>;
+					status = "disabled";
 				};
 				port2: port@2 {
 					reg = <2>;
+					status = "disabled";
 				};
 				port3: port@3 {
 					reg = <3>;
+					status = "disabled";
 				};
 				port4: port@4 {
 					reg = <4>;
+					status = "disabled";
 				};
 				port5: port@5 {
 					reg = <5>;
+					status = "disabled";
 				};
 				port6: port@6 {
 					reg = <6>;
+					status = "disabled";
 				};
 				port7: port@7 {
 					reg = <7>;
+					status = "disabled";
 				};
 				port8: port@8 {
 					reg = <8>;
+					status = "disabled";
 				};
 				port9: port@9 {
 					reg = <9>;
+					status = "disabled";
 				};
 				port10: port@10 {
 					reg = <10>;
+					status = "disabled";
 				};
 			};
 		};
diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
index 897de5025d7f..d2dc6b3d923c 100644
--- a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
@@ -69,40 +69,48 @@
 };
 
 &port0 {
+	status = "okay";
 	phy-handle = <&phy0>;
 };
 
 &port1 {
+	status = "okay";
 	phy-handle = <&phy1>;
 };
 
 &port2 {
+	status = "okay";
 	phy-handle = <&phy2>;
 };
 
 &port3 {
+	status = "okay";
 	phy-handle = <&phy3>;
 };
 
 &port4 {
+	status = "okay";
 	phy-handle = <&phy7>;
 	phy-mode = "sgmii";
 	phys = <&serdes 4 SERDES1G(2)>;
 };
 
 &port5 {
+	status = "okay";
 	phy-handle = <&phy4>;
 	phy-mode = "sgmii";
 	phys = <&serdes 5 SERDES1G(5)>;
 };
 
 &port6 {
+	status = "okay";
 	phy-handle = <&phy6>;
 	phy-mode = "sgmii";
 	phys = <&serdes 6 SERDES1G(3)>;
 };
 
 &port9 {
+	status = "okay";
 	phy-handle = <&phy5>;
 	phy-mode = "sgmii";
 	phys = <&serdes 9 SERDES1G(4)>;
diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
index ef852f382da8..7d7e638791dd 100644
--- a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
@@ -47,17 +47,21 @@
 };
 
 &port0 {
+	status = "okay";
 	phy-handle = <&phy0>;
 };
 
 &port1 {
+	status = "okay";
 	phy-handle = <&phy1>;
 };
 
 &port2 {
+	status = "okay";
 	phy-handle = <&phy2>;
 };
 
 &port3 {
+	status = "okay";
 	phy-handle = <&phy3>;
 };
-- 
cgit 


From eba54cbb92d28b4f6dc1ed5f73f5187b09d82c08 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 19 Aug 2021 20:04:16 +0300
Subject: MIPS: mscc: ocelot: mark the phy-mode for internal PHY ports

The ocelot driver was converted to phylink, and that expects a valid
phy_interface_t. Without a phy-mode, of_get_phy_mode returns
PHY_INTERFACE_MODE_NA, which is not ideal because phylink rejects that.

The ocelot driver was patched to treat PHY_INTERFACE_MODE_NA as
PHY_INTERFACE_MODE_INTERNAL to work with the broken DT blobs, but we
should fix the device trees and specify the phy-mode too.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/boot/dts/mscc/ocelot_pcb120.dts | 4 ++++
 arch/mips/boot/dts/mscc/ocelot_pcb123.dts | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
index d2dc6b3d923c..bd240690cb37 100644
--- a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
@@ -71,21 +71,25 @@
 &port0 {
 	status = "okay";
 	phy-handle = <&phy0>;
+	phy-mode = "internal";
 };
 
 &port1 {
 	status = "okay";
 	phy-handle = <&phy1>;
+	phy-mode = "internal";
 };
 
 &port2 {
 	status = "okay";
 	phy-handle = <&phy2>;
+	phy-mode = "internal";
 };
 
 &port3 {
 	status = "okay";
 	phy-handle = <&phy3>;
+	phy-mode = "internal";
 };
 
 &port4 {
diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
index 7d7e638791dd..0185045c7630 100644
--- a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
@@ -49,19 +49,23 @@
 &port0 {
 	status = "okay";
 	phy-handle = <&phy0>;
+	phy-mode = "internal";
 };
 
 &port1 {
 	status = "okay";
 	phy-handle = <&phy1>;
+	phy-mode = "internal";
 };
 
 &port2 {
 	status = "okay";
 	phy-handle = <&phy2>;
+	phy-mode = "internal";
 };
 
 &port3 {
 	status = "okay";
 	phy-handle = <&phy3>;
+	phy-mode = "internal";
 };
-- 
cgit 


From d00aa8061e04da5c570a54283462e47fab01bd3c Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 20 Jul 2021 08:41:19 -0600
Subject: ARM: dts: omap: Drop references to opp.txt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

opp.txt is getting removed with the OPP binding converted to DT schema.
As it is unusual to reference a binding doc from a dts file, let's just
remove the reference.

Cc: "Benoît Cousson" <bcousson@baylibre.com>
Cc: Tony Lindgren <tony@atomide.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 arch/arm/boot/dts/omap34xx.dtsi | 1 -
 arch/arm/boot/dts/omap36xx.dtsi | 1 -
 2 files changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/omap34xx.dtsi b/arch/arm/boot/dts/omap34xx.dtsi
index feaa43b78535..8b8451399784 100644
--- a/arch/arm/boot/dts/omap34xx.dtsi
+++ b/arch/arm/boot/dts/omap34xx.dtsi
@@ -24,7 +24,6 @@
 		};
 	};
 
-	/* see Documentation/devicetree/bindings/opp/opp.txt */
 	cpu0_opp_table: opp-table {
 		compatible = "operating-points-v2-ti-cpu";
 		syscon = <&scm_conf>;
diff --git a/arch/arm/boot/dts/omap36xx.dtsi b/arch/arm/boot/dts/omap36xx.dtsi
index 20844dbc002e..22b33098b1a2 100644
--- a/arch/arm/boot/dts/omap36xx.dtsi
+++ b/arch/arm/boot/dts/omap36xx.dtsi
@@ -29,7 +29,6 @@
 		};
 	};
 
-	/* see Documentation/devicetree/bindings/opp/opp.txt */
 	cpu0_opp_table: opp-table {
 		compatible = "operating-points-v2-ti-cpu";
 		syscon = <&scm_conf>;
-- 
cgit 


From a00ea5b6f2bbef8b004b0b7228c61680a50c7c3f Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 23 Aug 2021 06:45:20 +0000
Subject: powerpc/syscalls: Remove __NR__exit

__NR__exit is nowhere used. On most architectures it was removed by
commit 135ab6ec8fda ("[PATCH] remove remaining errno and
__KERNEL_SYSCALLS__ references") but not on powerpc.

powerpc removed __KERNEL_SYSCALLS__ in commit 3db03b4afb3e ("[PATCH]
rename the provided execve functions to kernel_execve"), but __NR__exit
was left over.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/6457eb4f327313323ed1f70e540bbb4ddc9178fa.1629701106.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/unistd.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index b541c690a31c..5eb462af6766 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -9,8 +9,6 @@
 
 #define NR_syscalls	__NR_syscalls
 
-#define __NR__exit __NR_exit
-
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
-- 
cgit 


From 3accc0faef081b6813967b34f7d05a3edb855cbd Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Mon, 23 Aug 2021 11:00:38 +0200
Subject: powerpc/prom: Fix unused variable ‘reserve_map’ when CONFIG_PPC32 is
 not set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a compile error with W=1.

arch/powerpc/kernel/prom.c: In function ‘early_reserve_mem’:
arch/powerpc/kernel/prom.c:625:10: error: variable ‘reserve_map’ set but not used [-Werror=unused-but-set-variable]
  __be64 *reserve_map;
          ^~~~~~~~~~~
cc1: all warnings being treated as errors

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210823090039.166120-2-clg@kaod.org
---
 arch/powerpc/kernel/prom.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f620e04dc9bf..44b2cdc0aae3 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -640,7 +640,9 @@ static void __init early_reserve_mem(void)
 	}
 #endif /* CONFIG_BLK_DEV_INITRD */
 
-#ifdef CONFIG_PPC32
+	if (!IS_ENABLED(CONFIG_PPC32))
+		return;
+
 	/* 
 	 * Handle the case where we might be booting from an old kexec
 	 * image that setup the mem_rsvmap as pairs of 32-bit values
@@ -661,7 +663,6 @@ static void __init early_reserve_mem(void)
 		}
 		return;
 	}
-#endif
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-- 
cgit 


From cc47ad409ba9cc950e9c492c8ba653dabd392148 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Mon, 23 Aug 2021 11:00:39 +0200
Subject: powerpc/compat_sys: Declare syscalls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a compile error with W=1.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210823090039.166120-3-clg@kaod.org
---
 arch/powerpc/include/asm/syscalls.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index 398171fdcd9f..7ee66ae5444d 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -6,6 +6,7 @@
 #include <linux/compiler.h>
 #include <linux/linkage.h>
 #include <linux/types.h>
+#include <linux/compat.h>
 
 struct rtas_args;
 
@@ -18,5 +19,34 @@ asmlinkage long sys_mmap2(unsigned long addr, size_t len,
 asmlinkage long ppc64_personality(unsigned long personality);
 asmlinkage long sys_rtas(struct rtas_args __user *uargs);
 
+#ifdef CONFIG_COMPAT
+unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
+			       unsigned long prot, unsigned long flags,
+			       unsigned long fd, unsigned long pgoff);
+
+compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
+				  u32 reg6, u32 pos1, u32 pos2);
+
+compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
+				   u32 reg6, u32 pos1, u32 pos2);
+
+compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count);
+
+int compat_sys_truncate64(const char __user *path, u32 reg4,
+			  unsigned long len1, unsigned long len2);
+
+long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, u32 len1, u32 len2);
+
+int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
+			   unsigned long len2);
+
+long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
+		     size_t len, int advice);
+
+long compat_sys_sync_file_range2(int fd, unsigned int flags,
+				 unsigned int offset1, unsigned int offset2,
+				 unsigned int nbytes1, unsigned int nbytes2);
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_SYSCALLS_H */
-- 
cgit 


From b424d4d4263200459615c87ad8dddaf4bb571a9d Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Tue, 27 Jul 2021 02:06:52 +0800
Subject: arm64: PCI: Restructure pcibios_root_bridge_prepare()

Restructure the pcibios_root_bridge_prepare() as the preparation for
supporting cases when no real ACPI device is related to the PCI host
bridge.

No functional change.

Link: https://lore.kernel.org/r/20210726180657.142727-4-boqun.feng@gmail.com
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/pci.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 1006ed2d7c60..5148ae242780 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -82,14 +82,19 @@ int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-	if (!acpi_disabled) {
-		struct pci_config_window *cfg = bridge->bus->sysdata;
-		struct acpi_device *adev = to_acpi_device(cfg->parent);
-		struct device *bus_dev = &bridge->bus->dev;
+	struct pci_config_window *cfg;
+	struct acpi_device *adev;
+	struct device *bus_dev;
 
-		ACPI_COMPANION_SET(&bridge->dev, adev);
-		set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
-	}
+	if (acpi_disabled)
+		return 0;
+
+	cfg = bridge->bus->sysdata;
+	adev = to_acpi_device(cfg->parent);
+	bus_dev = &bridge->bus->dev;
+
+	ACPI_COMPANION_SET(&bridge->dev, adev);
+	set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
 
 	return 0;
 }
-- 
cgit 


From 7d40c0f70d92291605c4498b8ee4b3a3c3ba07b1 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Tue, 27 Jul 2021 02:06:53 +0800
Subject: arm64: PCI: Support root bridge preparation for Hyper-V

Currently at root bridge preparation, the corresponding ACPI device will
be set as the companion, however for a Hyper-V virtual PCI root bridge,
there is no corresponding ACPI device, because a Hyper-V virtual PCI
root bridge is discovered via VMBus rather than ACPI table. In order to
support this, we need to make pcibios_root_bridge_prepare() work with
cfg->parent being NULL.

Use a NULL pointer as the ACPI device if there is no corresponding ACPI
device, and this is fine because: 1) ACPI_COMPANION_SET() can work with
the second parameter being NULL, 2) semantically, if a NULL pointer is
set via ACPI_COMPANION_SET(), ACPI_COMPANION() (the read API for this
field) will return NULL, and since ACPI_COMPANION() may return NULL, so
users must have handled the cases where it returns NULL, and 3) since
there is no corresponding ACPI device, it would be wrong to use any
other value here.

Link: https://lore.kernel.org/r/20210726180657.142727-5-boqun.feng@gmail.com
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/pci.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 5148ae242780..2276689b5411 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -90,7 +90,17 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 		return 0;
 
 	cfg = bridge->bus->sysdata;
-	adev = to_acpi_device(cfg->parent);
+
+	/*
+	 * On Hyper-V there is no corresponding ACPI device for a root bridge,
+	 * therefore ->parent is set as NULL by the driver. And set 'adev' as
+	 * NULL in this case because there is no proper ACPI device.
+	 */
+	if (!cfg->parent)
+		adev = NULL;
+	else
+		adev = to_acpi_device(cfg->parent);
+
 	bus_dev = &bridge->bus->dev;
 
 	ACPI_COMPANION_SET(&bridge->dev, adev);
-- 
cgit 


From 5d7d6dac8fe99ed59eee2300e4a03370f94d5222 Mon Sep 17 00:00:00 2001
From: Fabiano Rosas <farosas@linux.ibm.com>
Date: Thu, 5 Aug 2021 18:26:14 -0300
Subject: KVM: PPC: Book3S HV: Fix copy_tofrom_guest routines

The __kvmhv_copy_tofrom_guest_radix function was introduced along with
nested HV guest support. It uses the platform's Radix MMU quadrants to
provide a nested hypervisor with fast access to its nested guests
memory (H_COPY_TOFROM_GUEST hypercall). It has also since been added
as a fast path for the kvmppc_ld/st routines which are used during
instruction emulation.

The commit def0bfdbd603 ("powerpc: use probe_user_read() and
probe_user_write()") changed the low level copy function from
raw_copy_from_user to probe_user_read, which adds a check to
access_ok. In powerpc that is:

 static inline bool __access_ok(unsigned long addr, unsigned long size)
 {
         return addr < TASK_SIZE_MAX && size <= TASK_SIZE_MAX - addr;
 }

and TASK_SIZE_MAX is 0x0010000000000000UL for 64-bit, which means that
setting the two MSBs of the effective address (which correspond to the
quadrant) now cause access_ok to reject the access.

This was not caught earlier because the most common code path via
kvmppc_ld/st contains a fallback (kvm_read_guest) that is likely to
succeed for L1 guests. For nested guests there is no fallback.

Another issue is that probe_user_read (now __copy_from_user_nofault)
does not return the number of bytes not copied in case of failure, so
the destination memory is not being cleared anymore in
kvmhv_copy_from_guest_radix:

 ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n);
 if (ret > 0)                            <-- always false!
         memset(to + (n - ret), 0, ret);

This patch fixes both issues by skipping access_ok and open-coding the
low level __copy_to/from_user_inatomic.

Fixes: def0bfdbd603 ("powerpc: use probe_user_read() and probe_user_write()")
Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210805212616.2641017-2-farosas@linux.ibm.com
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index b5905ae4377c..44eb7b1ef289 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -65,10 +65,12 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
 	}
 	isync();
 
+	pagefault_disable();
 	if (is_load)
-		ret = copy_from_user_nofault(to, (const void __user *)from, n);
+		ret = __copy_from_user_inatomic(to, (const void __user *)from, n);
 	else
-		ret = copy_to_user_nofault((void __user *)to, from, n);
+		ret = __copy_to_user_inatomic((void __user *)to, from, n);
+	pagefault_enable();
 
 	/* switch the pid first to avoid running host with unallocated pid */
 	if (quadrant == 1 && pid != old_pid)
-- 
cgit 


From c232461c0c3b0aca637f0d7658a7f5d0bb393a4e Mon Sep 17 00:00:00 2001
From: Fabiano Rosas <farosas@linux.ibm.com>
Date: Thu, 5 Aug 2021 18:26:15 -0300
Subject: KVM: PPC: Book3S HV: Add sanity check to copy_tofrom_guest

Both paths into __kvmhv_copy_tofrom_guest_radix ensure that we arrive
with an effective address that is smaller than our total addressable
space and addresses quadrant 0.

- The H_COPY_TOFROM_GUEST hypercall path rejects the call with
H_PARAMETER if the effective address has any of the twelve most
significant bits set.

- The kvmhv_copy_tofrom_guest_radix path clears the top twelve bits
before calling the internal function.

Although the callers make sure that the effective address is sane, any
future use of the function is exposed to a programming error, so add a
sanity check.

Suggested-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210805212616.2641017-3-farosas@linux.ibm.com
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 44eb7b1ef289..1b1c9e9e539b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -44,6 +44,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
 					  (to != NULL) ? __pa(to): 0,
 					  (from != NULL) ? __pa(from): 0, n);
 
+	if (eaddr & (0xFFFUL << 52))
+		return ret;
+
 	quadrant = 1;
 	if (!pid)
 		quadrant = 2;
-- 
cgit 


From 0eb596f1e6103ebe122792a425b88c5dc21c4087 Mon Sep 17 00:00:00 2001
From: Fabiano Rosas <farosas@linux.ibm.com>
Date: Thu, 5 Aug 2021 18:26:16 -0300
Subject: KVM: PPC: Book3S HV: Stop exporting symbols from book3s_64_mmu_radix

The book3s_64_mmu_radix.o object is not part of the KVM builtins and
all the callers of the exported symbols are in the same kvm-hv.ko
module so we should not need to export any symbols.

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210805212616.2641017-4-farosas@linux.ibm.com
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 1b1c9e9e539b..16359525a40f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -86,7 +86,6 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix);
 
 static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
 					  void *to, void *from, unsigned long n)
@@ -122,14 +121,12 @@ long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to,
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix);
 
 long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from,
 			       unsigned long n)
 {
 	return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n);
 }
-EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix);
 
 int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
 			       struct kvmppc_pte *gpte, u64 root,
-- 
cgit 


From 1e753732bda6dcf888ea0b90b2a91ac1c1a0bae9 Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Mon, 23 Aug 2021 17:20:46 -0400
Subject: s390/vfio-ap: r/w lock for PQAP interception handler function pointer

The function pointer to the interception handler for the PQAP instruction
can get changed during the interception process. Let's add a
semaphore to struct kvm_s390_crypto to control read/write access to the
function pointer contained therein.

The semaphore must be locked for write access by the vfio_ap device driver
when notified that the KVM pointer has been set or cleared. It must be
locked for read access by the interception framework when the PQAP
instruction is intercepted.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Link: https://lore.kernel.org/r/20210823212047.1476436-2-akrowiak@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 arch/s390/include/asm/kvm_host.h |  8 +++-----
 arch/s390/kvm/kvm-s390.c         |  1 +
 arch/s390/kvm/priv.c             | 15 +++++++++------
 3 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 161a9e12bfb8..d681ae462350 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -798,14 +798,12 @@ struct kvm_s390_cpu_model {
 	unsigned short ibc;
 };
 
-struct kvm_s390_module_hook {
-	int (*hook)(struct kvm_vcpu *vcpu);
-	struct module *owner;
-};
+typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
 
 struct kvm_s390_crypto {
 	struct kvm_s390_crypto_cb *crycb;
-	struct kvm_s390_module_hook *pqap_hook;
+	struct rw_semaphore pqap_hook_rwsem;
+	crypto_hook *pqap_hook;
 	__u32 crycbd;
 	__u8 aes_kw;
 	__u8 dea_kw;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4527ac7b5961..5c4f559bcd60 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2630,6 +2630,7 @@ static void kvm_s390_crypto_init(struct kvm *kvm)
 {
 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
 	kvm_s390_set_crycb_format(kvm);
+	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
 
 	if (!test_kvm_facility(kvm, 76))
 		return;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 9928f785c677..53da4ceb16a3 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -610,6 +610,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
 static int handle_pqap(struct kvm_vcpu *vcpu)
 {
 	struct ap_queue_status status = {};
+	crypto_hook pqap_hook;
 	unsigned long reg0;
 	int ret;
 	uint8_t fc;
@@ -654,18 +655,20 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/*
-	 * Verify that the hook callback is registered, lock the owner
-	 * and call the hook.
+	 * If the hook callback is registered, there will be a pointer to the
+	 * hook function pointer in the kvm_s390_crypto structure. Lock the
+	 * owner, retrieve the hook function pointer and call the hook.
 	 */
+	down_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
 	if (vcpu->kvm->arch.crypto.pqap_hook) {
-		if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))
-			return -EOPNOTSUPP;
-		ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);
-		module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);
+		pqap_hook = *vcpu->kvm->arch.crypto.pqap_hook;
+		ret = pqap_hook(vcpu);
 		if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
 			kvm_s390_set_psw_cc(vcpu, 3);
+		up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
 		return ret;
 	}
+	up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
 	/*
 	 * A vfio_driver must register a hook.
 	 * No hook means no driver to enable the SIE CRYCB and no queues.
-- 
cgit 


From 86956e70761b3292156d668e87126844334dd71b Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Mon, 23 Aug 2021 17:20:47 -0400
Subject: s390/vfio-ap: replace open coded locks for VFIO_GROUP_NOTIFY_SET_KVM
 notification

It was pointed out during an unrelated patch review that locks should not
be open coded - i.e., writing the algorithm of a standard lock in a
function instead of using a lock from the standard library. The setting and
testing of a busy flag and sleeping on a wait_event is the same thing
a lock does. The open coded locks are invisible to lockdep, so potential
locking problems are not detected.

This patch removes the open coded locks used during
VFIO_GROUP_NOTIFY_SET_KVM notification. The busy flag
and wait queue were introduced to resolve a possible circular locking
dependency reported by lockdep when starting a secure execution guest
configured with AP adapters and domains. Reversing the order in which
the kvm->lock mutex and matrix_dev->lock mutex are locked resolves the
issue reported by lockdep, thus enabling the removal of the open coded
locks.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Link: https://lore.kernel.org/r/20210823212047.1476436-3-akrowiak@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 arch/s390/kvm/kvm-s390.c | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 5c4f559bcd60..efda0615741f 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2559,12 +2559,26 @@ static void kvm_s390_set_crycb_format(struct kvm *kvm)
 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
 }
 
+/*
+ * kvm_arch_crypto_set_masks
+ *
+ * @kvm: pointer to the target guest's KVM struct containing the crypto masks
+ *	 to be set.
+ * @apm: the mask identifying the accessible AP adapters
+ * @aqm: the mask identifying the accessible AP domains
+ * @adm: the mask identifying the accessible AP control domains
+ *
+ * Set the masks that identify the adapters, domains and control domains to
+ * which the KVM guest is granted access.
+ *
+ * Note: The kvm->lock mutex must be locked by the caller before invoking this
+ *	 function.
+ */
 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
 			       unsigned long *aqm, unsigned long *adm)
 {
 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
 
-	mutex_lock(&kvm->lock);
 	kvm_s390_vcpu_block_all(kvm);
 
 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
@@ -2595,13 +2609,23 @@ void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
 	/* recreate the shadow crycb for each vcpu */
 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
 	kvm_s390_vcpu_unblock_all(kvm);
-	mutex_unlock(&kvm->lock);
 }
 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
 
+/*
+ * kvm_arch_crypto_clear_masks
+ *
+ * @kvm: pointer to the target guest's KVM struct containing the crypto masks
+ *	 to be cleared.
+ *
+ * Clear the masks that identify the adapters, domains and control domains to
+ * which the KVM guest is granted access.
+ *
+ * Note: The kvm->lock mutex must be locked by the caller before invoking this
+ *	 function.
+ */
 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
 {
-	mutex_lock(&kvm->lock);
 	kvm_s390_vcpu_block_all(kvm);
 
 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
@@ -2613,7 +2637,6 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm)
 	/* recreate the shadow crycb for each vcpu */
 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
 	kvm_s390_vcpu_unblock_all(kvm);
-	mutex_unlock(&kvm->lock);
 }
 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
 
-- 
cgit 


From 82a423053eb3cf27209c78ce1852ea18d173722a Mon Sep 17 00:00:00 2001
From: Changcheng Deng <deng.changcheng@zte.com.cn>
Date: Sat, 14 Aug 2021 00:30:38 +0800
Subject: arch/arc/kernel/: fix misspellings using codespell tool

Some typos are found out by codespell tool:

./intc-compact.c:145: prioity ==> priority
./smp.c:286: recevier ==> receiver
./stacktrace.c:152 prelogue ==> prologue

Fix typos found by codespell.

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Changcheng Deng <deng.changcheng@zte.com.cn>
Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/kernel/intc-compact.c | 2 +-
 arch/arc/kernel/smp.c          | 2 +-
 arch/arc/kernel/stacktrace.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index a86641b91e65..6885e422870e 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -142,7 +142,7 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
  *    Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
  *    Here local_irq_enable( ) shd not re-enable lower priority interrupts
  * -If called from soft-ISR, it must re-enable all interrupts
- *    soft ISR are low prioity jobs which can be very slow, thus all IRQs
+ *    soft ISR are low priority jobs which can be very slow, thus all IRQs
  *    must be enabled while they run.
  *    Now hardware context wise we may still be in L2 ISR (not done rtie)
  *    still we must re-enable both L1 and L2 IRQs
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index db0e104d6835..b5b5fe2c6e1e 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -283,7 +283,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
 	/*
 	 * Call the platform specific IPI kick function, but avoid if possible:
 	 * Only do so if there's no pending msg from other concurrent sender(s).
-	 * Otherwise, recevier will see this msg as well when it takes the
+	 * Otherwise, receiver will see this msg as well when it takes the
 	 * IPI corresponding to that msg. This is true, even if it is already in
 	 * IPI handler, because !@old means it has not yet dequeued the msg(s)
 	 * so @new msg can be a free-loader
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index 1b9576d21e24..c376ff3147e7 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -149,7 +149,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
 #else
 	/* On ARC, only Dward based unwinder works. fp based backtracing is
 	 * not possible (-fno-omit-frame-pointer) because of the way function
-	 * prelogue is setup (callee regs saved and then fp set and not other
+	 * prologue is setup (callee regs saved and then fp set and not other
 	 * way around
 	 */
 	pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
-- 
cgit 


From 6b5ff0405e4190f23780362ea324b250bc495683 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 16 Aug 2021 14:05:33 -0700
Subject: ARC: export clear_user_page() for modules

0day bot reports a build error:
  ERROR: modpost: "clear_user_page" [drivers/media/v4l2-core/videobuf-dma-sg.ko] undefined!
so export it in arch/arc/ to fix the build error.

In most ARCHes, clear_user_page() is a macro. OTOH, in a few
ARCHes it is a function and needs to be exported.
PowerPC exported it in 2004. It looks like nds32 and nios2
still need to have it exported.

Fixes: 4102b53392d63 ("ARC: [mm] Aliasing VIPT dcache support 2/4")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: linux-snps-arc@lists.infradead.org
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/mm/cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index a2fbea3ee07c..102418ac5ff4 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -1123,7 +1123,7 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
 	clear_page(to);
 	clear_bit(PG_dc_clean, &page->flags);
 }
-
+EXPORT_SYMBOL(clear_user_page);
 
 /**********************************************************************
  * Explicit Cache flush request from user space via syscall
-- 
cgit 


From b0f839b4b91588c24bac8d320f7fc9fe2dea4517 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Tue, 3 Aug 2021 20:26:09 -0700
Subject: ARC: atomics: disintegrate header

Non functional change, to ease future addition/removal

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/atomic-llsc.h     | 103 ++++++++
 arch/arc/include/asm/atomic-spinlock.h | 111 +++++++++
 arch/arc/include/asm/atomic.h          | 429 +--------------------------------
 arch/arc/include/asm/atomic64-arcv2.h  | 242 +++++++++++++++++++
 4 files changed, 461 insertions(+), 424 deletions(-)
 create mode 100644 arch/arc/include/asm/atomic-llsc.h
 create mode 100644 arch/arc/include/asm/atomic-spinlock.h
 create mode 100644 arch/arc/include/asm/atomic64-arcv2.h

(limited to 'arch')

diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h
new file mode 100644
index 000000000000..aab4f2855457
--- /dev/null
+++ b/arch/arc/include/asm/atomic-llsc.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_LLSC_H
+#define _ASM_ARC_ATOMIC_LLSC_H
+
+#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int val;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
+	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+}									\
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
+{									\
+	unsigned int val;						\
+									\
+	/*								\
+	 * Explicit full memory barrier needed before/after as		\
+	 * LLOCK/SCOND themselves don't provide any such semantics	\
+	 */								\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	smp_mb();							\
+									\
+	return val;							\
+}
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	unsigned int val, orig;						\
+									\
+	/*								\
+	 * Explicit full memory barrier needed before/after as		\
+	 * LLOCK/SCOND themselves don't provide any such semantics	\
+	 */								\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[orig], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val),						\
+	  [orig] "=&r" (orig)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	smp_mb();							\
+									\
+	return orig;							\
+}
+
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, add)
+ATOMIC_OPS(sub, -=, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define arch_atomic_andnot		arch_atomic_andnot
+#define arch_atomic_fetch_andnot	arch_atomic_fetch_andnot
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h
new file mode 100644
index 000000000000..bdf87610b2d7
--- /dev/null
+++ b/arch/arc/include/asm/atomic-spinlock.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_SPLOCK_H
+#define _ASM_ARC_ATOMIC_SPLOCK_H
+
+#ifndef CONFIG_SMP
+
+ /* violating atomic_xxx API locking protocol in UP for optimization sake */
+#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+
+#else
+
+static inline void arch_atomic_set(atomic_t *v, int i)
+{
+	/*
+	 * Independent of hardware support, all of the atomic_xxx() APIs need
+	 * to follow the same locking rules to make sure that a "hardware"
+	 * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
+	 * sequence
+	 *
+	 * Thus atomic_set() despite being 1 insn (and seemingly atomic)
+	 * requires the locking.
+	 */
+	unsigned long flags;
+
+	atomic_ops_lock(flags);
+	WRITE_ONCE(v->counter, i);
+	atomic_ops_unlock(flags);
+}
+
+#define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
+
+#endif
+
+/*
+ * Non hardware assisted Atomic-R-M-W
+ * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
+ */
+
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+									\
+	atomic_ops_lock(flags);						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+}
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	unsigned long temp;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	temp = v->counter;						\
+	temp c_op i;							\
+	v->counter = temp;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return temp;							\
+}
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	unsigned long orig;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	orig = v->counter;						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return orig;							\
+}
+
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, add)
+ATOMIC_OPS(sub, -=, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define arch_atomic_andnot		arch_atomic_andnot
+#define arch_atomic_fetch_andnot	arch_atomic_fetch_andnot
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 7a36d79b5b2f..ee88e1dbaab5 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -17,435 +17,16 @@
 #define arch_atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
-
-#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void arch_atomic_##op(int i, atomic_t *v)			\
-{									\
-	unsigned int val;						\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[val], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[val], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
-	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-}									\
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
-{									\
-	unsigned int val;						\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND themselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[val], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[val], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val)						\
-	: [ctr]	"r"	(&v->counter),					\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return val;							\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
-{									\
-	unsigned int val, orig;						\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND themselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[orig], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val),						\
-	  [orig] "=&r" (orig)						\
-	: [ctr]	"r"	(&v->counter),					\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return orig;							\
-}
-
-#else	/* !CONFIG_ARC_HAS_LLSC */
-
-#ifndef CONFIG_SMP
-
- /* violating atomic_xxx API locking protocol in UP for optimization sake */
-#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
+#include <asm/atomic-llsc.h>
 #else
-
-static inline void arch_atomic_set(atomic_t *v, int i)
-{
-	/*
-	 * Independent of hardware support, all of the atomic_xxx() APIs need
-	 * to follow the same locking rules to make sure that a "hardware"
-	 * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
-	 * sequence
-	 *
-	 * Thus atomic_set() despite being 1 insn (and seemingly atomic)
-	 * requires the locking.
-	 */
-	unsigned long flags;
-
-	atomic_ops_lock(flags);
-	WRITE_ONCE(v->counter, i);
-	atomic_ops_unlock(flags);
-}
-
-#define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
-
+#include <asm/atomic-spinlock.h>
 #endif
 
-/*
- * Non hardware assisted Atomic-R-M-W
- * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
- */
-
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void arch_atomic_##op(int i, atomic_t *v)			\
-{									\
-	unsigned long flags;						\
-									\
-	atomic_ops_lock(flags);						\
-	v->counter c_op i;						\
-	atomic_ops_unlock(flags);					\
-}
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
-{									\
-	unsigned long flags;						\
-	unsigned long temp;						\
-									\
-	/*								\
-	 * spin lock/unlock provides the needed smp_mb() before/after	\
-	 */								\
-	atomic_ops_lock(flags);						\
-	temp = v->counter;						\
-	temp c_op i;							\
-	v->counter = temp;						\
-	atomic_ops_unlock(flags);					\
-									\
-	return temp;							\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
-{									\
-	unsigned long flags;						\
-	unsigned long orig;						\
-									\
-	/*								\
-	 * spin lock/unlock provides the needed smp_mb() before/after	\
-	 */								\
-	atomic_ops_lock(flags);						\
-	orig = v->counter;						\
-	v->counter c_op i;						\
-	atomic_ops_unlock(flags);					\
-									\
-	return orig;							\
-}
-
-#endif /* !CONFIG_ARC_HAS_LLSC */
-
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(add, +=, add)
-ATOMIC_OPS(sub, -=, sub)
-
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(and, &=, and)
-ATOMIC_OPS(andnot, &= ~, bic)
-ATOMIC_OPS(or, |=, or)
-ATOMIC_OPS(xor, ^=, xor)
-
-#define arch_atomic_andnot		arch_atomic_andnot
-#define arch_atomic_fetch_andnot	arch_atomic_fetch_andnot
-
-#undef ATOMIC_OPS
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
 #ifdef CONFIG_GENERIC_ATOMIC64
-
 #include <asm-generic/atomic64.h>
-
-#else	/* Kconfig ensures this is only enabled with needed h/w assist */
-
-/*
- * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
- *  - The address HAS to be 64-bit aligned
- *  - There are 2 semantics involved here:
- *    = exclusive implies no interim update between load/store to same addr
- *    = both words are observed/updated together: this is guaranteed even
- *      for regular 64-bit load (LDD) / store (STD). Thus atomic64_set()
- *      is NOT required to use LLOCKD+SCONDD, STD suffices
- */
-
-typedef struct {
-	s64 __aligned(8) counter;
-} atomic64_t;
-
-#define ATOMIC64_INIT(a) { (a) }
-
-static inline s64 arch_atomic64_read(const atomic64_t *v)
-{
-	s64 val;
-
-	__asm__ __volatile__(
-	"	ldd   %0, [%1]	\n"
-	: "=r"(val)
-	: "r"(&v->counter));
-
-	return val;
-}
-
-static inline void arch_atomic64_set(atomic64_t *v, s64 a)
-{
-	/*
-	 * This could have been a simple assignment in "C" but would need
-	 * explicit volatile. Otherwise gcc optimizers could elide the store
-	 * which borked atomic64 self-test
-	 * In the inline asm version, memory clobber needed for exact same
-	 * reason, to tell gcc about the store.
-	 *
-	 * This however is not needed for sibling atomic64_add() etc since both
-	 * load/store are explicitly done in inline asm. As long as API is used
-	 * for each access, gcc has no way to optimize away any load/store
-	 */
-	__asm__ __volatile__(
-	"	std   %0, [%1]	\n"
-	:
-	: "r"(a), "r"(&v->counter)
-	: "memory");
-}
-
-#define ATOMIC64_OP(op, op1, op2)					\
-static inline void arch_atomic64_##op(s64 a, atomic64_t *v)		\
-{									\
-	s64 val;							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd  %0, [%1]	\n"				\
-	"	" #op1 " %L0, %L0, %L2	\n"				\
-	"	" #op2 " %H0, %H0, %H2	\n"				\
-	"	scondd   %0, [%1]	\n"				\
-	"	bnz     1b		\n"				\
-	: "=&r"(val)							\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");							\
-}									\
-
-#define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
-static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v)	\
-{									\
-	s64 val;							\
-									\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd   %0, [%1]	\n"				\
-	"	" #op1 " %L0, %L0, %L2	\n"				\
-	"	" #op2 " %H0, %H0, %H2	\n"				\
-	"	scondd   %0, [%1]	\n"				\
-	"	bnz     1b		\n"				\
-	: [val] "=&r"(val)						\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");	/* memory clobber comes from smp_mb() */	\
-									\
-	smp_mb();							\
-									\
-	return val;							\
-}
-
-#define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
-static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v)	\
-{									\
-	s64 val, orig;							\
-									\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd   %0, [%2]	\n"				\
-	"	" #op1 " %L1, %L0, %L3	\n"				\
-	"	" #op2 " %H1, %H0, %H3	\n"				\
-	"	scondd   %1, [%2]	\n"				\
-	"	bnz     1b		\n"				\
-	: "=&r"(orig), "=&r"(val)					\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");	/* memory clobber comes from smp_mb() */	\
-									\
-	smp_mb();							\
-									\
-	return orig;							\
-}
-
-#define ATOMIC64_OPS(op, op1, op2)					\
-	ATOMIC64_OP(op, op1, op2)					\
-	ATOMIC64_OP_RETURN(op, op1, op2)				\
-	ATOMIC64_FETCH_OP(op, op1, op2)
-
-ATOMIC64_OPS(add, add.f, adc)
-ATOMIC64_OPS(sub, sub.f, sbc)
-ATOMIC64_OPS(and, and, and)
-ATOMIC64_OPS(andnot, bic, bic)
-ATOMIC64_OPS(or, or, or)
-ATOMIC64_OPS(xor, xor, xor)
-
-#define arch_atomic64_andnot		arch_atomic64_andnot
-#define arch_atomic64_fetch_andnot	arch_atomic64_fetch_andnot
-
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP_RETURN
-#undef ATOMIC64_OP
-
-static inline s64
-arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
-{
-	s64 prev;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	brne    %L0, %L2, 2f	\n"
-	"	brne    %H0, %H2, 2f	\n"
-	"	scondd  %3, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)
-	: "r"(ptr), "ir"(expected), "r"(new)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return prev;
-}
-
-static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
-{
-	s64 prev;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	scondd  %2, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)
-	: "r"(ptr), "r"(new)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return prev;
-}
-
-/**
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-
-static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
-{
-	s64 val;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	sub.f   %L0, %L0, 1	# w0 - 1, set C on borrow\n"
-	"	sub.c   %H0, %H0, 1	# if C set, w1 - 1\n"
-	"	brlt    %H0, 0, 2f	\n"
-	"	scondd  %0, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(val)
-	: "r"(&v->counter)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return val;
-}
-#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
-
-/**
- * arch_atomic64_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if it was not @u.
- * Returns the old value of @v
- */
-static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-	s64 old, temp;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%2]	\n"
-	"	brne	%L0, %L4, 2f	# continue to add since v != u \n"
-	"	breq.d	%H0, %H4, 3f	# return since v == u \n"
-	"2:				\n"
-	"	add.f   %L1, %L0, %L3	\n"
-	"	adc     %H1, %H0, %H3	\n"
-	"	scondd  %1, [%2]	\n"
-	"	bnz     1b		\n"
-	"3:				\n"
-	: "=&r"(old), "=&r" (temp)
-	: "r"(&v->counter), "r"(a), "r"(u)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return old;
-}
-#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
-
-#endif	/* !CONFIG_GENERIC_ATOMIC64 */
+#else
+#include <asm/atomic64-arcv2.h>
+#endif
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
new file mode 100644
index 000000000000..53996b11b551
--- /dev/null
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
+ *  - The address HAS to be 64-bit aligned
+ */
+
+#ifndef _ASM_ARC_ATOMIC64_ARCV2_H
+#define _ASM_ARC_ATOMIC64_ARCV2_H
+
+typedef struct {
+	s64 __aligned(8) counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(a) { (a) }
+
+static inline s64 arch_atomic64_read(const atomic64_t *v)
+{
+	s64 val;
+
+	__asm__ __volatile__(
+	"	ldd   %0, [%1]	\n"
+	: "=r"(val)
+	: "r"(&v->counter));
+
+	return val;
+}
+
+static inline void arch_atomic64_set(atomic64_t *v, s64 a)
+{
+	/*
+	 * This could have been a simple assignment in "C" but would need
+	 * explicit volatile. Otherwise gcc optimizers could elide the store
+	 * which borked atomic64 self-test
+	 * In the inline asm version, memory clobber needed for exact same
+	 * reason, to tell gcc about the store.
+	 *
+	 * This however is not needed for sibling atomic64_add() etc since both
+	 * load/store are explicitly done in inline asm. As long as API is used
+	 * for each access, gcc has no way to optimize away any load/store
+	 */
+	__asm__ __volatile__(
+	"	std   %0, [%1]	\n"
+	:
+	: "r"(a), "r"(&v->counter)
+	: "memory");
+}
+
+#define ATOMIC64_OP(op, op1, op2)					\
+static inline void arch_atomic64_##op(s64 a, atomic64_t *v)		\
+{									\
+	s64 val;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd  %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(val)							\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");							\
+}									\
+
+#define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
+static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v)	\
+{									\
+	s64 val;							\
+									\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: [val] "=&r"(val)						\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");	/* memory clobber comes from smp_mb() */	\
+									\
+	smp_mb();							\
+									\
+	return val;							\
+}
+
+#define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
+static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v)	\
+{									\
+	s64 val, orig;							\
+									\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%2]	\n"				\
+	"	" #op1 " %L1, %L0, %L3	\n"				\
+	"	" #op2 " %H1, %H0, %H3	\n"				\
+	"	scondd   %1, [%2]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(orig), "=&r"(val)					\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");	/* memory clobber comes from smp_mb() */	\
+									\
+	smp_mb();							\
+									\
+	return orig;							\
+}
+
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_OP_RETURN(op, op1, op2)				\
+	ATOMIC64_FETCH_OP(op, op1, op2)
+
+ATOMIC64_OPS(add, add.f, adc)
+ATOMIC64_OPS(sub, sub.f, sbc)
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or, or, or)
+ATOMIC64_OPS(xor, xor, xor)
+
+#define arch_atomic64_andnot		arch_atomic64_andnot
+#define arch_atomic64_fetch_andnot	arch_atomic64_fetch_andnot
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+static inline s64
+arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
+{
+	s64 prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	brne    %L0, %L2, 2f	\n"
+	"	brne    %H0, %H2, 2f	\n"
+	"	scondd  %3, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "ir"(expected), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+
+static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
+{
+	s64 prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	scondd  %2, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+
+/**
+ * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic64_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
+{
+	s64 val;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	sub.f   %L0, %L0, 1	# w0 - 1, set C on borrow\n"
+	"	sub.c   %H0, %H0, 1	# if C set, w1 - 1\n"
+	"	brlt    %H0, 0, 2f	\n"
+	"	scondd  %0, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(val)
+	: "r"(&v->counter)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return val;
+}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
+
+/**
+ * arch_atomic64_fetch_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if it was not @u.
+ * Returns the old value of @v
+ */
+static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	s64 old, temp;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%2]	\n"
+	"	brne	%L0, %L4, 2f	# continue to add since v != u \n"
+	"	breq.d	%H0, %H4, 3f	# return since v == u \n"
+	"2:				\n"
+	"	add.f   %L1, %L0, %L3	\n"
+	"	adc     %H1, %H0, %H3	\n"
+	"	scondd  %1, [%2]	\n"
+	"	bnz     1b		\n"
+	"3:				\n"
+	: "=&r"(old), "=&r" (temp)
+	: "r"(&v->counter), "r"(a), "r"(u)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return old;
+}
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
+
+#endif
-- 
cgit 


From b1040148b2ea4ecbdb60c0d20393f4d15eee13ac Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Tue, 21 Jan 2020 09:56:33 -0800
Subject: ARC: atomic: !LLSC: remove hack in atomic_set() for for UP

!LLSC atomics use spinlock (SMP) or irq-disable (UP) to implement
criticla regions. UP atomic_set() however was "cheating" by not doing
any of that so and still being functional.

Remove this anomaly (primarily as cleanup for future code improvements)
given that this config is not worth hassle of special case code.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/atomic-spinlock.h | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h
index bdf87610b2d7..8c6fd0e651e5 100644
--- a/arch/arc/include/asm/atomic-spinlock.h
+++ b/arch/arc/include/asm/atomic-spinlock.h
@@ -3,12 +3,10 @@
 #ifndef _ASM_ARC_ATOMIC_SPLOCK_H
 #define _ASM_ARC_ATOMIC_SPLOCK_H
 
-#ifndef CONFIG_SMP
-
- /* violating atomic_xxx API locking protocol in UP for optimization sake */
-#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
-#else
+/*
+ * Non hardware assisted Atomic-R-M-W
+ * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
+ */
 
 static inline void arch_atomic_set(atomic_t *v, int i)
 {
@@ -30,13 +28,6 @@ static inline void arch_atomic_set(atomic_t *v, int i)
 
 #define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
 
-#endif
-
-/*
- * Non hardware assisted Atomic-R-M-W
- * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
- */
-
 #define ATOMIC_OP(op, c_op, asm_op)					\
 static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
-- 
cgit 


From ca766f04ad1dc4527e3e388fe3736e6b1bf0e6b2 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Tue, 5 May 2020 22:59:57 -0700
Subject: ARC: atomic: !LLSC: use int data type consistently

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/atomic-spinlock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h
index 8c6fd0e651e5..2c830347bfb4 100644
--- a/arch/arc/include/asm/atomic-spinlock.h
+++ b/arch/arc/include/asm/atomic-spinlock.h
@@ -42,7 +42,7 @@ static inline void arch_atomic_##op(int i, atomic_t *v)			\
 static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
-	unsigned long temp;						\
+	unsigned int temp;						\
 									\
 	/*								\
 	 * spin lock/unlock provides the needed smp_mb() before/after	\
@@ -60,7 +60,7 @@ static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
-	unsigned long orig;						\
+	unsigned int orig;						\
 									\
 	/*								\
 	 * spin lock/unlock provides the needed smp_mb() before/after	\
-- 
cgit 


From 7e8f8cbb43990861e5881594e14d491f81931f8d Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Mon, 13 Apr 2020 17:41:24 -0700
Subject: ARC: atomic64: LLSC: elide unused atomic_{and,or,xor,andnot}_return

This is a non-functional change since those wrappers are not
used in kernel sources at all.

Link: http://lists.infradead.org/pipermail/linux-snps-arc/2018-August/004246.html
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/atomic64-arcv2.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
index 53996b11b551..22ef1cbb94e2 100644
--- a/arch/arc/include/asm/atomic64-arcv2.h
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -116,6 +116,12 @@ static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v)	\
 
 ATOMIC64_OPS(add, add.f, adc)
 ATOMIC64_OPS(sub, sub.f, sbc)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_FETCH_OP(op, op1, op2)
+
 ATOMIC64_OPS(and, and, and)
 ATOMIC64_OPS(andnot, bic, bic)
 ATOMIC64_OPS(or, or, or)
-- 
cgit 


From b64be6836993c431e54fad239fcba0543854ee35 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Mon, 13 Apr 2020 18:07:49 -0700
Subject: ARC: atomics: implement relaxed variants

The current ARC fetch/return atomics provide fully ordered semantics
only with 2 full barriers around the operation.

Instead implement them as relaxed variants without any barriers and
rely on generic code to generate the fully-ordered, acquire and release
varaints by adding the appropriate full barriers.

This helps elide some extra barriers in case of acquire/release/relaxed
calls.

bloat-o-meter for hsdk defconfig shows codegen improvements, although
numbers below inflated due to unrelated inlining heuristic changes

| bloat-o-meter vmlinux-643babe34fd7-non-relaxed vmlinux-45aa05cb44d7-relaxed
| add/remove: 2/5 grow/shrink: 42/1222 up/down: 4158/-14312 (-10154)
| Function                                     old     new   delta
| ..
| sys_renameat                                 462     476     +14
| ip_mc_inc_group                              424     436     +12
| do_read_cache_page                          1882    1894     +12
| ..
| refcount_dec_and_mutex_lock                  254     250      -4
| refcount_dec_and_lock_irqsave                258     254      -4
| refcount_dec_and_lock                        254     250      -4
| ..
| tcp_v6_route_req                             246     238      -8
| tcp_v4_destroy_sock                          286     278      -8
| tcp_twsk_unique                              352     344      -8

Link: https://lore.kernel.org/r/20180830144344.GW24142@hirez.programming.kicks-ass.net
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/atomic-llsc.h    | 32 +++++++++++++-------------------
 arch/arc/include/asm/atomic64-arcv2.h | 24 +++++++++++++-----------
 2 files changed, 26 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h
index aab4f2855457..088d348781c1 100644
--- a/arch/arc/include/asm/atomic-llsc.h
+++ b/arch/arc/include/asm/atomic-llsc.h
@@ -22,16 +22,10 @@ static inline void arch_atomic_##op(int i, atomic_t *v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
+static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 {									\
 	unsigned int val;						\
 									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND themselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
 	__asm__ __volatile__(						\
 	"1:	llock   %[val], [%[ctr]]		\n"		\
 	"	" #asm_op " %[val], %[val], %[i]	\n"		\
@@ -42,22 +36,17 @@ static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 	  [i]	"ir"	(i)						\
 	: "cc");							\
 									\
-	smp_mb();							\
-									\
 	return val;							\
 }
 
+#define arch_atomic_add_return_relaxed		arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed		arch_atomic_sub_return_relaxed
+
 #define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
+static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
 {									\
 	unsigned int val, orig;						\
 									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND themselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
 	__asm__ __volatile__(						\
 	"1:	llock   %[orig], [%[ctr]]		\n"		\
 	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
@@ -69,11 +58,17 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 	  [i]	"ir"	(i)						\
 	: "cc");							\
 									\
-	smp_mb();							\
-									\
 	return orig;							\
 }
 
+#define arch_atomic_fetch_add_relaxed		arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed		arch_atomic_fetch_sub_relaxed
+
+#define arch_atomic_fetch_and_relaxed		arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_andnot_relaxed	arch_atomic_fetch_andnot_relaxed
+#define arch_atomic_fetch_or_relaxed		arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed		arch_atomic_fetch_xor_relaxed
+
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
 	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
@@ -93,7 +88,6 @@ ATOMIC_OPS(or, |=, or)
 ATOMIC_OPS(xor, ^=, xor)
 
 #define arch_atomic_andnot		arch_atomic_andnot
-#define arch_atomic_fetch_andnot	arch_atomic_fetch_andnot
 
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
index 22ef1cbb94e2..c5a8010fdc97 100644
--- a/arch/arc/include/asm/atomic64-arcv2.h
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -64,12 +64,10 @@ static inline void arch_atomic64_##op(s64 a, atomic64_t *v)		\
 }									\
 
 #define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
-static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v)	\
+static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)	\
 {									\
 	s64 val;							\
 									\
-	smp_mb();							\
-									\
 	__asm__ __volatile__(						\
 	"1:				\n"				\
 	"	llockd   %0, [%1]	\n"				\
@@ -81,18 +79,17 @@ static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v)	\
 	: "r"(&v->counter), "ir"(a)					\
 	: "cc");	/* memory clobber comes from smp_mb() */	\
 									\
-	smp_mb();							\
-									\
 	return val;							\
 }
 
+#define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+
 #define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
-static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v)	\
+static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)	\
 {									\
 	s64 val, orig;							\
 									\
-	smp_mb();							\
-									\
 	__asm__ __volatile__(						\
 	"1:				\n"				\
 	"	llockd   %0, [%2]	\n"				\
@@ -104,11 +101,17 @@ static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v)	\
 	: "r"(&v->counter), "ir"(a)					\
 	: "cc");	/* memory clobber comes from smp_mb() */	\
 									\
-	smp_mb();							\
-									\
 	return orig;							\
 }
 
+#define arch_atomic64_fetch_add_relaxed		arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed		arch_atomic64_fetch_sub_relaxed
+
+#define arch_atomic64_fetch_and_relaxed		arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_andnot_relaxed	arch_atomic64_fetch_andnot_relaxed
+#define arch_atomic64_fetch_or_relaxed		arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed		arch_atomic64_fetch_xor_relaxed
+
 #define ATOMIC64_OPS(op, op1, op2)					\
 	ATOMIC64_OP(op, op1, op2)					\
 	ATOMIC64_OP_RETURN(op, op1, op2)				\
@@ -128,7 +131,6 @@ ATOMIC64_OPS(or, or, or)
 ATOMIC64_OPS(xor, xor, xor)
 
 #define arch_atomic64_andnot		arch_atomic64_andnot
-#define arch_atomic64_fetch_andnot	arch_atomic64_fetch_andnot
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_FETCH_OP
-- 
cgit 


From cea43147905f1c2b7b48104a5304cf5229f45bec Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Tue, 4 Sep 2018 16:11:29 -0700
Subject: ARC: switch to generic bitops

 - !LLSC now only needs a single spinlock for atomics and bitops

 - Some codegen changes (slight bloat) with generic bitops

   1. code increase due to LD-check-atomic paradigm vs. unconditonal
      atomic (but dirty'ing the cache line even if set already).
      So despite increase, generic is right thing to do.

   2. code decrease (but use of costlier instructions such as DIV vs.
      shifts based math) due to signed arithmetic.
      This needs to be revisited seperately.

     arc:
     static inline int test_bit(unsigned int nr, const volatile unsigned long *addr)
                                ^^^^^^^^^^^^
     generic:
     static inline int test_bit(int nr, const volatile unsigned long *addr)
                                ^^^

Link: https://lore.kernel.org/r/20180830135749.GA13005@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[vgupta: wrote patch based on Will's poc, analysed codegen diffs]
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/bitops.h | 184 +-----------------------------------------
 arch/arc/include/asm/smp.h    |  14 ----
 arch/arc/kernel/smp.c         |   2 -
 3 files changed, 2 insertions(+), 198 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index fb98440c0bd4..4f35130f5ba3 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -14,188 +14,6 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
-#include <asm/barrier.h>
-#ifndef CONFIG_ARC_HAS_LLSC
-#include <asm/smp.h>
-#endif
-
-#ifdef CONFIG_ARC_HAS_LLSC
-
-/*
- * Hardware assisted Atomic-R-M-W
- */
-
-#define BIT_OP(op, c_op, asm_op)					\
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned int temp;						\
-									\
-	m += nr >> 5;							\
-									\
-	nr &= 0x1f;							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock       %0, [%1]		\n"			\
-	"	" #asm_op " %0, %0, %2	\n"				\
-	"	scond       %0, [%1]		\n"			\
-	"	bnz         1b			\n"			\
-	: "=&r"(temp)	/* Early clobber, to prevent reg reuse */	\
-	: "r"(m),	/* Not "m": llock only supports reg direct addr mode */	\
-	  "ir"(nr)							\
-	: "cc");							\
-}
-
-/*
- * Semantically:
- *    Test the bit
- *    if clear
- *        set it and return 0 (old value)
- *    else
- *        return 1 (old value).
- *
- * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
- * and the old value of bit is returned
- */
-#define TEST_N_BIT_OP(op, c_op, asm_op)					\
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old, temp;					\
-									\
-	m += nr >> 5;							\
-									\
-	nr &= 0x1f;							\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND themselves don't provide any such smenatic	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock       %0, [%2]	\n"				\
-	"	" #asm_op " %1, %0, %3	\n"				\
-	"	scond       %1, [%2]	\n"				\
-	"	bnz         1b		\n"				\
-	: "=&r"(old), "=&r"(temp)					\
-	: "r"(m), "ir"(nr)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return (old & (1 << nr)) != 0;					\
-}
-
-#else /* !CONFIG_ARC_HAS_LLSC */
-
-/*
- * Non hardware assisted Atomic-R-M-W
- * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
- *
- * There's "significant" micro-optimization in writing our own variants of
- * bitops (over generic variants)
- *
- * (1) The generic APIs have "signed" @nr while we have it "unsigned"
- *     This avoids extra code to be generated for pointer arithmatic, since
- *     is "not sure" that index is NOT -ve
- * (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc
- *     only consider bottom 5 bits of @nr, so NO need to mask them off.
- *     (GCC Quirk: however for constant @nr we still need to do the masking
- *             at compile time)
- */
-
-#define BIT_OP(op, c_op, asm_op)					\
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long temp, flags;					\
-	m += nr >> 5;							\
-									\
-	/*								\
-	 * spin lock/unlock provide the needed smp_mb() before/after	\
-	 */								\
-	bitops_lock(flags);						\
-									\
-	temp = *m;							\
-	*m = temp c_op (1UL << (nr & 0x1f));					\
-									\
-	bitops_unlock(flags);						\
-}
-
-#define TEST_N_BIT_OP(op, c_op, asm_op)					\
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old, flags;					\
-	m += nr >> 5;							\
-									\
-	bitops_lock(flags);						\
-									\
-	old = *m;							\
-	*m = old c_op (1UL << (nr & 0x1f));				\
-									\
-	bitops_unlock(flags);						\
-									\
-	return (old & (1UL << (nr & 0x1f))) != 0;			\
-}
-
-#endif
-
-/***************************************
- * Non atomic variants
- **************************************/
-
-#define __BIT_OP(op, c_op, asm_op)					\
-static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m)	\
-{									\
-	unsigned long temp;						\
-	m += nr >> 5;							\
-									\
-	temp = *m;							\
-	*m = temp c_op (1UL << (nr & 0x1f));				\
-}
-
-#define __TEST_N_BIT_OP(op, c_op, asm_op)				\
-static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old;						\
-	m += nr >> 5;							\
-									\
-	old = *m;							\
-	*m = old c_op (1UL << (nr & 0x1f));				\
-									\
-	return (old & (1UL << (nr & 0x1f))) != 0;			\
-}
-
-#define BIT_OPS(op, c_op, asm_op)					\
-									\
-	/* set_bit(), clear_bit(), change_bit() */			\
-	BIT_OP(op, c_op, asm_op)					\
-									\
-	/* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
-	TEST_N_BIT_OP(op, c_op, asm_op)					\
-									\
-	/* __set_bit(), __clear_bit(), __change_bit() */		\
-	__BIT_OP(op, c_op, asm_op)					\
-									\
-	/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
-	__TEST_N_BIT_OP(op, c_op, asm_op)
-
-BIT_OPS(set, |, bset)
-BIT_OPS(clear, & ~, bclr)
-BIT_OPS(change, ^, bxor)
-
-/*
- * This routine doesn't need to be atomic.
- */
-static inline int
-test_bit(unsigned int nr, const volatile unsigned long *addr)
-{
-	unsigned long mask;
-
-	addr += nr >> 5;
-
-	mask = 1UL << (nr & 0x1f);
-
-	return ((mask & *addr) != 0);
-}
 
 #ifdef CONFIG_ISA_ARCOMPACT
 
@@ -368,6 +186,8 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
 
 #include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/le.h>
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index c5de4008d19f..d856491606ac 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -105,7 +105,6 @@ static inline const char *arc_platform_smp_cpuinfo(void)
 #include <asm/spinlock.h>
 
 extern arch_spinlock_t smp_atomic_ops_lock;
-extern arch_spinlock_t smp_bitops_lock;
 
 #define atomic_ops_lock(flags)	do {		\
 	local_irq_save(flags);			\
@@ -117,24 +116,11 @@ extern arch_spinlock_t smp_bitops_lock;
 	local_irq_restore(flags);		\
 } while (0)
 
-#define bitops_lock(flags)	do {		\
-	local_irq_save(flags);			\
-	arch_spin_lock(&smp_bitops_lock);	\
-} while (0)
-
-#define bitops_unlock(flags) do {		\
-	arch_spin_unlock(&smp_bitops_lock);	\
-	local_irq_restore(flags);		\
-} while (0)
-
 #else /* !CONFIG_SMP */
 
 #define atomic_ops_lock(flags)		local_irq_save(flags)
 #define atomic_ops_unlock(flags)	local_irq_restore(flags)
 
-#define bitops_lock(flags)		local_irq_save(flags)
-#define bitops_unlock(flags)		local_irq_restore(flags)
-
 #endif /* !CONFIG_SMP */
 
 #endif	/* !CONFIG_ARC_HAS_LLSC */
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index b5b5fe2c6e1e..78e6d069b1c1 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -29,10 +29,8 @@
 
 #ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
 EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
-EXPORT_SYMBOL_GPL(smp_bitops_lock);
 #endif
 
 struct plat_smp_ops  __weak plat_smp_ops;
-- 
cgit 


From 9d011e12075dc51fb57f8203a08cc5229fbcb2ef Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Tue, 5 May 2020 13:54:24 -0700
Subject: ARC: bitops: fls/ffs to take int (vs long) per asm-generic defines

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/bitops.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 4f35130f5ba3..a7daaf64ae34 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -114,7 +114,7 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
  * @result: [1-32]
  * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
  */
-static inline __attribute__ ((const)) int fls(unsigned long x)
+static inline __attribute__ ((const)) int fls(unsigned int x)
 {
 	int n;
 
@@ -141,7 +141,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
  * ffs = Find First Set in word (LSB to MSB)
  * @result: [1-32], 0 if all 0's
  */
-static inline __attribute__ ((const)) int ffs(unsigned long x)
+static inline __attribute__ ((const)) int ffs(unsigned int x)
 {
 	int n;
 
-- 
cgit 


From ecf51c9fa0960fd25cd66f2280fb1980b0d2e300 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Wed, 6 May 2020 14:41:12 -0700
Subject: ARC: xchg: !LLSC: remove UP micro-optimization/hack

It gets in the way of cleaning things up and is a maintenance
pain-in-neck !

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/cmpxchg.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index d42917e803e1..f9564dbe39b7 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -113,15 +113,9 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
  *  - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
  *    of  kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
  *    Hence xchg() needs to follow same locking rules.
- *
- * Technically the lock is also needed for UP (boils down to irq save/restore)
- * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
- * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg()
- * Other way around, xchg is one instruction anyways, so can't be interrupted
- * as such
  */
 
-#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
+#ifndef CONFIG_ARC_HAS_LLSC
 
 #define arch_xchg(ptr, with)		\
 ({					\
-- 
cgit 


From e188f3330a13df904d77003846eafd3edf99009d Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Wed, 6 May 2020 15:13:25 -0700
Subject: ARC: cmpxchg/xchg: rewrite as macros to make type safe

Existing code forces/assume args to type "long" which won't work in LP64
regime, so prepare code for that

Interestingly this should be a non functional change but I do see
some codegen changes

| bloat-o-meter vmlinux-cmpxchg-A vmlinux-cmpxchg-B
| add/remove: 0/0 grow/shrink: 17/12 up/down: 218/-150 (68)
|
| Function                                     old     new   delta
| rwsem_optimistic_spin                        518     550     +32
| rwsem_down_write_slowpath                   1244    1274     +30
| __do_sys_perf_event_open                    2576    2600     +24
| down_read                                    192     200      +8
| __down_read                                  192     200      +8
...
| task_work_run                                168     148     -20
| dma_fence_chain_walk.part                    760     736     -24
| __genradix_ptr_alloc                         674     646     -28

Total: Before=6187409, After=6187477, chg +0.00%

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/cmpxchg.h | 213 ++++++++++++++++++++++-------------------
 1 file changed, 117 insertions(+), 96 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index f9564dbe39b7..00deb076d6f6 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -6,6 +6,7 @@
 #ifndef __ASM_ARC_CMPXCHG_H
 #define __ASM_ARC_CMPXCHG_H
 
+#include <linux/build_bug.h>
 #include <linux/types.h>
 
 #include <asm/barrier.h>
@@ -13,62 +14,77 @@
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-	unsigned long prev;
-
-	/*
-	 * Explicit full memory barrier needed before/after as
-	 * LLOCK/SCOND themselves don't provide any such semantics
-	 */
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llock   %0, [%1]	\n"
-	"	brne    %0, %2, 2f	\n"
-	"	scond   %3, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)	/* Early clobber, to prevent reg reuse */
-	: "r"(ptr),	/* Not "m": llock only supports reg direct addr mode */
-	  "ir"(expected),
-	  "r"(new)	/* can't be "ir". scond can't take LIMM for "b" */
-	: "cc", "memory"); /* so that gcc knows memory is being written here */
-
-	smp_mb();
-
-	return prev;
-}
-
-#else /* !CONFIG_ARC_HAS_LLSC */
-
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-	unsigned long flags;
-	int prev;
-	volatile unsigned long *p = ptr;
-
-	/*
-	 * spin lock/unlock provide the needed smp_mb() before/after
-	 */
-	atomic_ops_lock(flags);
-	prev = *p;
-	if (prev == expected)
-		*p = new;
-	atomic_ops_unlock(flags);
-	return prev;
-}
+/*
+ * if (*ptr == @old)
+ *      *ptr = @new
+ */
+#define __cmpxchg(ptr, old, new)					\
+({									\
+	__typeof__(*(ptr)) _prev;					\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock  %0, [%1]	\n"					\
+	"	brne   %0, %2, 2f	\n"				\
+	"	scond  %3, [%1]	\n"					\
+	"	bnz     1b		\n"				\
+	"2:				\n"				\
+	: "=&r"(_prev)	/* Early clobber prevent reg reuse */		\
+	: "r"(ptr),	/* Not "m": llock only supports reg */		\
+	  "ir"(old),							\
+	  "r"(new)	/* Not "ir": scond can't take LIMM */		\
+	: "cc",								\
+	  "memory");	/* gcc knows memory is clobbered */		\
+									\
+	_prev;								\
+})
 
-#endif
+#define arch_cmpxchg(ptr, old, new)				        \
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _o_ = (old);					\
+	__typeof__(*(ptr)) _n_ = (new);					\
+	__typeof__(*(ptr)) _prev_;					\
+									\
+	switch(sizeof((_p_))) {						\
+	case 4:								\
+	        /*							\
+		 * Explicit full memory barrier needed before/after	\
+	         */							\
+		smp_mb();						\
+		_prev_ = __cmpxchg(_p_, _o_, _n_);			\
+		smp_mb();						\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	_prev_;								\
+})
 
-#define arch_cmpxchg(ptr, o, n) ({			\
-	(typeof(*(ptr)))__cmpxchg((ptr),		\
-				  (unsigned long)(o),	\
-				  (unsigned long)(n));	\
+#else
+
+#define arch_cmpxchg(ptr, old, new)				        \
+({									\
+	volatile __typeof__(ptr) _p_ = (ptr);				\
+	__typeof__(*(ptr)) _o_ = (old);					\
+	__typeof__(*(ptr)) _n_ = (new);					\
+	__typeof__(*(ptr)) _prev_;					\
+	unsigned long __flags;						\
+									\
+	BUILD_BUG_ON(sizeof(_p_) != 4);					\
+									\
+	/*								\
+	 * spin lock/unlock provide the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(__flags);					\
+	_prev_ = *_p_;							\
+	if (_prev_ == _o_)						\
+		*_p_ = _n_;						\
+	atomic_ops_unlock(__flags);					\
+	_prev_;								\
 })
 
+#endif
+
 /*
  * atomic_cmpxchg is same as cmpxchg
  *   LLSC: only different in data-type, semantics are exactly same
@@ -77,61 +93,66 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
  */
 #define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
 
-
 /*
- * xchg (reg with memory) based on "Native atomic" EX insn
+ * xchg
  */
-static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
-				   int size)
-{
-	extern unsigned long __xchg_bad_pointer(void);
-
-	switch (size) {
-	case 4:
-		smp_mb();
-
-		__asm__ __volatile__(
-		"	ex  %0, [%1]	\n"
-		: "+r"(val)
-		: "r"(ptr)
-		: "memory");
+#ifdef CONFIG_ARC_HAS_LLSC
 
-		smp_mb();
+#define __xchg(ptr, val)						\
+({									\
+	__asm__ __volatile__(						\
+	"	ex  %0, [%1]	\n"	/* set new value */	        \
+	: "+r"(val)							\
+	: "r"(ptr)							\
+	: "memory");							\
+	_val_;		/* get old value */				\
+})
 
-		return val;
-	}
-	return __xchg_bad_pointer();
-}
+#define arch_xchg(ptr, val)						\
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _val_ = (val);				\
+									\
+	switch(sizeof(*(_p_))) {					\
+	case 4:								\
+		smp_mb();						\
+		_val_ = __xchg(_p_, _val_);				\
+	        smp_mb();						\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	_val_;								\
+})
 
-#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
-						 sizeof(*(ptr))))
+#else  /* !CONFIG_ARC_HAS_LLSC */
 
 /*
- * xchg() maps directly to ARC EX instruction which guarantees atomicity.
- * However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
- * due to a subtle reason:
- *  - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
- *    of  kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
- *    Hence xchg() needs to follow same locking rules.
+ * EX instructions is baseline and present in !LLSC too. But in this
+ * regime it still needs use @atomic_ops_lock spinlock to allow interop
+ * with cmpxchg() which uses spinlock in !LLSC
+ * (llist.h use xchg and cmpxchg on sama data)
  */
 
-#ifndef CONFIG_ARC_HAS_LLSC
-
-#define arch_xchg(ptr, with)		\
-({					\
-	unsigned long flags;		\
-	typeof(*(ptr)) old_val;		\
-					\
-	atomic_ops_lock(flags);		\
-	old_val = _xchg(ptr, with);	\
-	atomic_ops_unlock(flags);	\
-	old_val;			\
+#define arch_xchg(ptr, val)					        \
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _val_ = (val);				\
+									\
+	unsigned long __flags;						\
+									\
+	atomic_ops_lock(__flags);					\
+									\
+	__asm__ __volatile__(						\
+	"	ex  %0, [%1]	\n"					\
+	: "+r"(_val_)							\
+	: "r"(_p_)							\
+	: "memory");							\
+									\
+	atomic_ops_unlock(__flags);					\
+	_val_;								\
 })
 
-#else
-
-#define arch_xchg(ptr, with)  _xchg(ptr, with)
-
 #endif
 
 /*
-- 
cgit 


From ddc348c44d82d0b55236210d5606ac868e6cc364 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Sat, 9 May 2020 00:15:36 -0700
Subject: ARC: cmpxchg/xchg: implement relaxed variants (LLSC config only)

It only makes sense to do this for the LLSC config

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/cmpxchg.h | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index 00deb076d6f6..e2ae0eb1ca07 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -38,7 +38,7 @@
 	_prev;								\
 })
 
-#define arch_cmpxchg(ptr, old, new)				        \
+#define arch_cmpxchg_relaxed(ptr, old, new)				\
 ({									\
 	__typeof__(ptr) _p_ = (ptr);					\
 	__typeof__(*(ptr)) _o_ = (old);					\
@@ -47,12 +47,7 @@
 									\
 	switch(sizeof((_p_))) {						\
 	case 4:								\
-	        /*							\
-		 * Explicit full memory barrier needed before/after	\
-	         */							\
-		smp_mb();						\
 		_prev_ = __cmpxchg(_p_, _o_, _n_);			\
-		smp_mb();						\
 		break;							\
 	default:							\
 		BUILD_BUG();						\
@@ -108,16 +103,14 @@
 	_val_;		/* get old value */				\
 })
 
-#define arch_xchg(ptr, val)						\
+#define arch_xchg_relaxed(ptr, val)					\
 ({									\
 	__typeof__(ptr) _p_ = (ptr);					\
 	__typeof__(*(ptr)) _val_ = (val);				\
 									\
 	switch(sizeof(*(_p_))) {					\
 	case 4:								\
-		smp_mb();						\
 		_val_ = __xchg(_p_, _val_);				\
-	        smp_mb();						\
 		break;							\
 	default:							\
 		BUILD_BUG();						\
-- 
cgit 


From 301014cf6d72836dd5fd5b3e9c92633f35b298c0 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Mon, 11 May 2020 13:27:23 -0700
Subject: ARC: atomic_cmpxchg/atomic_xchg: implement relaxed variants

And move them out of cmpxchg.h to canonical atomic.h

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/atomic.h  | 27 +++++++++++++++++++++++++++
 arch/arc/include/asm/cmpxchg.h | 23 -----------------------
 2 files changed, 27 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index ee88e1dbaab5..52ee51e1ff7c 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -22,6 +22,33 @@
 #include <asm/atomic-spinlock.h>
 #endif
 
+#define arch_atomic_cmpxchg(v, o, n)					\
+({									\
+	arch_cmpxchg(&((v)->counter), (o), (n));			\
+})
+
+#ifdef arch_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_relaxed(v, o, n)				\
+({									\
+	arch_cmpxchg_relaxed(&((v)->counter), (o), (n));		\
+})
+#endif
+
+#define arch_atomic_xchg(v, n)						\
+({									\
+	arch_xchg(&((v)->counter), (n));				\
+})
+
+#ifdef arch_xchg_relaxed
+#define arch_atomic_xchg_relaxed(v, n)					\
+({									\
+	arch_xchg_relaxed(&((v)->counter), (n));			\
+})
+#endif
+
+/*
+ * 64-bit atomics
+ */
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #else
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index e2ae0eb1ca07..c5b544a5fe81 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -80,14 +80,6 @@
 
 #endif
 
-/*
- * atomic_cmpxchg is same as cmpxchg
- *   LLSC: only different in data-type, semantics are exactly same
- *  !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
- *         semantics, and this lock also happens to be used by atomic_*()
- */
-#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
-
 /*
  * xchg
  */
@@ -148,19 +140,4 @@
 
 #endif
 
-/*
- * "atomic" variant of xchg()
- * REQ: It needs to follow the same serialization rules as other atomic_xxx()
- * Since xchg() doesn't always do that, it would seem that following definition
- * is incorrect. But here's the rationale:
- *   SMP : Even xchg() takes the atomic_ops_lock, so OK.
- *   LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
- *         is natively "SMP safe", no serialization required).
- *   UP  : other atomics disable IRQ, so no way a difft ctxt atomic_xchg()
- *         could clobber them. atomic_xchg() itself would be 1 insn, so it
- *         can't be clobbered by others. Thus no serialization required when
- *         atomic_xchg is involved.
- */
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
 #endif
-- 
cgit 


From 767a697e75769fdae092a15d92bf562d5631a490 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Thu, 5 Aug 2021 23:38:44 -0700
Subject: ARC: retire ARC750 support

There's no known/active customer using them with latest kernels anyways.

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/Kconfig | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index b5bf68e74732..fb3c319084c7 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -116,16 +116,9 @@ choice
 	default ARC_CPU_770 if ISA_ARCOMPACT
 	default ARC_CPU_HS if ISA_ARCV2
 
-if ISA_ARCOMPACT
-
-config ARC_CPU_750D
-	bool "ARC750D"
-	select ARC_CANT_LLSC
-	help
-	  Support for ARC750 core
-
 config ARC_CPU_770
 	bool "ARC770"
+	depends on ISA_ARCOMPACT
 	select ARC_HAS_SWAPE
 	help
 	  Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
@@ -135,8 +128,6 @@ config ARC_CPU_770
 	  -Caches: New Prog Model, Region Flush
 	  -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
 
-endif #ISA_ARCOMPACT
-
 config ARC_CPU_HS
 	bool "ARC-HS"
 	depends on ISA_ARCV2
@@ -275,7 +266,6 @@ config ARC_DCCM_BASE
 choice
 	prompt "MMU Version"
 	default ARC_MMU_V3 if ARC_CPU_770
-	default ARC_MMU_V2 if ARC_CPU_750D
 	default ARC_MMU_V4 if ARC_CPU_HS
 
 if ISA_ARCOMPACT
-- 
cgit 


From 288ff7de62af0936353c9394de9d0b2c6dd22c80 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Mon, 9 Sep 2019 17:36:34 -0700
Subject: ARC: retire MMUv1 and MMUv2 support

There's no known/active customer using them with latest kernels anyways.

Removal helps cleanup code and remove the hack for
MMU_VER to MMU_V[3-4] conversion

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/Kconfig                |  22 +-----
 arch/arc/include/asm/mmu.h      |  22 +-----
 arch/arc/include/asm/pgtable.h  |  26 +------
 arch/arc/include/asm/tlb-mmu1.h | 101 -------------------------
 arch/arc/mm/cache.c             | 110 ++++-----------------------
 arch/arc/mm/tlb.c               | 163 +++++-----------------------------------
 arch/arc/mm/tlbex.S             |  10 +--
 7 files changed, 42 insertions(+), 412 deletions(-)
 delete mode 100644 arch/arc/include/asm/tlb-mmu1.h

(limited to 'arch')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index fb3c319084c7..53d143fc42fe 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -265,32 +265,17 @@ config ARC_DCCM_BASE
 
 choice
 	prompt "MMU Version"
-	default ARC_MMU_V3 if ARC_CPU_770
-	default ARC_MMU_V4 if ARC_CPU_HS
-
-if ISA_ARCOMPACT
-
-config ARC_MMU_V1
-	bool "MMU v1"
-	help
-	  Orig ARC700 MMU
-
-config ARC_MMU_V2
-	bool "MMU v2"
-	help
-	  Fixed the deficiency of v1 - possible thrashing in memcpy scenario
-	  when 2 D-TLB and 1 I-TLB entries index into same 2way set.
+	default ARC_MMU_V3 if ISA_ARCOMPACT
+	default ARC_MMU_V4 if ISA_ARCV2
 
 config ARC_MMU_V3
 	bool "MMU v3"
-	depends on ARC_CPU_770
+	depends on ISA_ARCOMPACT
 	help
 	  Introduced with ARC700 4.10: New Features
 	  Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
 	  Shared Address Spaces (SASID)
 
-endif
-
 config ARC_MMU_V4
 	bool "MMU v4"
 	depends on ISA_ARCV2
@@ -309,7 +294,6 @@ config ARC_PAGE_SIZE_8K
 
 config ARC_PAGE_SIZE_16K
 	bool "16KB"
-	depends on ARC_MMU_V3 || ARC_MMU_V4
 
 config ARC_PAGE_SIZE_4K
 	bool "4KB"
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 26b731d32a2b..a81d1975866a 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -10,19 +10,10 @@
 #include <linux/threads.h>	/* NR_CPUS */
 #endif
 
-#if defined(CONFIG_ARC_MMU_V1)
-#define CONFIG_ARC_MMU_VER 1
-#elif defined(CONFIG_ARC_MMU_V2)
-#define CONFIG_ARC_MMU_VER 2
-#elif defined(CONFIG_ARC_MMU_V3)
-#define CONFIG_ARC_MMU_VER 3
-#elif defined(CONFIG_ARC_MMU_V4)
-#define CONFIG_ARC_MMU_VER 4
-#endif
-
 /* MMU Management regs */
 #define ARC_REG_MMU_BCR		0x06f
-#if (CONFIG_ARC_MMU_VER < 4)
+
+#ifdef CONFIG_ARC_MMU_V3
 #define ARC_REG_TLBPD0		0x405
 #define ARC_REG_TLBPD1		0x406
 #define ARC_REG_TLBPD1HI	0	/* Dummy: allows code sharing with ARC700 */
@@ -52,7 +43,7 @@
 /* Error code if probe fails */
 #define TLB_LKUP_ERR		0x80000000
 
-#if (CONFIG_ARC_MMU_VER < 4)
+#ifdef CONFIG_ARC_MMU_V3
 #define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x00000001)
 #else
 #define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x40000000)
@@ -63,15 +54,10 @@
 #define TLBRead     0x2
 #define TLBGetIndex 0x3
 #define TLBProbe    0x4
-
-#if (CONFIG_ARC_MMU_VER >= 2)
 #define TLBWriteNI  0x5		/* write JTLB without inv uTLBs */
 #define TLBIVUTLB   0x6		/* explicitly inv uTLBs */
-#else
-#define TLBWriteNI  TLBWrite	/* Not present in hardware, fallback */
-#endif
 
-#if (CONFIG_ARC_MMU_VER >= 4)
+#ifdef CONFIG_ARC_MMU_V4
 #define TLBInsertEntry	0x7
 #define TLBDeleteEntry	0x8
 #endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 320cc0ae8a08..0c3e220bd2b4 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -35,7 +35,7 @@
 #include <linux/bits.h>
 #include <asm-generic/pgtable-nopmd.h>
 #include <asm/page.h>
-#include <asm/mmu.h>	/* to propagate CONFIG_ARC_MMU_VER <n> */
+#include <asm/mmu.h>
 
 /**************************************************************************
  * Page Table Flags
@@ -51,20 +51,6 @@
  *      (saves some bit shift ops in TLB Miss hdlrs)
  */
 
-#if (CONFIG_ARC_MMU_VER <= 2)
-
-#define _PAGE_ACCESSED      (1<<1)	/* Page is accessed (S) */
-#define _PAGE_CACHEABLE     (1<<2)	/* Page is cached (H) */
-#define _PAGE_EXECUTE       (1<<3)	/* Page has user execute perm (H) */
-#define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
-#define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
-#define _PAGE_DIRTY         (1<<6)	/* Page modified (dirty) (S) */
-#define _PAGE_SPECIAL       (1<<7)
-#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
-#define _PAGE_PRESENT       (1<<10)	/* TLB entry is valid (H) */
-
-#else	/* MMU v3 onwards */
-
 #define _PAGE_CACHEABLE     (1<<0)	/* Page is cached (H) */
 #define _PAGE_EXECUTE       (1<<1)	/* Page has user execute perm (H) */
 #define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
@@ -73,23 +59,15 @@
 #define _PAGE_DIRTY         (1<<5)	/* Page modified (dirty) (S) */
 #define _PAGE_SPECIAL       (1<<6)
 
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define _PAGE_WTHRU         (1<<7)	/* Page cache mode write-thru (H) */
-#endif
-
 #define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
 #define _PAGE_PRESENT       (1<<9)	/* TLB entry is valid (H) */
 
-#if (CONFIG_ARC_MMU_VER >= 4)
+#ifdef CONFIG_ARC_MMU_V4
 #define _PAGE_HW_SZ         (1<<10)	/* Page Size indicator (H): 0 normal, 1 super */
 #endif
 
 #define _PAGE_SHARED_CODE   (1<<11)	/* Shared Code page with cmn vaddr
 					   usable for shared TLB entries (H) */
-
-#define _PAGE_UNUSED_BIT    (1<<12)
-#endif
-
 /* vmalloc permissions */
 #define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
 			_PAGE_GLOBAL | _PAGE_PRESENT)
diff --git a/arch/arc/include/asm/tlb-mmu1.h b/arch/arc/include/asm/tlb-mmu1.h
deleted file mode 100644
index a3083b36f5f4..000000000000
--- a/arch/arc/include/asm/tlb-mmu1.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef __ASM_TLB_MMU_V1_H__
-#define __ASM_TLB_MMU_V1_H__
-
-#include <asm/mmu.h>
-
-#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1)
-
-.macro TLB_WRITE_HEURISTICS
-
-#define JH_HACK1
-#undef JH_HACK2
-#undef JH_HACK3
-
-#ifdef JH_HACK3
-; Calculate set index for 2-way MMU
-; -avoiding use of GetIndex from MMU
-;   and its unpleasant LFSR pseudo-random sequence
-;
-; r1 = TLBPD0 from TLB_RELOAD above
-;
-; -- jh_ex_way_set not cleared on startup
-;    didn't want to change setup.c
-;    hence extra instruction to clean
-;
-; -- should be in cache since in same line
-;    as r0/r1 saves above
-;
-ld  r0,[jh_ex_way_sel]  ; victim pointer
-and r0,r0,1         ; clean
-xor.f   r0,r0,1         ; flip
-st  r0,[jh_ex_way_sel]  ; store back
-asr r0,r1,12        ; get set # <<1, note bit 12=R=0
-or.nz   r0,r0,1         ; set way bit
-and r0,r0,0xff      ; clean
-sr  r0,[ARC_REG_TLBINDEX]
-#endif
-
-#ifdef JH_HACK2
-; JH hack #2
-;  Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU
-;  Slower in thrash case (where it matters) because more code is executed
-;  Inefficient due to two-register paradigm of this miss handler
-;
-/* r1 = data TLBPD0 at this point */
-lr      r0,[eret]               /* instruction address */
-xor     r0,r0,r1                /* compare set #       */
-and.f   r0,r0,0x000fe000        /* 2-way MMU mask      */
-bne     88f                     /* not in same set - no need to probe */
-
-lr      r0,[eret]               /* instruction address */
-and     r0,r0,PAGE_MASK         /* VPN of instruction address */
-; lr  r1,[ARC_REG_TLBPD0]     /* Data VPN+ASID - already in r1 from TLB_RELOAD*/
-and     r1,r1,0xff              /* Data ASID */
-or      r0,r0,r1                /* Instruction address + Data ASID */
-
-lr      r1,[ARC_REG_TLBPD0]     /* save TLBPD0 containing data TLB*/
-sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
-sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
-lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
-sr      r1,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
-
-xor     r0,r0,1                 /* flip bottom bit of data index */
-b.d     89f
-sr      r0,[ARC_REG_TLBINDEX]   /* and put it back */
-88:
-sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
-89:
-#endif
-
-#ifdef JH_HACK1
-;
-; Always checks whether instruction will be kicked out by dtlb miss
-;
-mov_s   r3, r1                  ; save PD0 prepared by TLB_RELOAD in r3
-lr      r0,[eret]               /* instruction address */
-and     r0,r0,PAGE_MASK         /* VPN of instruction address */
-bmsk    r1,r3,7                 /* Data ASID, bits 7-0 */
-or_s    r0,r0,r1                /* Instruction address + Data ASID */
-
-sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
-sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
-lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
-sr      r3,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
-
-sr      TLBGetIndex, [ARC_REG_TLBCOMMAND]
-lr      r1,[ARC_REG_TLBINDEX]   /* r1 = index where MMU wants to put data */
-cmp     r0,r1                   /* if no match on indices, go around */
-xor.eq  r1,r1,1                 /* flip bottom bit of data index */
-sr      r1,[ARC_REG_TLBINDEX]   /* and put it back */
-#endif
-
-.endm
-
-#endif
-
-#endif
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 102418ac5ff4..8aa1231865d1 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -205,93 +205,24 @@ slc_chk:
 #define OP_INV_IC	0x4
 
 /*
- *		I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
+ * Cache Flush programming model
  *
- * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
- * The orig Cache Management Module "CDU" only required paddr to invalidate a
- * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
- * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
- * the exact same line.
+ * ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias.
+ * Programming model requires both paddr and vaddr irrespecive of aliasing
+ * considerations:
+ *  - vaddr in {I,D}C_IV?L
+ *  - paddr in {I,D}C_PTAG
  *
- * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
- * paddr alone could not be used to correctly index the cache.
+ * In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias.
+ * Programming model is different for aliasing vs. non-aliasing I$
+ *  - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L
+ *  - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$)
  *
- * ------------------
- * MMU v1/v2 (Fixed Page Size 8k)
- * ------------------
- * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geometry, 13 comes from
- * standard page size of 8k.
- * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
- * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
- * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
- * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU programming
- * model.
- *
- * 17:13 represented the max num of bits passable, actual bits needed were
- * fewer, based on the num-of-aliases possible.
- * -for 2 alias possibility, only bit 13 needed (32K cache)
- * -for 4 alias possibility, bits 14:13 needed (64K cache)
- *
- * ------------------
- * MMU v3
- * ------------------
- * This ver of MMU supports variable page sizes (1k-16k): although Linux will
- * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggravate aliasing
- * meaning more vaddr bits needed to disambiguate the cache-line-op ;
- * the existing scheme of piggybacking won't work for certain configurations.
- * Two new registers IC_PTAG and DC_PTAG inttoduced.
- * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
+ *  - If PAE40 is enabled, independent of aliasing considerations, the higher
+ *    bits needs to be written into PTAG_HI
  */
 
 static inline
-void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
-			  unsigned long sz, const int op, const int full_page)
-{
-	unsigned int aux_cmd;
-	int num_lines;
-
-	if (op == OP_INV_IC) {
-		aux_cmd = ARC_REG_IC_IVIL;
-	} else {
-		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
-		aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
-	}
-
-	/* Ensure we properly floor/ceil the non-line aligned/sized requests
-	 * and have @paddr - aligned to cache line and integral @num_lines.
-	 * This however can be avoided for page sized since:
-	 *  -@paddr will be cache-line aligned already (being page aligned)
-	 *  -@sz will be integral multiple of line size (being page sized).
-	 */
-	if (!full_page) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
-		vaddr &= CACHE_LINE_MASK;
-	}
-
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
-
-	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
-	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-
-	while (num_lines-- > 0) {
-		write_aux_reg(aux_cmd, paddr);
-		paddr += L1_CACHE_BYTES;
-	}
-}
-
-/*
- * For ARC700 MMUv3 I-cache and D-cache flushes
- *  - ARC700 programming model requires paddr and vaddr be passed in seperate
- *    AUX registers (*_IV*L and *_PTAG respectively) irrespective of whether the
- *    caches actually alias or not.
- * -  For HS38, only the aliasing I-cache configuration uses the PTAG reg
- *    (non aliasing I-cache version doesn't; while D-cache can't possibly alias)
- */
-static inline
 void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 			  unsigned long sz, const int op, const int full_page)
 {
@@ -350,17 +281,6 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 #ifndef USE_RGN_FLSH
 
 /*
- * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
- * Here's how cache ops are implemented
- *
- *  - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
- *  - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
- *  - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
- *    respectively, similar to MMU v3 programming model, hence
- *    __cache_line_loop_v3() is used)
- *
- * If PAE40 is enabled, independent of aliasing considerations, the higher bits
- * needs to be written into PTAG_HI
  */
 static inline
 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
@@ -460,11 +380,9 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 
 #endif
 
-#if (CONFIG_ARC_MMU_VER < 3)
-#define __cache_line_loop	__cache_line_loop_v2
-#elif (CONFIG_ARC_MMU_VER == 3)
+#ifdef CONFIG_ARC_MMU_V3
 #define __cache_line_loop	__cache_line_loop_v3
-#elif (CONFIG_ARC_MMU_VER > 3)
+#else
 #define __cache_line_loop	__cache_line_loop_v4
 #endif
 
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 9c7c68247289..8696829d37c0 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -1,51 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * TLB Management (flush/create/diagnostics) for ARC700
+ * TLB Management (flush/create/diagnostics) for MMUv3 and MMUv4
  *
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  *
- * vineetg: Aug 2011
- *  -Reintroduce duplicate PD fixup - some customer chips still have the issue
- *
- * vineetg: May 2011
- *  -No need to flush_cache_page( ) for each call to update_mmu_cache()
- *   some of the LMBench tests improved amazingly
- *      = page-fault thrice as fast (75 usec to 28 usec)
- *      = mmap twice as fast (9.6 msec to 4.6 msec),
- *      = fork (5.3 msec to 3.7 msec)
- *
- * vineetg: April 2011 :
- *  -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
- *      helps avoid a shift when preparing PD0 from PTE
- *
- * vineetg: April 2011 : Preparing for MMU V3
- *  -MMU v2/v3 BCRs decoded differently
- *  -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512
- *  -tlb_entry_erase( ) can be void
- *  -local_flush_tlb_range( ):
- *      = need not "ceil" @end
- *      = walks MMU only if range spans < 32 entries, as opposed to 256
- *
- * Vineetg: Sept 10th 2008
- *  -Changes related to MMU v2 (Rel 4.8)
- *
- * Vineetg: Aug 29th 2008
- *  -In TLB Flush operations (Metal Fix MMU) there is a explicit command to
- *    flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,
- *    it fails. Thus need to load it with ANY valid value before invoking
- *    TLBIVUTLB cmd
- *
- * Vineetg: Aug 21th 2008:
- *  -Reduced the duration of IRQ lockouts in TLB Flush routines
- *  -Multiple copies of TLB erase code separated into a "single" function
- *  -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID
- *       in interrupt-safe region.
- *
- * Vineetg: April 23rd Bug #93131
- *    Problem: tlb_flush_kernel_range() doesn't do anything if the range to
- *              flush is more than the size of TLB itself.
- *
- * Rahul Trivedi : Codito Technologies 2004
  */
 
 #include <linux/module.h>
@@ -57,47 +15,6 @@
 #include <asm/mmu_context.h>
 #include <asm/mmu.h>
 
-/*			Need for ARC MMU v2
- *
- * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc.
- * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages
- * map into same set, there would be contention for the 2 ways causing severe
- * Thrashing.
- *
- * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has
- * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways.
- * Given this, the thrashing problem should never happen because once the 3
- * J-TLB entries are created (even though 3rd will knock out one of the prev
- * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy
- *
- * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs.
- * This is a simple design for keeping them in sync. So what do we do?
- * The solution which James came up was pretty neat. It utilised the assoc
- * of uTLBs by not invalidating always but only when absolutely necessary.
- *
- * - Existing TLB commands work as before
- * - New command (TLBWriteNI) for TLB write without clearing uTLBs
- * - New command (TLBIVUTLB) to invalidate uTLBs.
- *
- * The uTLBs need only be invalidated when pages are being removed from the
- * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB
- * as a result of a miss, the removed entry is still allowed to exist in the
- * uTLBs as it is still valid and present in the OS page table. This allows the
- * full associativity of the uTLBs to hide the limited associativity of the main
- * TLB.
- *
- * During a miss handler, the new "TLBWriteNI" command is used to load
- * entries without clearing the uTLBs.
- *
- * When the OS page table is updated, TLB entries that may be associated with a
- * removed page are removed (flushed) from the TLB using TLBWrite. In this
- * circumstance, the uTLBs must also be cleared. This is done by using the
- * existing TLBWrite command. An explicit IVUTLB is also required for those
- * corner cases when TLBWrite was not executed at all because the corresp
- * J-TLB entry got evicted/replaced.
- */
-
-
 /* A copy of the ASID from the PID reg is kept in asid_cache */
 DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 
@@ -120,32 +37,10 @@ static inline void __tlb_entry_erase(void)
 
 static void utlb_invalidate(void)
 {
-#if (CONFIG_ARC_MMU_VER >= 2)
-
-#if (CONFIG_ARC_MMU_VER == 2)
-	/* MMU v2 introduced the uTLB Flush command.
-	 * There was however an obscure hardware bug, where uTLB flush would
-	 * fail when a prior probe for J-TLB (both totally unrelated) would
-	 * return lkup err - because the entry didn't exist in MMU.
-	 * The Workaround was to set Index reg with some valid value, prior to
-	 * flush. This was fixed in MMU v3
-	 */
-	unsigned int idx;
-
-	/* make sure INDEX Reg is valid */
-	idx = read_aux_reg(ARC_REG_TLBINDEX);
-
-	/* If not write some dummy val */
-	if (unlikely(idx & TLB_LKUP_ERR))
-		write_aux_reg(ARC_REG_TLBINDEX, 0xa);
-#endif
-
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
-#endif
-
 }
 
-#if (CONFIG_ARC_MMU_VER < 4)
+#ifdef CONFIG_ARC_MMU_V3
 
 static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
 {
@@ -206,7 +101,7 @@ static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
-#else	/* CONFIG_ARC_MMU_VER >= 4) */
+#else	/* MMUv4 */
 
 static void tlb_entry_erase(unsigned int vaddr_n_asid)
 {
@@ -706,14 +601,6 @@ void read_decode_mmu_bcr(void)
 {
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
 	unsigned int tmp;
-	struct bcr_mmu_1_2 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8;
-#else
-		unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8;
-#endif
-	} *mmu2;
-
 	struct bcr_mmu_3 {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
@@ -738,23 +625,14 @@ void read_decode_mmu_bcr(void)
 	tmp = read_aux_reg(ARC_REG_MMU_BCR);
 	mmu->ver = (tmp >> 24);
 
-	if (is_isa_arcompact()) {
-		if (mmu->ver <= 2) {
-			mmu2 = (struct bcr_mmu_1_2 *)&tmp;
-			mmu->pg_sz_k = TO_KB(0x2000);
-			mmu->sets = 1 << mmu2->sets;
-			mmu->ways = 1 << mmu2->ways;
-			mmu->u_dtlb = mmu2->u_dtlb;
-			mmu->u_itlb = mmu2->u_itlb;
-		} else {
-			mmu3 = (struct bcr_mmu_3 *)&tmp;
-			mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
-			mmu->sets = 1 << mmu3->sets;
-			mmu->ways = 1 << mmu3->ways;
-			mmu->u_dtlb = mmu3->u_dtlb;
-			mmu->u_itlb = mmu3->u_itlb;
-			mmu->sasid = mmu3->sasid;
-		}
+	if (is_isa_arcompact() && mmu->ver == 3) {
+		mmu3 = (struct bcr_mmu_3 *)&tmp;
+		mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
+		mmu->sets = 1 << mmu3->sets;
+		mmu->ways = 1 << mmu3->ways;
+		mmu->u_dtlb = mmu3->u_dtlb;
+		mmu->u_itlb = mmu3->u_itlb;
+		mmu->sasid = mmu3->sasid;
 	} else {
 		mmu4 = (struct bcr_mmu_4 *)&tmp;
 		mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
@@ -815,22 +693,17 @@ void arc_mmu_init(void)
 
 	/*
 	 * Ensure that MMU features assumed by kernel exist in hardware.
-	 * For older ARC700 cpus, it has to be exact match, since the MMU
-	 * revisions were not backwards compatible (MMUv3 TLB layout changed
-	 * so even if kernel for v2 didn't use any new cmds of v3, it would
-	 * still not work.
-	 * For HS cpus, MMUv4 was baseline and v5 is backwards compatible
-	 * (will run older software).
+	 *  - For older ARC700 cpus, only v3 supported
+	 *  - For HS cpus, v4 was baseline and v5 is backwards compatible
+	 *    (will run older software).
 	 */
-	if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER)
+	if (is_isa_arcompact() && mmu->ver == 3)
 		compat = 1;
-	else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER)
+	else if (is_isa_arcv2() && mmu->ver >= 4)
 		compat = 1;
 
-	if (!compat) {
-		panic("MMU ver %d doesn't match kernel built for %d...\n",
-		      mmu->ver, CONFIG_ARC_MMU_VER);
-	}
+	if (!compat)
+		panic("MMU ver %d doesn't match kernel built for\n", mmu->ver);
 
 	if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
 		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 062fae46c3f8..96c3a5de9dd4 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -39,7 +39,6 @@
 #include <asm/arcregs.h>
 #include <asm/cache.h>
 #include <asm/processor.h>
-#include <asm/tlb-mmu1.h>
 
 #ifdef CONFIG_ISA_ARCOMPACT
 ;-----------------------------------------------------------------
@@ -279,7 +278,7 @@ ex_saved_reg1:
 ; Commit the TLB entry into MMU
 
 .macro COMMIT_ENTRY_TO_MMU
-#if (CONFIG_ARC_MMU_VER < 4)
+#ifdef CONFIG_ARC_MMU_V3
 
 	/* Get free TLB slot: Set = computed from vaddr, way = random */
 	sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
@@ -375,13 +374,6 @@ ENTRY(EV_TLBMissD)
 
 	CONV_PTE_TO_TLB
 
-#if (CONFIG_ARC_MMU_VER == 1)
-	; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
-	; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
-	; But only for old MMU or one with Metal Fix
-	TLB_WRITE_HEURISTICS
-#endif
-
 	COMMIT_ENTRY_TO_MMU
 	TLBMISS_RESTORE_REGS
 EV_TLBMissD_fast_ret:	; additional label for VDK OS-kit instrumentation
-- 
cgit 


From 6128df5be48f48d63efdc7c52022dd163f612373 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Mon, 13 Jan 2020 09:16:06 -0800
Subject: ARC: mm: use SCRATCH_DATA0 register for caching pgdir in ARCv2 only

MMU SCRATCH_DATA0 register is intended to cache task pgd. However in
ARC700 SMP port, it has to be repurposed for re-entrant interrupt
handling, while UP port doesn't. We currently handle these use-cases
using a fabricated #define which has usual issues of dependency nesting
and obvious ugliness.

So clean this up: for ARC700 don't use to cache pgd (even in UP) and do
the opposite for ARCv2.

And while here, switch to canonical pgd_offset().

Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/entry-compact.h |  8 --------
 arch/arc/include/asm/mmu.h           |  4 ----
 arch/arc/include/asm/mmu_context.h   |  2 +-
 arch/arc/include/asm/pgtable.h       | 23 -----------------------
 arch/arc/mm/fault.c                  |  2 +-
 arch/arc/mm/tlb.c                    |  4 ++--
 arch/arc/mm/tlbex.S                  |  2 +-
 7 files changed, 5 insertions(+), 40 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 6dbf5cecc8cc..5aab4f93ab8a 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -126,19 +126,11 @@
  * to be saved again on kernel mode stack, as part of pt_regs.
  *-------------------------------------------------------------*/
 .macro PROLOG_FREEUP_REG	reg, mem
-#ifndef ARC_USE_SCRATCH_REG
-	sr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
 	st  \reg, [\mem]
-#endif
 .endm
 
 .macro PROLOG_RESTORE_REG	reg, mem
-#ifndef ARC_USE_SCRATCH_REG
-	lr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
 	ld  \reg, [\mem]
-#endif
 .endm
 
 /*--------------------------------------------------------------
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index a81d1975866a..4065335a7922 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -31,10 +31,6 @@
 #define ARC_REG_SCRATCH_DATA0	0x46c
 #endif
 
-#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP)
-#define	ARC_USE_SCRATCH_REG
-#endif
-
 /* Bits in MMU PID register */
 #define __TLB_ENABLE		(1 << 31)
 #define __PROG_ENABLE		(1 << 30)
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index df164066e172..49318a126879 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -146,7 +146,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 */
 	cpumask_set_cpu(cpu, mm_cpumask(next));
 
-#ifdef ARC_USE_SCRATCH_REG
+#ifdef CONFIG_ISA_ARCV2
 	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
 	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
 #endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 0c3e220bd2b4..80b57c14b430 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -284,29 +284,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	set_pte(ptep, pteval);
 }
 
-/*
- * Macro to quickly access the PGD entry, utlising the fact that some
- * arch may cache the pointer to Page Directory of "current" task
- * in a MMU register
- *
- * Thus task->mm->pgd (3 pointer dereferences, cache misses etc simply
- * becomes read a register
- *
- * ********CAUTION*******:
- * Kernel code might be dealing with some mm_struct of NON "current"
- * Thus use this macro only when you are certain that "current" is current
- * e.g. when dealing with signal frame setup code etc
- */
-#ifdef ARC_USE_SCRATCH_REG
-#define pgd_offset_fast(mm, addr)	\
-({					\
-	pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0);  \
-	pgd_base + pgd_index(addr);	\
-})
-#else
-#define pgd_offset_fast(mm, addr)	pgd_offset(mm, addr)
-#endif
-
 extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 		      pte_t *ptep);
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index f5657cb68e4f..41f154320964 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -33,7 +33,7 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
 	pud_t *pud, *pud_k;
 	pmd_t *pmd, *pmd_k;
 
-	pgd = pgd_offset_fast(current->active_mm, address);
+	pgd = pgd_offset(current->active_mm, address);
 	pgd_k = pgd_offset_k(address);
 
 	if (!pgd_present(*pgd_k))
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 8696829d37c0..349fb7a75d1d 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -719,8 +719,8 @@ void arc_mmu_init(void)
 	/* Enable the MMU */
 	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
 
-	/* In smp we use this reg for interrupt 1 scratch */
-#ifdef ARC_USE_SCRATCH_REG
+	/* In arc700/smp needed for re-entrant interrupt handling */
+#ifdef CONFIG_ISA_ARCV2
 	/* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
 	write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
 #endif
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 96c3a5de9dd4..bcd2909c691f 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -202,7 +202,7 @@ ex_saved_reg1:
 
 	lr  r2, [efa]
 
-#ifdef ARC_USE_SCRATCH_REG
+#ifdef CONFIG_ISA_ARCV2
 	lr  r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
 #else
 	GET_CURR_TASK_ON_CPU  r1
-- 
cgit 


From 12e7804c264143c63d1cae7e3a62d21fbed06f59 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Mon, 9 Sep 2019 16:59:59 -0700
Subject: ARC: mm: remove tlb paranoid code

This was used back in arc700 days when ASID allocator was fragile.
Not needed in last 5 years

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/Kconfig           |  3 ---
 arch/arc/include/asm/mmu.h |  6 ------
 arch/arc/mm/tlb.c          | 40 -------------------------------------
 arch/arc/mm/tlbex.S        | 50 ----------------------------------------------
 4 files changed, 99 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 53d143fc42fe..56aad105ad13 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -537,9 +537,6 @@ config ARC_DW2_UNWIND
 	  If you don't debug the kernel, you can say N, but we may not be able
 	  to solve problems without frame unwind information
 
-config ARC_DBG_TLB_PARANOIA
-	bool "Paranoia Checks in Low Level TLB Handlers"
-
 config ARC_DBG_JUMP_LABEL
 	bool "Paranoid checks in Static Keys (jump labels) code"
 	depends on JUMP_LABEL
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 4065335a7922..38a036508699 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -64,12 +64,6 @@ typedef struct {
 	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
-#else
-#define tlb_paranoid_check(a, b)
-#endif
-
 void arc_mmu_init(void);
 extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
 void read_decode_mmu_bcr(void);
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 349fb7a75d1d..6079dfd129b9 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -400,7 +400,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 	 *
 	 * Removing the assumption involves
 	 * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg.
-	 * -Fix the TLB paranoid debug code to not trigger false negatives.
 	 * -More importantly it makes this handler inconsistent with fast-path
 	 *  TLB Refill handler which always deals with "current"
 	 *
@@ -423,8 +422,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 
 	local_irq_save(flags);
 
-	tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr);
-
 	vaddr &= PAGE_MASK;
 
 	/* update this PTE credentials */
@@ -818,40 +815,3 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 
 	local_irq_restore(flags);
 }
-
-/***********************************************************************
- * Diagnostic Routines
- *  -Called from Low Level TLB Handlers if things don;t look good
- **********************************************************************/
-
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-
-/*
- * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS
- * don't match
- */
-void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path)
-{
-	pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
-	       is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid);
-
-	__asm__ __volatile__("flag 1");
-}
-
-void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr)
-{
-	unsigned int mmu_asid;
-
-	mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff;
-
-	/*
-	 * At the time of a TLB miss/installation
-	 *   - HW version needs to match SW version
-	 *   - SW needs to have a valid ASID
-	 */
-	if (addr < 0x70000000 &&
-	    ((mm_asid == MM_CTXT_NO_ASID) ||
-	      (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK))))
-		print_asid_mismatch(mm_asid, mmu_asid, 0);
-}
-#endif
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index bcd2909c691f..0b4bb62fa0ab 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -93,11 +93,6 @@ ex_saved_reg1:
 	st_s  r1, [r0, 4]
 	st_s  r2, [r0, 8]
 	st_s  r3, [r0, 12]
-
-	; VERIFY if the ASID in MMU-PID Reg is same as
-	; one in Linux data structures
-
-	tlb_paranoid_check_asm
 .endm
 
 .macro TLBMISS_RESTORE_REGS
@@ -146,51 +141,6 @@ ex_saved_reg1:
 
 #endif
 
-;============================================================================
-;  Troubleshooting Stuff
-;============================================================================
-
-; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid
-; When Creating TLB Entries, instead of doing 3 dependent loads from memory,
-; we use the MMU PID Reg to get current ASID.
-; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
-; So we try to detect this in TLB Mis shandler
-
-.macro tlb_paranoid_check_asm
-
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-
-	GET_CURR_TASK_ON_CPU  r3
-	ld r0, [r3, TASK_ACT_MM]
-	ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
-	breq r0, 0, 55f	; Error if no ASID allocated
-
-	lr r1, [ARC_REG_PID]
-	and r1, r1, 0xFF
-
-	and r2, r0, 0xFF	; MMU PID bits only for comparison
-	breq r1, r2, 5f
-
-55:
-	; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
-	lr  r2, [erstatus]
-	bbit0 r2, STATUS_U_BIT, 5f
-
-	; We sure are in troubled waters, Flag the error, but to do so
-	; need to switch to kernel mode stack to call error routine
-	GET_TSK_STACK_BASE   r3, sp
-
-	; Call printk to shoutout aloud
-	mov r2, 1
-	j print_asid_mismatch
-
-5:	; ASIDs match so proceed normally
-	nop
-
-#endif
-
-.endm
-
 ;============================================================================
 ;TLB Miss handling Code
 ;============================================================================
-- 
cgit 


From 47910ca3ce946662f950d700f26e51c563a9821f Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Fri, 13 Sep 2019 13:19:44 -0700
Subject: ARC: mm: move mmu/cache externs out to setup.h

Don't pollute mmu.h and cache.h with ARC internal bootlog/setup
related functions. Move them aside to setup.h

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/cache.h |  4 ----
 arch/arc/include/asm/mmu.h   |  4 ----
 arch/arc/include/asm/setup.h | 12 ++++++++++--
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index d8ece4292388..f0f1fc5d62b6 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -62,10 +62,6 @@
 #define ARCH_SLAB_MINALIGN	8
 #endif
 
-extern void arc_cache_init(void);
-extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_cache_bcr(void);
-
 extern int ioc_enable;
 extern unsigned long perip_base, perip_end;
 
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 38a036508699..762cfe66e16b 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -64,10 +64,6 @@ typedef struct {
 	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
-void arc_mmu_init(void);
-extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-void read_decode_mmu_bcr(void);
-
 static inline int is_pae40_enabled(void)
 {
 	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 01f85478170d..028a8cf76206 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -2,8 +2,8 @@
 /*
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  */
-#ifndef __ASMARC_SETUP_H
-#define __ASMARC_SETUP_H
+#ifndef __ASM_ARC_SETUP_H
+#define __ASM_ARC_SETUP_H
 
 
 #include <linux/types.h>
@@ -34,4 +34,12 @@ long __init arc_get_mem_sz(void);
 #define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
 #define IS_AVAIL3(v, v2, s)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
 
+extern void arc_mmu_init(void);
+extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
+extern void read_decode_mmu_bcr(void);
+
+extern void arc_cache_init(void);
+extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
+extern void read_decode_cache_bcr(void);
+
 #endif /* __ASMARC_SETUP_H */
-- 
cgit 


From 366440eec855dad6527adaf2f1dcc305fb5eef99 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Mon, 28 Oct 2019 12:02:47 -0700
Subject: ARC: mm: Fixes to allow STRICT_MM_TYPECHECKS

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/mm/tlb.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 6079dfd129b9..15cbc285b0de 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -71,7 +71,7 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
 	}
 }
 
-static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
+static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
 {
 	unsigned int idx;
 
@@ -109,13 +109,16 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
 }
 
-static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
+static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
 {
 	write_aux_reg(ARC_REG_TLBPD0, pd0);
-	write_aux_reg(ARC_REG_TLBPD1, pd1);
 
-	if (is_pae40_enabled())
+	if (!is_pae40_enabled()) {
+		write_aux_reg(ARC_REG_TLBPD1, pd1);
+	} else {
+		write_aux_reg(ARC_REG_TLBPD1, pd1 & 0xFFFFFFFF);
 		write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
+	}
 
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
 }
@@ -391,7 +394,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 	unsigned long flags;
 	unsigned int asid_or_sasid, rwx;
 	unsigned long pd0;
-	pte_t pd1;
+	phys_addr_t pd1;
 
 	/*
 	 * create_tlb() assumes that current->mm == vma->mm, since
-- 
cgit 


From 1b4013b9aebca87636dc9edc8903ffa87267704f Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Mon, 28 Oct 2019 13:49:41 -0700
Subject: ARC: mm: Enable STRICT_MM_TYPECHECKS

In the past I've refrained from doing this (at least 2 times) due to the
slight code bloat due to ABI implications of pte_t etc becoming struct

Per ARC ABI, functions return struct via memory and not through register
r0, even if the struct would fit in register(s)

 - caller allocates space on stack and passes the address as first arg
   (r0), shifting rest of args by one

 - callee creates return struct in memory (referenced via r0)

This time around the code actually shrunk slightly (due to subtle
inlining heuristic effects), but still slightly inefficient due to
return values passed through memory. That however seems like a small
cost compared to maintenance burden given the impending new mmu support
for page walk etc

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/page.h | 26 --------------------------
 arch/arc/mm/ioremap.c       |  2 +-
 2 files changed, 1 insertion(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 4a9d33372fe2..c4ac827379cd 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -34,12 +34,6 @@ void copy_user_highpage(struct page *to, struct page *from,
 			unsigned long u_vaddr, struct vm_area_struct *vma);
 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
 
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-/*
- * These are used to make use of C type-checking..
- */
 typedef struct {
 #ifdef CONFIG_ARC_HAS_PAE40
 	unsigned long long pte;
@@ -64,26 +58,6 @@ typedef struct {
 
 #define pte_pgprot(x) __pgprot(pte_val(x))
 
-#else /* !STRICT_MM_TYPECHECKS */
-
-#ifdef CONFIG_ARC_HAS_PAE40
-typedef unsigned long long pte_t;
-#else
-typedef unsigned long pte_t;
-#endif
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
-
-#define pte_val(x)	(x)
-#define pgd_val(x)	(x)
-#define pgprot_val(x)	(x)
-#define __pte(x)	(x)
-#define __pgd(x)	(x)
-#define __pgprot(x)	(x)
-#define pte_pgprot(x)	(x)
-
-#endif
-
 typedef pte_t * pgtable_t;
 
 /*
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index 95c649fbc95a..052bbd8b1e5f 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -39,7 +39,7 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
 	if (arc_uncached_addr_space(paddr))
 		return (void __iomem *)(u32)paddr;
 
-	return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE);
+	return ioremap_prot(paddr, size, pgprot_val(PAGE_KERNEL_NO_CACHE));
 }
 EXPORT_SYMBOL(ioremap);
 
-- 
cgit 


From da773cf20eb3745e18be995e00be0d57aa862564 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Thu, 12 Sep 2019 12:01:04 -0700
Subject: ARC: ioremap: use more commonly used PAGE_KERNEL based uncached flag

and remove the one off uncached definition for ARC

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/pgtable.h | 3 ---
 arch/arc/mm/ioremap.c          | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 80b57c14b430..b054c14f8bf6 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -103,9 +103,6 @@
  */
 #define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE)
 
-/* ioremap */
-#define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
-
 /* Masks for actual TLB "PD"s */
 #define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
 #define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index 052bbd8b1e5f..0ee75aca6e10 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -39,7 +39,8 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
 	if (arc_uncached_addr_space(paddr))
 		return (void __iomem *)(u32)paddr;
 
-	return ioremap_prot(paddr, size, pgprot_val(PAGE_KERNEL_NO_CACHE));
+	return ioremap_prot(paddr, size,
+			    pgprot_val(pgprot_noncached(PAGE_KERNEL)));
 }
 EXPORT_SYMBOL(ioremap);
 
-- 
cgit 


From e93e59ac1e699d07a2bfd0bb03f36b0d2f931834 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Wed, 30 Oct 2019 16:54:32 -0700
Subject: ARC: mm: pmd_populate* to use the canonical set_pmd (and drop
 pmd_set)

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/pgalloc.h | 14 ++++++++++----
 arch/arc/include/asm/pgtable.h |  6 ------
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index a32ca3104ced..408bc4b0842d 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -35,13 +35,19 @@
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 {
-	pmd_set(pmd, pte);
+	/*
+	 * The cast to long below is OK in 32-bit PAE40 regime with long long pte
+	 * Despite "wider" pte, the pte table needs to be in non-PAE low memory
+	 * as all higher levels can only hold long pointers.
+	 *
+	 * The cast itself is needed given simplistic definition of set_pmd()
+	 */
+	set_pmd(pmd, __pmd((unsigned long)pte));
 }
 
-static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte)
 {
-	pmd_set(pmd, (pte_t *) ptep);
+	set_pmd(pmd, __pmd((unsigned long)pte));
 }
 
 static inline int __get_order_pgd(void)
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index b054c14f8bf6..f762bacb2358 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -222,12 +222,6 @@ extern char empty_zero_page[PAGE_SIZE];
 /* find the logical addr (phy for ARC) of the Page Tbl ref by PMD entry */
 #define pmd_page_vaddr(pmd)	(pmd_val(pmd) & PAGE_MASK)
 
-/* In a 2 level sys, setup the PGD entry with PTE value */
-static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
-{
-	pmd_val(*pmdp) = (unsigned long)ptep;
-}
-
 #define pte_none(x)			(!pte_val(x))
 #define pte_present(x)			(pte_val(x) & _PAGE_PRESENT)
 #define pte_clear(mm, addr, ptep)	set_pte_at(mm, addr, ptep, __pte(0))
-- 
cgit 


From be43b096ed787bad9e1a74f79486159c6cd6e648 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Wed, 30 Sep 2020 18:58:16 -0700
Subject: ARC: mm: non-functional code movement/cleanup

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/page.h | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index c4ac827379cd..313e6f543d2d 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -34,6 +34,13 @@ void copy_user_highpage(struct page *to, struct page *from,
 			unsigned long u_vaddr, struct vm_area_struct *vma);
 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
 
+typedef struct {
+	unsigned long pgd;
+} pgd_t;
+
+#define pgd_val(x)	((x).pgd)
+#define __pgd(x)	((pgd_t) { (x) })
+
 typedef struct {
 #ifdef CONFIG_ARC_HAS_PAE40
 	unsigned long long pte;
@@ -41,22 +48,17 @@ typedef struct {
 	unsigned long pte;
 #endif
 } pte_t;
-typedef struct {
-	unsigned long pgd;
-} pgd_t;
+
+#define pte_val(x)	((x).pte)
+#define __pte(x)	((pte_t) { (x) })
+
 typedef struct {
 	unsigned long pgprot;
 } pgprot_t;
 
-#define pte_val(x)      ((x).pte)
-#define pgd_val(x)      ((x).pgd)
-#define pgprot_val(x)   ((x).pgprot)
-
-#define __pte(x)        ((pte_t) { (x) })
-#define __pgd(x)        ((pgd_t) { (x) })
-#define __pgprot(x)     ((pgprot_t) { (x) })
-
-#define pte_pgprot(x) __pgprot(pte_val(x))
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) })
+#define pte_pgprot(x)	__pgprot(pte_val(x))
 
 typedef pte_t * pgtable_t;
 
@@ -96,8 +98,8 @@ extern int pfn_valid(unsigned long pfn);
  * virt here means link-address/program-address as embedded in object code.
  * And for ARC, link-addr = physical address
  */
-#define __pa(vaddr)  ((unsigned long)(vaddr))
-#define __va(paddr)  ((void *)((unsigned long)(paddr)))
+#define __pa(vaddr)  		((unsigned long)(vaddr))
+#define __va(paddr)  		((void *)((unsigned long)(paddr)))
 
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define virt_addr_valid(kaddr)  pfn_valid(virt_to_pfn(kaddr))
-- 
cgit 


From 89d0d42412a116563c28c763d9c1bdee83b5b6af Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Fri, 13 Sep 2019 15:20:01 -0700
Subject: ARC: mm: move MMU specific bits out of ASID allocator

And while at it, rewrite commentary on ASID allocator

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/mmu.h         | 13 +++++++++++++
 arch/arc/include/asm/mmu_context.h | 28 +++++++++++++---------------
 arch/arc/mm/tlb.c                  | 11 ++++-------
 3 files changed, 30 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 762cfe66e16b..c8b490175eae 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -64,6 +64,19 @@ typedef struct {
 	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
+static inline void mmu_setup_asid(struct mm_struct *mm, unsigned int asid)
+{
+	write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
+}
+
+static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
+{
+	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
+#ifdef CONFIG_ISA_ARCV2
+	write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
+#endif
+}
+
 static inline int is_pae40_enabled(void)
 {
 	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 49318a126879..dda471f5f05b 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -15,22 +15,23 @@
 #ifndef _ASM_ARC_MMU_CONTEXT_H
 #define _ASM_ARC_MMU_CONTEXT_H
 
-#include <asm/arcregs.h>
-#include <asm/tlb.h>
 #include <linux/sched/mm.h>
 
+#include <asm/tlb.h>
 #include <asm-generic/mm_hooks.h>
 
-/*		ARC700 ASID Management
+/*		ARC ASID Management
+ *
+ * MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on
+ * context-switch.
  *
- * ARC MMU provides 8-bit ASID (0..255) to TAG TLB entries, allowing entries
- * with same vaddr (different tasks) to co-exit. This provides for
- * "Fast Context Switch" i.e. no TLB flush on ctxt-switch
+ * ASID is managed per cpu, so task threads across CPUs can have different
+ * ASID. Global ASID management is needed if hardware supports TLB shootdown
+ * and/or shared TLB across cores, which ARC doesn't.
  *
- * Linux assigns each task a unique ASID. A simple round-robin allocation
- * of H/w ASID is done using software tracker @asid_cpu.
- * When it reaches max 255, the allocation cycle starts afresh by flushing
- * the entire TLB and wrapping ASID back to zero.
+ * Each task is assigned unique ASID, with a simple round-robin allocator
+ * tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started
+ * over from 0, and TLB is flushed
  *
  * A new allocation cycle, post rollover, could potentially reassign an ASID
  * to a different task. Thus the rule is to refresh the ASID in a new cycle.
@@ -93,7 +94,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	asid_mm(mm, cpu) = asid_cpu(cpu);
 
 set_hw:
-	write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
+	mmu_setup_asid(mm, hw_pid(mm, cpu));
 
 	local_irq_restore(flags);
 }
@@ -146,10 +147,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 */
 	cpumask_set_cpu(cpu, mm_cpumask(next));
 
-#ifdef CONFIG_ISA_ARCV2
-	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
-#endif
+	mmu_setup_pgd(next, next->pgd);
 
 	get_new_mmu_context(next);
 }
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 15cbc285b0de..b68d5798327b 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -716,14 +716,11 @@ void arc_mmu_init(void)
 	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
 		panic("Hardware doesn't support PAE40\n");
 
-	/* Enable the MMU */
-	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
+	/* Enable the MMU with ASID 0 */
+	mmu_setup_asid(NULL, 0);
 
-	/* In arc700/smp needed for re-entrant interrupt handling */
-#ifdef CONFIG_ISA_ARCV2
-	/* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
-#endif
+	/* cache the pgd pointer in MMU SCRATCH reg (ARCv2 only) */
+	mmu_setup_pgd(NULL, swapper_pg_dir);
 
 	if (pae40_exist_but_not_enab())
 		write_aux_reg(ARC_REG_TLBPD1HI, 0);
-- 
cgit 


From a79a9c765f95a73e087f11f0994297cd69987bda Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Mon, 16 Sep 2019 10:54:34 -0700
Subject: ARC: mm: move MMU specific bits out of entry code ...

... to avoid polluting shared entry code (across three ISA variants)
with ISA/MMU specific code.

Cc: Jose Abreu <joabreu@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/mmu.h    | 8 ++++++++
 arch/arc/kernel/entry-arcv2.S | 1 +
 arch/arc/kernel/entry.S       | 7 ++-----
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index c8b490175eae..e66e1e417694 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -84,6 +84,14 @@ static inline int is_pae40_enabled(void)
 
 extern int pae40_exist_but_not_enab(void);
 
+#else
+
+.macro ARC_MMU_REENABLE reg
+	lr \reg, [ARC_REG_PID]
+	or \reg, \reg, MMU_ENABLE
+	sr \reg, [ARC_REG_PID]
+.endm
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index 12d5f12d10d2..a7e6a2174187 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -10,6 +10,7 @@
 #include <asm/errno.h>
 #include <asm/arcregs.h>
 #include <asm/irqflags.h>
+#include <asm/mmu.h>
 
 ; A maximum number of supported interrupts in the core interrupt controller.
 ; This number is not equal to the maximum interrupt number (256) because
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 2cb8dfe866b6..dd77a0c8f740 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -101,11 +101,8 @@ ENTRY(EV_MachineCheck)
 	lr  r0, [efa]
 	mov r1, sp
 
-	; hardware auto-disables MMU, re-enable it to allow kernel vaddr
-	; access for say stack unwinding of modules for crash dumps
-	lr	r3, [ARC_REG_PID]
-	or	r3, r3, MMU_ENABLE
-	sr	r3, [ARC_REG_PID]
+	; MC excpetions disable MMU
+	ARC_MMU_REENABLE r3
 
 	lsr  	r3, r2, 8
 	bmsk 	r3, r3, 7
-- 
cgit 


From 113ec9ccc8049c3772f0eab46b62c5d6654c09f7 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 20 Aug 2021 05:16:05 +0000
Subject: powerpc/32: indirect function call use bctrl rather than blrl in
 ret_from_kernel_thread

Copied from commit 89bbe4c798bc ("powerpc/64: indirect function call
use bctrl rather than blrl in ret_from_kernel_thread")

blrl is not recommended to use as an indirect function call, as it may
corrupt the link stack predictor.

This is not a performance critical path but this should be fixed for
consistency.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/91b1d242525307ceceec7ef6e832bfbacdd4501b.1629436472.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/entry_32.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0273a1349006..61fdd53cdd9a 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -161,10 +161,10 @@ ret_from_fork:
 ret_from_kernel_thread:
 	REST_NVGPRS(r1)
 	bl	schedule_tail
-	mtlr	r14
+	mtctr	r14
 	mr	r3,r15
 	PPC440EP_ERR42
-	blrl
+	bctrl
 	li	r3,0
 	b	ret_from_syscall
 
-- 
cgit 


From f5007dbf4da729baa850b33a64dc3cc53757bdf8 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 24 Aug 2021 07:56:26 +0000
Subject: powerpc/booke: Avoid link stack corruption in several places

Use bcl 20,31,+4 instead of bl in order to preserve link stack.

See commit c974809a26a1 ("powerpc/vdso: Avoid link stack corruption
in __get_datapage()") for details.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/e9fbc285eceb720e6c0e032ef47fe8b05f669b48.1629791751.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/ppc_asm.h            | 2 +-
 arch/powerpc/kernel/exceptions-64e.S          | 6 +++---
 arch/powerpc/kernel/fsl_booke_entry_mapping.S | 8 ++++----
 arch/powerpc/kernel/head_44x.S                | 6 +++---
 arch/powerpc/kernel/head_fsl_booke.S          | 6 +++---
 arch/powerpc/mm/nohash/tlb_low.S              | 4 ++--
 6 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index ffe712307e11..1c538a9a11e0 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -260,7 +260,7 @@ n:
 
 /* Be careful, this will clobber the lr register. */
 #define LOAD_REG_ADDR_PIC(reg, name)		\
-	bl	0f;				\
+	bcl	20,31,$+4;			\
 0:	mflr	reg;				\
 	addis	reg,reg,(name - 0b)@ha;		\
 	addi	reg,reg,(name - 0b)@l;
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 1401787b0b93..7e0943d9f9b0 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1127,7 +1127,7 @@ found_iprot:
  * r3 = MAS0_TLBSEL (for the iprot array)
  * r4 = SPRN_TLBnCFG
  */
-	bl	invstr				/* Find our address */
+	bcl	20,31,$+4			/* Find our address */
 invstr:	mflr	r6				/* Make it accessible */
 	mfmsr	r7
 	rlwinm	r5,r7,27,31,31			/* extract MSR[IS] */
@@ -1196,7 +1196,7 @@ skpinv:	addi	r6,r6,1				/* Increment */
 	mfmsr	r6
 	xori	r6,r6,MSR_IS
 	mtspr	SPRN_SRR1,r6
-	bl	1f		/* Find our address */
+	bcl	20,31,$+4	/* Find our address */
 1:	mflr	r6
 	addi	r6,r6,(2f - 1b)
 	mtspr	SPRN_SRR0,r6
@@ -1256,7 +1256,7 @@ skpinv:	addi	r6,r6,1				/* Increment */
  * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
  */
 	/* Now we branch the new virtual address mapped by this entry */
-	bl	1f		/* Find our address */
+	bcl	20,31,$+4	/* Find our address */
 1:	mflr	r6
 	addi	r6,r6,(2f - 1b)
 	tovirt(r6,r6)
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index 8bccce6544b5..dedc17fac8f8 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
 /* 1. Find the index of the entry we're executing in */
-	bl	invstr				/* Find our address */
+	bcl	20,31,$+4				/* Find our address */
 invstr:	mflr	r6				/* Make it accessible */
 	mfmsr	r7
 	rlwinm	r4,r7,27,31,31			/* extract MSR[IS] */
@@ -85,7 +85,7 @@ skpinv:	addi	r6,r6,1				/* Increment */
 	addi	r6,r6,10
 	slw	r6,r8,r6	/* convert to mask */
 
-	bl	1f		/* Find our address */
+	bcl	20,31,$+4	/* Find our address */
 1:	mflr	r7
 
 	mfspr	r8,SPRN_MAS3
@@ -117,7 +117,7 @@ skpinv:	addi	r6,r6,1				/* Increment */
 
 	xori	r6,r4,1
 	slwi	r6,r6,5		/* setup new context with other address space */
-	bl	1f		/* Find our address */
+	bcl	20,31,$+4	/* Find our address */
 1:	mflr	r9
 	rlwimi	r7,r9,0,20,31
 	addi	r7,r7,(2f - 1b)
@@ -207,7 +207,7 @@ next_tlb_setup:
 
 	lis	r7,MSR_KERNEL@h
 	ori	r7,r7,MSR_KERNEL@l
-	bl	1f			/* Find our address */
+	bcl	20,31,$+4		/* Find our address */
 1:	mflr	r9
 	rlwimi	r6,r9,0,20,31
 	addi	r6,r6,(2f - 1b)
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index ddc978a2d381..02d2928d1e01 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -70,7 +70,7 @@ _ENTRY(_start);
  * address.
  * r21 will be loaded with the physical runtime address of _stext
  */
-	bl	0f				/* Get our runtime address */
+	bcl	20,31,$+4			/* Get our runtime address */
 0:	mflr	r21				/* Make it accessible */
 	addis	r21,r21,(_stext - 0b)@ha
 	addi	r21,r21,(_stext - 0b)@l 	/* Get our current runtime base */
@@ -853,7 +853,7 @@ _GLOBAL(init_cpu_state)
 wmmucr:	mtspr	SPRN_MMUCR,r3			/* Put MMUCR */
 	sync
 
-	bl	invstr				/* Find our address */
+	bcl	20,31,$+4			/* Find our address */
 invstr:	mflr	r5				/* Make it accessible */
 	tlbsx	r23,0,r5			/* Find entry we are in */
 	li	r4,0				/* Start at TLB entry 0 */
@@ -1045,7 +1045,7 @@ head_start_47x:
 	sync
 
 	/* Find the entry we are running from */
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r23
 	tlbsx	r23,0,r23
 	tlbre	r24,r23,0
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 0f9642f36b49..dbf3b89e543c 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -79,7 +79,7 @@ _ENTRY(_start);
 	mr	r23,r3
 	mr	r25,r4
 
-	bl	0f
+	bcl	20,31,$+4
 0:	mflr	r8
 	addis	r3,r8,(is_second_reloc - 0b)@ha
 	lwz	r19,(is_second_reloc - 0b)@l(r3)
@@ -1132,7 +1132,7 @@ _GLOBAL(switch_to_as1)
 	bne	1b
 
 	/* Get the tlb entry used by the current running code */
-	bl	0f
+	bcl	20,31,$+4
 0:	mflr	r4
 	tlbsx	0,r4
 
@@ -1166,7 +1166,7 @@ _GLOBAL(switch_to_as1)
 _GLOBAL(restore_to_as0)
 	mflr	r0
 
-	bl	0f
+	bcl	20,31,$+4
 0:	mflr	r9
 	addi	r9,r9,1f - 0b
 
diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index 4613bf8e9aae..5add4a51e51f 100644
--- a/arch/powerpc/mm/nohash/tlb_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
@@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
  * Touch enough instruction cache lines to ensure cache hits
  */
 1:	mflr	r9
-	bl	2f
+	bcl	20,31,$+4
 2:	mflr	r6
 	li	r7,32
 	PPC_ICBT(0,R6,R7)		/* touch next cache line */
@@ -414,7 +414,7 @@ _GLOBAL(loadcam_multi)
 	 * Set up temporary TLB entry that is the same as what we're
 	 * running from, but in AS=1.
 	 */
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r6
 	tlbsx	0,r8
 	mfspr	r6,SPRN_MAS1
-- 
cgit 


From 33e1402435cb9f3021439a15935ea2dc69ec1844 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 24 Aug 2021 07:56:35 +0000
Subject: powerpc: Avoid link stack corruption in misc asm functions

bl;mflr is used at several places to get code position.

Use bcl 20,31,+4 instead of bl in order to preserve link stack.

See commit c974809a26a1 ("powerpc/vdso: Avoid link stack corruption
in __get_datapage()") for details.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/c6eabb4fb6c156f75d56dcbcc6f243e5ac0fba42.1629791763.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/misc.S       |  2 +-
 arch/powerpc/kernel/misc_32.S    |  2 +-
 arch/powerpc/kernel/misc_64.S    |  2 +-
 arch/powerpc/kernel/reloc_32.S   |  2 +-
 arch/powerpc/kexec/relocate_32.S | 12 ++++++------
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 5be96feccb55..fb7de3543c03 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -29,7 +29,7 @@ _GLOBAL(reloc_offset)
 	li	r3, 0
 _GLOBAL(add_reloc_offset)
 	mflr	r0
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r5
 	PPC_LL	r4,(2f-1b)(r5)
 	subf	r5,r4,r5
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index d8645efff902..e5127b19fec2 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -67,7 +67,7 @@ _GLOBAL(reloc_got2)
 	srwi.	r8,r8,2
 	beqlr
 	mtctr	r8
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r0
 	lis	r4,1b@ha
 	addi	r4,r4,1b@l
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 4b761a18a74d..d38a019b38e1 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -255,7 +255,7 @@ _GLOBAL(scom970_write)
  * Physical (hardware) cpu id should be in r3.
  */
 _GLOBAL(kexec_wait)
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r5
 	addi	r5,r5,kexec_flag-1b
 
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
index 10e96f3e22fe..0508c14b4c28 100644
--- a/arch/powerpc/kernel/reloc_32.S
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -30,7 +30,7 @@ R_PPC_RELATIVE = 22
 _GLOBAL(relocate)
 
 	mflr	r0		/* Save our LR */
-	bl	0f		/* Find our current runtime address */
+	bcl	20,31,$+4	/* Find our current runtime address */
 0:	mflr	r12		/* Make it accessible */
 	mtlr	r0
 
diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S
index 61946c19e07c..cf6e52bdf8d8 100644
--- a/arch/powerpc/kexec/relocate_32.S
+++ b/arch/powerpc/kexec/relocate_32.S
@@ -93,7 +93,7 @@ wmmucr:
 	 * Invalidate all the TLB entries except the current entry
 	 * where we are running from
 	 */
-	bl	0f				/* Find our address */
+	bcl	20,31,$+4			/* Find our address */
 0:	mflr	r5				/* Make it accessible */
 	tlbsx	r23,0,r5			/* Find entry we are in */
 	li	r4,0				/* Start at TLB entry 0 */
@@ -158,7 +158,7 @@ write_out:
 	/* Switch to other address space in MSR */
 	insrwi	r9, r7, 1, 26		/* Set MSR[IS] = r7 */
 
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r8
 	addi	r8, r8, (2f-1b)		/* Find the target offset */
 
@@ -202,7 +202,7 @@ next_tlb:
 	li	r9,0
 	insrwi	r9, r7, 1, 26			/* Set MSR[IS] = r7 */
 
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r8
 	and	r8, r8, r11			/* Get our offset within page */
 	addi	r8, r8, (2f-1b)
@@ -240,7 +240,7 @@ setup_map_47x:
 	sync
 
 	/* Find the entry we are running from */
-	bl	2f
+	bcl	20,31,$+4
 2:	mflr	r23
 	tlbsx	r23, 0, r23
 	tlbre	r24, r23, 0			/* TLB Word 0 */
@@ -296,7 +296,7 @@ clear_utlb_entry:
 	/* Update the msr to the new TS */
 	insrwi	r5, r7, 1, 26
 
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r6
 	addi	r6, r6, (2f-1b)
 
@@ -355,7 +355,7 @@ write_utlb:
 	/* Defaults to 256M */
 	lis	r10, 0x1000
 
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r4
 	addi	r4, r4, (2f-1b)			/* virtual address  of 2f */
 
-- 
cgit 


From 11f27a7fa4ca27935de74e3eb052bdc430d5f8d8 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 8 Jul 2021 16:49:40 +0000
Subject: powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE()

Use DEFINE_SHOW_ATTRIBUTE() instead of open coding
open() and fops.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b864a92693ca8413ef0b19f0c12065c212899b6e.1625762905.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/mm/ptdump/bats.c          | 14 ++------------
 arch/powerpc/mm/ptdump/hashpagetable.c | 12 +-----------
 arch/powerpc/mm/ptdump/ptdump.c        | 13 +------------
 arch/powerpc/mm/ptdump/segment_regs.c  | 12 +-----------
 4 files changed, 5 insertions(+), 46 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index 8bf7383fb26c..820c119013e4 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -57,7 +57,7 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool
 
 #define BAT_SHOW_603(_m, _n, _l, _u, _d) bat_show_603(_m, _n, mfspr(_l), mfspr(_u), _d)
 
-static int bats_show_603(struct seq_file *m, void *v)
+static int bats_show(struct seq_file *m, void *v)
 {
 	seq_puts(m, "---[ Instruction Block Address Translation ]---\n");
 
@@ -88,17 +88,7 @@ static int bats_show_603(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int bats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, bats_show_603, NULL);
-}
-
-static const struct file_operations bats_fops = {
-	.open		= bats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(bats);
 
 static int __init bats_init(void)
 {
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index ad6df9a2e7c8..c7f824d294b2 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -526,17 +526,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int ptdump_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
-	.open		= ptdump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ptdump);
 
 static int ptdump_init(void)
 {
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 5062c58b1e5b..349fd8fe173f 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -397,18 +397,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-
-static int ptdump_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
-	.open		= ptdump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ptdump);
 
 static void build_pgtable_complete_mask(void)
 {
diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c
index 9223dfb85c51..9df3af8d481f 100644
--- a/arch/powerpc/mm/ptdump/segment_regs.c
+++ b/arch/powerpc/mm/ptdump/segment_regs.c
@@ -41,17 +41,7 @@ static int sr_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int sr_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sr_show, NULL);
-}
-
-static const struct file_operations sr_fops = {
-	.open		= sr_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(sr);
 
 static int __init sr_init(void)
 {
-- 
cgit 


From 64b87b0c70e0fd28352895cba3c0a9631e0072dd Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 8 Jul 2021 16:49:41 +0000
Subject: powerpc/ptdump: Remove unused 'page_size' parameter

note_page_update_state() doesn't use page_size. Remove it.

Could also be removed to note_page() but as a following patch
will remove all current users of note_page(), just leave it as
is for now.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/e2f80d052001155251bfe009c360d0c5d9242c6b.1625762906.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/mm/ptdump/ptdump.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 349fd8fe173f..3eb8732641da 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -189,7 +189,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 }
 
 static void note_page_update_state(struct pg_state *st, unsigned long addr,
-				   unsigned int level, u64 val, unsigned long page_size)
+				   unsigned int level, u64 val)
 {
 	u64 flag = val & pg_level[level].mask;
 	u64 pa = val & PTE_RPN_MASK;
@@ -213,7 +213,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
 	/* At first no level is set */
 	if (!st->level) {
 		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
-		note_page_update_state(st, addr, level, val, page_size);
+		note_page_update_state(st, addr, level, val);
 	/*
 	 * Dump the section of virtual memory when:
 	 *   - the PTE flags from one entry to the next differs.
@@ -242,7 +242,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
 		 * Address indicates we have passed the end of the
 		 * current section of virtual memory
 		 */
-		note_page_update_state(st, addr, level, val, page_size);
+		note_page_update_state(st, addr, level, val);
 	}
 }
 
-- 
cgit 


From cf98d2b6eea6a1b2c43f85680ad58fcc3ea9496b Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 8 Jul 2021 16:49:42 +0000
Subject: powerpc/ptdump: Reduce level numbers by 1 in note_page() and add p4d
 level

Do the same as commit f8f0d0b6fa20 ("mm: ptdump: reduce level numbers
by 1 in note_page()") and add missing p4d level.

This will align powerpc to the users of generic ptdump.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/d76495c574132b197b445a1f133755cca4b912a4.1625762906.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/mm/ptdump/8xx.c      |  6 ++++--
 arch/powerpc/mm/ptdump/book3s64.c |  6 ++++--
 arch/powerpc/mm/ptdump/ptdump.c   | 17 +++++++++--------
 arch/powerpc/mm/ptdump/shared.c   |  6 ++++--
 4 files changed, 21 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index 86da2a669680..fac932eb8f9a 100644
--- a/arch/powerpc/mm/ptdump/8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -75,8 +75,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
diff --git a/arch/powerpc/mm/ptdump/book3s64.c b/arch/powerpc/mm/ptdump/book3s64.c
index 14f73868db66..5ad92d9dc5d1 100644
--- a/arch/powerpc/mm/ptdump/book3s64.c
+++ b/arch/powerpc/mm/ptdump/book3s64.c
@@ -103,8 +103,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 3eb8732641da..fb531bc64fc5 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -58,7 +58,7 @@ struct pg_state {
 	const struct addr_marker *marker;
 	unsigned long start_address;
 	unsigned long start_pa;
-	unsigned int level;
+	int level;
 	u64 current_flags;
 	bool check_wx;
 	unsigned long wx_pages;
@@ -188,10 +188,9 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 	st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
 }
 
-static void note_page_update_state(struct pg_state *st, unsigned long addr,
-				   unsigned int level, u64 val)
+static void note_page_update_state(struct pg_state *st, unsigned long addr, int level, u64 val)
 {
-	u64 flag = val & pg_level[level].mask;
+	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
 	u64 pa = val & PTE_RPN_MASK;
 
 	st->level = level;
@@ -206,12 +205,12 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr,
 }
 
 static void note_page(struct pg_state *st, unsigned long addr,
-	       unsigned int level, u64 val, unsigned long page_size)
+		      int level, u64 val, unsigned long page_size)
 {
-	u64 flag = val & pg_level[level].mask;
+	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
 
 	/* At first no level is set */
-	if (!st->level) {
+	if (st->level == -1) {
 		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 		note_page_update_state(st, addr, level, val);
 	/*
@@ -383,6 +382,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 	struct pg_state st = {
 		.seq = m,
 		.marker = address_markers,
+		.level = -1,
 		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
 	};
 
@@ -393,7 +393,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 
 	/* Traverse kernel page tables */
 	walk_pagetables(&st);
-	note_page(&st, 0, 0, 0, 0);
+	note_page(&st, 0, -1, 0, 0);
 	return 0;
 }
 
@@ -415,6 +415,7 @@ void ptdump_check_wx(void)
 	struct pg_state st = {
 		.seq = NULL,
 		.marker = address_markers,
+		.level = -1,
 		.check_wx = true,
 		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
 	};
diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
index c005fe041c18..03607ab90c66 100644
--- a/arch/powerpc/mm/ptdump/shared.c
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -68,8 +68,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
-- 
cgit 


From e084728393a58e7fdafeee2e6b20e0faff09b557 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 8 Jul 2021 16:49:43 +0000
Subject: powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP

This patch converts powerpc to the generic PTDUMP implementation.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/03166d569526be70214fe9370a7bad219d2f41c8.1625762907.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/Kconfig            |   2 +
 arch/powerpc/Kconfig.debug      |  30 ---------
 arch/powerpc/mm/Makefile        |   2 +-
 arch/powerpc/mm/mmu_decl.h      |   2 +-
 arch/powerpc/mm/ptdump/Makefile |   9 ++-
 arch/powerpc/mm/ptdump/ptdump.c | 146 ++++++++++------------------------------
 6 files changed, 47 insertions(+), 144 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d01e3401581d..ec866085ff8b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -123,6 +123,7 @@ config PPC
 	select ARCH_HAS_COPY_MC			if PPC64
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE
+	select ARCH_HAS_DEBUG_WX		if STRICT_KERNEL_RWX
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DMA_MAP_DIRECT 		if PPC_PSERIES
 	select ARCH_HAS_ELF_RANDOMIZE
@@ -182,6 +183,7 @@ config PPC
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_PCI_IOMAP		if PCI
+	select GENERIC_PTDUMP
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 205cd77f321f..192f0ed0097f 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -365,36 +365,6 @@ config FAIL_IOMMU
 
 	  If you are unsure, say N.
 
-config PPC_PTDUMP
-	bool "Export kernel pagetable layout to userspace via debugfs"
-	depends on DEBUG_KERNEL && DEBUG_FS
-	help
-	  This option exports the state of the kernel pagetables to a
-	  debugfs file. This is only useful for kernel developers who are
-	  working in architecture specific areas of the kernel - probably
-	  not a good idea to enable this feature in a production kernel.
-
-	  If you are unsure, say N.
-
-config PPC_DEBUG_WX
-	bool "Warn on W+X mappings at boot"
-	depends on PPC_PTDUMP && STRICT_KERNEL_RWX
-	help
-	  Generate a warning if any W+X mappings are found at boot.
-
-	  This is useful for discovering cases where the kernel is leaving
-	  W+X mappings after applying NX, as such mappings are a security risk.
-
-	  Note that even if the check fails, your kernel is possibly
-	  still fine, as W+X mappings are not a security hole in
-	  themselves, what they do is that they make the exploitation
-	  of other unfixed kernel bugs easier.
-
-	  There is no runtime or memory usage effect of this option
-	  once the kernel has booted up - it's a one time check.
-
-	  If in doubt, say "Y".
-
 config PPC_FAST_ENDIAN_SWITCH
 	bool "Deprecated fast endian-switch syscall"
 	depends on DEBUG_KERNEL && PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index eae4ec2988fc..df8172da2301 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -18,5 +18,5 @@ obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_PPC_COPRO_BASE)	+= copro_fault.o
-obj-$(CONFIG_PPC_PTDUMP)	+= ptdump/
+obj-$(CONFIG_PTDUMP_CORE)	+= ptdump/
 obj-$(CONFIG_KASAN)		+= kasan/
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 7dac910c0b21..dd1cabc2ea0f 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -180,7 +180,7 @@ static inline void mmu_mark_rodata_ro(void) { }
 void __init mmu_mapin_immr(void);
 #endif
 
-#ifdef CONFIG_PPC_DEBUG_WX
+#ifdef CONFIG_DEBUG_WX
 void ptdump_check_wx(void);
 #else
 static inline void ptdump_check_wx(void) { }
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
index 712762be3cb1..4050cbb55acf 100644
--- a/arch/powerpc/mm/ptdump/Makefile
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -5,5 +5,10 @@ obj-y	+= ptdump.o
 obj-$(CONFIG_4xx)		+= shared.o
 obj-$(CONFIG_PPC_8xx)		+= 8xx.o
 obj-$(CONFIG_PPC_BOOK3E_MMU)	+= shared.o
-obj-$(CONFIG_PPC_BOOK3S_32)	+= shared.o bats.o segment_regs.o
-obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64.o hashpagetable.o
+obj-$(CONFIG_PPC_BOOK3S_32)	+= shared.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64.o
+
+ifdef CONFIG_PTDUMP_DEBUGFS
+obj-$(CONFIG_PPC_BOOK3S_32)	+= bats.o segment_regs.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= hashpagetable.o
+endif
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index fb531bc64fc5..2d80d775d15e 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -16,6 +16,7 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/ptdump.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <asm/fixmap.h>
@@ -54,6 +55,7 @@
  *
  */
 struct pg_state {
+	struct ptdump_state ptdump;
 	struct seq_file *seq;
 	const struct addr_marker *marker;
 	unsigned long start_address;
@@ -102,6 +104,11 @@ static struct addr_marker address_markers[] = {
 	{ -1,	NULL },
 };
 
+static struct ptdump_range ptdump_range[] __ro_after_init = {
+	{TASK_SIZE_MAX, ~0UL},
+	{0, 0}
+};
+
 #define pt_dump_seq_printf(m, fmt, args...)	\
 ({						\
 	if (m)					\
@@ -204,10 +211,10 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr, int
 	}
 }
 
-static void note_page(struct pg_state *st, unsigned long addr,
-		      int level, u64 val, unsigned long page_size)
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
 {
 	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
+	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
 
 	/* At first no level is set */
 	if (st->level == -1) {
@@ -245,94 +252,6 @@ static void note_page(struct pg_state *st, unsigned long addr,
 	}
 }
 
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
-{
-	pte_t *pte = pte_offset_kernel(pmd, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
-		addr = start + i * PAGE_SIZE;
-		note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE);
-
-	}
-}
-
-static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start,
-			int pdshift, int level)
-{
-#ifdef CONFIG_ARCH_HAS_HUGEPD
-	unsigned int i;
-	int shift = hugepd_shift(*phpd);
-	int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1;
-
-	if (start & ((1 << shift) - 1))
-		return;
-
-	for (i = 0; i < ptrs_per_hpd; i++) {
-		unsigned long addr = start + (i << shift);
-		pte_t *pte = hugepte_offset(*phpd, addr, pdshift);
-
-		note_page(st, addr, level + 1, pte_val(*pte), 1 << shift);
-	}
-#endif
-}
-
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
-{
-	pmd_t *pmd = pmd_offset(pud, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
-		addr = start + i * PMD_SIZE;
-		if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd))
-			/* pmd exists */
-			walk_pte(st, pmd, addr);
-		else
-			note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE);
-	}
-}
-
-static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start)
-{
-	pud_t *pud = pud_offset(p4d, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
-		addr = start + i * PUD_SIZE;
-		if (!pud_none(*pud) && !pud_is_leaf(*pud))
-			/* pud exists */
-			walk_pmd(st, pud, addr);
-		else
-			note_page(st, addr, 2, pud_val(*pud), PUD_SIZE);
-	}
-}
-
-static void walk_pagetables(struct pg_state *st)
-{
-	unsigned int i;
-	unsigned long addr = st->start_address & PGDIR_MASK;
-	pgd_t *pgd = pgd_offset_k(addr);
-
-	/*
-	 * Traverse the linux pagetable structure and dump pages that are in
-	 * the hash pagetable.
-	 */
-	for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
-		p4d_t *p4d = p4d_offset(pgd, 0);
-
-		if (p4d_none(*p4d) || p4d_is_leaf(*p4d))
-			note_page(st, addr, 1, p4d_val(*p4d), PGDIR_SIZE);
-		else if (is_hugepd(__hugepd(p4d_val(*p4d))))
-			walk_hugepd(st, (hugepd_t *)p4d, addr, PGDIR_SHIFT, 1);
-		else
-			/* p4d exists */
-			walk_pud(st, p4d, addr);
-	}
-}
-
 static void populate_markers(void)
 {
 	int i = 0;
@@ -383,17 +302,14 @@ static int ptdump_show(struct seq_file *m, void *v)
 		.seq = m,
 		.marker = address_markers,
 		.level = -1,
-		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
+		.ptdump = {
+			.note_page = note_page,
+			.range = ptdump_range,
+		}
 	};
 
-#ifdef CONFIG_PPC64
-	if (!radix_enabled())
-		st.start_address = KERN_VIRT_START;
-#endif
-
 	/* Traverse kernel page tables */
-	walk_pagetables(&st);
-	note_page(&st, 0, -1, 0, 0);
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 	return 0;
 }
 
@@ -409,23 +325,24 @@ static void build_pgtable_complete_mask(void)
 				pg_level[i].mask |= pg_level[i].flag[j].mask;
 }
 
-#ifdef CONFIG_PPC_DEBUG_WX
+#ifdef CONFIG_DEBUG_WX
 void ptdump_check_wx(void)
 {
 	struct pg_state st = {
 		.seq = NULL,
-		.marker = address_markers,
+		.marker = (struct addr_marker[]) {
+			{ 0, NULL},
+			{ -1, NULL},
+		},
 		.level = -1,
 		.check_wx = true,
-		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
+		.ptdump = {
+			.note_page = note_page,
+			.range = ptdump_range,
+		}
 	};
 
-#ifdef CONFIG_PPC64
-	if (!radix_enabled())
-		st.start_address = KERN_VIRT_START;
-#endif
-
-	walk_pagetables(&st);
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 
 	if (st.wx_pages)
 		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
@@ -435,12 +352,21 @@ void ptdump_check_wx(void)
 }
 #endif
 
-static int ptdump_init(void)
+static int __init ptdump_init(void)
 {
+#ifdef CONFIG_PPC64
+	if (!radix_enabled())
+		ptdump_range[0].start = KERN_VIRT_START;
+	else
+		ptdump_range[0].start = PAGE_OFFSET;
+#endif
+
 	populate_markers();
 	build_pgtable_complete_mask();
-	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
-			    &ptdump_fops);
+
+	if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS))
+		debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+
 	return 0;
 }
 device_initcall(ptdump_init);
-- 
cgit 


From 316389e904f968d24d44cd96a6d171ee1ef269cf Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 25 Jun 2021 10:58:33 +0000
Subject: powerpc/syscalls: Simplify do_mmap2()

When shift is nul, operations remain valid so no test needed.

And 'ret' is unnecessary.

And use IS_ALIGNED() to check alignment, that's more clear.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/373ec500f386374bc5735007df3d3869eac47be1.1624618701.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/syscalls.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index bf4ae0f0e36c..825931e400df 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -41,20 +41,13 @@ static inline long do_mmap2(unsigned long addr, size_t len,
 			unsigned long prot, unsigned long flags,
 			unsigned long fd, unsigned long off, int shift)
 {
-	long ret = -EINVAL;
-
 	if (!arch_validate_prot(prot, addr))
-		goto out;
+		return -EINVAL;
 
-	if (shift) {
-		if (off & ((1 << shift) - 1))
-			goto out;
-		off >>= shift;
-	}
+	if (!IS_ALIGNED(off, 1 << shift))
+		return -EINVAL;
 
-	ret = ksys_mmap_pgoff(addr, len, prot, flags, fd, off);
-out:
-	return ret;
+	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> shift);
 }
 
 SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len,
-- 
cgit 


From 19e932eb6ea47f4f37513eb2ae0daee19117765c Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 17 Aug 2021 16:00:14 +0000
Subject: powerpc/ptrace: Make user_mode() common to PPC32 and PPC64

Today we have:

	#ifdef __powerpc64__
	#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
	#else
	#define user_mode(regs) (((regs)->msr & MSR_PR) != 0)
	#endif

With ppc64_defconfig, we get:

	if (!user_mode(regs))
    14b4:	e9 3e 01 08 	ld      r9,264(r30)
    14b8:	71 29 40 00 	andi.   r9,r9,16384
    14bc:	41 82 07 a4 	beq     1c60 <.emulate_instruction+0x7d0>

If taking the ppc32 definition of user_mode(), the exact same code
is generated for ppc64_defconfig.

So, only keep one version of user_mode(), preferably the one not
using MSR_PR_LG which should be kept internal to reg.h.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/000a28c51808bbd802b505af42d2cb316c2be7d3.1629216000.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/ptrace.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 3e5d470a6155..333979cf5d1e 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -181,11 +181,7 @@ static inline unsigned long frame_pointer(struct pt_regs *regs)
 	return 0;
 }
 
-#ifdef __powerpc64__
-#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
-#else
 #define user_mode(regs) (((regs)->msr & MSR_PR) != 0)
-#endif
 
 #define force_successful_syscall_return()   \
 	do { \
-- 
cgit 


From 9401f4e46cf6965e23738f70e149172344a01eef Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 2 Mar 2021 08:48:11 +0000
Subject: powerpc: Use lwarx/ldarx directly instead of PPC_LWARX/LDARX macros

Force the eh flag at 0 on PPC32.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1fc81f07cabebb875b963e295408cc3dd38c8d85.1614674882.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/asm-compat.h      | 4 ++--
 arch/powerpc/include/asm/atomic.h          | 4 ++--
 arch/powerpc/include/asm/bitops.h          | 8 ++++----
 arch/powerpc/include/asm/ppc-opcode.h      | 2 --
 arch/powerpc/include/asm/simple_spinlock.h | 6 +++---
 5 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
index 19b70c5b5f18..2b736d9fbb1b 100644
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -17,7 +17,7 @@
 #define PPC_LONG	stringify_in_c(.8byte)
 #define PPC_LONG_ALIGN	stringify_in_c(.balign 8)
 #define PPC_TLNEI	stringify_in_c(tdnei)
-#define PPC_LLARX(t, a, b, eh)	PPC_LDARX(t, a, b, eh)
+#define PPC_LLARX	stringify_in_c(ldarx)
 #define PPC_STLCX	stringify_in_c(stdcx.)
 #define PPC_CNTLZL	stringify_in_c(cntlzd)
 #define PPC_MTOCRF(FXM, RS) MTOCRF((FXM), RS)
@@ -50,7 +50,7 @@
 #define PPC_LONG	stringify_in_c(.long)
 #define PPC_LONG_ALIGN	stringify_in_c(.balign 4)
 #define PPC_TLNEI	stringify_in_c(twnei)
-#define PPC_LLARX(t, a, b, eh)	PPC_LWARX(t, a, b, eh)
+#define PPC_LLARX	stringify_in_c(lwarx)
 #define PPC_STLCX	stringify_in_c(stwcx.)
 #define PPC_CNTLZL	stringify_in_c(cntlzw)
 #define PPC_MTOCRF	stringify_in_c(mtcrf)
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index a1732a79e92a..6a53ef178bfd 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -207,7 +207,7 @@ arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
 	int r, o = *old;
 
 	__asm__ __volatile__ (
-"1:\t"	PPC_LWARX(%0,0,%2,1) "	# atomic_try_cmpxchg_acquire	\n"
+"1:	lwarx	%0,0,%2,%5	# atomic_try_cmpxchg_acquire		\n"
 "	cmpw	0,%0,%3							\n"
 "	bne-	2f							\n"
 "	stwcx.	%4,0,%2							\n"
@@ -215,7 +215,7 @@ arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
 "\t"	PPC_ACQUIRE_BARRIER "						\n"
 "2:									\n"
 	: "=&r" (r), "+m" (v->counter)
-	: "r" (&v->counter), "r" (o), "r" (new)
+	: "r" (&v->counter), "r" (o), "r" (new), "i" (IS_ENABLED(CONFIG_PPC64) ? 1 : 0)
 	: "cr0", "memory");
 
 	if (unlikely(r != o))
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 299ab33505a6..11847b6a244e 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -70,7 +70,7 @@ static inline void fn(unsigned long mask,	\
 	unsigned long *p = (unsigned long *)_p;	\
 	__asm__ __volatile__ (			\
 	prefix					\
-"1:"	PPC_LLARX(%0,0,%3,0) "\n"		\
+"1:"	PPC_LLARX "%0,0,%3,0\n"			\
 	stringify_in_c(op) "%0,%0,%2\n"		\
 	PPC_STLCX "%0,0,%3\n"			\
 	"bne- 1b\n"				\
@@ -115,13 +115,13 @@ static inline unsigned long fn(			\
 	unsigned long *p = (unsigned long *)_p;		\
 	__asm__ __volatile__ (				\
 	prefix						\
-"1:"	PPC_LLARX(%0,0,%3,eh) "\n"			\
+"1:"	PPC_LLARX "%0,0,%3,%4\n"			\
 	stringify_in_c(op) "%1,%0,%2\n"			\
 	PPC_STLCX "%1,0,%3\n"				\
 	"bne- 1b\n"					\
 	postfix						\
 	: "=&r" (old), "=&r" (t)			\
-	: "r" (mask), "r" (p)				\
+	: "r" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0)	\
 	: "cc", "memory");				\
 	return (old & mask);				\
 }
@@ -170,7 +170,7 @@ clear_bit_unlock_return_word(int nr, volatile unsigned long *addr)
 
 	__asm__ __volatile__ (
 	PPC_RELEASE_BARRIER
-"1:"	PPC_LLARX(%0,0,%3,0) "\n"
+"1:"	PPC_LLARX "%0,0,%3,0\n"
 	"andc %1,%0,%2\n"
 	PPC_STLCX "%1,0,%3\n"
 	"bne- 1b\n"
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index bede76dd3db7..baea657bc868 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -576,8 +576,6 @@
 #define	PPC_DIVDE(t, a, b)	stringify_in_c(.long PPC_RAW_DIVDE(t, a, b))
 #define	PPC_DIVDEU(t, a, b)	stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b))
 #define PPC_LQARX(t, a, b, eh)	stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh))
-#define PPC_LDARX(t, a, b, eh)	stringify_in_c(.long PPC_RAW_LDARX(t, a, b, eh))
-#define PPC_LWARX(t, a, b, eh)	stringify_in_c(.long PPC_RAW_LWARX(t, a, b, eh))
 #define PPC_STQCX(t, a, b)	stringify_in_c(.long PPC_RAW_STQCX(t, a, b))
 #define PPC_MADDHD(t, a, b, c)	stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c))
 #define PPC_MADDHDU(t, a, b, c)	stringify_in_c(.long PPC_RAW_MADDHDU(t, a, b, c))
diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h
index 552f325412cc..8985791a2ba5 100644
--- a/arch/powerpc/include/asm/simple_spinlock.h
+++ b/arch/powerpc/include/asm/simple_spinlock.h
@@ -51,7 +51,7 @@ static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
 
 	token = LOCK_TOKEN;
 	__asm__ __volatile__(
-"1:	" PPC_LWARX(%0,0,%2,1) "\n\
+"1:	lwarx		%0,0,%2,1\n\
 	cmpwi		0,%0,0\n\
 	bne-		2f\n\
 	stwcx.		%1,0,%2\n\
@@ -179,7 +179,7 @@ static inline long __arch_read_trylock(arch_rwlock_t *rw)
 	long tmp;
 
 	__asm__ __volatile__(
-"1:	" PPC_LWARX(%0,0,%1,1) "\n"
+"1:	lwarx		%0,0,%1,1\n"
 	__DO_SIGN_EXTEND
 "	addic.		%0,%0,1\n\
 	ble-		2f\n"
@@ -203,7 +203,7 @@ static inline long __arch_write_trylock(arch_rwlock_t *rw)
 
 	token = WRLOCK_TOKEN;
 	__asm__ __volatile__(
-"1:	" PPC_LWARX(%0,0,%2,1) "\n\
+"1:	lwarx		%0,0,%2,1\n\
 	cmpwi		0,%0,0\n\
 	bne-		2f\n"
 "	stwcx.		%1,0,%2\n\
-- 
cgit 


From 7f85b04b08ca264923358e2c4e93422bad59a6e0 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Thu, 5 Aug 2021 00:29:08 +0800
Subject: riscv: Keep the riscv Kconfig selects sorted

Move three Kconfig selects: ARCH_STACKWALK, ARCH_SUPPORTS_ATOMIC_RMW
and ARCH_SUPPORTS_DEBUG_PAGEALLOC to the right place.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 387cf68a5d52..a0caf1d4211c 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -13,9 +13,6 @@ config 32BIT
 config RISCV
 	def_bool y
 	select ARCH_CLOCKSOURCE_INIT
-	select ARCH_SUPPORTS_ATOMIC_RMW
-	select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
-	select ARCH_STACKWALK
 	select ARCH_HAS_BINFMT_FLAT
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
@@ -33,6 +30,9 @@ config RISCV
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+	select ARCH_STACKWALK
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
 	select ARCH_SUPPORTS_HUGETLBFS if MMU
 	select ARCH_USE_MEMTEST
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
-- 
cgit 


From 8341dcfbd8dda98a3b2836a421016f7d88e35b1c Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Thu, 5 Aug 2021 00:30:59 +0800
Subject: riscv: Enable Undefined Behavior Sanitizer UBSAN

Select ARCH_HAS_UBSAN_SANITIZE_ALL in order to allow the user to
enable CONFIG_UBSAN_SANITIZE_ALL and instrument the entire kernel for
ubsan checks.

VDSO is excluded because its build doesn't include the
__ubsan_handle_*() functions from lib/ubsan.c, and the VDSO has no
sane way to report errors even if it has definitions of these functions.

Passed lib/test_ubsan.c test.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig              | 1 +
 arch/riscv/kernel/vdso/Makefile | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a0caf1d4211c..e26e255dce3c 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -28,6 +28,7 @@ config RISCV
 	select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
 	select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
 	select ARCH_STACKWALK
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 24d936c147cd..30a5335bd317 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -36,6 +36,7 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
 GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
 KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
 
 # Force dependency
 $(obj)/vdso.o: $(obj)/vdso.so
-- 
cgit 


From fde9c59aebafb91caeed816cc510b56f14aa63ae Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <abdulras@google.com>
Date: Wed, 4 Aug 2021 17:32:14 +0000
Subject: riscv: explicitly use symbol offsets for VDSO

The current implementation of the `__rt_sigaction` reference computed an
absolute offset relative to the mapped base of the VDSO.  While this can
be handled in the medlow model, the medany model cannot handle this as
it is meant to be position independent.  The current implementation
relied on the BFD linker relaxing the PC-relative relocation into an
absolute relocation as it was a near-zero address allowing it to be
referenced relative to `zero`.

We now extract the offsets and create a generated header allowing the
build with LLVM and lld to succeed as we no longer depend on the linker
rewriting address references near zero.  This change was largely
modelled after the ARM64 target which does something similar.

Signed-off-by: Saleem Abdulrasool <abdulras@google.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Makefile                        |  4 ++++
 arch/riscv/include/asm/vdso.h              | 14 ++------------
 arch/riscv/kernel/vdso/Makefile            | 25 +++++++++++--------------
 arch/riscv/kernel/vdso/gen_vdso_offsets.sh |  5 +++++
 arch/riscv/kernel/vdso/so2s.sh             |  6 ------
 5 files changed, 22 insertions(+), 32 deletions(-)
 create mode 100755 arch/riscv/kernel/vdso/gen_vdso_offsets.sh
 delete mode 100755 arch/riscv/kernel/vdso/so2s.sh

(limited to 'arch')

diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index bc74afdbf31e..e026b2d0a5a4 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -108,6 +108,10 @@ PHONY += vdso_install
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
 
+prepare: vdso_prepare
+vdso_prepare: prepare0
+	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h
+
 ifneq ($(CONFIG_XIP_KERNEL),y)
 ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
 KBUILD_IMAGE := $(boot)/loader.bin
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 1453a2f563bc..d8d003c2b5a3 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -9,25 +9,15 @@
 #define _ASM_RISCV_VDSO_H
 
 #include <linux/types.h>
+#include <generated/vdso-offsets.h>
 
 #ifndef CONFIG_GENERIC_TIME_VSYSCALL
 struct vdso_data {
 };
 #endif
 
-/*
- * The VDSO symbols are mapped into Linux so we can just use regular symbol
- * addressing to get their offsets in userspace.  The symbols are mapped at an
- * offset of 0, but since the linker must support setting weak undefined
- * symbols to the absolute address 0 it also happens to support other low
- * addresses even when the code model suggests those low addresses would not
- * otherwise be availiable.
- */
 #define VDSO_SYMBOL(base, name)							\
-({										\
-	extern const char __vdso_##name[];					\
-	(void __user *)((unsigned long)(base) + __vdso_##name);			\
-})
+	(void __user *)((unsigned long)(base) + __vdso_##name##_offset)
 
 asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
 
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 30a5335bd317..f2e065671e4d 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -23,10 +23,10 @@ ifneq ($(c-gettimeofday-y),)
 endif
 
 # Build rules
-targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-syms.S
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds
 obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
 
-obj-y += vdso.o vdso-syms.o
+obj-y += vdso.o
 CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 
 # Disable -pg to prevent insert call site
@@ -44,20 +44,22 @@ $(obj)/vdso.o: $(obj)/vdso.so
 # link rule for the .so file, .lds has to be first
 $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
 	$(call if_changed,vdsold)
-LDFLAGS_vdso.so.dbg = -shared -s -soname=linux-vdso.so.1 \
+LDFLAGS_vdso.so.dbg = -shared -S -soname=linux-vdso.so.1 \
 	--build-id=sha1 --hash-style=both --eh-frame-hdr
 
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO. With ld --just-symbols we can then
-# refer to these symbols in the kernel code rather than hand-coded addresses.
-$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE
-	$(call if_changed,so2s)
-
 # strip rule for the .so file
 $(obj)/%.so: OBJCOPYFLAGS := -S
 $(obj)/%.so: $(obj)/%.so.dbg FORCE
 	$(call if_changed,objcopy)
 
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
+	$(call if_changed,vdsosym)
+
 # actual build commands
 # The DSO images are built using a special linker script
 # Make sure only to export the intended __vdso_xxx symbol offsets.
@@ -66,11 +68,6 @@ quiet_cmd_vdsold = VDSOLD  $@
                    $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
                    rm $@.tmp
 
-# Extracts symbol offsets from the VDSO, converting them into an assembly file
-# that contains the same symbols at the same offsets.
-quiet_cmd_so2s = SO2S    $@
-      cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@
-
 # install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@
       cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
diff --git a/arch/riscv/kernel/vdso/gen_vdso_offsets.sh b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh
new file mode 100755
index 000000000000..c2e5613f3495
--- /dev/null
+++ b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+LC_ALL=C
+sed -n -e 's/^[0]\+\(0[0-9a-fA-F]*\) . \(__vdso_[a-zA-Z0-9_]*\)$/\#define \2_offset\t0x\1/p'
diff --git a/arch/riscv/kernel/vdso/so2s.sh b/arch/riscv/kernel/vdso/so2s.sh
deleted file mode 100755
index e64cb6d9440e..000000000000
--- a/arch/riscv/kernel/vdso/so2s.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0+
-# Copyright 2020 Palmer Dabbelt <palmerdabbelt@google.com>
-
-sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_4.15\)*!.global \2\n.set \2,0x\1!' \
-| grep '^\.'
-- 
cgit 


From fd42b7b09c602c904452c0c3e5955ca21d8e387a Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 12 Aug 2021 02:00:35 +1000
Subject: KVM: PPC: Book3S HV: Initialise vcpu MSR with MSR_ME

It is possible to create a VCPU without setting the MSR before running
it, which results in a warning in kvmhv_vcpu_entry_p9() that MSR_ME is
not set. This is pretty harmless because the MSR_ME bit is added to
HSRR1 before HRFID to guest, and a normal qemu guest doesn't hit it.

Initialise the vcpu MSR with MSR_ME set.

Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210811160134.904987-2-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 085fb8ecbf68..c402eb0276b0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2684,6 +2684,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 	spin_lock_init(&vcpu->arch.vpa_update_lock);
 	spin_lock_init(&vcpu->arch.tbacct_lock);
 	vcpu->arch.busy_preempt = TB_NIL;
+	vcpu->arch.shregs.msr = MSR_ME;
 	vcpu->arch.intr_msr = MSR_SF | MSR_ME;
 
 	/*
-- 
cgit 


From daac40e8d7a63ab8608132a7cfce411986feac32 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 12 Aug 2021 02:00:36 +1000
Subject: KVM: PPC: Book3S HV: Remove TM emulation from POWER7/8 path

TM fake-suspend emulation is only used by POWER9. Remove it from the old
code path.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210811160134.904987-3-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 42 ---------------------------------
 1 file changed, 42 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8dd437d7a2c6..75079397c2a5 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1088,12 +1088,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
 	beq	kvmppc_hisi
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	/* For softpatch interrupt, go off and do TM instruction emulation */
-	cmpwi	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-	beq	kvmppc_tm_emul
-#endif
-
 	/* See if this is a leftover HDEC interrupt */
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	bne	2f
@@ -1599,42 +1593,6 @@ maybe_reenter_guest:
 	blt	deliver_guest_interrupt
 	b	guest_exit_cont
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Softpatch interrupt for transactional memory emulation cases
- * on POWER9 DD2.2.  This is early in the guest exit path - we
- * haven't saved registers or done a treclaim yet.
- */
-kvmppc_tm_emul:
-	/* Save instruction image in HEIR */
-	mfspr	r3, SPRN_HEIR
-	stw	r3, VCPU_HEIR(r9)
-
-	/*
-	 * The cases we want to handle here are those where the guest
-	 * is in real suspend mode and is trying to transition to
-	 * transactional mode.
-	 */
-	lbz	r0, HSTATE_FAKE_SUSPEND(r13)
-	cmpwi	r0, 0		/* keep exiting guest if in fake suspend */
-	bne	guest_exit_cont
-	rldicl	r3, r11, 64 - MSR_TS_S_LG, 62
-	cmpwi	r3, 1		/* or if not in suspend state */
-	bne	guest_exit_cont
-
-	/* Call C code to do the emulation */
-	mr	r3, r9
-	bl	kvmhv_p9_tm_emulation_early
-	nop
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	li	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-	cmpwi	r3, 0
-	beq	guest_exit_cont		/* continue exiting if not handled */
-	ld	r10, VCPU_PC(r9)
-	ld	r11, VCPU_MSR(r9)
-	b	fast_interrupt_c_return	/* go back to guest if handled */
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
 /*
  * Check whether an HDSI is an HPTE not found fault or something else.
  * If it is an HPTE not found fault that is due to the guest accessing
-- 
cgit 


From 4782e0cd0d184d727ad3b0cfe20d1d44d9f98239 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 12 Aug 2021 02:00:37 +1000
Subject: KVM: PPC: Book3S HV P9: Fixes for TM softpatch interrupt NIP

The softpatch interrupt sets HSRR0 to the faulting instruction +4, so
it should subtract 4 for the faulting instruction address in the case
it is a TM softpatch interrupt (the instruction was not executed) and
it was not emulated.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210811160134.904987-4-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv_tm.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index cc90b8b82329..e7c36f8bf205 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -46,6 +46,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 	u64 newmsr, bescr;
 	int ra, rs;
 
+	/*
+	 * The TM softpatch interrupt sets NIP to the instruction following
+	 * the faulting instruction, which is not executed. Rewind nip to the
+	 * faulting instruction so it looks like a normal synchronous
+	 * interrupt, then update nip in the places where the instruction is
+	 * emulated.
+	 */
+	vcpu->arch.regs.nip -= 4;
+
 	/*
 	 * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
 	 * in these instructions, so masking bit 31 out doesn't change these
@@ -67,7 +76,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 			       (newmsr & MSR_TM)));
 		newmsr = sanitize_msr(newmsr);
 		vcpu->arch.shregs.msr = newmsr;
-		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.cfar = vcpu->arch.regs.nip;
 		vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
 		return RESUME_GUEST;
 
@@ -100,7 +109,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		vcpu->arch.bescr = bescr;
 		msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
 		vcpu->arch.shregs.msr = msr;
-		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.cfar = vcpu->arch.regs.nip;
 		vcpu->arch.regs.nip = vcpu->arch.ebbrr;
 		return RESUME_GUEST;
 
@@ -116,6 +125,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
 		newmsr = sanitize_msr(newmsr);
 		vcpu->arch.shregs.msr = newmsr;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 
 	/* ignore bit 31, see comment above */
@@ -152,6 +162,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 				msr = (msr & ~MSR_TS_MASK) | MSR_TS_S;
 		}
 		vcpu->arch.shregs.msr = msr;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 
 	/* ignore bit 31, see comment above */
@@ -189,6 +200,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
 			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
 		vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 
 	/* ignore bit 31, see comment above */
@@ -220,6 +232,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
 			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
 		vcpu->arch.shregs.msr = msr | MSR_TS_S;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 	}
 
-- 
cgit 


From d82b392d9b3556b63e3f9916cf057ea847e173a9 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 12 Aug 2021 02:00:38 +1000
Subject: KVM: PPC: Book3S HV Nested: Fix TM softpatch HFAC interrupt emulation

Have the TM softpatch emulation code set up the HFAC interrupt and
return -1 in case an instruction was executed with HFSCR bits clear,
and have the interrupt exit handler fall through to the HFAC handler.
When the L0 is running a nested guest, this ensures the HFAC interrupt
is correctly passed up to the L1.

The "direct guest" exit handler will turn these into PROGILL program
interrupts so functionality in practice will be unchanged. But it's
possible an L1 would want to handle these in a different way.

Also rearrange the FAC interrupt emulation code to match the HFAC format
while here (mainly, adding the FSCR_INTR_CAUSE mask).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210811160134.904987-5-npiggin@gmail.com
---
 arch/powerpc/include/asm/reg.h  |  3 ++-
 arch/powerpc/kvm/book3s_hv.c    | 35 ++++++++++++++++++++------------
 arch/powerpc/kvm/book3s_hv_tm.c | 44 ++++++++++++++++++++++-------------------
 3 files changed, 48 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index be85cf156a1f..e9d27265253b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -415,6 +415,7 @@
 #define   FSCR_TAR	__MASK(FSCR_TAR_LG)
 #define   FSCR_EBB	__MASK(FSCR_EBB_LG)
 #define   FSCR_DSCR	__MASK(FSCR_DSCR_LG)
+#define   FSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56)	/* interrupt cause */
 #define SPRN_HFSCR	0xbe	/* HV=1 Facility Status & Control Register */
 #define   HFSCR_PREFIX	__MASK(FSCR_PREFIX_LG)
 #define   HFSCR_MSGP	__MASK(FSCR_MSGP_LG)
@@ -426,7 +427,7 @@
 #define   HFSCR_DSCR	__MASK(FSCR_DSCR_LG)
 #define   HFSCR_VECVSX	__MASK(FSCR_VECVSX_LG)
 #define   HFSCR_FP	__MASK(FSCR_FP_LG)
-#define   HFSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56)	/* interrupt cause */
+#define   HFSCR_INTR_CAUSE FSCR_INTR_CAUSE
 #define SPRN_TAR	0x32f	/* Target Address Register */
 #define SPRN_LPCR	0x13E	/* LPAR Control Register */
 #define   LPCR_VPM0		ASM_CONST(0x8000000000000000)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c402eb0276b0..e7df8a3ca62c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1679,6 +1679,21 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 			r = RESUME_GUEST;
 		}
 		break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+		/*
+		 * This occurs for various TM-related instructions that
+		 * we need to emulate on POWER9 DD2.2.  We have already
+		 * handled the cases where the guest was in real-suspend
+		 * mode and was transitioning to transactional state.
+		 */
+		r = kvmhv_p9_tm_emulation(vcpu);
+		if (r != -1)
+			break;
+		fallthrough; /* go to facility unavailable handler */
+#endif
+
 	/*
 	 * This occurs if the guest (kernel or userspace), does something that
 	 * is prohibited by HFSCR.
@@ -1697,18 +1712,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 		}
 		break;
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
-		/*
-		 * This occurs for various TM-related instructions that
-		 * we need to emulate on POWER9 DD2.2.  We have already
-		 * handled the cases where the guest was in real-suspend
-		 * mode and was transitioning to transactional state.
-		 */
-		r = kvmhv_p9_tm_emulation(vcpu);
-		break;
-#endif
-
 	case BOOK3S_INTERRUPT_HV_RM_HARD:
 		r = RESUME_PASSTHROUGH;
 		break;
@@ -1811,9 +1814,15 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 		 * mode and was transitioning to transactional state.
 		 */
 		r = kvmhv_p9_tm_emulation(vcpu);
-		break;
+		if (r != -1)
+			break;
+		fallthrough; /* go to facility unavailable handler */
 #endif
 
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+		r = RESUME_HOST;
+		break;
+
 	case BOOK3S_INTERRUPT_HV_RM_HARD:
 		vcpu->arch.trap = 0;
 		r = RESUME_GUEST;
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index e7c36f8bf205..866cadd70094 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -88,14 +88,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		}
 		/* check EBB facility is available */
 		if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_EBB_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_EBB_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_EBB_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
 		}
@@ -138,14 +139,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		}
 		/* check for TM disabled in the HFSCR or MSR */
 		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if (!(msr & MSR_TM)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_TM_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu,
 						BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
@@ -169,14 +171,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 	case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK):
 		/* check for TM disabled in the HFSCR or MSR */
 		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if (!(msr & MSR_TM)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_TM_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu,
 						BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
@@ -208,14 +211,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		/* XXX do we need to check for PR=0 here? */
 		/* check for TM disabled in the HFSCR or MSR */
 		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if (!(msr & MSR_TM)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_TM_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu,
 						BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
-- 
cgit 


From 7487cabc7ed2f7716bf304e4e97c57fd995cf70e Mon Sep 17 00:00:00 2001
From: Fabiano Rosas <farosas@linux.ibm.com>
Date: Thu, 12 Aug 2021 02:00:39 +1000
Subject: KVM: PPC: Book3S HV Nested: Sanitise vcpu registers

As one of the arguments of the H_ENTER_NESTED hypercall, the nested
hypervisor (L1) prepares a structure containing the values of various
hypervisor-privileged registers with which it wants the nested guest
(L2) to run. Since the nested HV runs in supervisor mode it needs the
host to write to these registers.

To stop a nested HV manipulating this mechanism and using a nested
guest as a proxy to access a facility that has been made unavailable
to it, we have a routine that sanitises the values of the HV registers
before copying them into the nested guest's vcpu struct.

However, when coming out of the guest the values are copied as they
were back into L1 memory, which means that any sanitisation we did
during guest entry will be exposed to L1 after H_ENTER_NESTED returns.

This patch alters this sanitisation to have effect on the vcpu->arch
registers directly before entering and after exiting the guest,
leaving the structure that is copied back into L1 unchanged (except
when we really want L1 to access the value, e.g the Cause bits of
HFSCR).

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Link: https://lore.kernel.org/r/20210811160134.904987-6-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv_nested.c | 94 ++++++++++++++++++-------------------
 1 file changed, 46 insertions(+), 48 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 898f942eb198..1eb4e989edc7 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -105,7 +105,6 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
 	hr->dpdes = vc->dpdes;
-	hr->hfscr = vcpu->arch.hfscr;
 	hr->purr = vcpu->arch.purr;
 	hr->spurr = vcpu->arch.spurr;
 	hr->ic = vcpu->arch.ic;
@@ -128,55 +127,17 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
 	case BOOK3S_INTERRUPT_H_INST_STORAGE:
 		hr->asdr = vcpu->arch.fault_gpa;
 		break;
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+		hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) |
+			     (HFSCR_INTR_CAUSE & vcpu->arch.hfscr));
+		break;
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
 		hr->heir = vcpu->arch.emul_inst;
 		break;
 	}
 }
 
-/*
- * This can result in some L0 HV register state being leaked to an L1
- * hypervisor when the hv_guest_state is copied back to the guest after
- * being modified here.
- *
- * There is no known problem with such a leak, and in many cases these
- * register settings could be derived by the guest by observing behaviour
- * and timing, interrupts, etc., but it is an issue to consider.
- */
-static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
-{
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
-	u64 mask;
-
-	/*
-	 * Don't let L1 change LPCR bits for the L2 except these:
-	 */
-	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
-		LPCR_LPES | LPCR_MER;
-
-	/*
-	 * Additional filtering is required depending on hardware
-	 * and configuration.
-	 */
-	hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
-			(vc->lpcr & ~mask) | (hr->lpcr & mask));
-
-	/*
-	 * Don't let L1 enable features for L2 which we've disabled for L1,
-	 * but preserve the interrupt cause field.
-	 */
-	hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
-
-	/* Don't let data address watchpoint match in hypervisor state */
-	hr->dawrx0 &= ~DAWRX_HYP;
-	hr->dawrx1 &= ~DAWRX_HYP;
-
-	/* Don't let completed instruction address breakpt match in HV state */
-	if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
-		hr->ciabr &= ~CIABR_PRIV;
-}
-
-static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *hr)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
@@ -288,6 +249,43 @@ static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
 				     sizeof(struct pt_regs));
 }
 
+static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
+			    const struct hv_guest_state *l2_hv,
+			    const struct hv_guest_state *l1_hv, u64 *lpcr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	u64 mask;
+
+	restore_hv_regs(vcpu, l2_hv);
+
+	/*
+	 * Don't let L1 change LPCR bits for the L2 except these:
+	 */
+	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
+		LPCR_LPES | LPCR_MER;
+
+	/*
+	 * Additional filtering is required depending on hardware
+	 * and configuration.
+	 */
+	*lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
+				      (vc->lpcr & ~mask) | (*lpcr & mask));
+
+	/*
+	 * Don't let L1 enable features for L2 which we've disabled for L1,
+	 * but preserve the interrupt cause field.
+	 */
+	vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
+
+	/* Don't let data address watchpoint match in hypervisor state */
+	vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP;
+	vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP;
+
+	/* Don't let completed instruction address breakpt match in HV state */
+	if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
+		vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV;
+}
+
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 {
 	long int err, r;
@@ -296,7 +294,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	struct hv_guest_state l2_hv = {0}, saved_l1_hv;
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	u64 hv_ptr, regs_ptr;
-	u64 hdec_exp;
+	u64 hdec_exp, lpcr;
 	s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
 
 	if (vcpu->kvm->arch.l1_ptcr == 0)
@@ -369,8 +367,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	/* Guest must always run with ME enabled, HV disabled. */
 	vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
 
-	sanitise_hv_regs(vcpu, &l2_hv);
-	restore_hv_regs(vcpu, &l2_hv);
+	lpcr = l2_hv.lpcr;
+	load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv, &lpcr);
 
 	vcpu->arch.ret = RESUME_GUEST;
 	vcpu->arch.trap = 0;
@@ -380,7 +378,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 			r = RESUME_HOST;
 			break;
 		}
-		r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr);
+		r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
 	} while (is_kvmppc_resume_guest(r));
 
 	/* save L2 state for return */
-- 
cgit 


From 8b210a880b35ba75eb42b79dfd65e369c1feb119 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 12 Aug 2021 02:00:40 +1000
Subject: KVM: PPC: Book3S HV Nested: Make nested HFSCR state accessible

When the L0 runs a nested L2, there are several permutations of HFSCR
that can be relevant. The HFSCR that the L1 vcpu L1 requested, the
HFSCR that the L1 vcpu may use, and the HFSCR that is actually being
used to run the L2.

The L1 requested HFSCR is not accessible outside the nested hcall
handler, so copy that into a new kvm_nested_guest.hfscr field.

The permitted HFSCR is taken from the HFSCR that the L1 runs with,
which is also not accessible while the hcall is being made. Move
this into a new kvm_vcpu_arch.hfscr_permitted field.

These will be used by the next patch to improve facility handling
for nested guests, and later by facility demand faulting patches.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210811160134.904987-7-npiggin@gmail.com
---
 arch/powerpc/include/asm/kvm_book3s_64.h | 1 +
 arch/powerpc/include/asm/kvm_host.h      | 2 ++
 arch/powerpc/kvm/book3s_hv.c             | 2 ++
 arch/powerpc/kvm/book3s_hv_nested.c      | 5 +++--
 4 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index eaf3a562bf1e..19b6942c6969 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -39,6 +39,7 @@ struct kvm_nested_guest {
 	pgd_t *shadow_pgtable;		/* our page table for this guest */
 	u64 l1_gr_to_hr;		/* L1's addr of part'n-scoped table */
 	u64 process_table;		/* process table entry for this guest */
+	u64 hfscr;			/* HFSCR that the L1 requested for this nested guest */
 	long refcnt;			/* number of pointers to this struct */
 	struct mutex tlb_lock;		/* serialize page faults and tlbies */
 	struct kvm_nested_guest *next;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9f52f282b1aa..a779f7849cfb 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -811,6 +811,8 @@ struct kvm_vcpu_arch {
 
 	u32 online;
 
+	u64 hfscr_permitted;	/* A mask of permitted HFSCR facilities */
+
 	/* For support of nested guests */
 	struct kvm_nested_guest *nested;
 	u32 nested_vcpu_id;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e7df8a3ca62c..9d31267d26b3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2715,6 +2715,8 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 	if (cpu_has_feature(CPU_FTR_TM_COMP))
 		vcpu->arch.hfscr |= HFSCR_TM;
 
+	vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
+
 	kvmppc_mmu_book3s_hv_init(vcpu);
 
 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 1eb4e989edc7..5ad5014c6f68 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -272,10 +272,10 @@ static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
 				      (vc->lpcr & ~mask) | (*lpcr & mask));
 
 	/*
-	 * Don't let L1 enable features for L2 which we've disabled for L1,
+	 * Don't let L1 enable features for L2 which we don't allow for L1,
 	 * but preserve the interrupt cause field.
 	 */
-	vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
+	vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr_permitted);
 
 	/* Don't let data address watchpoint match in hypervisor state */
 	vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP;
@@ -362,6 +362,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	/* set L1 state to L2 state */
 	vcpu->arch.nested = l2;
 	vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+	l2->hfscr = l2_hv.hfscr;
 	vcpu->arch.regs = l2_regs;
 
 	/* Guest must always run with ME enabled, HV disabled. */
-- 
cgit 


From 7c3ded5735141ff4d049747c9f76672a8b737c49 Mon Sep 17 00:00:00 2001
From: Fabiano Rosas <farosas@linux.ibm.com>
Date: Thu, 12 Aug 2021 02:00:41 +1000
Subject: KVM: PPC: Book3S HV Nested: Stop forwarding all HFUs to L1

If the nested hypervisor has no access to a facility because it has
been disabled by the host, it should also not be able to see the
Hypervisor Facility Unavailable that arises from one of its guests
trying to access the facility.

This patch turns a HFU that happened in L2 into a Hypervisor Emulation
Assistance interrupt and forwards it to L1 for handling. The ones that
happened because L1 explicitly disabled the facility for L2 are still
let through, along with the corresponding Cause bits in the HFSCR.

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
[np: move handling into kvmppc_handle_nested_exit]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210811160134.904987-8-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9d31267d26b3..15e2ba93678e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1730,6 +1730,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 
 static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 {
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
 	int r;
 	int srcu_idx;
 
@@ -1819,9 +1820,35 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 		fallthrough; /* go to facility unavailable handler */
 #endif
 
-	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
-		r = RESUME_HOST;
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
+		u64 cause = vcpu->arch.hfscr >> 56;
+
+		/*
+		 * Only pass HFU interrupts to the L1 if the facility is
+		 * permitted but disabled by the L1's HFSCR, otherwise
+		 * the interrupt does not make sense to the L1 so turn
+		 * it into a HEAI.
+		 */
+		if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
+					(nested->hfscr & (1UL << cause))) {
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
+
+			/*
+			 * If the fetch failed, return to guest and
+			 * try executing it again.
+			 */
+			r = kvmppc_get_last_inst(vcpu, INST_GENERIC,
+						 &vcpu->arch.emul_inst);
+			if (r != EMULATE_DONE)
+				r = RESUME_GUEST;
+			else
+				r = RESUME_HOST;
+		} else {
+			r = RESUME_HOST;
+		}
+
 		break;
+	}
 
 	case BOOK3S_INTERRUPT_HV_RM_HARD:
 		vcpu->arch.trap = 0;
-- 
cgit 


From f2e29db156523bf08a0524e0f4306a641912c6d9 Mon Sep 17 00:00:00 2001
From: Fabiano Rosas <farosas@linux.ibm.com>
Date: Thu, 12 Aug 2021 02:00:42 +1000
Subject: KVM: PPC: Book3S HV Nested: save_hv_return_state does not require
 trap argument

vcpu is already anargument so vcpu->arch.trap can be used directly.

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210811160134.904987-9-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv_nested.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 5ad5014c6f68..ed8a2c9f5629 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -99,7 +99,7 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
 	hr->dawrx1 = swab64(hr->dawrx1);
 }
 
-static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
+static void save_hv_return_state(struct kvm_vcpu *vcpu,
 				 struct hv_guest_state *hr)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -118,7 +118,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
 	hr->pidr = vcpu->arch.pid;
 	hr->cfar = vcpu->arch.cfar;
 	hr->ppr = vcpu->arch.ppr;
-	switch (trap) {
+	switch (vcpu->arch.trap) {
 	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
 		hr->hdar = vcpu->arch.fault_dar;
 		hr->hdsisr = vcpu->arch.fault_dsisr;
@@ -389,7 +389,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
 	delta_ic = vcpu->arch.ic - l2_hv.ic;
 	delta_vtb = vc->vtb - l2_hv.vtb;
-	save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
+	save_hv_return_state(vcpu, &l2_hv);
 
 	/* restore L1 state */
 	vcpu->arch.nested = NULL;
-- 
cgit 


From 1782663897945a5cf28e564ba5eed730098e9aa4 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 12 Aug 2021 02:00:43 +1000
Subject: KVM: PPC: Book3S HV Nested: Reflect guest PMU in-use to L0 when guest
 SPRs are live

After the L1 saves its PMU SPRs but before loading the L2's PMU SPRs,
switch the pmcregs_in_use field in the L1 lppaca to the value advertised
by the L2 in its VPA. On the way out of the L2, set it back after saving
the L2 PMU registers (if they were in-use).

This transfers the PMU liveness indication between the L1 and L2 at the
points where the registers are not live.

This fixes the nested HV bug for which a workaround was added to the L0
HV by commit 63279eeb7f93a ("KVM: PPC: Book3S HV: Always save guest pmu
for guest capable of nesting"), which explains the problem in detail.
That workaround is no longer required for guests that include this bug
fix.

Fixes: 360cae313702 ("KVM: PPC: Book3S HV: Nested guest entry via hypercall")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Link: https://lore.kernel.org/r/20210811160134.904987-10-npiggin@gmail.com
---
 arch/powerpc/include/asm/pmc.h |  7 +++++++
 arch/powerpc/kvm/book3s_hv.c   | 20 ++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index c6bbe9778d3c..3c09109e708e 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -34,6 +34,13 @@ static inline void ppc_set_pmu_inuse(int inuse)
 #endif
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static inline int ppc_get_pmu_inuse(void)
+{
+	return get_paca()->pmcregs_in_use;
+}
+#endif
+
 extern void power4_enable_pmcs(void);
 
 #else /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 15e2ba93678e..938b0948478f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -59,6 +59,7 @@
 #include <asm/kvm_book3s.h>
 #include <asm/mmu_context.h>
 #include <asm/lppaca.h>
+#include <asm/pmc.h>
 #include <asm/processor.h>
 #include <asm/cputhreads.h>
 #include <asm/page.h>
@@ -3891,6 +3892,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
 		kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
 
+#ifdef CONFIG_PPC_PSERIES
+	if (kvmhv_on_pseries()) {
+		barrier();
+		if (vcpu->arch.vpa.pinned_addr) {
+			struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+			get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
+		} else {
+			get_lppaca()->pmcregs_in_use = 1;
+		}
+		barrier();
+	}
+#endif
 	kvmhv_load_guest_pmu(vcpu);
 
 	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
@@ -4025,6 +4038,13 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	save_pmu |= nesting_enabled(vcpu->kvm);
 
 	kvmhv_save_guest_pmu(vcpu, save_pmu);
+#ifdef CONFIG_PPC_PSERIES
+	if (kvmhv_on_pseries()) {
+		barrier();
+		get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+		barrier();
+	}
+#endif
 
 	vc->entry_exit_map = 0x101;
 	vc->in_guest = 0;
-- 
cgit 


From 0c8fb653d487d2873f5eefdcaf4cecff4e103828 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 12 Aug 2021 02:00:44 +1000
Subject: powerpc/64s: Remove WORT SPR from POWER9/10

This register is not architected and not implemented in POWER9 or 10,
it just reads back zeroes for compatibility.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Link: https://lore.kernel.org/r/20210811160134.904987-11-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c          | 3 ---
 arch/powerpc/platforms/powernv/idle.c | 2 --
 2 files changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 938b0948478f..dc8f18b0aa75 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3767,7 +3767,6 @@ static void load_spr_state(struct kvm_vcpu *vcpu)
 	mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
 	mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
 	mtspr(SPRN_BESCR, vcpu->arch.bescr);
-	mtspr(SPRN_WORT, vcpu->arch.wort);
 	mtspr(SPRN_TIDR, vcpu->arch.tid);
 	mtspr(SPRN_AMR, vcpu->arch.amr);
 	mtspr(SPRN_UAMOR, vcpu->arch.uamor);
@@ -3794,7 +3793,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
 	vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
 	vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
 	vcpu->arch.bescr = mfspr(SPRN_BESCR);
-	vcpu->arch.wort = mfspr(SPRN_WORT);
 	vcpu->arch.tid = mfspr(SPRN_TIDR);
 	vcpu->arch.amr = mfspr(SPRN_AMR);
 	vcpu->arch.uamor = mfspr(SPRN_UAMOR);
@@ -3826,7 +3824,6 @@ static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
 				    struct p9_host_os_sprs *host_os_sprs)
 {
 	mtspr(SPRN_PSPB, 0);
-	mtspr(SPRN_WORT, 0);
 	mtspr(SPRN_UAMOR, 0);
 
 	mtspr(SPRN_DSCR, host_os_sprs->dscr);
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 528a7e0cf83a..180baecad914 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -667,7 +667,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
 		sprs.purr	= mfspr(SPRN_PURR);
 		sprs.spurr	= mfspr(SPRN_SPURR);
 		sprs.dscr	= mfspr(SPRN_DSCR);
-		sprs.wort	= mfspr(SPRN_WORT);
 		sprs.ciabr	= mfspr(SPRN_CIABR);
 
 		sprs.mmcra	= mfspr(SPRN_MMCRA);
@@ -785,7 +784,6 @@ core_woken:
 	mtspr(SPRN_PURR,	sprs.purr);
 	mtspr(SPRN_SPURR,	sprs.spurr);
 	mtspr(SPRN_DSCR,	sprs.dscr);
-	mtspr(SPRN_WORT,	sprs.wort);
 	mtspr(SPRN_CIABR,	sprs.ciabr);
 
 	mtspr(SPRN_MMCRA,	sprs.mmcra);
-- 
cgit 


From 315511166469a641c1b838eaca6bdd3af5c362c4 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 12 Aug 2021 01:45:18 +0900
Subject: microblaze: move core-y in arch/microblaze/Makefile to
 arch/microblaze/Kbuild

Use obj-y to clean up Makefile.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20210811164518.187497-1-masahiroy@kernel.org
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 arch/microblaze/Kbuild   | 4 ++++
 arch/microblaze/Makefile | 5 -----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/microblaze/Kbuild b/arch/microblaze/Kbuild
index a4e40e534e6a..a1c597889319 100644
--- a/arch/microblaze/Kbuild
+++ b/arch/microblaze/Kbuild
@@ -1 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-y			+= kernel/
+obj-y			+= mm/
+obj-$(CONFIG_PCI)	+= pci/
+obj-y			+= boot/dts/
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 6d4af39e3890..9adc6b6434df 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -50,17 +50,12 @@ KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-y) $(CPUFLAGS-1) $(CPUFLAGS-2)
 
 head-y := arch/microblaze/kernel/head.o
 libs-y += arch/microblaze/lib/
-core-y += arch/microblaze/kernel/
-core-y += arch/microblaze/mm/
-core-$(CONFIG_PCI) += arch/microblaze/pci/
 
 boot := arch/microblaze/boot
 
 # Are we making a simpleImage.<boardname> target? If so, crack out the boardname
 DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS)))
 
-core-y	+= $(boot)/dts/
-
 export DTB
 
 all: linux.bin
-- 
cgit 


From b1643084d164cea0c107a39bcdf0119fc52619af Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Wed, 18 Aug 2021 22:45:54 +0530
Subject: powerpc/perf: Use stack siar instead of mfspr

Minor optimization in the 'perf_instruction_pointer' function code by
making use of stack siar instead of mfspr.

Fixes: 75382aa72f06 ("powerpc/perf: Move code to select SIAR or pt_regs into perf_read_regs")
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210818171556.36912-1-kjain@linux.ibm.com
---
 arch/powerpc/perf/core-book3s.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 91203ed9d0ff..3a782a35100d 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2269,7 +2269,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs)
 		else
 			return regs->nip;
 	} else if (use_siar && siar_valid(regs))
-		return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
+		return siar + perf_ip_adjust(regs);
 	else if (use_siar)
 		return 0;		// no valid instruction pointer
 	else
-- 
cgit 


From cc90c6742ef5b438f4cb86029d7a794bd0a44a06 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Wed, 18 Aug 2021 22:45:55 +0530
Subject: powerpc/perf: Drop the case of returning 0 as instruction pointer

Drop the case of returning 0 as instruction pointer since kernel
never executes at 0 and userspace almost never does either.

Fixes: e6878835ac47 ("powerpc/perf: Sample only if SIAR-Valid bit is set in P7+")
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210818171556.36912-2-kjain@linux.ibm.com
---
 arch/powerpc/perf/core-book3s.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 3a782a35100d..9bb466d2d99e 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2270,8 +2270,6 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs)
 			return regs->nip;
 	} else if (use_siar && siar_valid(regs))
 		return siar + perf_ip_adjust(regs);
-	else if (use_siar)
-		return 0;		// no valid instruction pointer
 	else
 		return regs->nip;
 }
-- 
cgit 


From 3c69a5f22223fa3e312689ec218b5059784d49d7 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Wed, 18 Aug 2021 22:45:56 +0530
Subject: powerpc/perf: Fix the check for SIAR value

Incase of random sampling, there can be scenarios where
Sample Instruction Address Register(SIAR) may not latch
to the sampled instruction and could result in
the value of 0. In these scenarios it is preferred to
return regs->nip. These corner cases are seen in the
previous generation (p9) also.

Patch adds the check for SIAR value along with regs_use_siar
and siar_valid checks so that the function will return
regs->nip incase SIAR is zero.

Patch drops the code under PPMU_P10_DD1 flag check
which handles SIAR 0 case only for Power10 DD1.

Fixes: 2ca13a4cc56c9 ("powerpc/perf: Use regs->nip when SIAR is zero")
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210818171556.36912-3-kjain@linux.ibm.com
---
 arch/powerpc/perf/core-book3s.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 9bb466d2d99e..73e62e9b179b 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2260,15 +2260,9 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
  */
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
-	bool use_siar = regs_use_siar(regs);
 	unsigned long siar = mfspr(SPRN_SIAR);
 
-	if (ppmu && (ppmu->flags & PPMU_P10_DD1)) {
-		if (siar)
-			return siar;
-		else
-			return regs->nip;
-	} else if (use_siar && siar_valid(regs))
+	if (regs_use_siar(regs) && siar_valid(regs) && siar)
 		return siar + perf_ip_adjust(regs);
 	else
 		return regs->nip;
-- 
cgit 


From 2cc1121bc993ca3090cc4267bc38d3da61b68602 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Thu, 1 Oct 2020 19:39:15 -0700
Subject: ARC: mm: disintegrate mmu.h (arcv2 bits out)

non functional change

Tested-by: kernel test robot <lkp@intel.com>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/mmu-arcv2.h | 103 +++++++++++++++++++++++++++++++++++++++
 arch/arc/include/asm/mmu.h       |  80 +-----------------------------
 arch/arc/include/asm/pgtable.h   |   6 ---
 3 files changed, 105 insertions(+), 84 deletions(-)
 create mode 100644 arch/arc/include/asm/mmu-arcv2.h

(limited to 'arch')

diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h
new file mode 100644
index 000000000000..ed9036d4ede3
--- /dev/null
+++ b/arch/arc/include/asm/mmu-arcv2.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com)
+ *
+ * MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed.
+ * This file contains the TLB access registers and commands
+ */
+
+#ifndef _ASM_ARC_MMU_ARCV2_H
+#define _ASM_ARC_MMU_ARCV2_H
+
+/*
+ * TLB Management regs
+ */
+#define ARC_REG_MMU_BCR		0x06f
+
+#ifdef CONFIG_ARC_MMU_V3
+#define ARC_REG_TLBPD0		0x405
+#define ARC_REG_TLBPD1		0x406
+#define ARC_REG_TLBPD1HI	0	/* Dummy: allows common code */
+#define ARC_REG_TLBINDEX	0x407
+#define ARC_REG_TLBCOMMAND	0x408
+#define ARC_REG_PID		0x409
+#define ARC_REG_SCRATCH_DATA0	0x418
+#else
+#define ARC_REG_TLBPD0		0x460
+#define ARC_REG_TLBPD1		0x461
+#define ARC_REG_TLBPD1HI	0x463
+#define ARC_REG_TLBINDEX	0x464
+#define ARC_REG_TLBCOMMAND	0x465
+#define ARC_REG_PID		0x468
+#define ARC_REG_SCRATCH_DATA0	0x46c
+#endif
+
+/* Bits in MMU PID reg */
+#define __TLB_ENABLE		(1 << 31)
+#define __PROG_ENABLE		(1 << 30)
+#define MMU_ENABLE		(__TLB_ENABLE | __PROG_ENABLE)
+
+/* Bits in TLB Index reg */
+#define TLB_LKUP_ERR		0x80000000
+
+#ifdef CONFIG_ARC_MMU_V3
+#define TLB_DUP_ERR		(TLB_LKUP_ERR | 0x00000001)
+#else
+#define TLB_DUP_ERR		(TLB_LKUP_ERR | 0x40000000)
+#endif
+
+/*
+ * TLB Commands
+ */
+#define TLBWrite    		0x1
+#define TLBRead     		0x2
+#define TLBGetIndex 		0x3
+#define TLBProbe    		0x4
+#define TLBWriteNI		0x5  /* write JTLB without inv uTLBs */
+#define TLBIVUTLB		0x6  /* explicitly inv uTLBs */
+
+#ifdef CONFIG_ARC_MMU_V4
+#define TLBInsertEntry		0x7
+#define TLBDeleteEntry		0x8
+#endif
+
+/* Masks for actual TLB "PD"s */
+#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
+#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+
+#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK_PHYS | _PAGE_CACHEABLE)
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+extern int pae40_exist_but_not_enab(void);
+
+static inline int is_pae40_enabled(void)
+{
+	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
+}
+
+static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid)
+{
+	write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
+}
+
+static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
+{
+	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
+#ifdef CONFIG_ISA_ARCV2
+	write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
+#endif
+}
+
+#else
+
+.macro ARC_MMU_REENABLE reg
+	lr \reg, [ARC_REG_PID]
+	or \reg, \reg, MMU_ENABLE
+	sr \reg, [ARC_REG_PID]
+.endm
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index e66e1e417694..ca427c30f70e 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -7,91 +7,15 @@
 #define _ASM_ARC_MMU_H
 
 #ifndef __ASSEMBLY__
-#include <linux/threads.h>	/* NR_CPUS */
-#endif
-
-/* MMU Management regs */
-#define ARC_REG_MMU_BCR		0x06f
-
-#ifdef CONFIG_ARC_MMU_V3
-#define ARC_REG_TLBPD0		0x405
-#define ARC_REG_TLBPD1		0x406
-#define ARC_REG_TLBPD1HI	0	/* Dummy: allows code sharing with ARC700 */
-#define ARC_REG_TLBINDEX	0x407
-#define ARC_REG_TLBCOMMAND	0x408
-#define ARC_REG_PID		0x409
-#define ARC_REG_SCRATCH_DATA0	0x418
-#else
-#define ARC_REG_TLBPD0		0x460
-#define ARC_REG_TLBPD1		0x461
-#define ARC_REG_TLBPD1HI	0x463
-#define ARC_REG_TLBINDEX	0x464
-#define ARC_REG_TLBCOMMAND	0x465
-#define ARC_REG_PID		0x468
-#define ARC_REG_SCRATCH_DATA0	0x46c
-#endif
-
-/* Bits in MMU PID register */
-#define __TLB_ENABLE		(1 << 31)
-#define __PROG_ENABLE		(1 << 30)
-#define MMU_ENABLE		(__TLB_ENABLE | __PROG_ENABLE)
-
-/* Error code if probe fails */
-#define TLB_LKUP_ERR		0x80000000
-
-#ifdef CONFIG_ARC_MMU_V3
-#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x00000001)
-#else
-#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x40000000)
-#endif
-
-/* TLB Commands */
-#define TLBWrite    0x1
-#define TLBRead     0x2
-#define TLBGetIndex 0x3
-#define TLBProbe    0x4
-#define TLBWriteNI  0x5		/* write JTLB without inv uTLBs */
-#define TLBIVUTLB   0x6		/* explicitly inv uTLBs */
 
-#ifdef CONFIG_ARC_MMU_V4
-#define TLBInsertEntry	0x7
-#define TLBDeleteEntry	0x8
-#endif
-
-#ifndef __ASSEMBLY__
+#include <linux/threads.h>	/* NR_CPUS */
 
 typedef struct {
 	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
-static inline void mmu_setup_asid(struct mm_struct *mm, unsigned int asid)
-{
-	write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
-}
-
-static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
-{
-	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
-#ifdef CONFIG_ISA_ARCV2
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
 #endif
-}
-
-static inline int is_pae40_enabled(void)
-{
-	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
-}
-
-extern int pae40_exist_but_not_enab(void);
-
-#else
-
-.macro ARC_MMU_REENABLE reg
-	lr \reg, [ARC_REG_PID]
-	or \reg, \reg, MMU_ENABLE
-	sr \reg, [ARC_REG_PID]
-.endm
 
-#endif	/* !__ASSEMBLY__ */
+#include <asm/mmu-arcv2.h>
 
 #endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index f762bacb2358..de4576e8d17a 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -103,12 +103,6 @@
  */
 #define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE)
 
-/* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
-#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
-
-#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK_PHYS | _PAGE_CACHEABLE)
-
 /**************************************************************************
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
  *
-- 
cgit 


From fe6cb7b043b69cd9498616592bb9e28648fb4f7a Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Fri, 13 Sep 2019 15:25:52 -0700
Subject: ARC: mm: disintegrate pgtable.h into levels and flags

 - pgtable-bits-arcv2.h (MMU specific page table flags)
 - pgtable-levels.h (paging levels)

No functional changes, but paves way for easy addition of new MMU code
with different bits and levels etc

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/pgtable-bits-arcv2.h | 149 ++++++++++++++++
 arch/arc/include/asm/pgtable-levels.h     |  95 ++++++++++
 arch/arc/include/asm/pgtable.h            | 277 +-----------------------------
 3 files changed, 248 insertions(+), 273 deletions(-)
 create mode 100644 arch/arc/include/asm/pgtable-bits-arcv2.h
 create mode 100644 arch/arc/include/asm/pgtable-levels.h

(limited to 'arch')

diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
new file mode 100644
index 000000000000..183d23bc1e00
--- /dev/null
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS)
+ * There correspond to the corresponding bits in the TLB
+ */
+
+#ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H
+#define _ASM_ARC_PGTABLE_BITS_ARCV2_H
+
+#ifdef CONFIG_ARC_CACHE_PAGES
+#define _PAGE_CACHEABLE		(1 << 0)  /* Cached (H) */
+#else
+#define _PAGE_CACHEABLE		0
+#endif
+
+#define _PAGE_EXECUTE		(1 << 1)  /* User Execute  (H) */
+#define _PAGE_WRITE		(1 << 2)  /* User Write    (H) */
+#define _PAGE_READ		(1 << 3)  /* User Read     (H) */
+#define _PAGE_ACCESSED		(1 << 4)  /* Accessed      (s) */
+#define _PAGE_DIRTY		(1 << 5)  /* Modified      (s) */
+#define _PAGE_SPECIAL		(1 << 6)
+#define _PAGE_GLOBAL		(1 << 8)  /* ASID agnostic (H) */
+#define _PAGE_PRESENT		(1 << 9)  /* PTE/TLB Valid (H) */
+
+#ifdef CONFIG_ARC_MMU_V4
+#define _PAGE_HW_SZ		(1 << 10)  /* Normal/super (H) */
+#else
+#define _PAGE_HW_SZ		0
+#endif
+
+/* Defaults for every user page */
+#define ___DEF		(_PAGE_PRESENT | _PAGE_CACHEABLE)
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK	(PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
+							   _PAGE_SPECIAL)
+
+/* More Abbrevaited helpers */
+#define PAGE_U_NONE     __pgprot(___DEF)
+#define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
+#define PAGE_U_W_R      __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
+#define PAGE_U_X_R      __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
+#define PAGE_U_X_W_R    __pgprot(___DEF \
+				| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+#define PAGE_KERNEL     __pgprot(___DEF | _PAGE_GLOBAL \
+				| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+
+#define PAGE_SHARED	PAGE_U_W_R
+
+#define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
+
+/*
+ * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
+ *
+ * Certain cases have 1:1 mapping
+ *  e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
+ *       which directly corresponds to  PAGE_U_X_R
+ *
+ * Other rules which cause the divergence from 1:1 mapping
+ *
+ *  1. Although ARC700 can do exclusive execute/write protection (meaning R
+ *     can be tracked independet of X/W unlike some other CPUs), still to
+ *     keep things consistent with other archs:
+ *      -Write implies Read:   W => R
+ *      -Execute implies Read: X => R
+ *
+ *  2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
+ *     This is to enable COW mechanism
+ */
+	/* xwr */
+#define __P000  PAGE_U_NONE
+#define __P001  PAGE_U_R
+#define __P010  PAGE_U_R	/* Pvt-W => !W */
+#define __P011  PAGE_U_R	/* Pvt-W => !W */
+#define __P100  PAGE_U_X_R	/* X => R */
+#define __P101  PAGE_U_X_R
+#define __P110  PAGE_U_X_R	/* Pvt-W => !W and X => R */
+#define __P111  PAGE_U_X_R	/* Pvt-W => !W */
+
+#define __S000  PAGE_U_NONE
+#define __S001  PAGE_U_R
+#define __S010  PAGE_U_W_R	/* W => R */
+#define __S011  PAGE_U_W_R
+#define __S100  PAGE_U_X_R	/* X => R */
+#define __S101  PAGE_U_X_R
+#define __S110  PAGE_U_X_W_R	/* X => R */
+#define __S111  PAGE_U_X_W_R
+
+#ifndef __ASSEMBLY__
+
+#define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
+#define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
+#define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
+#define pte_special(pte)	(pte_val(pte) & _PAGE_SPECIAL)
+
+#define PTE_BIT_FUNC(fn, op) \
+	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
+
+PTE_BIT_FUNC(mknotpresent,     &= ~(_PAGE_PRESENT));
+PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
+PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
+PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
+PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
+PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
+PTE_BIT_FUNC(mkhuge,	|= (_PAGE_HW_SZ));
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pteval)
+{
+	set_pte(ptep, pteval);
+}
+
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+		      pte_t *ptep);
+
+/* Encode swap {type,off} tuple into PTE
+ * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
+ * PAGE_PRESENT is zero in a PTE holding swap "identifier"
+ */
+#define __swp_entry(type, off)		((swp_entry_t) \
+					{ ((type) & 0x1f) | ((off) << 13) })
+
+/* Decode a PTE containing swap "identifier "into constituents */
+#define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
+#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)
+
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+#define kern_addr_valid(addr)	(1)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <asm/hugepage.h>
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
new file mode 100644
index 000000000000..e65fb8c9da12
--- /dev/null
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Helpers for implemenintg paging levels
+ */
+
+#ifndef _ASM_ARC_PGTABLE_LEVELS_H
+#define _ASM_ARC_PGTABLE_LEVELS_H
+
+/*
+ * 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS)
+ *
+ * [31]            32 bit virtual address              [0]
+ * -------------------------------------------------------
+ * |               | <---------- PGDIR_SHIFT ----------> |
+ * |               |                | <-- PAGE_SHIFT --> |
+ * -------------------------------------------------------
+ *       |                  |                |
+ *       |                  |                --> off in page frame
+ *       |                  ---> index into Page Table
+ *       ----> index into Page Directory
+ *
+ * Given software walk, the vaddr split is arbitrary set to 11:8:13
+ * However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to
+ * super page size.
+ */
+
+#if defined(CONFIG_ARC_HUGEPAGE_16M)
+#define PGDIR_SHIFT		24
+#elif defined(CONFIG_ARC_HUGEPAGE_2M)
+#define PGDIR_SHIFT		21
+#else
+/*
+ * No Super page case
+ * Default value provides 11:8:13 (8K), 11:9:12 (4K)
+ */
+#define PGDIR_SHIFT		21
+
+#endif
+
+#define PGDIR_SIZE		BIT(PGDIR_SHIFT)	/* vaddr span, not PDG sz */
+#define PGDIR_MASK		(~(PGDIR_SIZE - 1))
+
+#define PTRS_PER_PGD		BIT(32 - PGDIR_SHIFT)
+
+#define PTRS_PER_PTE		BIT(PGDIR_SHIFT - PAGE_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+#include <asm-generic/pgtable-nopmd.h>
+
+/*
+ * 1st level paging: pgd
+ */
+#define pgd_index(addr)		((addr) >> PGDIR_SHIFT)
+#define pgd_offset(mm, addr)	(((mm)->pgd) + pgd_index(addr))
+#define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
+#define pgd_ERROR(e) \
+	pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Due to the strange way generic pgtable level folding works, in a 2 level
+ * setup, pmd_val() returns pgd, so these pmd_* macros actually work on pgd
+ */
+#define pmd_none(x)		(!pmd_val(x))
+#define pmd_bad(x)		((pmd_val(x) & ~PAGE_MASK))
+#define pmd_present(x)		(pmd_val(x))
+#define pmd_clear(xp)		do { pmd_val(*(xp)) = 0; } while (0)
+#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & PAGE_MASK)
+#define pmd_page(pmd)		virt_to_page(pmd_page_vaddr(pmd))
+#define set_pmd(pmdp, pmd)	(*(pmdp) = pmd)
+#define pmd_pgtable(pmd)	((pgtable_t) pmd_page_vaddr(pmd))
+
+#define pte_ERROR(e) \
+	pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+
+#define pte_none(x)		(!pte_val(x))
+#define pte_present(x)		(pte_val(x) & _PAGE_PRESENT)
+#define pte_clear(mm,addr,ptep)	set_pte_at(mm, addr, ptep, __pte(0))
+#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
+#define set_pte(ptep, pte)	((*(ptep)) = (pte))
+#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
+#define pfn_pte(pfn, prot)	__pte(__pfn_to_phys(pfn) | pgprot_val(prot))
+#define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
+
+#ifdef CONFIG_ISA_ARCV2
+#define pmd_leaf(x)		(pmd_val(x) & _PAGE_HW_SZ)
+#endif
+
+#endif	/* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index de4576e8d17a..9320b04c04bf 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -1,304 +1,35 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * vineetg: May 2011
- *  -Folded PAGE_PRESENT (used by VM) and PAGE_VALID (used by MMU) into 1.
- *     They are semantically the same although in different contexts
- *     VALID marks a TLB entry exists and it will only happen if PRESENT
- *  - Utilise some unused free bits to confine PTE flags to 12 bits
- *     This is a must for 4k pg-sz
- *
- * vineetg: Mar 2011 - changes to accommodate MMU TLB Page Descriptor mods
- *  -TLB Locking never really existed, except for initial specs
- *  -SILENT_xxx not needed for our port
- *  -Per my request, MMU V3 changes the layout of some of the bits
- *     to avoid a few shifts in TLB Miss handlers.
- *
- * vineetg: April 2010
- *  -PGD entry no longer contains any flags. If empty it is 0, otherwise has
- *   Pg-Tbl ptr. Thus pmd_present(), pmd_valid(), pmd_set( ) become simpler
- *
- * vineetg: April 2010
- *  -Switched form 8:11:13 split for page table lookup to 11:8:13
- *  -this speeds up page table allocation itself as we now have to memset 1K
- *    instead of 8k per page table.
- * -TODO: Right now page table alloc is 8K and rest 7K is unused
- *    need to optimise it
- *
- * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
  */
 
 #ifndef _ASM_ARC_PGTABLE_H
 #define _ASM_ARC_PGTABLE_H
 
 #include <linux/bits.h>
-#include <asm-generic/pgtable-nopmd.h>
+
+#include <asm/pgtable-levels.h>
+#include <asm/pgtable-bits-arcv2.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 
-/**************************************************************************
- * Page Table Flags
- *
- * ARC700 MMU only deals with softare managed TLB entries.
- * Page Tables are purely for Linux VM's consumption and the bits below are
- * suited to that (uniqueness). Hence some are not implemented in the TLB and
- * some have different value in TLB.
- * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in
- *      seperate PD0 and PD1, which combined forms a translation entry)
- *      while for PTE perspective, they are 8 and 9 respectively
- * with MMU v3: Most bits (except SHARED) represent the exact hardware pos
- *      (saves some bit shift ops in TLB Miss hdlrs)
- */
-
-#define _PAGE_CACHEABLE     (1<<0)	/* Page is cached (H) */
-#define _PAGE_EXECUTE       (1<<1)	/* Page has user execute perm (H) */
-#define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
-#define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
-#define _PAGE_ACCESSED      (1<<4)	/* Page is accessed (S) */
-#define _PAGE_DIRTY         (1<<5)	/* Page modified (dirty) (S) */
-#define _PAGE_SPECIAL       (1<<6)
-
-#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
-#define _PAGE_PRESENT       (1<<9)	/* TLB entry is valid (H) */
-
-#ifdef CONFIG_ARC_MMU_V4
-#define _PAGE_HW_SZ         (1<<10)	/* Page Size indicator (H): 0 normal, 1 super */
-#endif
-
-#define _PAGE_SHARED_CODE   (1<<11)	/* Shared Code page with cmn vaddr
-					   usable for shared TLB entries (H) */
-/* vmalloc permissions */
-#define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
-			_PAGE_GLOBAL | _PAGE_PRESENT)
-
-#ifndef CONFIG_ARC_CACHE_PAGES
-#undef _PAGE_CACHEABLE
-#define _PAGE_CACHEABLE 0
-#endif
-
-#ifndef _PAGE_HW_SZ
-#define _PAGE_HW_SZ	0
-#endif
-
-/* Defaults for every user page */
-#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
-
-/* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK	(PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
-							   _PAGE_SPECIAL)
-/* More Abbrevaited helpers */
-#define PAGE_U_NONE     __pgprot(___DEF)
-#define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
-#define PAGE_U_W_R      __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
-#define PAGE_U_X_R      __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
-#define PAGE_U_X_W_R    __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE | \
-						       _PAGE_EXECUTE)
-
-#define PAGE_SHARED	PAGE_U_W_R
-
-/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of
- * user vaddr space - visible in all addr spaces, but kernel mode only
- * Thus Global, all-kernel-access, no-user-access, cached
- */
-#define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE)
-
-/**************************************************************************
- * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
- *
- * Certain cases have 1:1 mapping
- *  e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
- *       which directly corresponds to  PAGE_U_X_R
- *
- * Other rules which cause the divergence from 1:1 mapping
- *
- *  1. Although ARC700 can do exclusive execute/write protection (meaning R
- *     can be tracked independet of X/W unlike some other CPUs), still to
- *     keep things consistent with other archs:
- *      -Write implies Read:   W => R
- *      -Execute implies Read: X => R
- *
- *  2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
- *     This is to enable COW mechanism
- */
-	/* xwr */
-#define __P000  PAGE_U_NONE
-#define __P001  PAGE_U_R
-#define __P010  PAGE_U_R	/* Pvt-W => !W */
-#define __P011  PAGE_U_R	/* Pvt-W => !W */
-#define __P100  PAGE_U_X_R	/* X => R */
-#define __P101  PAGE_U_X_R
-#define __P110  PAGE_U_X_R	/* Pvt-W => !W and X => R */
-#define __P111  PAGE_U_X_R	/* Pvt-W => !W */
-
-#define __S000  PAGE_U_NONE
-#define __S001  PAGE_U_R
-#define __S010  PAGE_U_W_R	/* W => R */
-#define __S011  PAGE_U_W_R
-#define __S100  PAGE_U_X_R	/* X => R */
-#define __S101  PAGE_U_X_R
-#define __S110  PAGE_U_X_W_R	/* X => R */
-#define __S111  PAGE_U_X_W_R
-
-/****************************************************************
- * 2 tier (PGD:PTE) software page walker
- *
- * [31]		    32 bit virtual address              [0]
- * -------------------------------------------------------
- * |               | <------------ PGDIR_SHIFT ----------> |
- * |		   |					 |
- * | BITS_FOR_PGD  |  BITS_FOR_PTE  | <-- PAGE_SHIFT --> |
- * -------------------------------------------------------
- *       |                  |                |
- *       |                  |                --> off in page frame
- *       |                  ---> index into Page Table
- *       ----> index into Page Directory
- *
- * In a single page size configuration, only PAGE_SHIFT is fixed
- * So both PGD and PTE sizing can be tweaked
- *  e.g. 8K page (PAGE_SHIFT 13) can have
- *  - PGDIR_SHIFT 21  -> 11:8:13 address split
- *  - PGDIR_SHIFT 24  -> 8:11:13 address split
- *
- * If Super Page is configured, PGDIR_SHIFT becomes fixed too,
- * so the sizing flexibility is gone.
- */
-
-#if defined(CONFIG_ARC_HUGEPAGE_16M)
-#define PGDIR_SHIFT	24
-#elif defined(CONFIG_ARC_HUGEPAGE_2M)
-#define PGDIR_SHIFT	21
-#else
-/*
- * Only Normal page support so "hackable" (see comment above)
- * Default value provides 11:8:13 (8K), 11:9:12 (4K)
- */
-#define PGDIR_SHIFT	21
-#endif
-
-#define BITS_FOR_PTE	(PGDIR_SHIFT - PAGE_SHIFT)
-#define BITS_FOR_PGD	(32 - PGDIR_SHIFT)
-
-#define PGDIR_SIZE	BIT(PGDIR_SHIFT)	/* vaddr span, not PDG sz */
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-
-#define	PTRS_PER_PTE	BIT(BITS_FOR_PTE)
-#define	PTRS_PER_PGD	BIT(BITS_FOR_PGD)
-
 /*
  * Number of entries a user land program use.
  * TASK_SIZE is the maximum vaddr that can be used by a userland program.
  */
 #define	USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
 
-
-/****************************************************************
- * Bucket load of VM Helpers
- */
-
 #ifndef __ASSEMBLY__
 
-#define pte_ERROR(e) \
-	pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pgd_ERROR(e) \
-	pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-
-/* the zero page used for uninitialized and anonymous pages */
 extern char empty_zero_page[PAGE_SIZE];
 #define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
 
-#define set_pte(pteptr, pteval)	((*(pteptr)) = (pteval))
-#define set_pmd(pmdptr, pmdval)	(*(pmdptr) = pmdval)
-
-/* find the page descriptor of the Page Tbl ref by PMD entry */
-#define pmd_page(pmd)		virt_to_page(pmd_val(pmd) & PAGE_MASK)
-
-/* find the logical addr (phy for ARC) of the Page Tbl ref by PMD entry */
-#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & PAGE_MASK)
-
-#define pte_none(x)			(!pte_val(x))
-#define pte_present(x)			(pte_val(x) & _PAGE_PRESENT)
-#define pte_clear(mm, addr, ptep)	set_pte_at(mm, addr, ptep, __pte(0))
-
-#define pmd_none(x)			(!pmd_val(x))
-#define	pmd_bad(x)			((pmd_val(x) & ~PAGE_MASK))
-#define pmd_present(x)			(pmd_val(x))
-#define pmd_leaf(x)			(pmd_val(x) & _PAGE_HW_SZ)
-#define pmd_clear(xp)			do { pmd_val(*(xp)) = 0; } while (0)
-
-#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
-#define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
-#define pfn_pte(pfn, prot)	__pte(__pfn_to_phys(pfn) | pgprot_val(prot))
-
-/* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
-#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
-
-/* Zoo of pte_xxx function */
-#define pte_read(pte)		(pte_val(pte) & _PAGE_READ)
-#define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
-#define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
-#define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
-#define pte_special(pte)	(pte_val(pte) & _PAGE_SPECIAL)
-
-#define PTE_BIT_FUNC(fn, op) \
-	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
-
-PTE_BIT_FUNC(mknotpresent,	&= ~(_PAGE_PRESENT));
-PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
-PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
-PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
-PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
-PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
-PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
-PTE_BIT_FUNC(exprotect,	&= ~(_PAGE_EXECUTE));
-PTE_BIT_FUNC(mkexec,	|= (_PAGE_EXECUTE));
-PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
-PTE_BIT_FUNC(mkhuge,	|= (_PAGE_HW_SZ));
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
-}
+extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
 
 /* Macro to mark a page protection as uncacheable */
 #define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
 
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pteval)
-{
-	set_pte(ptep, pteval);
-}
-
 extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
-		      pte_t *ptep);
-
-/* Encode swap {type,off} tuple into PTE
- * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
- * PAGE_PRESENT is zero in a PTE holding swap "identifier"
- */
-#define __swp_entry(type, off)	((swp_entry_t) { \
-					((type) & 0x1f) | ((off) << 13) })
-
-/* Decode a PTE containing swap "identifier "into constituents */
-#define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
-#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)
-
-/* NOPs, to keep generic kernel happy */
-#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
-
-#define kern_addr_valid(addr)	(1)
-
-#define pmd_pgtable(pmd)       ((pgtable_t) pmd_page_vaddr(pmd))
-
-/*
- * remap a physical page `pfn' of size `size' with page protection `prot'
- * into virtual address `from'
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#include <asm/hugepage.h>
-#endif
 
 /* to cope with aliasing VIPT cache */
 #define HAVE_ARCH_UNMAPPED_AREA
-- 
cgit 


From f35534a2bcc7fd614a11aa7e3d91a0b1d6c962fb Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Thu, 1 Oct 2020 16:42:15 -0700
Subject: ARC: mm: hack to allow 2 level build with 4 level code

PMD_SHIFT is mapped to PUD_SHIFT or PGD_SHIFT by asm-generic/pgtable-*
but only for !__ASSEMBLY__

tlbex.S asm code has PTRS_PER_PTE which uses PMD_SHIFT hence barfs
for CONFIG_PGTABLE_LEVEL={2,3} and works for 4.

So add a workaround local to tlbex.S - the proper fix is to change
asm-generic/pgtable-* headers to expose the defines for __ASSEMBLY__ too

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/mm/tlbex.S | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 0b4bb62fa0ab..c4a5f16444ce 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -145,6 +145,14 @@ ex_saved_reg1:
 ;TLB Miss handling Code
 ;============================================================================
 
+#ifndef PMD_SHIFT
+#define PMD_SHIFT PUD_SHIFT
+#endif
+
+#ifndef PUD_SHIFT
+#define PUD_SHIFT PGDIR_SHIFT
+#endif
+
 ;-----------------------------------------------------------------------------
 ; This macro does the page-table lookup for the faulting address.
 ; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address
-- 
cgit 


From 803930ee35fafd005fd978d0c0a0d8db5bcba654 Mon Sep 17 00:00:00 2001
From: Jason Wang <wangborong@cdjrlc.com>
Date: Sat, 7 Aug 2021 15:14:27 +0800
Subject: riscv: use strscpy to replace strlcpy

The strlcpy should not be used because it doesn't limit the source
length. As linus says, it's a completely useless function if you
can't implicitly trust the source string - but that is almost always
why people think they should use it! All in all the BSD function
will lead some potential bugs.

But the strscpy doesn't require reading memory from the src string
beyond the specified "count" bytes, and since the return value is
easier to error-check than strlcpy()'s. In addition, the implementation
is robust to the string changing out from underneath it, unlike the
current strlcpy() implementation.

Thus, We prefer using strscpy instead of strlcpy.

Signed-off-by: Jason Wang <wangborong@cdjrlc.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 18bd0e4bc36c..2dc62ebc0001 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -255,7 +255,7 @@ static void __init parse_dtb(void)
 
 	pr_err("No DTB passed to the kernel\n");
 #ifdef CONFIG_CMDLINE_FORCE
-	strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+	strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 	pr_info("Forcing kernel command line to: %s\n", boot_command_line);
 #endif
 }
-- 
cgit 


From a290f510a178830a01bfc06e66a54bbe4ece5d2a Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmerdabbelt@google.com>
Date: Wed, 25 Aug 2021 22:52:45 -0700
Subject: RISC-V: Fix VDSO build for !MMU

We don't have a VDSO for the !MMU configurations, so don't try to build
one.

Fixes: fde9c59aebaf ("riscv: explicitly use symbol offsets for VDSO")
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Makefile           | 2 ++
 arch/riscv/include/asm/vdso.h | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index e026b2d0a5a4..83ee0e71204c 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -108,9 +108,11 @@ PHONY += vdso_install
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
 
+ifeq ($(CONFIG_MMU),y)
 prepare: vdso_prepare
 vdso_prepare: prepare0
 	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h
+endif
 
 ifneq ($(CONFIG_XIP_KERNEL),y)
 ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index d8d003c2b5a3..893e47195e30 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -8,6 +8,13 @@
 #ifndef _ASM_RISCV_VDSO_H
 #define _ASM_RISCV_VDSO_H
 
+
+/*
+ * All systems with an MMU have a VDSO, but systems without an MMU don't
+ * support shared libraries and therefor don't have one.
+ */
+#ifdef CONFIG_MMU
+
 #include <linux/types.h>
 #include <generated/vdso-offsets.h>
 
@@ -19,6 +26,8 @@ struct vdso_data {
 #define VDSO_SYMBOL(base, name)							\
 	(void __user *)((unsigned long)(base) + __vdso_##name##_offset)
 
+#endif /* CONFIG_MMU */
+
 asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
 
 #endif /* _ASM_RISCV_VDSO_H */
-- 
cgit 


From 8ce8a6fce9bfd3fcabe230ad104e2caf08b2e58d Mon Sep 17 00:00:00 2001
From: Raghavendra Rao Ananta <rananta@google.com>
Date: Mon, 23 Aug 2021 22:39:40 +0000
Subject: KVM: arm64: Trim guest debug exception handling

The switch-case for handling guest debug exception covers
all the debug exception classes, but functionally, doesn't
do anything with them other than ESR_ELx_EC_WATCHPT_LOW.
Moreover, even though handled well, the 'default' case
could be confusing from a security point of view, stating
that the guests' actions can potentially flood the syslog.
But in reality, the code is unreachable.

Hence, trim down the function to only handle the case with
ESR_ELx_EC_WATCHPT_LOW with a simple 'if' check.

Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210823223940.1878930-1-rananta@google.com
---
 arch/arm64/kvm/handle_exit.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 6f48336b1d86..b143f1ab6520 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -113,34 +113,20 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
  * guest and host are using the same debug facilities it will be up to
  * userspace to re-inject the correct exception for guest delivery.
  *
- * @return: 0 (while setting vcpu->run->exit_reason), -1 for error
+ * @return: 0 (while setting vcpu->run->exit_reason)
  */
 static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	u32 esr = kvm_vcpu_get_esr(vcpu);
-	int ret = 0;
 
 	run->exit_reason = KVM_EXIT_DEBUG;
 	run->debug.arch.hsr = esr;
 
-	switch (ESR_ELx_EC(esr)) {
-	case ESR_ELx_EC_WATCHPT_LOW:
+	if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW)
 		run->debug.arch.far = vcpu->arch.fault.far_el2;
-		fallthrough;
-	case ESR_ELx_EC_SOFTSTP_LOW:
-	case ESR_ELx_EC_BREAKPT_LOW:
-	case ESR_ELx_EC_BKPT32:
-	case ESR_ELx_EC_BRK64:
-		break;
-	default:
-		kvm_err("%s: un-handled case esr: %#08x\n",
-			__func__, (unsigned int) esr);
-		ret = -1;
-		break;
-	}
 
-	return ret;
+	return 0;
 }
 
 static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
-- 
cgit 


From 4f8e78c0757e3c5a65d9d8ac76e2434c71a78f5a Mon Sep 17 00:00:00 2001
From: Xiongwei Song <sxwjean@gmail.com>
Date: Sat, 7 Aug 2021 09:02:36 +0800
Subject: powerpc: Add esr as a synonym for pt_regs.dsisr

Create an anonymous union for dsisr and esr regsiters, we can reference
esr to get the exception detail when CONFIG_4xx=y or CONFIG_BOOKE=y.
Otherwise, reference dsisr. This makes code more clear.

Signed-off-by: Xiongwei Song <sxwjean@gmail.com>
[mpe: Reword commit title]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210807010239.416055-2-sxwjean@me.com
---
 arch/powerpc/include/asm/ptrace.h          | 5 ++++-
 arch/powerpc/kernel/process.c              | 2 +-
 arch/powerpc/kernel/ptrace/ptrace.c        | 2 ++
 arch/powerpc/kernel/traps.c                | 2 +-
 arch/powerpc/platforms/44x/machine_check.c | 4 ++--
 arch/powerpc/platforms/4xx/machine_check.c | 2 +-
 6 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 333979cf5d1e..ab58939653db 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -44,7 +44,10 @@ struct pt_regs
 #endif
 			unsigned long trap;
 			unsigned long dar;
-			unsigned long dsisr;
+			union {
+				unsigned long dsisr;
+				unsigned long esr;
+			};
 			unsigned long result;
 		};
 	};
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 185beb290580..f74af8f9133c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1499,7 +1499,7 @@ static void __show_regs(struct pt_regs *regs)
 	    trap == INTERRUPT_DATA_STORAGE ||
 	    trap == INTERRUPT_ALIGNMENT) {
 		if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE))
-			pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
+			pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->esr);
 		else
 			pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
 	}
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index 0a0a33eb0d28..a222fd4d6334 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -375,6 +375,8 @@ void __init pt_regs_check(void)
 		     offsetof(struct user_pt_regs, dar));
 	BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
 		     offsetof(struct user_pt_regs, dsisr));
+	BUILD_BUG_ON(offsetof(struct pt_regs, esr) !=
+		     offsetof(struct user_pt_regs, dsisr));
 	BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
 		     offsetof(struct user_pt_regs, result));
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 51d4f5faf425..5463d201d465 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -562,7 +562,7 @@ static inline int check_io_access(struct pt_regs *regs)
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 /* On 4xx, the reason for the machine check or program exception
    is in the ESR. */
-#define get_reason(regs)	((regs)->dsisr)
+#define get_reason(regs)	((regs)->esr)
 #define REASON_FP		ESR_FP
 #define REASON_ILLEGAL		(ESR_PIL | ESR_PUO)
 #define REASON_PRIVILEGED	ESR_PPR
diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c
index a5c898bb9bab..5d19daacd78a 100644
--- a/arch/powerpc/platforms/44x/machine_check.c
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -11,7 +11,7 @@
 
 int machine_check_440A(struct pt_regs *regs)
 {
-	unsigned long reason = regs->dsisr;
+	unsigned long reason = regs->esr;
 
 	printk("Machine check in kernel mode.\n");
 	if (reason & ESR_IMCP){
@@ -48,7 +48,7 @@ int machine_check_440A(struct pt_regs *regs)
 #ifdef CONFIG_PPC_47x
 int machine_check_47x(struct pt_regs *regs)
 {
-	unsigned long reason = regs->dsisr;
+	unsigned long reason = regs->esr;
 	u32 mcsr;
 
 	printk(KERN_ERR "Machine check in kernel mode.\n");
diff --git a/arch/powerpc/platforms/4xx/machine_check.c b/arch/powerpc/platforms/4xx/machine_check.c
index a71c29892a91..a905da1d6f41 100644
--- a/arch/powerpc/platforms/4xx/machine_check.c
+++ b/arch/powerpc/platforms/4xx/machine_check.c
@@ -10,7 +10,7 @@
 
 int machine_check_4xx(struct pt_regs *regs)
 {
-	unsigned long reason = regs->dsisr;
+	unsigned long reason = regs->esr;
 
 	if (reason & ESR_IMCP) {
 		printk("Instruction");
-- 
cgit 


From cfa47772ca8d53d7a6c9b331a7f6e7c4c9827214 Mon Sep 17 00:00:00 2001
From: Xiongwei Song <sxwjean@gmail.com>
Date: Sat, 7 Aug 2021 09:02:37 +0800
Subject: powerpc/64e: Get esr offset with _ESR macro

Use _ESR to get the offset of esr register in pr_regs for 64e cpus.

Signed-off-by: Xiongwei Song <sxwjean@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210807010239.416055-3-sxwjean@me.com
---
 arch/powerpc/kernel/asm-offsets.c    |  2 +-
 arch/powerpc/kernel/exceptions-64e.S | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index a47eefa09bcb..f4ebc435fd78 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -287,6 +287,7 @@ int main(void)
 	STACK_PT_REGS_OFFSET(_XER, xer);
 	STACK_PT_REGS_OFFSET(_DAR, dar);
 	STACK_PT_REGS_OFFSET(_DSISR, dsisr);
+	STACK_PT_REGS_OFFSET(_ESR, esr);
 	STACK_PT_REGS_OFFSET(ORIG_GPR3, orig_gpr3);
 	STACK_PT_REGS_OFFSET(RESULT, result);
 	STACK_PT_REGS_OFFSET(_TRAP, trap);
@@ -298,7 +299,6 @@ int main(void)
 	 * we use them to hold SRR0 and SRR1.
 	 */
 	STACK_PT_REGS_OFFSET(_DEAR, dar);
-	STACK_PT_REGS_OFFSET(_ESR, dsisr);
 #else /* CONFIG_PPC64 */
 	STACK_PT_REGS_OFFSET(SOFTE, softe);
 	STACK_PT_REGS_OFFSET(_PPR, ppr);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 7e0943d9f9b0..bf8822a5ae0e 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -546,7 +546,7 @@ __end_interrupts:
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
 	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
+	std	r15,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x300)
@@ -559,7 +559,7 @@ __end_interrupts:
 	li	r15,0
 	mr	r14,r10
 	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
+	std	r15,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x400)
@@ -576,7 +576,7 @@ __end_interrupts:
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
 	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
+	std	r15,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x600)
@@ -587,7 +587,7 @@ __end_interrupts:
 	NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
 				PROLOG_ADDITION_1REG)
 	mfspr	r14,SPRN_ESR
-	std	r14,_DSISR(r1)
+	std	r14,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	EXCEPTION_COMMON(0x700)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
@@ -1058,7 +1058,7 @@ bad_stack_book3e:
 	mfspr	r10,SPRN_DEAR
 	mfspr	r11,SPRN_ESR
 	std	r10,_DAR(r1)
-	std	r11,_DSISR(r1)
+	std	r11,_ESR(r1)
 	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \
 	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \
 	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe */    \
-- 
cgit 


From 4872cbd0ca35ca5b20d52e2539e7e1950f126e7b Mon Sep 17 00:00:00 2001
From: Xiongwei Song <sxwjean@gmail.com>
Date: Sat, 7 Aug 2021 09:02:38 +0800
Subject: powerpc: Add dear as a synonym for pt_regs.dar register

Create an anonymous union for dar and dear regsiters, we can reference
dear to get the effective address when CONFIG_4xx=y or CONFIG_BOOKE=y.
Otherwise, reference dar. This makes code more clear.

Signed-off-by: Xiongwei Song <sxwjean@gmail.com>
[mpe: Reword commit title]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210807010239.416055-4-sxwjean@me.com
---
 arch/powerpc/include/asm/ptrace.h   | 5 ++++-
 arch/powerpc/kernel/process.c       | 2 +-
 arch/powerpc/kernel/ptrace/ptrace.c | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index ab58939653db..5114ddba6f16 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -43,7 +43,10 @@ struct pt_regs
 			unsigned long mq;
 #endif
 			unsigned long trap;
-			unsigned long dar;
+			union {
+				unsigned long dar;
+				unsigned long dear;
+			};
 			union {
 				unsigned long dsisr;
 				unsigned long esr;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f74af8f9133c..50436b52c213 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1499,7 +1499,7 @@ static void __show_regs(struct pt_regs *regs)
 	    trap == INTERRUPT_DATA_STORAGE ||
 	    trap == INTERRUPT_ALIGNMENT) {
 		if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE))
-			pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->esr);
+			pr_cont("DEAR: "REG" ESR: "REG" ", regs->dear, regs->esr);
 		else
 			pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
 	}
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index a222fd4d6334..7c7093c17c45 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -373,6 +373,8 @@ void __init pt_regs_check(void)
 		     offsetof(struct user_pt_regs, trap));
 	BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
 		     offsetof(struct user_pt_regs, dar));
+	BUILD_BUG_ON(offsetof(struct pt_regs, dear) !=
+		     offsetof(struct user_pt_regs, dar));
 	BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
 		     offsetof(struct user_pt_regs, dsisr));
 	BUILD_BUG_ON(offsetof(struct pt_regs, esr) !=
-- 
cgit 


From d9db6e420268b2d561731468a31f0b15e2e9a145 Mon Sep 17 00:00:00 2001
From: Xiongwei Song <sxwjean@gmail.com>
Date: Sat, 7 Aug 2021 09:02:39 +0800
Subject: powerpc/64e: Get dear offset with _DEAR macro

Use _DEAR to get the offset of dear register in pr_regs for 64e cpus.

Signed-off-by: Xiongwei Song <sxwjean@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210807010239.416055-5-sxwjean@me.com
---
 arch/powerpc/kernel/asm-offsets.c    | 13 +++----------
 arch/powerpc/kernel/exceptions-64e.S |  8 ++++----
 2 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f4ebc435fd78..8357d5fcd09e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -286,23 +286,16 @@ int main(void)
 	STACK_PT_REGS_OFFSET(_CCR, ccr);
 	STACK_PT_REGS_OFFSET(_XER, xer);
 	STACK_PT_REGS_OFFSET(_DAR, dar);
+	STACK_PT_REGS_OFFSET(_DEAR, dear);
 	STACK_PT_REGS_OFFSET(_DSISR, dsisr);
 	STACK_PT_REGS_OFFSET(_ESR, esr);
 	STACK_PT_REGS_OFFSET(ORIG_GPR3, orig_gpr3);
 	STACK_PT_REGS_OFFSET(RESULT, result);
 	STACK_PT_REGS_OFFSET(_TRAP, trap);
-#ifndef CONFIG_PPC64
-	/*
-	 * The PowerPC 400-class & Book-E processors have neither the DAR
-	 * nor the DSISR SPRs. Hence, we overload them to hold the similar
-	 * DEAR and ESR SPRs for such processors.  For critical interrupts
-	 * we use them to hold SRR0 and SRR1.
-	 */
-	STACK_PT_REGS_OFFSET(_DEAR, dar);
-#else /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC64
 	STACK_PT_REGS_OFFSET(SOFTE, softe);
 	STACK_PT_REGS_OFFSET(_PPR, ppr);
-#endif /* CONFIG_PPC64 */
+#endif
 
 #ifdef CONFIG_PPC_PKEY
 	STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index bf8822a5ae0e..711c66b76df1 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -545,7 +545,7 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
-	std	r14,_DAR(r1)
+	std	r14,_DEAR(r1)
 	std	r15,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
@@ -558,7 +558,7 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	li	r15,0
 	mr	r14,r10
-	std	r14,_DAR(r1)
+	std	r14,_DEAR(r1)
 	std	r15,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
@@ -575,7 +575,7 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
-	std	r14,_DAR(r1)
+	std	r14,_DEAR(r1)
 	std	r15,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
@@ -1057,7 +1057,7 @@ bad_stack_book3e:
 	std	r11,_CCR(r1)
 	mfspr	r10,SPRN_DEAR
 	mfspr	r11,SPRN_ESR
-	std	r10,_DAR(r1)
+	std	r10,_DEAR(r1)
 	std	r11,_ESR(r1)
 	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \
 	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \
-- 
cgit 


From 133c17a1788d68c9fff59d5f724a4ba14647a16d Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 23 Aug 2021 08:24:20 +0000
Subject: powerpc: Remove MSR_PR check in
 interrupt_exit_{user/kernel}_prepare()

In those hot functions that are called at every interrupt, any saved
cycle is worth it.

interrupt_exit_user_prepare() and interrupt_exit_kernel_prepare() are
called from three places:
- From entry_32.S
- From interrupt_64.S
- From interrupt_exit_user_restart() and interrupt_exit_kernel_restart()

In entry_32.S, there are inambiguously called based on MSR_PR:

	interrupt_return:
		lwz	r4,_MSR(r1)
		addi	r3,r1,STACK_FRAME_OVERHEAD
		andi.	r0,r4,MSR_PR
		beq	.Lkernel_interrupt_return
		bl	interrupt_exit_user_prepare
	...
	.Lkernel_interrupt_return:
		bl	interrupt_exit_kernel_prepare

In interrupt_64.S, that's similar:

	interrupt_return_\srr\():
		ld	r4,_MSR(r1)
		andi.	r0,r4,MSR_PR
		beq	interrupt_return_\srr\()_kernel
	interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
		addi	r3,r1,STACK_FRAME_OVERHEAD
		bl	interrupt_exit_user_prepare
	...
	interrupt_return_\srr\()_kernel:
		addi	r3,r1,STACK_FRAME_OVERHEAD
		bl	interrupt_exit_kernel_prepare

In interrupt_exit_user_restart() and interrupt_exit_kernel_restart(),
MSR_PR is verified respectively by BUG_ON(!user_mode(regs)) and
BUG_ON(user_mode(regs)) prior to calling interrupt_exit_user_prepare()
and interrupt_exit_kernel_prepare().

The verification in interrupt_exit_user_prepare() and
interrupt_exit_kernel_prepare() are therefore useless and can be removed.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/385ead49ccb66a259b25fee3eebf0bd4094068f3.1629707037.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/interrupt.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index abc2b3dc3f20..531f0a1dbabc 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -464,7 +464,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
 
 	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
 		BUG_ON(!(regs->msr & MSR_RI));
-	BUG_ON(!(regs->msr & MSR_PR));
 	BUG_ON(arch_irq_disabled_regs(regs));
 	CT_WARN_ON(ct_state() == CONTEXT_USER);
 
@@ -498,7 +497,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
 	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
 	    unlikely(!(regs->msr & MSR_RI)))
 		unrecoverable_exception(regs);
-	BUG_ON(regs->msr & MSR_PR);
 	/*
 	 * CT_WARN_ON comes here via program_check_exception,
 	 * so avoid recursion.
-- 
cgit 


From 806c0e6e7e97adc17389c8dc1f52d4736f49299b Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 23 Aug 2021 08:24:21 +0000
Subject: powerpc: Refactor verification of MSR_RI

40x and BOOKE don't have MSR_RI therefore all tests involving
MSR_RI may be problematic on those plateforms.

Create helpers to check or set MSR_RI in regs, and use them
in common code.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/c2fb93708196734f4176dda334aaa3055f213b89.1629707037.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/ptrace.h                 | 23 +++++++++++++++++++++++
 arch/powerpc/kernel/interrupt.c                   |  9 +++------
 arch/powerpc/kernel/traps.c                       |  8 ++++----
 arch/powerpc/mm/book3s64/slb.c                    |  2 +-
 arch/powerpc/platforms/embedded6xx/holly.c        |  2 +-
 arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c |  2 +-
 arch/powerpc/platforms/pasemi/idle.c              |  2 +-
 arch/powerpc/platforms/powernv/opal.c             |  2 +-
 arch/powerpc/platforms/pseries/ras.c              |  2 +-
 arch/powerpc/sysdev/fsl_rio.c                     |  2 +-
 arch/powerpc/xmon/xmon.c                          | 16 +++-------------
 11 files changed, 40 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 5114ddba6f16..f9c235c1d6ce 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -22,6 +22,7 @@
 #include <linux/err.h>
 #include <uapi/asm/ptrace.h>
 #include <asm/asm-const.h>
+#include <asm/reg.h>
 
 #ifndef __ASSEMBLY__
 struct pt_regs
@@ -272,6 +273,28 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
 	regs->gpr[3] = rc;
 }
 
+static inline bool cpu_has_msr_ri(void)
+{
+	return !IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x);
+}
+
+static inline bool regs_is_unrecoverable(struct pt_regs *regs)
+{
+	return unlikely(cpu_has_msr_ri() && !(regs->msr & MSR_RI));
+}
+
+static inline void regs_set_recoverable(struct pt_regs *regs)
+{
+	if (cpu_has_msr_ri())
+		regs_set_return_msr(regs, regs->msr | MSR_RI);
+}
+
+static inline void regs_set_unrecoverable(struct pt_regs *regs)
+{
+	if (cpu_has_msr_ri())
+		regs_set_return_msr(regs, regs->msr & ~MSR_RI);
+}
+
 #define arch_has_single_step()	(1)
 #define arch_has_block_step()	(true)
 #define ARCH_HAS_USER_SINGLE_STEP_REPORT
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 531f0a1dbabc..a73f3f70a657 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -92,8 +92,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
 	user_exit_irqoff();
 
-	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
-		BUG_ON(!(regs->msr & MSR_RI));
+	BUG_ON(regs_is_unrecoverable(regs));
 	BUG_ON(!(regs->msr & MSR_PR));
 	BUG_ON(arch_irq_disabled_regs(regs));
 
@@ -462,8 +461,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
 {
 	unsigned long ret;
 
-	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
-		BUG_ON(!(regs->msr & MSR_RI));
+	BUG_ON(regs_is_unrecoverable(regs));
 	BUG_ON(arch_irq_disabled_regs(regs));
 	CT_WARN_ON(ct_state() == CONTEXT_USER);
 
@@ -494,8 +492,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
 	bool stack_store = current_thread_info()->flags &
 						_TIF_EMULATE_STACK_STORE;
 
-	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
-	    unlikely(!(regs->msr & MSR_RI)))
+	if (regs_is_unrecoverable(regs))
 		unrecoverable_exception(regs);
 	/*
 	 * CT_WARN_ON comes here via program_check_exception,
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5463d201d465..3a344f5265c2 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -428,7 +428,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
 	return;
 
 nonrecoverable:
-	regs_set_return_msr(regs, regs->msr & ~MSR_RI);
+	regs_set_unrecoverable(regs);
 #endif
 }
 DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
@@ -498,7 +498,7 @@ out:
 		die("Unrecoverable nested System Reset", regs, SIGABRT);
 #endif
 	/* Must die if the interrupt is not recoverable */
-	if (!(regs->msr & MSR_RI)) {
+	if (regs_is_unrecoverable(regs)) {
 		/* For the reason explained in die_mce, nmi_exit before die */
 		nmi_exit();
 		die("Unrecoverable System Reset", regs, SIGABRT);
@@ -550,7 +550,7 @@ static inline int check_io_access(struct pt_regs *regs)
 			printk(KERN_DEBUG "%s bad port %lx at %p\n",
 			       (*nip & 0x100)? "OUT to": "IN from",
 			       regs->gpr[rb] - _IO_BASE, nip);
-			regs_set_return_msr(regs, regs->msr | MSR_RI);
+			regs_set_recoverable(regs);
 			regs_set_return_ip(regs, extable_fixup(entry));
 			return 1;
 		}
@@ -840,7 +840,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
 
 bail:
 	/* Must die if the interrupt is not recoverable */
-	if (!(regs->msr & MSR_RI))
+	if (regs_is_unrecoverable(regs))
 		die_mce("Unrecoverable Machine check", regs, SIGBUS);
 
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index c91bd85eb90e..f0037bcc47a0 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -822,7 +822,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
 	/* IRQs are not reconciled here, so can't check irqs_disabled */
 	VM_WARN_ON(mfmsr() & MSR_EE);
 
-	if (unlikely(!(regs->msr & MSR_RI)))
+	if (regs_is_unrecoverable(regs))
 		return -EINVAL;
 
 	/*
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 85521b3e7098..7a85b117f7a4 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -251,7 +251,7 @@ static int ppc750_machine_check_exception(struct pt_regs *regs)
 	/* Are we prepared to handle this fault */
 	if ((entry = search_exception_tables(regs->nip)) != NULL) {
 		tsi108_clear_pci_cfg_error();
-		regs_set_return_msr(regs, regs->msr | MSR_RI);
+		regs_set_recoverable(regs);
 		regs_set_return_ip(regs, extable_fixup(entry));
 		return 1;
 	}
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index d8da6a483e59..9eb9abb5bce2 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -173,7 +173,7 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs)
 	/* Are we prepared to handle this fault */
 	if ((entry = search_exception_tables(regs->nip)) != NULL) {
 		tsi108_clear_pci_cfg_error();
-		regs_set_return_msr(regs, regs->msr | MSR_RI);
+		regs_set_recoverable(regs);
 		regs_set_return_ip(regs, extable_fixup(entry));
 		return 1;
 	}
diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c
index 9b88e3cded7d..2a5b84268b77 100644
--- a/arch/powerpc/platforms/pasemi/idle.c
+++ b/arch/powerpc/platforms/pasemi/idle.c
@@ -58,7 +58,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
 	restore_astate(hard_smp_processor_id());
 
 	/* everything handled */
-	regs_set_return_msr(regs, regs->msr | MSR_RI);
+	regs_set_recoverable(regs);
 	return 1;
 }
 
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 2835376e61a4..e9d18519e650 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -588,7 +588,7 @@ static int opal_recover_mce(struct pt_regs *regs,
 {
 	int recovered = 0;
 
-	if (!(regs->msr & MSR_RI)) {
+	if (regs_is_unrecoverable(regs)) {
 		/* If MSR_RI isn't set, we cannot recover */
 		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 		recovered = 0;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 167f2e1b8d39..56092dccfdb8 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -783,7 +783,7 @@ static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
 {
 	int recovered = 0;
 
-	if (!(regs->msr & MSR_RI)) {
+	if (regs_is_unrecoverable(regs)) {
 		/* If MSR_RI isn't set, we cannot recover */
 		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 		recovered = 0;
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 5a95b8ea23d8..ff7906b48ca1 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -108,7 +108,7 @@ int fsl_rio_mcheck_exception(struct pt_regs *regs)
 				 __func__);
 			out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR),
 				 0);
-			regs_set_return_msr(regs, regs->msr | MSR_RI);
+			regs_set_recoverable(regs);
 			regs_set_return_ip(regs, extable_fixup(entry));
 			return 1;
 		}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index ead460b80905..dd8241c009e5 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -482,16 +482,6 @@ static inline void get_output_lock(void) {}
 static inline void release_output_lock(void) {}
 #endif
 
-static inline int unrecoverable_excp(struct pt_regs *regs)
-{
-#if defined(CONFIG_4xx) || defined(CONFIG_PPC_BOOK3E)
-	/* We have no MSR_RI bit on 4xx or Book3e, so we simply return false */
-	return 0;
-#else
-	return ((regs->msr & MSR_RI) == 0);
-#endif
-}
-
 static void xmon_touch_watchdogs(void)
 {
 	touch_softlockup_watchdog_sync();
@@ -565,7 +555,7 @@ static int xmon_core(struct pt_regs *regs, volatile int fromipi)
 	bp = NULL;
 	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT))
 		bp = at_breakpoint(regs->nip);
-	if (bp || unrecoverable_excp(regs))
+	if (bp || regs_is_unrecoverable(regs))
 		fromipi = 0;
 
 	if (!fromipi) {
@@ -577,7 +567,7 @@ static int xmon_core(struct pt_regs *regs, volatile int fromipi)
 			       cpu, BP_NUM(bp));
 			xmon_print_symbol(regs->nip, " ", ")\n");
 		}
-		if (unrecoverable_excp(regs))
+		if (regs_is_unrecoverable(regs))
 			printf("WARNING: exception is not recoverable, "
 			       "can't continue\n");
 		release_output_lock();
@@ -693,7 +683,7 @@ static int xmon_core(struct pt_regs *regs, volatile int fromipi)
 			printf("Stopped at breakpoint %tx (", BP_NUM(bp));
 			xmon_print_symbol(regs->nip, " ", ")\n");
 		}
-		if (unrecoverable_excp(regs))
+		if (regs_is_unrecoverable(regs))
 			printf("WARNING: exception is not recoverable, "
 			       "can't continue\n");
 		remove_bpts();
-- 
cgit 


From c12adb0678446b3284a6135dc5fa7aa3b6a968a5 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 7 Jan 2021 08:40:38 -0500
Subject: powerpc: retire sbc8548 board support

The support was for this was mainlined 13 years ago, in v2.6.25
[0e0fffe88767] just around the ppc --> powerpc migration.

I believe the board was introduced a year or two before that, so it
is roughly a 15 year old platform - with the CPU speed and memory size
that was typical for that era.

I haven't had one of these boards for several years, and availability
was discontinued several years before that.

Given that, there is no point in adding a burden to testing coverage
that builds all possible defconfigs, so it makes sense to remove it.

Of course it will remain in the git history forever, for anyone who
happens to find a functional board and wants to tinker with it.

Acked-by: Scott Wood <oss@buserror.net>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/Makefile                  |   1 -
 arch/powerpc/boot/dts/sbc8548-altflash.dts  | 111 -----------
 arch/powerpc/boot/dts/sbc8548-post.dtsi     | 289 ----------------------------
 arch/powerpc/boot/dts/sbc8548-pre.dtsi      |  48 -----
 arch/powerpc/boot/dts/sbc8548.dts           | 106 ----------
 arch/powerpc/boot/wrapper                   |   2 +-
 arch/powerpc/configs/85xx/sbc8548_defconfig |  50 -----
 arch/powerpc/configs/mpc85xx_base.config    |   1 -
 arch/powerpc/platforms/85xx/Kconfig         |   6 -
 arch/powerpc/platforms/85xx/Makefile        |   1 -
 arch/powerpc/platforms/85xx/sbc8548.c       | 134 -------------
 11 files changed, 1 insertion(+), 748 deletions(-)
 delete mode 100644 arch/powerpc/boot/dts/sbc8548-altflash.dts
 delete mode 100644 arch/powerpc/boot/dts/sbc8548-post.dtsi
 delete mode 100644 arch/powerpc/boot/dts/sbc8548-pre.dtsi
 delete mode 100644 arch/powerpc/boot/dts/sbc8548.dts
 delete mode 100644 arch/powerpc/configs/85xx/sbc8548_defconfig
 delete mode 100644 arch/powerpc/platforms/85xx/sbc8548.c

(limited to 'arch')

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index e312ea802aa6..3e30ea6f2187 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -341,7 +341,6 @@ image-$(CONFIG_TQM8541)			+= cuImage.tqm8541
 image-$(CONFIG_TQM8548)			+= cuImage.tqm8548
 image-$(CONFIG_TQM8555)			+= cuImage.tqm8555
 image-$(CONFIG_TQM8560)			+= cuImage.tqm8560
-image-$(CONFIG_SBC8548)			+= cuImage.sbc8548
 image-$(CONFIG_KSI8560)			+= cuImage.ksi8560
 
 # Board ports in arch/powerpc/platform/86xx/Kconfig
diff --git a/arch/powerpc/boot/dts/sbc8548-altflash.dts b/arch/powerpc/boot/dts/sbc8548-altflash.dts
deleted file mode 100644
index bb7a1e712bb7..000000000000
--- a/arch/powerpc/boot/dts/sbc8548-altflash.dts
+++ /dev/null
@@ -1,111 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8548 Device Tree Source
- *
- * Configured for booting off the alternate (64MB SODIMM) flash.
- * Requires switching JP12 jumpers and changing SW2.8 setting.
- *
- * Copyright 2013 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- */
-
-
-/dts-v1/;
-
-/include/ "sbc8548-pre.dtsi"
-
-/{
-	localbus@e0000000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		compatible = "simple-bus";
-		reg = <0xe0000000 0x5000>;
-		interrupt-parent = <&mpic>;
-
-		ranges = <0x0 0x0 0xfc000000 0x04000000		/*64MB Flash*/
-			  0x3 0x0 0xf0000000 0x04000000		/*64MB SDRAM*/
-			  0x4 0x0 0xf4000000 0x04000000 	/*64MB SDRAM*/
-			  0x5 0x0 0xf8000000 0x00b10000		/* EPLD */
-			  0x6 0x0 0xef800000 0x00800000>;	/*8MB Flash*/
-
-		flash@0,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			reg = <0x0 0x0 0x04000000>;
-			compatible = "intel,JS28F128", "cfi-flash";
-			bank-width = <4>;
-			device-width = <1>;
-			partition@0 {
-				label = "space";
-				/* FC000000 -> FFEFFFFF */
-				reg = <0x00000000 0x03f00000>;
-			};
-			partition@3f00000 {
-				label = "bootloader";
-				/* FFF00000 -> FFFFFFFF */
-				reg = <0x03f00000 0x00100000>;
-				read-only;
-			};
-                };
-
-
-		epld@5,0 {
-			compatible = "wrs,epld-localbus";
-			#address-cells = <2>;
-			#size-cells = <1>;
-			reg = <0x5 0x0 0x00b10000>;
-			ranges = <
-				0x0 0x0 0x5 0x000000 0x1fff	/* LED */
-				0x1 0x0 0x5 0x100000 0x1fff	/* Switches */
-				0x3 0x0 0x5 0x300000 0x1fff	/* HW Rev. */
-				0xb 0x0	0x5 0xb00000 0x1fff	/* EEPROM */
-			>;
-
-			led@0,0 {
-				compatible = "led";
-				reg = <0x0 0x0 0x1fff>;
-			};
-
-			switches@1,0 {
-				compatible = "switches";
-				reg = <0x1 0x0 0x1fff>;
-			};
-
-			hw-rev@3,0 {
-				compatible = "hw-rev";
-				reg = <0x3 0x0 0x1fff>;
-			};
-
-			eeprom@b,0 {
-				compatible = "eeprom";
-				reg = <0xb 0 0x1fff>;
-			};
-
-		};
-
-		alt-flash@6,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "intel,JS28F640", "cfi-flash";
-			reg = <0x6 0x0 0x800000>;
-			bank-width = <1>;
-			device-width = <1>;
-			partition@0 {
-				label = "space";
-				/* EF800000 -> EFF9FFFF */
-				reg = <0x00000000 0x007a0000>;
-			};
-			partition@7a0000 {
-				label = "bootloader";
-				/* EFFA0000 -> EFFFFFFF */
-				reg = <0x007a0000 0x00060000>;
-				read-only;
-			};
-		};
-
-
-        };
-};
-
-/include/ "sbc8548-post.dtsi"
diff --git a/arch/powerpc/boot/dts/sbc8548-post.dtsi b/arch/powerpc/boot/dts/sbc8548-post.dtsi
deleted file mode 100644
index 9d848d409408..000000000000
--- a/arch/powerpc/boot/dts/sbc8548-post.dtsi
+++ /dev/null
@@ -1,289 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8548 Device Tree Source
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- */
-
-/{
-	soc8548@e0000000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		ranges = <0x00000000 0xe0000000 0x00100000>;
-		bus-frequency = <0>;
-		compatible = "simple-bus";
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <10>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,mpc8548-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <17 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,mpc8548-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <0x12 0x2>;
-		};
-
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,mpc8548-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <0x20>;	// 32 bytes
-			cache-size = <0x80000>;	// L2, 512K
-			interrupt-parent = <&mpic>;
-			interrupts = <0x10 0x2>;
-		};
-
-		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <0x2b 0x2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <0x2b 0x2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,mpc8548-dma", "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,mpc8548-dma-channel",
-						"fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,mpc8548-dma-channel",
-						"fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,mpc8548-dma-channel",
-						"fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,mpc8548-dma-channel",
-						"fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
-			};
-		};
-
-		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <0x1d 0x2 0x1e 0x2 0x22 0x2>;
-			interrupt-parent = <&mpic>;
-			tbi-handle = <&tbi0>;
-			phy-handle = <&phy0>;
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-mdio";
-				reg = <0x520 0x20>;
-
-				phy0: ethernet-phy@19 {
-					interrupt-parent = <&mpic>;
-					interrupts = <0x6 0x1>;
-					reg = <0x19>;
-				};
-				phy1: ethernet-phy@1a {
-					interrupt-parent = <&mpic>;
-					interrupts = <0x7 0x1>;
-					reg = <0x1a>;
-				};
-				tbi0: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
-		};
-
-		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <0x23 0x2 0x24 0x2 0x28 0x2>;
-			interrupt-parent = <&mpic>;
-			tbi-handle = <&tbi1>;
-			phy-handle = <&phy1>;
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi1: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "fsl,ns16550", "ns16550";
-			reg = <0x4500 0x100>;	// reg base, size
-			clock-frequency = <0>;	// should we fill in in uboot?
-			interrupts = <0x2a 0x2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "fsl,ns16550", "ns16550";
-			reg = <0x4600 0x100>;	// reg base, size
-			clock-frequency = <0>;	// should we fill in in uboot?
-			interrupts = <0x2a 0x2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		global-utilities@e0000 {	//global utilities reg
-			compatible = "fsl,mpc8548-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xfe>;
-			fsl,descriptor-types-mask = <0x12b0ebf>;
-		};
-
-		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
-		};
-	};
-
-	pci0: pci@e0008000 {
-		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
-		interrupt-map = <
-			/* IDSEL 0x01 (PCI-X slot) @66MHz */
-			0x0800 0x0 0x0 0x1 &mpic 0x2 0x1
-			0x0800 0x0 0x0 0x2 &mpic 0x3 0x1
-			0x0800 0x0 0x0 0x3 &mpic 0x4 0x1
-			0x0800 0x0 0x0 0x4 &mpic 0x1 0x1
-
-			/* IDSEL 0x11 (PCI, 3.3V 32bit) @33MHz */
-			0x8800 0x0 0x0 0x1 &mpic 0x2 0x1
-			0x8800 0x0 0x0 0x2 &mpic 0x3 0x1
-			0x8800 0x0 0x0 0x3 &mpic 0x4 0x1
-			0x8800 0x0 0x0 0x4 &mpic 0x1 0x1>;
-
-		interrupt-parent = <&mpic>;
-		interrupts = <0x18 0x2>;
-		bus-range = <0 0>;
-		ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x10000000
-			  0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00800000>;
-		clock-frequency = <66000000>;
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0xe0008000 0x1000>;
-		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
-		device_type = "pci";
-	};
-
-	pci1: pcie@e000a000 {
-		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
-		interrupt-map = <
-
-			/* IDSEL 0x0 (PEX) */
-			0x0000 0x0 0x0 0x1 &mpic 0x0 0x1
-			0x0000 0x0 0x0 0x2 &mpic 0x1 0x1
-			0x0000 0x0 0x0 0x3 &mpic 0x2 0x1
-			0x0000 0x0 0x0 0x4 &mpic 0x3 0x1>;
-
-		interrupt-parent = <&mpic>;
-		interrupts = <0x1a 0x2>;
-		bus-range = <0x0 0xff>;
-		ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
-			  0x01000000 0x0 0x00000000 0xe2800000 0x0 0x08000000>;
-		clock-frequency = <33000000>;
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0xe000a000 0x1000>;
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		pcie@0 {
-			reg = <0x0 0x0 0x0 0x0 0x0>;
-			#size-cells = <2>;
-			#address-cells = <3>;
-			device_type = "pci";
-			ranges = <0x02000000 0x0 0xa0000000
-				  0x02000000 0x0 0xa0000000
-				  0x0 0x10000000
-
-				  0x01000000 0x0 0x00000000
-				  0x01000000 0x0 0x00000000
-				  0x0 0x00800000>;
-		};
-	};
-};
diff --git a/arch/powerpc/boot/dts/sbc8548-pre.dtsi b/arch/powerpc/boot/dts/sbc8548-pre.dtsi
deleted file mode 100644
index 0e3665fd15d0..000000000000
--- a/arch/powerpc/boot/dts/sbc8548-pre.dtsi
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8548 Device Tree Source
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- */
-
-/{
-	model = "SBC8548";
-	compatible = "SBC8548";
-	#address-cells = <1>;
-	#size-cells = <1>;
-
-	aliases {
-		ethernet0 = &enet0;
-		ethernet1 = &enet1;
-		serial0 = &serial0;
-		serial1 = &serial1;
-		pci0 = &pci0;
-		pci1 = &pci1;
-	};
-
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,8548@0 {
-			device_type = "cpu";
-			reg = <0>;
-			d-cache-line-size = <0x20>;	// 32 bytes
-			i-cache-line-size = <0x20>;	// 32 bytes
-			d-cache-size = <0x8000>;	// L1, 32K
-			i-cache-size = <0x8000>;	// L1, 32K
-			timebase-frequency = <0>;	// From uboot
-			bus-frequency = <0>;
-			clock-frequency = <0>;
-			next-level-cache = <&L2>;
-		};
-	};
-
-	memory {
-		device_type = "memory";
-		reg = <0x00000000 0x10000000>;
-	};
-
-};
diff --git a/arch/powerpc/boot/dts/sbc8548.dts b/arch/powerpc/boot/dts/sbc8548.dts
deleted file mode 100644
index ce0a119f496e..000000000000
--- a/arch/powerpc/boot/dts/sbc8548.dts
+++ /dev/null
@@ -1,106 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8548 Device Tree Source
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- */
-
-
-/dts-v1/;
-
-/include/ "sbc8548-pre.dtsi"
-
-/{
-	localbus@e0000000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		compatible = "simple-bus";
-		reg = <0xe0000000 0x5000>;
-		interrupt-parent = <&mpic>;
-
-		ranges = <0x0 0x0 0xff800000 0x00800000		/*8MB Flash*/
-			  0x3 0x0 0xf0000000 0x04000000		/*64MB SDRAM*/
-			  0x4 0x0 0xf4000000 0x04000000 	/*64MB SDRAM*/
-			  0x5 0x0 0xf8000000 0x00b10000		/* EPLD */
-			  0x6 0x0 0xec000000 0x04000000>;	/*64MB Flash*/
-
-
-		flash@0,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "intel,JS28F640", "cfi-flash";
-			reg = <0x0 0x0 0x800000>;
-			bank-width = <1>;
-			device-width = <1>;
-			partition@0 {
-				label = "space";
-				/* FF800000 -> FFF9FFFF */
-				reg = <0x00000000 0x007a0000>;
-			};
-			partition@7a0000 {
-				label = "bootloader";
-				/* FFFA0000 -> FFFFFFFF */
-				reg = <0x007a0000 0x00060000>;
-				read-only;
-			};
-		};
-
-		epld@5,0 {
-			compatible = "wrs,epld-localbus";
-			#address-cells = <2>;
-			#size-cells = <1>;
-			reg = <0x5 0x0 0x00b10000>;
-			ranges = <
-				0x0 0x0 0x5 0x000000 0x1fff	/* LED */
-				0x1 0x0 0x5 0x100000 0x1fff	/* Switches */
-				0x3 0x0 0x5 0x300000 0x1fff	/* HW Rev. */
-				0xb 0x0	0x5 0xb00000 0x1fff	/* EEPROM */
-			>;
-
-			led@0,0 {
-				compatible = "led";
-				reg = <0x0 0x0 0x1fff>;
-			};
-
-			switches@1,0 {
-				compatible = "switches";
-				reg = <0x1 0x0 0x1fff>;
-			};
-
-			hw-rev@3,0 {
-				compatible = "hw-rev";
-				reg = <0x3 0x0 0x1fff>;
-			};
-
-			eeprom@b,0 {
-				compatible = "eeprom";
-				reg = <0xb 0 0x1fff>;
-			};
-
-		};
-
-		alt-flash@6,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			reg = <0x6 0x0 0x04000000>;
-			compatible = "intel,JS28F128", "cfi-flash";
-			bank-width = <4>;
-			device-width = <1>;
-			partition@0 {
-				label = "space";
-				/* EC000000 -> EFEFFFFF */
-				reg = <0x00000000 0x03f00000>;
-			};
-			partition@3f00000 {
-				label = "bootloader";
-				/* EFF00000 -> EFFFFFFF */
-				reg = <0x03f00000 0x00100000>;
-				read-only;
-			};
-                };
-        };
-};
-
-/include/ "sbc8548-post.dtsi"
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 1f4676bab786..1cd82564c996 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -298,7 +298,7 @@ cuboot*)
     *-tqm8541|*-mpc8560*|*-tqm8560|*-tqm8555|*-ksi8560*)
         platformo=$object/cuboot-85xx-cpm2.o
         ;;
-    *-mpc85*|*-tqm85*|*-sbc85*)
+    *-mpc85*|*-tqm85*)
         platformo=$object/cuboot-85xx.o
         ;;
     *-amigaone)
diff --git a/arch/powerpc/configs/85xx/sbc8548_defconfig b/arch/powerpc/configs/85xx/sbc8548_defconfig
deleted file mode 100644
index 258881727119..000000000000
--- a/arch/powerpc/configs/85xx/sbc8548_defconfig
+++ /dev/null
@@ -1,50 +0,0 @@
-CONFIG_PPC_85xx=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-CONFIG_SLAB=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_SBC8548=y
-CONFIG_GEN_RTC=y
-CONFIG_BINFMT_MISC=y
-CONFIG_MATH_EMULATION=y
-# CONFIG_SECCOMP is not set
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_IPV6 is not set
-# CONFIG_FW_LOADER is not set
-CONFIG_MTD=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_ADV_OPTIONS=y
-CONFIG_MTD_CFI_GEOMETRY=y
-CONFIG_MTD_CFI_I4=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_NETDEVICES=y
-CONFIG_GIANFAR=y
-CONFIG_BROADCOM_PHY=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
diff --git a/arch/powerpc/configs/mpc85xx_base.config b/arch/powerpc/configs/mpc85xx_base.config
index b1593fe6f70b..85907b776908 100644
--- a/arch/powerpc/configs/mpc85xx_base.config
+++ b/arch/powerpc/configs/mpc85xx_base.config
@@ -13,7 +13,6 @@ CONFIG_P1022_DS=y
 CONFIG_P1022_RDK=y
 CONFIG_P1023_RDB=y
 CONFIG_TWR_P102x=y
-CONFIG_SBC8548=y
 CONFIG_SOCRATES=y
 CONFIG_STX_GP3=y
 CONFIG_TQM8540=y
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index b77cbb0a50e1..4142ebf01382 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -208,12 +208,6 @@ config TQM8560
 	select TQM85xx
 	select CPM2
 
-config SBC8548
-	bool "Wind River SBC8548"
-	select DEFAULT_UIMAGE
-	help
-	  This option enables support for the Wind River SBC8548 board
-
 config PPA8548
 	bool "Prodrive PPA8548"
 	help
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index d1dd0dca5ebf..60e4e97a929d 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -26,7 +26,6 @@ obj-$(CONFIG_CORENET_GENERIC)   += corenet_generic.o
 obj-$(CONFIG_FB_FSL_DIU)	+= t1042rdb_diu.o
 obj-$(CONFIG_STX_GP3)	  += stx_gp3.o
 obj-$(CONFIG_TQM85xx)	  += tqm85xx.o
-obj-$(CONFIG_SBC8548)     += sbc8548.o
 obj-$(CONFIG_PPA8548)     += ppa8548.o
 obj-$(CONFIG_SOCRATES)    += socrates.o socrates_fpga_pic.o
 obj-$(CONFIG_KSI8560)	  += ksi8560.o
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
deleted file mode 100644
index e4acf5ce6b07..000000000000
--- a/arch/powerpc/platforms/85xx/sbc8548.c
+++ /dev/null
@@ -1,134 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Wind River SBC8548 setup and early boot code.
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * By Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the MPC8548CDS support - Copyright 2005 Freescale Inc.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/reboot.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/major.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/initrd.h>
-#include <linux/interrupt.h>
-#include <linux/fsl_devices.h>
-#include <linux/of_platform.h>
-#include <linux/pgtable.h>
-
-#include <asm/page.h>
-#include <linux/atomic.h>
-#include <asm/time.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ipic.h>
-#include <asm/pci-bridge.h>
-#include <asm/irq.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-
-#include "mpc85xx.h"
-
-static int sbc_rev;
-
-static void __init sbc8548_pic_init(void)
-{
-	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
-			0, 256, " OpenPIC  ");
-	BUG_ON(mpic == NULL);
-	mpic_init(mpic);
-}
-
-/* Extract the HW Rev from the EPLD on the board */
-static int __init sbc8548_hw_rev(void)
-{
-	struct device_node *np;
-	struct resource res;
-	unsigned int *rev;
-	int board_rev = 0;
-
-	np = of_find_compatible_node(NULL, NULL, "hw-rev");
-	if (np == NULL) {
-		printk("No HW-REV found in DTB.\n");
-		return -ENODEV;
-	}
-
-	of_address_to_resource(np, 0, &res);
-	of_node_put(np);
-
-	rev = ioremap(res.start,sizeof(unsigned int));
-	board_rev = (*rev) >> 28;
-	iounmap(rev);
-
-	return board_rev;
-}
-
-/*
- * Setup the architecture
- */
-static void __init sbc8548_setup_arch(void)
-{
-	if (ppc_md.progress)
-		ppc_md.progress("sbc8548_setup_arch()", 0);
-
-	fsl_pci_assign_primary();
-
-	sbc_rev = sbc8548_hw_rev();
-}
-
-static void sbc8548_show_cpuinfo(struct seq_file *m)
-{
-	uint pvid, svid, phid1;
-
-	pvid = mfspr(SPRN_PVR);
-	svid = mfspr(SPRN_SVR);
-
-	seq_printf(m, "Vendor\t\t: Wind River\n");
-	seq_printf(m, "Machine\t\t: SBC8548 v%d\n", sbc_rev);
-	seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
-	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
-
-	/* Display cpu Pll setting */
-	phid1 = mfspr(SPRN_HID1);
-	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
-}
-
-machine_arch_initcall(sbc8548, mpc85xx_common_publish_devices);
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init sbc8548_probe(void)
-{
-	return of_machine_is_compatible("SBC8548");
-}
-
-define_machine(sbc8548) {
-	.name		= "SBC8548",
-	.probe		= sbc8548_probe,
-	.setup_arch	= sbc8548_setup_arch,
-	.init_IRQ	= sbc8548_pic_init,
-	.show_cpuinfo	= sbc8548_show_cpuinfo,
-	.get_irq	= mpic_get_irq,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
-#endif
-	.calibrate_decr = generic_calibrate_decr,
-	.progress	= udbg_progress,
-};
-- 
cgit 


From d7c1814f2f4f812d7a39447f975abdc095dbf7aa Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 7 Jan 2021 13:45:32 -0500
Subject: powerpc: retire sbc8641d board support

The support was for this was added to mainline over 12 years ago, in
v2.6.26 [4e8aae89a35d] just around the ppc --> powerpc migration.

I believe the board was introduced shortly after the sbc8548 board,
making it roughly a 14 year old platform - with the CPU speed and
memory size typical for that era.

I haven't had one of these boards for several years, and availability
was discontinued several years before that.

Given that, there is no point in adding a burden to testing coverage
that builds all possible defconfigs, so it makes sense to remove it.

Of course it will remain in the git history forever, for anyone who
happens to find a functional board and wants to tinker with it.

Acked-by: Scott Wood <oss@buserror.net>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/dts/fsl/sbc8641d.dts   | 176 -------------------------------
 arch/powerpc/configs/mpc86xx_base.config |   1 -
 arch/powerpc/configs/ppc6xx_defconfig    |   1 -
 arch/powerpc/platforms/86xx/Kconfig      |   8 +-
 arch/powerpc/platforms/86xx/Makefile     |   1 -
 arch/powerpc/platforms/86xx/sbc8641d.c   |  87 ---------------
 6 files changed, 1 insertion(+), 273 deletions(-)
 delete mode 100644 arch/powerpc/boot/dts/fsl/sbc8641d.dts
 delete mode 100644 arch/powerpc/platforms/86xx/sbc8641d.c

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/fsl/sbc8641d.dts b/arch/powerpc/boot/dts/fsl/sbc8641d.dts
deleted file mode 100644
index 3dca10acc161..000000000000
--- a/arch/powerpc/boot/dts/fsl/sbc8641d.dts
+++ /dev/null
@@ -1,176 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8641D Device Tree Source
- *
- * Copyright 2008 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the mpc8641_hpcn.dts by Freescale Semiconductor Inc.
- */
-
-/include/ "mpc8641si-pre.dtsi"
-
-/ {
-	model = "SBC8641D";
-	compatible = "wind,sbc8641";
-
-	memory {
-		device_type = "memory";
-		reg = <0x00000000 0x20000000>;	// 512M at 0x0
-	};
-
-	lbc: localbus@f8005000 {
-		reg = <0xf8005000 0x1000>;
-
-		ranges = <0 0 0xff000000 0x01000000	// 16MB Boot flash
-			  1 0 0xf0000000 0x00010000	// 64KB EEPROM
-			  2 0 0xf1000000 0x00100000	// EPLD (1MB)
-			  3 0 0xe0000000 0x04000000	// 64MB LB SDRAM (CS3)
-			  4 0 0xe4000000 0x04000000	// 64MB LB SDRAM (CS4)
-			  6 0 0xf4000000 0x00100000	// LCD display (1MB)
-			  7 0 0xe8000000 0x04000000>;	// 64MB OneNAND
-
-		flash@0,0 {
-			compatible = "cfi-flash";
-			reg = <0 0 0x01000000>;
-			bank-width = <2>;
-			device-width = <2>;
-			#address-cells = <1>;
-			#size-cells = <1>;
-			partition@0 {
-				label = "dtb";
-				reg = <0x00000000 0x00100000>;
-				read-only;
-			};
-			partition@300000 {
-				label = "kernel";
-				reg = <0x00100000 0x00400000>;
-				read-only;
-			};
-			partition@400000 {
-				label = "fs";
-				reg = <0x00500000 0x00a00000>;
-			};
-			partition@700000 {
-				label = "firmware";
-				reg = <0x00f00000 0x00100000>;
-				read-only;
-			};
-		};
-
-		epld@2,0 {
-			compatible = "wrs,epld-localbus";
-			#address-cells = <2>;
-			#size-cells = <1>;
-			reg = <2 0 0x100000>;
-			ranges = <0 0 5 0 1	// User switches
-				  1 0 5 1 1	// Board ID/Rev
-				  3 0 5 3 1>;	// LEDs
-		};
-	};
-
-	soc: soc@f8000000 {
-		ranges = <0x00000000 0xf8000000 0x00100000>;
-
-		enet0: ethernet@24000 {
-			tbi-handle = <&tbi0>;
-			phy-handle = <&phy0>;
-			phy-connection-type = "rgmii-id";
-		};
-
-		mdio@24520 {
-			phy0: ethernet-phy@1f {
-				reg = <0x1f>;
-			};
-			phy1: ethernet-phy@0 {
-				reg = <0>;
-			};
-			phy2: ethernet-phy@1 {
-				reg = <1>;
-			};
-			phy3: ethernet-phy@2 {
-				reg = <2>;
-			};
-			tbi0: tbi-phy@11 {
-				reg = <0x11>;
-				device_type = "tbi-phy";
-			};
-		};
-
-		enet1: ethernet@25000 {
-			tbi-handle = <&tbi1>;
-			phy-handle = <&phy1>;
-			phy-connection-type = "rgmii-id";
-		};
-
-		mdio@25520 {
-			tbi1: tbi-phy@11 {
-				reg = <0x11>;
-				device_type = "tbi-phy";
-			};
-		};
-
-		enet2: ethernet@26000 {
-			tbi-handle = <&tbi2>;
-			phy-handle = <&phy2>;
-			phy-connection-type = "rgmii-id";
-		};
-
-		mdio@26520 {
-			tbi2: tbi-phy@11 {
-				reg = <0x11>;
-				device_type = "tbi-phy";
-			};
-		};
-
-		enet3: ethernet@27000 {
-			tbi-handle = <&tbi3>;
-			phy-handle = <&phy3>;
-			phy-connection-type = "rgmii-id";
-		};
-
-		mdio@27520 {
-			tbi3: tbi-phy@11 {
-				reg = <0x11>;
-				device_type = "tbi-phy";
-			};
-		};
-	};
-
-	pci0: pcie@f8008000 {
-		reg = <0xf8008000 0x1000>;
-		ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x20000000
-			  0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>;
-		interrupt-map-mask = <0xff00 0 0 7>;
-
-		pcie@0 {
-			ranges = <0x02000000 0x0 0x80000000
-				  0x02000000 0x0 0x80000000
-				  0x0 0x20000000
-
-				  0x01000000 0x0 0x00000000
-				  0x01000000 0x0 0x00000000
-				  0x0 0x00100000>;
-		};
-
-	};
-
-	pci1: pcie@f8009000 {
-		reg = <0xf8009000 0x1000>;
-		ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
-			  0x01000000 0x0 0x00000000 0xe3000000 0x0 0x00100000>;
-
-		pcie@0 {
-			ranges = <0x02000000 0x0 0xa0000000
-				  0x02000000 0x0 0xa0000000
-				  0x0 0x20000000
-
-				  0x01000000 0x0 0x00000000
-				  0x01000000 0x0 0x00000000
-				  0x0 0x00100000>;
-		};
-	};
-};
-
-/include/ "mpc8641si-post.dtsi"
diff --git a/arch/powerpc/configs/mpc86xx_base.config b/arch/powerpc/configs/mpc86xx_base.config
index 67bd1fa036ee..588870e6af3b 100644
--- a/arch/powerpc/configs/mpc86xx_base.config
+++ b/arch/powerpc/configs/mpc86xx_base.config
@@ -1,6 +1,5 @@
 CONFIG_PPC_86xx=y
 CONFIG_MPC8641_HPCN=y
-CONFIG_SBC8641D=y
 CONFIG_MPC8610_HPCD=y
 CONFIG_GEF_PPC9A=y
 CONFIG_GEF_SBC310=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index ee09f7bb6ea9..6697c5e6682f 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -55,7 +55,6 @@ CONFIG_MPC837x_RDB=y
 CONFIG_ASP834x=y
 CONFIG_PPC_86xx=y
 CONFIG_MPC8641_HPCN=y
-CONFIG_SBC8641D=y
 CONFIG_MPC8610_HPCD=y
 CONFIG_GEF_SBC610=y
 CONFIG_CPU_FREQ=y
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
index 07a9d60c618a..be867abebc83 100644
--- a/arch/powerpc/platforms/86xx/Kconfig
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -20,12 +20,6 @@ config MPC8641_HPCN
 	help
 	  This option enables support for the MPC8641 HPCN board.
 
-config SBC8641D
-	bool "Wind River SBC8641D"
-	select DEFAULT_UIMAGE
-	help
-	  This option enables support for the WRS SBC8641D board.
-
 config MPC8610_HPCD
 	bool "Freescale MPC8610 HPCD"
 	select DEFAULT_UIMAGE
@@ -74,7 +68,7 @@ config MPC8641
 	select FSL_PCI if PCI
 	select PPC_UDBG_16550
 	select MPIC
-	default y if MPC8641_HPCN || SBC8641D || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \
+	default y if MPC8641_HPCN || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \
 			|| MVME7100
 
 config MPC8610
diff --git a/arch/powerpc/platforms/86xx/Makefile b/arch/powerpc/platforms/86xx/Makefile
index 2c04449be107..5bbe1475bf26 100644
--- a/arch/powerpc/platforms/86xx/Makefile
+++ b/arch/powerpc/platforms/86xx/Makefile
@@ -6,7 +6,6 @@
 obj-y				:= pic.o common.o
 obj-$(CONFIG_SMP)		+= mpc86xx_smp.o
 obj-$(CONFIG_MPC8641_HPCN)	+= mpc86xx_hpcn.o
-obj-$(CONFIG_SBC8641D)		+= sbc8641d.o
 obj-$(CONFIG_MPC8610_HPCD)	+= mpc8610_hpcd.o
 obj-$(CONFIG_GEF_SBC610)	+= gef_sbc610.o
 obj-$(CONFIG_GEF_SBC310)	+= gef_sbc310.o
diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c
deleted file mode 100644
index dc23dd383d6e..000000000000
--- a/arch/powerpc/platforms/86xx/sbc8641d.c
+++ /dev/null
@@ -1,87 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8641D board specific routines
- *
- * Copyright 2008 Wind River Systems Inc.
- *
- * By Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the 8641 HPCN support by Freescale Semiconductor Inc.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/of_platform.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <asm/prom.h>
-#include <mm/mmu_decl.h>
-#include <asm/udbg.h>
-
-#include <asm/mpic.h>
-
-#include <sysdev/fsl_pci.h>
-#include <sysdev/fsl_soc.h>
-
-#include "mpc86xx.h"
-
-static void __init
-sbc8641_setup_arch(void)
-{
-	if (ppc_md.progress)
-		ppc_md.progress("sbc8641_setup_arch()", 0);
-
-	printk("SBC8641 board from Wind River\n");
-
-#ifdef CONFIG_SMP
-	mpc86xx_smp_init();
-#endif
-
-	fsl_pci_assign_primary();
-}
-
-
-static void
-sbc8641_show_cpuinfo(struct seq_file *m)
-{
-	uint svid = mfspr(SPRN_SVR);
-
-	seq_printf(m, "Vendor\t\t: Wind River Systems\n");
-
-	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
-}
-
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init sbc8641_probe(void)
-{
-	if (of_machine_is_compatible("wind,sbc8641"))
-		return 1;	/* Looks good */
-
-	return 0;
-}
-
-machine_arch_initcall(sbc8641, mpc86xx_common_publish_devices);
-
-define_machine(sbc8641) {
-	.name			= "SBC8641D",
-	.probe			= sbc8641_probe,
-	.setup_arch		= sbc8641_setup_arch,
-	.init_IRQ		= mpc86xx_init_irq,
-	.show_cpuinfo		= sbc8641_show_cpuinfo,
-	.get_irq		= mpic_get_irq,
-	.time_init		= mpc86xx_time_init,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= udbg_progress,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-#endif
-};
-- 
cgit 


From 8149238ffd210875f5a77e3c654bb59b58da35e3 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Wed, 25 Aug 2021 13:34:45 +0000
Subject: powerpc: Redefine HMT_xxx macros as empty on PPC32

HMT_xxx macros are macros for adjusting thread priority
(hardware multi-threading) are macros inherited from PPC64
via commit 5f7c690728ac ("[PATCH] powerpc: Merged ppc_asm.h")

Those instructions are pointless on PPC32, but some common
fonctions like arch_cpu_idle() use them.

So make them empty on PPC32 to avoid those instructions.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/c5a07fadea33d640ad10cecf0ac8faaec1c524e0.1629898474.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/vdso/processor.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h
index e072577bc7c0..8d79f994b4aa 100644
--- a/arch/powerpc/include/asm/vdso/processor.h
+++ b/arch/powerpc/include/asm/vdso/processor.h
@@ -5,12 +5,21 @@
 #ifndef __ASSEMBLY__
 
 /* Macros for adjusting thread priority (hardware multi-threading) */
+#ifdef CONFIG_PPC64
 #define HMT_very_low()		asm volatile("or 31, 31, 31	# very low priority")
 #define HMT_low()		asm volatile("or 1, 1, 1	# low priority")
 #define HMT_medium_low()	asm volatile("or 6, 6, 6	# medium low priority")
 #define HMT_medium()		asm volatile("or 2, 2, 2	# medium priority")
 #define HMT_medium_high()	asm volatile("or 5, 5, 5	# medium high priority")
 #define HMT_high()		asm volatile("or 3, 3, 3	# high priority")
+#else
+#define HMT_very_low()
+#define HMT_low()
+#define HMT_medium_low()
+#define HMT_medium()
+#define HMT_medium_high()
+#define HMT_high()
+#endif
 
 #ifdef CONFIG_PPC64
 #define cpu_relax()	do { HMT_low(); HMT_medium(); barrier(); } while (0)
-- 
cgit 


From 602d0f96563c2e0b8e1ddb22ac46bf7f58480d64 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Thu, 26 Aug 2021 21:56:51 +0930
Subject: powerpc/microwatt: Add Ethernet to device tree

The liteeth network device is used in the Microwatt soc.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210826122653.3236867-2-joel@jms.id.au
---
 arch/powerpc/boot/dts/microwatt.dts | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
index 974abbdda249..65b270a90f94 100644
--- a/arch/powerpc/boot/dts/microwatt.dts
+++ b/arch/powerpc/boot/dts/microwatt.dts
@@ -127,6 +127,18 @@
 			fifo-size = <16>;
 			interrupts = <0x10 0x1>;
 		};
+
+		ethernet@8020000 {
+			compatible = "litex,liteeth";
+			reg = <0x8021000 0x100
+				0x8020800 0x100
+				0x8030000 0x2000>;
+			reg-names = "mac", "mido", "buffer";
+			litex,rx-slots = <2>;
+			litex,tx-slots = <2>;
+			litex,slot-size = <0x800>;
+			interrupts = <0x11 0x1>;
+		};
 	};
 
 	chosen {
-- 
cgit 


From ef4fcaf99cd27eb48790f2adc4eff456dbe1dec4 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Thu, 26 Aug 2021 21:56:52 +0930
Subject: powerpc/configs/microwattt: Enable Liteeth

Liteeth is the network device used by Microwatt.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210826122653.3236867-3-joel@jms.id.au
---
 arch/powerpc/configs/microwatt_defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
index ebc90aefbc0c..6b76232094b2 100644
--- a/arch/powerpc/configs/microwatt_defconfig
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -53,6 +53,7 @@ CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_NETDEVICES=y
+CONFIG_LITEX_LITEETH=y
 # CONFIG_WLAN is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
-- 
cgit 


From 3e18e271182206c996a3a7efbbe70c66307ef137 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Thu, 26 Aug 2021 21:56:53 +0930
Subject: powerpc/configs/microwatt: Enable options for systemd

When booting with systemd these options are required.

This increases the image by about 50KB, or 2%.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210826122653.3236867-4-joel@jms.id.au
---
 arch/powerpc/configs/microwatt_defconfig | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
index 6b76232094b2..9465209b8c5b 100644
--- a/arch/powerpc/configs/microwatt_defconfig
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -5,6 +5,7 @@ CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_TICK_CPU_ACCOUNTING=y
 CONFIG_LOG_BUF_SHIFT=16
 CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
+CONFIG_CGROUPS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_KALLSYMS_ALL=y
@@ -78,8 +79,10 @@ CONFIG_SPI_SPIDEV=y
 CONFIG_EXT4_FS=y
 # CONFIG_FILE_LOCKING is not set
 # CONFIG_DNOTIFY is not set
-# CONFIG_INOTIFY_USER is not set
+CONFIG_AUTOFS_FS=y
+CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_CRYPTO_SHA256=y
 # CONFIG_CRYPTO_HW is not set
 # CONFIG_XZ_DEC_X86 is not set
 # CONFIG_XZ_DEC_IA64 is not set
-- 
cgit 


From 8efd249babea2fec268cff90b9f5ca723dbb7499 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Thu, 26 Aug 2021 15:33:59 +0530
Subject: powerpc/smp: Fix a crash while booting kvm guest with nr_cpus=2

Aneesh reported a crash with a fairly recent upstream kernel when
booting kernel whose commandline was appended with nr_cpus=2

1:mon> e
cpu 0x1: Vector: 300 (Data Access) at [c000000008a67bd0]
    pc: c00000000002557c: cpu_to_chip_id+0x3c/0x100
    lr: c000000000058380: start_secondary+0x460/0xb00
    sp: c000000008a67e70
   msr: 8000000000001033
   dar: 10
 dsisr: 80000
  current = 0xc00000000891bb00
  paca    = 0xc0000018ff981f80   irqmask: 0x03   irq_happened: 0x01
    pid   = 0, comm = swapper/1
Linux version 5.13.0-rc3-15704-ga050a6d2b7e8 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #433 SMP Tue May 25 02:38:49 CDT 2021
1:mon> t
[link register   ] c000000000058380 start_secondary+0x460/0xb00
[c000000008a67e70] c000000008a67eb0 (unreliable)
[c000000008a67eb0] c0000000000589d4 start_secondary+0xab4/0xb00
[c000000008a67f90] c00000000000c654 start_secondary_prolog+0x10/0x14

Current code assumes that num_possible_cpus() is always greater than
threads_per_core. However this may not be true when using nr_cpus=2 or
similar options. Handle the case where num_possible_cpus() is not an
exact multiple of  threads_per_core.

Fixes: c1e53367dab1 ("powerpc/smp: Cache CPU to chip lookup")
Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Debugged-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210826100401.412519-2-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index f2abd88e0c25..a3eab89ee572 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1109,7 +1109,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	}
 
 	if (cpu_to_chip_id(boot_cpuid) != -1) {
-		int idx = num_possible_cpus() / threads_per_core;
+		int idx = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
 
 		/*
 		 * All threads of a core will all belong to the same core,
-- 
cgit 


From b8b928030332a0ca16d42433eb2c3085600d8704 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Thu, 26 Aug 2021 15:34:00 +0530
Subject: powerpc/smp: Update cpu_core_map on all PowerPc systems

lscpu() uses core_siblings to list the number of sockets in the
system. core_siblings is set using topology_core_cpumask.

While optimizing the powerpc bootup path, Commit 4ca234a9cbd7
("powerpc/smp: Stop updating cpu_core_mask").  it was found that
updating cpu_core_mask() ended up taking a lot of time. It was thought
that on Powerpc, cpu_core_mask() would always be same as
cpu_cpu_mask() i.e number of sockets will always be equal to number of
nodes. As an optimization, cpu_core_mask() was made a snapshot of
cpu_cpu_mask().

However that was found to be false with PowerPc KVM guests, where each
node could have more than one socket. So with Commit c47f892d7aa6
("powerpc/smp: Reintroduce cpu_core_mask"), cpu_core_mask was updated
based on chip_id but in an optimized way using some mask manipulations
and chip_id caching.

However on non-PowerNV and non-pseries KVM guests (i.e not
implementing cpu_to_chip_id(), continued to use a copy of
cpu_cpu_mask().

There are two issues that were noticed on such systems
1. lscpu would report one extra socket.
On a IBM,9009-42A (aka zz system) which has only 2 chips/ sockets/
nodes, lscpu would report
Architecture:        ppc64le
Byte Order:          Little Endian
CPU(s):              160
On-line CPU(s) list: 0-159
Thread(s) per core:  8
Core(s) per socket:  6
Socket(s):           3                <--------------
NUMA node(s):        2
Model:               2.2 (pvr 004e 0202)
Model name:          POWER9 (architected), altivec supported
Hypervisor vendor:   pHyp
Virtualization type: para
L1d cache:           32K
L1i cache:           32K
L2 cache:            512K
L3 cache:            10240K
NUMA node0 CPU(s):   0-79
NUMA node1 CPU(s):   80-159

2. Currently cpu_cpu_mask is updated when a core is
added/removed. However its not updated when smt mode switching or on
CPUs are explicitly offlined. However all other percpu masks are
updated to ensure only active/online CPUs are in the masks.
This results in build_sched_domain traces since there will be CPUs in
cpu_cpu_mask() but those CPUs are not present in SMT / CACHE / MC /
NUMA domains. A loop of threads running smt mode switching and core
add/remove will soon show this trace.
Hence cpu_cpu_mask has to be update at smt mode switch.

This will have impact on cpu_core_mask(). cpu_core_mask() is a
snapshot of cpu_cpu_mask. Different CPUs within the same socket will
end up having different cpu_core_masks since they are snapshots at
different points of time. This means when lscpu will start reporting
many more sockets than the actual number of sockets/ nodes / chips.

Different ways to handle this problem:
A. Update the snapshot aka cpu_core_mask for all CPUs whenever
   cpu_cpu_mask is updated. This would a non-optimal solution.
B. Instead of a cpumask_var_t, make cpu_core_map a cpumask pointer
   pointing to cpu_cpu_mask. However percpu cpumask pointer is frowned
   upon and we need a clean way to handle PowerPc KVM guest which is
   not a snapshot.
C. Update cpu_core_masks all PowerPc systems like in PowerPc KVM
guests using mask manipulations. This approach is relatively simple
and unifies with the existing code.
D. On top of 3, we could also resurrect get_physical_package_id which
   could return a nid for the said CPU. However this is not needed at this
   time.

Option C is the preferred approach for now.

While this is somewhat a revert of Commit 4ca234a9cbd7 ("powerpc/smp:
Stop updating cpu_core_mask").

1. Plain revert has some conflicts
2. For chip_id == -1, the cpu_core_mask is made identical to
cpu_cpu_mask, unlike previously where cpu_core_mask was set to a core
if chip_id doesn't exist.

This goes by the principle that if chip_id is not exposed, then
sockets / chip / node share the same set of CPUs.

With the fix, lscpu o/p would be
Architecture:        ppc64le
Byte Order:          Little Endian
CPU(s):              160
On-line CPU(s) list: 0-159
Thread(s) per core:  8
Core(s) per socket:  6
Socket(s):           2                     <--------------
NUMA node(s):        2
Model:               2.2 (pvr 004e 0202)
Model name:          POWER9 (architected), altivec supported
Hypervisor vendor:   pHyp
Virtualization type: para
L1d cache:           32K
L1i cache:           32K
L2 cache:            512K
L3 cache:            10240K
NUMA node0 CPU(s):   0-79
NUMA node1 CPU(s):   80-159

Fixes: 4ca234a9cbd7 ("powerpc/smp: Stop updating cpu_core_mask")
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210826100401.412519-3-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index a3eab89ee572..4bdc339fff3c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1527,6 +1527,7 @@ static void add_cpu_to_masks(int cpu)
 	 * add it to it's own thread sibling mask.
 	 */
 	cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+	cpumask_set_cpu(cpu, cpu_core_mask(cpu));
 
 	for (i = first_thread; i < first_thread + threads_per_core; i++)
 		if (cpu_online(i))
@@ -1544,11 +1545,6 @@ static void add_cpu_to_masks(int cpu)
 	if (chip_id_lookup_table && ret)
 		chip_id = cpu_to_chip_id(cpu);
 
-	if (chip_id == -1) {
-		cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
-		goto out;
-	}
-
 	if (shared_caches)
 		submask_fn = cpu_l2_cache_mask;
 
@@ -1558,6 +1554,10 @@ static void add_cpu_to_masks(int cpu)
 	/* Skip all CPUs already part of current CPU core mask */
 	cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
 
+	/* If chip_id is -1; limit the cpu_core_mask to within DIE*/
+	if (chip_id == -1)
+		cpumask_and(mask, mask, cpu_cpu_mask(cpu));
+
 	for_each_cpu(i, mask) {
 		if (chip_id == cpu_to_chip_id(i)) {
 			or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
@@ -1567,7 +1567,6 @@ static void add_cpu_to_masks(int cpu)
 		}
 	}
 
-out:
 	free_cpumask_var(mask);
 }
 
-- 
cgit 


From 5bf63497b8ddf53d8973f68076119e482639b2bb Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Thu, 26 Aug 2021 15:34:01 +0530
Subject: powerpc/smp: Enable CACHE domain for shared processor

Currently CACHE domain is not enabled on shared processor mode PowerVM
LPARS. On PowerVM systems, 'ibm,thread-group' device-tree property 2
under cpu-device-node indicates which all CPUs share L2-cache. However
'ibm,thread-group' device-tree property 2 is a relatively new property.
In absence of 'ibm,thread-group' property 2, 'l2-cache' device property
under cpu-device-node could help system to identify CPUs sharing L2-cache.
However this property is not exposed by PhyP in shared processor mode
configurations.

In absence of properties that inform OS about which CPUs share L2-cache,
fallback on core boundary.

Here are some stats from Power9 shared LPAR with the changes.

$ lscpu
Architecture:        ppc64le
Byte Order:          Little Endian
CPU(s):              32
On-line CPU(s) list: 0-31
Thread(s) per core:  8
Core(s) per socket:  1
Socket(s):           3
NUMA node(s):        2
Model:               2.2 (pvr 004e 0202)
Model name:          POWER9 (architected), altivec supported
Hypervisor vendor:   pHyp
Virtualization type: para
L1d cache:           32K
L1i cache:           32K
NUMA node0 CPU(s):   16-23
NUMA node1 CPU(s):   0-15,24-31
Physical sockets:    2
Physical chips:      1
Physical cores/chip: 10

Before patch
$ grep -r . /sys/kernel/debug/sched/domains/cpu0/domain*/name
Before
/sys/kernel/debug/sched/domains/cpu0/domain0/name:SMT
/sys/kernel/debug/sched/domains/cpu0/domain1/name:DIE
/sys/kernel/debug/sched/domains/cpu0/domain2/name:NUMA

After
/sys/kernel/debug/sched/domains/cpu0/domain0/name:SMT
/sys/kernel/debug/sched/domains/cpu0/domain1/name:CACHE
/sys/kernel/debug/sched/domains/cpu0/domain2/name:DIE
/sys/kernel/debug/sched/domains/cpu0/domain3/name:NUMA

$  awk '/domain/{print $1, $2}' /proc/schedstat | sort -u | sed -e 's/00000000,//g'
Before
domain0 00000055
domain0 000000aa
domain0 00005500
domain0 0000aa00
domain0 00550000
domain0 00aa0000
domain0 55000000
domain0 aa000000
domain1 00ff0000
domain1 ff00ffff
domain2 ffffffff

After
domain0 00000055
domain0 000000aa
domain0 00005500
domain0 0000aa00
domain0 00550000
domain0 00aa0000
domain0 55000000
domain0 aa000000
domain1 000000ff
domain1 0000ff00
domain1 00ff0000
domain1 ff000000
domain2 ff00ffff
domain2 ffffffff
domain3 ffffffff

(Lower is better)
perf stat -a -r 5 -n perf bench sched pipe  | tail -n 2
Before
           153.798 +- 0.142 seconds time elapsed  ( +-  0.09% )

After
           111.545 +- 0.652 seconds time elapsed  ( +-  0.58% )

which is an improvement of 27.47%

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210826100401.412519-4-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 4bdc339fff3c..548aa58d25f9 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1400,7 +1400,7 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
 	l2_cache = cpu_to_l2cache(cpu);
 	if (!l2_cache || !*mask) {
 		/* Assume only core siblings share cache with this CPU */
-		for_each_cpu(i, submask_fn(cpu))
+		for_each_cpu(i, cpu_sibling_mask(cpu))
 			set_cpus_related(cpu, i, cpu_l2_cache_mask);
 
 		return false;
-- 
cgit 


From 544af6429777cefae2f8af9a9866df5e8cb21763 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Thu, 26 Aug 2021 15:35:17 +0530
Subject: powerpc/numa: Drop dbg in favour of pr_debug

powerpc supported numa=debug which is not documented. This option was
used to print early debug output. However something more flexible can be
achieved by using CONFIG_DYNAMIC_DEBUG.

Hence drop dbg (and numa=debug) in favour of pr_debug

Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
[mpe: Rebase on to powerpc/next form2 affinity changes]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210826100521.412639-2-srikar@linux.vnet.ibm.com
---
 arch/powerpc/mm/numa.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fe9c6ced40b2..73ba6b072274 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -40,9 +40,6 @@ static int numa_enabled = 1;
 
 static char *cmdline __initdata;
 
-static int numa_debug;
-#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
-
 int numa_cpu_lookup_table[NR_CPUS];
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 struct pglist_data *node_data[MAX_NUMNODES];
@@ -87,7 +84,7 @@ static void __init setup_node_to_cpumask_map(void)
 		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
 
 	/* cpumask_of_node() will now work */
-	dbg("Node to cpumask map for %u nodes\n", nr_node_ids);
+	pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
 }
 
 static int __init fake_numa_create_new_node(unsigned long end_pfn,
@@ -131,7 +128,7 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn,
 		cmdline = p;
 		fake_nid++;
 		*nid = fake_nid;
-		dbg("created new fake_node with id %d\n", fake_nid);
+		pr_debug("created new fake_node with id %d\n", fake_nid);
 		return 1;
 	}
 	return 0;
@@ -149,7 +146,7 @@ static void map_cpu_to_node(int cpu, int node)
 {
 	update_numa_cpu_lookup_table(cpu, node);
 
-	dbg("adding cpu %d to node %d\n", cpu, node);
+	pr_debug("adding cpu %d to node %d\n", cpu, node);
 
 	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
 		cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
@@ -160,7 +157,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
 {
 	int node = numa_cpu_lookup_table[cpu];
 
-	dbg("removing cpu %lu from node %d\n", cpu, node);
+	pr_debug("removing cpu %lu from node %d\n", cpu, node);
 
 	if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
 		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
@@ -450,10 +447,10 @@ static int __init find_primary_domain_index(void)
 	if (firmware_has_feature(FW_FEATURE_OPAL)) {
 		affinity_form = FORM1_AFFINITY;
 	} else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) {
-		dbg("Using form 2 affinity\n");
+		pr_debug("Using form 2 affinity\n");
 		affinity_form = FORM2_AFFINITY;
 	} else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) {
-		dbg("Using form 1 affinity\n");
+		pr_debug("Using form 1 affinity\n");
 		affinity_form = FORM1_AFFINITY;
 	} else
 		affinity_form = FORM0_AFFINITY;
@@ -482,7 +479,7 @@ static int __init find_primary_domain_index(void)
 					&distance_ref_points_depth);
 
 	if (!distance_ref_points) {
-		dbg("NUMA: ibm,associativity-reference-points not found.\n");
+		pr_debug("NUMA: ibm,associativity-reference-points not found.\n");
 		goto err;
 	}
 
@@ -928,7 +925,7 @@ static int __init parse_numa_properties(void)
 		return primary_domain_index;
 	}
 
-	dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index);
+	pr_debug("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index);
 
 	/*
 	 * If it is FORM2 initialize the distance table here.
@@ -1251,9 +1248,6 @@ static int __init early_numa(char *p)
 	if (strstr(p, "off"))
 		numa_enabled = 0;
 
-	if (strstr(p, "debug"))
-		numa_debug = 1;
-
 	p = strstr(p, "fake=");
 	if (p)
 		cmdline = p + strlen("fake=");
@@ -1416,7 +1410,7 @@ static long vphn_get_associativity(unsigned long cpu,
 
 	switch (rc) {
 	case H_SUCCESS:
-		dbg("VPHN hcall succeeded. Reset polling...\n");
+		pr_debug("VPHN hcall succeeded. Reset polling...\n");
 		goto out;
 
 	case H_FUNCTION:
-- 
cgit 


From 506c2075ffd8db352c53201ef166948a272e3bce Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Thu, 26 Aug 2021 15:35:18 +0530
Subject: powerpc/numa: convert printk to pr_xxx

Convert the remaining printk to pr_xxx
One advantage would be all prints will now have prefix "numa:" from
pr_fmt().

[ convert printk(KERN_ERR) to pr_warn : Suggested by Laurent Dufour ]
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
[mpe: Rebase onto powerpc/next, s/WARNING/Warning/]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210826100521.412639-3-srikar@linux.vnet.ibm.com
---
 arch/powerpc/mm/numa.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 73ba6b072274..f4e52581713e 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -162,8 +162,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
 	if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
 		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
 	} else {
-		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
-		       cpu, node);
+		pr_warn("Warning: cpu %lu not found in node %d\n", cpu, node);
 	}
 }
 #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
@@ -479,15 +478,14 @@ static int __init find_primary_domain_index(void)
 					&distance_ref_points_depth);
 
 	if (!distance_ref_points) {
-		pr_debug("NUMA: ibm,associativity-reference-points not found.\n");
+		pr_debug("ibm,associativity-reference-points not found.\n");
 		goto err;
 	}
 
 	distance_ref_points_depth /= sizeof(int);
 	if (affinity_form == FORM0_AFFINITY) {
 		if (distance_ref_points_depth < 2) {
-			printk(KERN_WARNING "NUMA: "
-			       "short ibm,associativity-reference-points\n");
+			pr_warn("short ibm,associativity-reference-points\n");
 			goto err;
 		}
 
@@ -504,8 +502,8 @@ static int __init find_primary_domain_index(void)
 	 * MAX_DISTANCE_REF_POINTS domains.
 	 */
 	if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
-		printk(KERN_WARNING "NUMA: distance array capped at "
-			"%d entries\n", MAX_DISTANCE_REF_POINTS);
+		pr_warn("distance array capped at %d entries\n",
+			MAX_DISTANCE_REF_POINTS);
 		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
 	}
 
@@ -910,7 +908,7 @@ static int __init parse_numa_properties(void)
 	const __be32 *associativity;
 
 	if (numa_enabled == 0) {
-		printk(KERN_WARNING "NUMA disabled by user\n");
+		pr_warn("disabled by user\n");
 		return -1;
 	}
 
@@ -925,7 +923,7 @@ static int __init parse_numa_properties(void)
 		return primary_domain_index;
 	}
 
-	pr_debug("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index);
+	pr_debug("associativity depth for CPU/Memory: %d\n", primary_domain_index);
 
 	/*
 	 * If it is FORM2 initialize the distance table here.
@@ -1038,10 +1036,8 @@ static void __init setup_nonnuma(void)
 	unsigned int nid = 0;
 	int i;
 
-	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
-	       top_of_ram, total_ram);
-	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
-	       (top_of_ram - total_ram) >> 20);
+	pr_debug("Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram);
+	pr_debug("Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20);
 
 	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
 		fake_numa_create_new_node(end_pfn, &nid);
-- 
cgit 


From 544a09ee7434b949552266d20ece538d35535bd5 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Thu, 26 Aug 2021 15:35:19 +0530
Subject: powerpc/numa: Print debug statements only when required

Currently, a debug message gets printed every time an attempt to
add(remove) a CPU. However this is redundant if the CPU is already added
(removed) from the node.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210826100521.412639-4-srikar@linux.vnet.ibm.com
---
 arch/powerpc/mm/numa.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f4e52581713e..52bc63ee6a6a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -146,10 +146,10 @@ static void map_cpu_to_node(int cpu, int node)
 {
 	update_numa_cpu_lookup_table(cpu, node);
 
-	pr_debug("adding cpu %d to node %d\n", cpu, node);
-
-	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
+	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) {
+		pr_debug("adding cpu %d to node %d\n", cpu, node);
 		cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
+	}
 }
 
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
@@ -157,10 +157,9 @@ static void unmap_cpu_from_node(unsigned long cpu)
 {
 	int node = numa_cpu_lookup_table[cpu];
 
-	pr_debug("removing cpu %lu from node %d\n", cpu, node);
-
 	if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
 		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
+		pr_debug("removing cpu %lu from node %d\n", cpu, node);
 	} else {
 		pr_warn("Warning: cpu %lu not found in node %d\n", cpu, node);
 	}
-- 
cgit 


From 9a245d0e1f006bc7ccf0285d0d520ed304d00c4a Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Thu, 26 Aug 2021 15:35:20 +0530
Subject: powerpc/numa: Update cpu_cpu_map on CPU online/offline

cpu_cpu_map holds all the CPUs in the DIE. However in PowerPC, when
onlining/offlining of CPUs, this mask doesn't get updated.  This mask
is however updated when CPUs are added/removed. So when both
operations like online/offline of CPUs and adding/removing of CPUs are
done simultaneously, then cpumaps end up broken.

WARNING: CPU: 13 PID: 1142 at kernel/sched/topology.c:898
build_sched_domains+0xd48/0x1720
Modules linked in: rpadlpar_io rpaphp mptcp_diag xsk_diag tcp_diag
udp_diag raw_diag inet_diag unix_diag af_packet_diag netlink_diag
bonding tls nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib
nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct
nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set
rfkill nf_tables nfnetlink pseries_rng xts vmx_crypto uio_pdrv_genirq
uio binfmt_misc ip_tables xfs libcrc32c dm_service_time sd_mod t10_pi sg
ibmvfc scsi_transport_fc ibmveth dm_multipath dm_mirror dm_region_hash
dm_log dm_mod fuse
CPU: 13 PID: 1142 Comm: kworker/13:2 Not tainted 5.13.0-rc6+ #28
Workqueue: events cpuset_hotplug_workfn
NIP:  c0000000001caac8 LR: c0000000001caac4 CTR: 00000000007088ec
REGS: c00000005596f220 TRAP: 0700   Not tainted  (5.13.0-rc6+)
MSR:  8000000000029033 <SF,EE,ME,IR,DR,RI,LE>  CR: 48828222  XER:
00000009
CFAR: c0000000001ea698 IRQMASK: 0
GPR00: c0000000001caac4 c00000005596f4c0 c000000001c4a400 0000000000000036
GPR04: 00000000fffdffff c00000005596f1d0 0000000000000027 c0000018cfd07f90
GPR08: 0000000000000023 0000000000000001 0000000000000027 c0000018fe68ffe8
GPR12: 0000000000008000 c00000001e9d1880 c00000013a047200 0000000000000800
GPR16: c000000001d3c7d0 0000000000000240 0000000000000048 c000000010aacd18
GPR20: 0000000000000001 c000000010aacc18 c00000013a047c00 c000000139ec2400
GPR24: 0000000000000280 c000000139ec2520 c000000136c1b400 c000000001c93060
GPR28: c00000013a047c20 c000000001d3c6c0 c000000001c978a0 000000000000000d
NIP [c0000000001caac8] build_sched_domains+0xd48/0x1720
LR [c0000000001caac4] build_sched_domains+0xd44/0x1720
Call Trace:
[c00000005596f4c0] [c0000000001caac4] build_sched_domains+0xd44/0x1720 (unreliable)
[c00000005596f670] [c0000000001cc5ec] partition_sched_domains_locked+0x3ac/0x4b0
[c00000005596f710] [c0000000002804e4] rebuild_sched_domains_locked+0x404/0x9e0
[c00000005596f810] [c000000000283e60] rebuild_sched_domains+0x40/0x70
[c00000005596f840] [c000000000284124] cpuset_hotplug_workfn+0x294/0xf10
[c00000005596fc60] [c000000000175040] process_one_work+0x290/0x590
[c00000005596fd00] [c0000000001753c8] worker_thread+0x88/0x620
[c00000005596fda0] [c000000000181704] kthread+0x194/0x1a0
[c00000005596fe10] [c00000000000ccec] ret_from_kernel_thread+0x5c/0x70
Instruction dump:
485af049 60000000 2fa30800 409e0028 80fe0000 e89a00f8 e86100e8 38da0120
7f88e378 7ce53b78 4801fb91 60000000 <0fe00000> 39000000 38e00000 38c00000

Fix this by updating cpu_cpu_map aka cpumask_of_node() on every CPU
online/offline.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210826100521.412639-5-srikar@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/topology.h | 13 +++++++++++++
 arch/powerpc/kernel/smp.c           |  3 +++
 arch/powerpc/mm/numa.c              |  7 ++-----
 3 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index a4712ecad3e9..36fcafb1fd6d 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -66,6 +66,11 @@ static inline int early_cpu_to_node(int cpu)
 int of_drconf_to_nid_single(struct drmem_lmb *lmb);
 void update_numa_distance(struct device_node *node);
 
+extern void map_cpu_to_node(int cpu, int node);
+#ifdef CONFIG_HOTPLUG_CPU
+extern void unmap_cpu_from_node(unsigned long cpu);
+#endif /* CONFIG_HOTPLUG_CPU */
+
 #else
 
 static inline int early_cpu_to_node(int cpu) { return 0; }
@@ -95,6 +100,14 @@ static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
 }
 
 static inline void update_numa_distance(struct device_node *node) {}
+
+#ifdef CONFIG_SMP
+static inline void map_cpu_to_node(int cpu, int node) {}
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void unmap_cpu_from_node(unsigned long cpu) {}
+#endif /* CONFIG_HOTPLUG_CPU */
+#endif /* CONFIG_SMP */
+
 #endif /* CONFIG_NUMA */
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 548aa58d25f9..9cc7d3dbf439 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1442,6 +1442,8 @@ static void remove_cpu_from_masks(int cpu)
 	struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
 	int i;
 
+	unmap_cpu_from_node(cpu);
+
 	if (shared_caches)
 		mask_fn = cpu_l2_cache_mask;
 
@@ -1526,6 +1528,7 @@ static void add_cpu_to_masks(int cpu)
 	 * This CPU will not be in the online mask yet so we need to manually
 	 * add it to it's own thread sibling mask.
 	 */
+	map_cpu_to_node(cpu, cpu_to_node(cpu));
 	cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
 	cpumask_set_cpu(cpu, cpu_core_mask(cpu));
 
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 52bc63ee6a6a..6f14c8fb6359 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -142,7 +142,7 @@ static void reset_numa_cpu_lookup_table(void)
 		numa_cpu_lookup_table[cpu] = -1;
 }
 
-static void map_cpu_to_node(int cpu, int node)
+void map_cpu_to_node(int cpu, int node)
 {
 	update_numa_cpu_lookup_table(cpu, node);
 
@@ -153,7 +153,7 @@ static void map_cpu_to_node(int cpu, int node)
 }
 
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
-static void unmap_cpu_from_node(unsigned long cpu)
+void unmap_cpu_from_node(unsigned long cpu)
 {
 	int node = numa_cpu_lookup_table[cpu];
 
@@ -800,9 +800,6 @@ static int ppc_numa_cpu_prepare(unsigned int cpu)
 
 static int ppc_numa_cpu_dead(unsigned int cpu)
 {
-#ifdef CONFIG_HOTPLUG_CPU
-	unmap_cpu_from_node(cpu);
-#endif
 	return 0;
 }
 
-- 
cgit 


From 0c634bafe3bbee7a36dca7f1277057e05bf14d91 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Tue, 17 Aug 2021 03:39:19 -0300
Subject: powerpc/pseries/iommu: Replace hard-coded page shift

Some functions assume IOMMU page size can only be 4K (pageshift == 12).
Update them to accept any page size passed, so we can use 64K pages.

In the process, some defines like TCE_SHIFT were made obsolete, and then
removed.

IODA3 Revision 3.0_prd1 (OpenPowerFoundation), Figures 3.4 and 3.5 show
a RPN of 52-bit, and considers a 12-bit pageshift, so there should be
no need of using TCE_RPN_MASK, which masks out any bit after 40 in rpn.
It's usage removed from tce_build_pSeries(), tce_build_pSeriesLP(), and
tce_buildmulti_pSeriesLP().

Most places had a tbl struct, so using tbl->it_page_shift was simple.
tce_free_pSeriesLP() was a special case, since callers not always have a
tbl struct, so adding a tceshift parameter seems the right thing to do.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817063929.38701-2-leobras.c@gmail.com
---
 arch/powerpc/include/asm/tce.h         |  8 -------
 arch/powerpc/platforms/pseries/iommu.c | 39 ++++++++++++++++++++--------------
 2 files changed, 23 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/tce.h b/arch/powerpc/include/asm/tce.h
index db5fc2f2262d..0c34d2756d92 100644
--- a/arch/powerpc/include/asm/tce.h
+++ b/arch/powerpc/include/asm/tce.h
@@ -19,15 +19,7 @@
 #define TCE_VB			0
 #define TCE_PCI			1
 
-/* TCE page size is 4096 bytes (1 << 12) */
-
-#define TCE_SHIFT	12
-#define TCE_PAGE_SIZE	(1 << TCE_SHIFT)
-
 #define TCE_ENTRY_SIZE		8		/* each TCE is 64 bits */
-
-#define TCE_RPN_MASK		0xfffffffffful  /* 40-bit RPN (4K pages) */
-#define TCE_RPN_SHIFT		12
 #define TCE_VALID		0x800		/* TCE valid */
 #define TCE_ALLIO		0x400		/* TCE valid for all lpars */
 #define TCE_PCI_WRITE		0x2		/* write from PCI allowed */
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 0c55b991f665..b1b8d12bab39 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -107,6 +107,8 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
 	u64 proto_tce;
 	__be64 *tcep;
 	u64 rpn;
+	const unsigned long tceshift = tbl->it_page_shift;
+	const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl);
 
 	proto_tce = TCE_PCI_READ; // Read allowed
 
@@ -117,10 +119,10 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
 
 	while (npages--) {
 		/* can't move this out since we might cross MEMBLOCK boundary */
-		rpn = __pa(uaddr) >> TCE_SHIFT;
-		*tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
+		rpn = __pa(uaddr) >> tceshift;
+		*tcep = cpu_to_be64(proto_tce | rpn << tceshift);
 
-		uaddr += TCE_PAGE_SIZE;
+		uaddr += pagesize;
 		tcep++;
 	}
 	return 0;
@@ -146,7 +148,7 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
 	return be64_to_cpu(*tcep);
 }
 
-static void tce_free_pSeriesLP(unsigned long liobn, long, long);
+static void tce_free_pSeriesLP(unsigned long liobn, long, long, long);
 static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
 
 static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
@@ -166,12 +168,12 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
 		proto_tce |= TCE_PCI_WRITE;
 
 	while (npages--) {
-		tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
+		tce = proto_tce | rpn << tceshift;
 		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
 
 		if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
 			ret = (int)rc;
-			tce_free_pSeriesLP(liobn, tcenum_start,
+			tce_free_pSeriesLP(liobn, tcenum_start, tceshift,
 			                   (npages_start - (npages + 1)));
 			break;
 		}
@@ -205,10 +207,11 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	long tcenum_start = tcenum, npages_start = npages;
 	int ret = 0;
 	unsigned long flags;
+	const unsigned long tceshift = tbl->it_page_shift;
 
 	if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
 		return tce_build_pSeriesLP(tbl->it_index, tcenum,
-					   tbl->it_page_shift, npages, uaddr,
+					   tceshift, npages, uaddr,
 		                           direction, attrs);
 	}
 
@@ -225,13 +228,13 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		if (!tcep) {
 			local_irq_restore(flags);
 			return tce_build_pSeriesLP(tbl->it_index, tcenum,
-					tbl->it_page_shift,
+					tceshift,
 					npages, uaddr, direction, attrs);
 		}
 		__this_cpu_write(tce_page, tcep);
 	}
 
-	rpn = __pa(uaddr) >> TCE_SHIFT;
+	rpn = __pa(uaddr) >> tceshift;
 	proto_tce = TCE_PCI_READ;
 	if (direction != DMA_TO_DEVICE)
 		proto_tce |= TCE_PCI_WRITE;
@@ -245,12 +248,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
 
 		for (l = 0; l < limit; l++) {
-			tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
+			tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift);
 			rpn++;
 		}
 
 		rc = plpar_tce_put_indirect((u64)tbl->it_index,
-					    (u64)tcenum << 12,
+					    (u64)tcenum << tceshift,
 					    (u64)__pa(tcep),
 					    limit);
 
@@ -277,12 +280,13 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	return ret;
 }
 
-static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
+static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+			       long npages)
 {
 	u64 rc;
 
 	while (npages--) {
-		rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);
+		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0);
 
 		if (rc && printk_ratelimit()) {
 			printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
@@ -301,9 +305,11 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
 	u64 rc;
 
 	if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
-		return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
+		return tce_free_pSeriesLP(tbl->it_index, tcenum,
+					  tbl->it_page_shift, npages);
 
-	rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
+	rc = plpar_tce_stuff((u64)tbl->it_index,
+			     (u64)tcenum << tbl->it_page_shift, 0, npages);
 
 	if (rc && printk_ratelimit()) {
 		printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
@@ -319,7 +325,8 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
 	u64 rc;
 	unsigned long tce_ret;
 
-	rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
+	rc = plpar_tce_get((u64)tbl->it_index,
+			   (u64)tcenum << tbl->it_page_shift, &tce_ret);
 
 	if (rc && printk_ratelimit()) {
 		printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
-- 
cgit 


From 3c33066a21903076722a2881556a92aa3cd7d359 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Tue, 17 Aug 2021 03:39:20 -0300
Subject: powerpc/kernel/iommu: Add new iommu_table_in_use() helper

Having a function to check if the iommu table has any allocation helps
deciding if a tbl can be reset for using a new DMA window.

It should be enough to replace all instances of !bitmap_empty(tbl...).

iommu_table_in_use() skips reserved memory, so we don't need to worry about
releasing it before testing. This causes iommu_table_release_pages() to
become unnecessary, given it is only used to remove reserved memory for
testing.

Also, only allow storing reserved memory values in tbl if they are valid
in the table, so there is no need to check it in the new helper.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817063929.38701-3-leobras.c@gmail.com
---
 arch/powerpc/include/asm/iommu.h |  1 +
 arch/powerpc/kernel/iommu.c      | 61 ++++++++++++++++++++--------------------
 2 files changed, 32 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index deef7c94d7b6..bf3b84128525 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -154,6 +154,7 @@ extern int iommu_tce_table_put(struct iommu_table *tbl);
  */
 extern struct iommu_table *iommu_init_table(struct iommu_table *tbl,
 		int nid, unsigned long res_start, unsigned long res_end);
+bool iommu_table_in_use(struct iommu_table *tbl);
 
 #define IOMMU_TABLE_GROUP_MAX_TABLES	2
 
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 2af89a5e379f..ed98ad63633e 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -690,32 +690,24 @@ static void iommu_table_reserve_pages(struct iommu_table *tbl,
 	if (tbl->it_offset == 0)
 		set_bit(0, tbl->it_map);
 
-	tbl->it_reserved_start = res_start;
-	tbl->it_reserved_end = res_end;
-
-	/* Check if res_start..res_end isn't empty and overlaps the table */
-	if (res_start && res_end &&
-			(tbl->it_offset + tbl->it_size < res_start ||
-			 res_end < tbl->it_offset))
-		return;
+	if (res_start < tbl->it_offset)
+		res_start = tbl->it_offset;
 
-	for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
-		set_bit(i - tbl->it_offset, tbl->it_map);
-}
+	if (res_end > (tbl->it_offset + tbl->it_size))
+		res_end = tbl->it_offset + tbl->it_size;
 
-static void iommu_table_release_pages(struct iommu_table *tbl)
-{
-	int i;
+	/* Check if res_start..res_end is a valid range in the table */
+	if (res_start >= res_end) {
+		tbl->it_reserved_start = tbl->it_offset;
+		tbl->it_reserved_end = tbl->it_offset;
+		return;
+	}
 
-	/*
-	 * In case we have reserved the first bit, we should not emit
-	 * the warning below.
-	 */
-	if (tbl->it_offset == 0)
-		clear_bit(0, tbl->it_map);
+	tbl->it_reserved_start = res_start;
+	tbl->it_reserved_end = res_end;
 
 	for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
-		clear_bit(i - tbl->it_offset, tbl->it_map);
+		set_bit(i - tbl->it_offset, tbl->it_map);
 }
 
 /*
@@ -779,6 +771,22 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
 	return tbl;
 }
 
+bool iommu_table_in_use(struct iommu_table *tbl)
+{
+	unsigned long start = 0, end;
+
+	/* ignore reserved bit0 */
+	if (tbl->it_offset == 0)
+		start = 1;
+	end = tbl->it_reserved_start - tbl->it_offset;
+	if (find_next_bit(tbl->it_map, end, start) != end)
+		return true;
+
+	start = tbl->it_reserved_end - tbl->it_offset;
+	end = tbl->it_size;
+	return find_next_bit(tbl->it_map, end, start) != end;
+}
+
 static void iommu_table_free(struct kref *kref)
 {
 	struct iommu_table *tbl;
@@ -795,10 +803,8 @@ static void iommu_table_free(struct kref *kref)
 
 	iommu_debugfs_del(tbl);
 
-	iommu_table_release_pages(tbl);
-
 	/* verify that table contains no entries */
-	if (!bitmap_empty(tbl->it_map, tbl->it_size))
+	if (iommu_table_in_use(tbl))
 		pr_warn("%s: Unexpected TCEs\n", __func__);
 
 	/* free bitmap */
@@ -1099,14 +1105,9 @@ int iommu_take_ownership(struct iommu_table *tbl)
 	for (i = 0; i < tbl->nr_pools; i++)
 		spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
 
-	iommu_table_release_pages(tbl);
-
-	if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
+	if (iommu_table_in_use(tbl)) {
 		pr_err("iommu_tce: it_map is not empty");
 		ret = -EBUSY;
-		/* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
-		iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
-				tbl->it_reserved_end);
 	} else {
 		memset(tbl->it_map, 0xff, sz);
 	}
-- 
cgit 


From 4ff8677a0b192a58d998d1d34fc5168203041a24 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Tue, 17 Aug 2021 03:39:21 -0300
Subject: powerpc/pseries/iommu: Add iommu_pseries_alloc_table() helper

Creates a helper to allow allocating a new iommu_table without the need
to reallocate the iommu_group.

This will be helpful for replacing the iommu_table for the new DMA window,
after we remove the old one with iommu_tce_table_put().

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817063929.38701-4-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index b1b8d12bab39..33d82865d6e6 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -53,28 +53,31 @@ enum {
 	DDW_EXT_QUERY_OUT_SIZE = 2
 };
 
-static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+static struct iommu_table *iommu_pseries_alloc_table(int node)
 {
-	struct iommu_table_group *table_group;
 	struct iommu_table *tbl;
 
-	table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
-			   node);
-	if (!table_group)
-		return NULL;
-
 	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
 	if (!tbl)
-		goto free_group;
+		return NULL;
 
 	INIT_LIST_HEAD_RCU(&tbl->it_group_list);
 	kref_init(&tbl->it_kref);
+	return tbl;
+}
 
-	table_group->tables[0] = tbl;
+static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+{
+	struct iommu_table_group *table_group;
+
+	table_group = kzalloc_node(sizeof(*table_group), GFP_KERNEL, node);
+	if (!table_group)
+		return NULL;
 
-	return table_group;
+	table_group->tables[0] = iommu_pseries_alloc_table(node);
+	if (table_group->tables[0])
+		return table_group;
 
-free_group:
 	kfree(table_group);
 	return NULL;
 }
-- 
cgit 


From 92a23219299cedde52e3298788484f4875d5ce0f Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Tue, 17 Aug 2021 03:39:22 -0300
Subject: powerpc/pseries/iommu: Add ddw_list_new_entry() helper

There are two functions creating direct_window_list entries in a
similar way, so create a ddw_list_new_entry() to avoid duplicity and
simplify those functions.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817063929.38701-5-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 33d82865d6e6..712d1667144a 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -874,6 +874,21 @@ static u64 find_existing_ddw(struct device_node *pdn, int *window_shift)
 	return dma_addr;
 }
 
+static struct direct_window *ddw_list_new_entry(struct device_node *pdn,
+						const struct dynamic_dma_window_prop *dma64)
+{
+	struct direct_window *window;
+
+	window = kzalloc(sizeof(*window), GFP_KERNEL);
+	if (!window)
+		return NULL;
+
+	window->device = pdn;
+	window->prop = dma64;
+
+	return window;
+}
+
 static int find_existing_ddw_windows(void)
 {
 	int len;
@@ -886,18 +901,15 @@ static int find_existing_ddw_windows(void)
 
 	for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
 		direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
-		if (!direct64)
-			continue;
-
-		window = kzalloc(sizeof(*window), GFP_KERNEL);
-		if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
-			kfree(window);
+		if (!direct64 || len < sizeof(*direct64)) {
 			remove_ddw(pdn, true);
 			continue;
 		}
 
-		window->device = pdn;
-		window->prop = direct64;
+		window = ddw_list_new_entry(pdn, direct64);
+		if (!window)
+			break;
+
 		spin_lock(&direct_window_list_lock);
 		list_add(&window->list, &direct_window_list);
 		spin_unlock(&direct_window_list_lock);
@@ -1307,7 +1319,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
 		  create.liobn, dn);
 
-	window = kzalloc(sizeof(*window), GFP_KERNEL);
+	window = ddw_list_new_entry(pdn, ddwprop);
 	if (!window)
 		goto out_clear_window;
 
@@ -1326,8 +1338,6 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 		goto out_free_window;
 	}
 
-	window->device = pdn;
-	window->prop = ddwprop;
 	spin_lock(&direct_window_list_lock);
 	list_add(&window->list, &direct_window_list);
 	spin_unlock(&direct_window_list_lock);
-- 
cgit 


From 2ca73c54ce24489518a56d816331b774044c2445 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Tue, 17 Aug 2021 03:39:23 -0300
Subject: powerpc/pseries/iommu: Allow DDW windows starting at 0x00

enable_ddw() currently returns the address of the DMA window, which is
considered invalid if has the value 0x00.

Also, it only considers valid an address returned from find_existing_ddw
if it's not 0x00.

Changing this behavior makes sense, given the users of enable_ddw() only
need to know if direct mapping is possible. It can also allow a DMA window
starting at 0x00 to be used.

This will be helpful for using a DDW with indirect mapping, as the window
address will be different than 0x00, but it will not map the whole
partition.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817063929.38701-6-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 36 +++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 712d1667144a..b34b473bbdc1 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -853,25 +853,26 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
 			np, ret);
 }
 
-static u64 find_existing_ddw(struct device_node *pdn, int *window_shift)
+static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift)
 {
 	struct direct_window *window;
 	const struct dynamic_dma_window_prop *direct64;
-	u64 dma_addr = 0;
+	bool found = false;
 
 	spin_lock(&direct_window_list_lock);
 	/* check if we already created a window and dupe that config if so */
 	list_for_each_entry(window, &direct_window_list, list) {
 		if (window->device == pdn) {
 			direct64 = window->prop;
-			dma_addr = be64_to_cpu(direct64->dma_base);
+			*dma_addr = be64_to_cpu(direct64->dma_base);
 			*window_shift = be32_to_cpu(direct64->window_shift);
+			found = true;
 			break;
 		}
 	}
 	spin_unlock(&direct_window_list_lock);
 
-	return dma_addr;
+	return found;
 }
 
 static struct direct_window *ddw_list_new_entry(struct device_node *pdn,
@@ -1161,20 +1162,20 @@ static int iommu_get_page_shift(u32 query_page_size)
  * pdn: the parent pe node with the ibm,dma_window property
  * Future: also check if we can remap the base window for our base page size
  *
- * returns the dma offset for use by the direct mapped DMA code.
+ * returns true if can map all pages (direct mapping), false otherwise..
  */
-static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
+static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 {
 	int len = 0, ret;
 	int max_ram_len = order_base_2(ddw_memory_hotplug_max());
 	struct ddw_query_response query;
 	struct ddw_create_response create;
 	int page_shift;
-	u64 dma_addr;
 	struct device_node *dn;
 	u32 ddw_avail[DDW_APPLICABLE_SIZE];
 	struct direct_window *window;
 	struct property *win64;
+	bool ddw_enabled = false;
 	struct dynamic_dma_window_prop *ddwprop;
 	struct failed_ddw_pdn *fpdn;
 	bool default_win_removed = false;
@@ -1186,9 +1187,10 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 
 	mutex_lock(&direct_window_init_mutex);
 
-	dma_addr = find_existing_ddw(pdn, &len);
-	if (dma_addr != 0)
+	if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) {
+		ddw_enabled = true;
 		goto out_unlock;
+	}
 
 	/*
 	 * If we already went through this for a previous function of
@@ -1342,7 +1344,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	list_add(&window->list, &direct_window_list);
 	spin_unlock(&direct_window_list_lock);
 
-	dma_addr = be64_to_cpu(ddwprop->dma_base);
+	dev->dev.archdata.dma_offset = be64_to_cpu(ddwprop->dma_base);
+	ddw_enabled = true;
 	goto out_unlock;
 
 out_free_window:
@@ -1374,10 +1377,10 @@ out_unlock:
 	 * as RAM, then we failed to create a window to cover persistent
 	 * memory and need to set the DMA limit.
 	 */
-	if (pmem_present && dma_addr && (len == max_ram_len))
-		dev->dev.bus_dma_limit = dma_addr + (1ULL << len);
+	if (pmem_present && ddw_enabled && (len == max_ram_len))
+		dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
 
-	return dma_addr;
+	return ddw_enabled;
 }
 
 static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
@@ -1456,11 +1459,8 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
 			break;
 	}
 
-	if (pdn && PCI_DN(pdn)) {
-		pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
-		if (pdev->dev.archdata.dma_offset)
-			return true;
-	}
+	if (pdn && PCI_DN(pdn))
+		return enable_ddw(pdev, pdn);
 
 	return false;
 }
-- 
cgit 


From 7ed2ed2db2685a285cb09ab330dc4efea0b64022 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Tue, 17 Aug 2021 03:39:24 -0300
Subject: powerpc/pseries/iommu: Add ddw_property_create() and refactor
 enable_ddw()

Code used to create a ddw property that was previously scattered in
enable_ddw() is now gathered in ddw_property_create(), which deals with
allocation and filling the property, letting it ready for
of_property_add(), which now occurs in sequence.

This created an opportunity to reorganize the second part of enable_ddw():

Without this patch enable_ddw() does, in order:
kzalloc() property & members, create_ddw(), fill ddwprop inside property,
ddw_list_new_entry(), do tce_setrange_multi_pSeriesLP_walk in all memory,
of_add_property(), and list_add().

With this patch enable_ddw() does, in order:
create_ddw(), ddw_property_create(), of_add_property(),
ddw_list_new_entry(), do tce_setrange_multi_pSeriesLP_walk in all memory,
and list_add().

This change requires of_remove_property() in case anything fails after
of_add_property(), but we get to do tce_setrange_multi_pSeriesLP_walk
in all memory, which looks the most expensive operation, only if
everything else succeeds.

Also, the error path got remove_ddw() replaced by a new helper
__remove_dma_window(), which only removes the new DDW with an rtas-call.
For this, a new helper clean_dma_window() was needed to clean anything
that could left if walk_system_ram_range() fails.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817063929.38701-7-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 129 +++++++++++++++++++++------------
 1 file changed, 84 insertions(+), 45 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index b34b473bbdc1..00392582fe10 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -795,17 +795,10 @@ static int __init disable_ddw_setup(char *str)
 
 early_param("disable_ddw", disable_ddw_setup);
 
-static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
-			      struct property *win)
+static void clean_dma_window(struct device_node *np, struct dynamic_dma_window_prop *dwp)
 {
-	struct dynamic_dma_window_prop *dwp;
-	u64 liobn;
 	int ret;
 
-	dwp = win->value;
-	liobn = (u64)be32_to_cpu(dwp->liobn);
-
-	/* clear the whole window, note the arg is in kernel pages */
 	ret = tce_clearrange_multi_pSeriesLP(0,
 		1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
 	if (ret)
@@ -814,18 +807,39 @@ static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
 	else
 		pr_debug("%pOF successfully cleared tces in window.\n",
 			 np);
+}
+
+/*
+ * Call only if DMA window is clean.
+ */
+static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liobn)
+{
+	int ret;
 
 	ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
 	if (ret)
-		pr_warn("%pOF: failed to remove direct window: rtas returned "
+		pr_warn("%pOF: failed to remove DMA window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
 			np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
 	else
-		pr_debug("%pOF: successfully removed direct window: rtas returned "
+		pr_debug("%pOF: successfully removed DMA window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
 			np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
 }
 
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
+			      struct property *win)
+{
+	struct dynamic_dma_window_prop *dwp;
+	u64 liobn;
+
+	dwp = win->value;
+	liobn = (u64)be32_to_cpu(dwp->liobn);
+
+	clean_dma_window(np, dwp);
+	__remove_dma_window(np, ddw_avail, liobn);
+}
+
 static void remove_ddw(struct device_node *np, bool remove_prop)
 {
 	struct property *win;
@@ -1153,6 +1167,35 @@ static int iommu_get_page_shift(u32 query_page_size)
 	return 0;
 }
 
+static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr,
+					    u32 page_shift, u32 window_shift)
+{
+	struct dynamic_dma_window_prop *ddwprop;
+	struct property *win64;
+
+	win64 = kzalloc(sizeof(*win64), GFP_KERNEL);
+	if (!win64)
+		return NULL;
+
+	win64->name = kstrdup(propname, GFP_KERNEL);
+	ddwprop = kzalloc(sizeof(*ddwprop), GFP_KERNEL);
+	win64->value = ddwprop;
+	win64->length = sizeof(*ddwprop);
+	if (!win64->name || !win64->value) {
+		kfree(win64->name);
+		kfree(win64->value);
+		kfree(win64);
+		return NULL;
+	}
+
+	ddwprop->liobn = cpu_to_be32(liobn);
+	ddwprop->dma_base = cpu_to_be64(dma_addr);
+	ddwprop->tce_shift = cpu_to_be32(page_shift);
+	ddwprop->window_shift = cpu_to_be32(window_shift);
+
+	return win64;
+}
+
 /*
  * If the PE supports dynamic dma windows, and there is space for a table
  * that can map all pages in a linear offset, then setup such a table,
@@ -1171,12 +1214,12 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	struct ddw_query_response query;
 	struct ddw_create_response create;
 	int page_shift;
+	u64 win_addr;
 	struct device_node *dn;
 	u32 ddw_avail[DDW_APPLICABLE_SIZE];
 	struct direct_window *window;
 	struct property *win64;
 	bool ddw_enabled = false;
-	struct dynamic_dma_window_prop *ddwprop;
 	struct failed_ddw_pdn *fpdn;
 	bool default_win_removed = false;
 	bool pmem_present;
@@ -1293,72 +1336,68 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 			1ULL << page_shift);
 		goto out_failed;
 	}
-	win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
-	if (!win64) {
-		dev_info(&dev->dev,
-			"couldn't allocate property for 64bit dma window\n");
-		goto out_failed;
-	}
-	win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
-	win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
-	win64->length = sizeof(*ddwprop);
-	if (!win64->name || !win64->value) {
-		dev_info(&dev->dev,
-			"couldn't allocate property name and value\n");
-		goto out_free_prop;
-	}
 
 	ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
 	if (ret != 0)
-		goto out_free_prop;
-
-	ddwprop->liobn = cpu_to_be32(create.liobn);
-	ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) |
-			create.addr_lo);
-	ddwprop->tce_shift = cpu_to_be32(page_shift);
-	ddwprop->window_shift = cpu_to_be32(len);
+		goto out_failed;
 
 	dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
 		  create.liobn, dn);
 
-	window = ddw_list_new_entry(pdn, ddwprop);
+	win_addr = ((u64)create.addr_hi << 32) | create.addr_lo;
+	win64 = ddw_property_create(DIRECT64_PROPNAME, create.liobn, win_addr,
+				    page_shift, len);
+	if (!win64) {
+		dev_info(&dev->dev,
+			 "couldn't allocate property, property name, or value\n");
+		goto out_remove_win;
+	}
+
+	ret = of_add_property(pdn, win64);
+	if (ret) {
+		dev_err(&dev->dev, "unable to add dma window property for %pOF: %d",
+			pdn, ret);
+		goto out_free_prop;
+	}
+
+	window = ddw_list_new_entry(pdn, win64->value);
 	if (!window)
-		goto out_clear_window;
+		goto out_del_prop;
 
 	ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
 			win64->value, tce_setrange_multi_pSeriesLP_walk);
 	if (ret) {
 		dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
 			 dn, ret);
-		goto out_free_window;
-	}
 
-	ret = of_add_property(pdn, win64);
-	if (ret) {
-		dev_err(&dev->dev, "unable to add dma window property for %pOF: %d",
-			 pdn, ret);
-		goto out_free_window;
+		/* Make sure to clean DDW if any TCE was set*/
+		clean_dma_window(pdn, win64->value);
+		goto out_del_list;
 	}
 
 	spin_lock(&direct_window_list_lock);
 	list_add(&window->list, &direct_window_list);
 	spin_unlock(&direct_window_list_lock);
 
-	dev->dev.archdata.dma_offset = be64_to_cpu(ddwprop->dma_base);
+	dev->dev.archdata.dma_offset = win_addr;
 	ddw_enabled = true;
 	goto out_unlock;
 
-out_free_window:
+out_del_list:
 	kfree(window);
 
-out_clear_window:
-	remove_ddw(pdn, true);
+out_del_prop:
+	of_remove_property(pdn, win64);
 
 out_free_prop:
 	kfree(win64->name);
 	kfree(win64->value);
 	kfree(win64);
 
+out_remove_win:
+	/* DDW is clean, so it's ok to call this directly. */
+	__remove_dma_window(pdn, ddw_avail, create.liobn);
+
 out_failed:
 	if (default_win_removed)
 		reset_dma_window(dev, pdn);
-- 
cgit 


From fc8cba8f989fb98e496b33a78476861e246c42a0 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Tue, 17 Aug 2021 03:39:25 -0300
Subject: powerpc/pseries/iommu: Reorganize iommu_table_setparms*() with new
 helper

Add a new helper _iommu_table_setparms(), and use it in
iommu_table_setparms() and iommu_table_setparms_lpar() to avoid duplicated
code.

Also, setting tbl->it_ops was happening outsite iommu_table_setparms*(),
so move it to the new helper. Since we need the iommu_table_ops to be
declared before used, declare iommu_table_lpar_multi_ops and
iommu_table_pseries_ops to before their respective iommu_table_setparms*().

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817063929.38701-8-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 72 ++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 00392582fe10..a47f59a8f107 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -501,6 +501,24 @@ static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
 	return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
 }
 
+static void iommu_table_setparms_common(struct iommu_table *tbl, unsigned long busno,
+					unsigned long liobn, unsigned long win_addr,
+					unsigned long window_size, unsigned long page_shift,
+					void *base, struct iommu_table_ops *table_ops)
+{
+	tbl->it_busno = busno;
+	tbl->it_index = liobn;
+	tbl->it_offset = win_addr >> page_shift;
+	tbl->it_size = window_size >> page_shift;
+	tbl->it_page_shift = page_shift;
+	tbl->it_base = (unsigned long)base;
+	tbl->it_blocksize = 16;
+	tbl->it_type = TCE_PCI;
+	tbl->it_ops = table_ops;
+}
+
+struct iommu_table_ops iommu_table_pseries_ops;
+
 static void iommu_table_setparms(struct pci_controller *phb,
 				 struct device_node *dn,
 				 struct iommu_table *tbl)
@@ -509,8 +527,13 @@ static void iommu_table_setparms(struct pci_controller *phb,
 	const unsigned long *basep;
 	const u32 *sizep;
 
-	node = phb->dn;
+	/* Test if we are going over 2GB of DMA space */
+	if (phb->dma_window_base_cur + phb->dma_window_size > SZ_2G) {
+		udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+		panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+	}
 
+	node = phb->dn;
 	basep = of_get_property(node, "linux,tce-base", NULL);
 	sizep = of_get_property(node, "linux,tce-size", NULL);
 	if (basep == NULL || sizep == NULL) {
@@ -519,33 +542,18 @@ static void iommu_table_setparms(struct pci_controller *phb,
 		return;
 	}
 
-	tbl->it_base = (unsigned long)__va(*basep);
+	iommu_table_setparms_common(tbl, phb->bus->number, 0, phb->dma_window_base_cur,
+				    phb->dma_window_size, IOMMU_PAGE_SHIFT_4K,
+				    __va(*basep), &iommu_table_pseries_ops);
 
 	if (!is_kdump_kernel())
 		memset((void *)tbl->it_base, 0, *sizep);
 
-	tbl->it_busno = phb->bus->number;
-	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
-
-	/* Units of tce entries */
-	tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift;
-
-	/* Test if we are going over 2GB of DMA space */
-	if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
-		udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
-		panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
-	}
-
 	phb->dma_window_base_cur += phb->dma_window_size;
-
-	/* Set the tce table size - measured in entries */
-	tbl->it_size = phb->dma_window_size >> tbl->it_page_shift;
-
-	tbl->it_index = 0;
-	tbl->it_blocksize = 16;
-	tbl->it_type = TCE_PCI;
 }
 
+struct iommu_table_ops iommu_table_lpar_multi_ops;
+
 /*
  * iommu_table_setparms_lpar
  *
@@ -557,17 +565,13 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb,
 				      struct iommu_table_group *table_group,
 				      const __be32 *dma_window)
 {
-	unsigned long offset, size;
+	unsigned long offset, size, liobn;
 
-	of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
+	of_parse_dma_window(dn, dma_window, &liobn, &offset, &size);
+
+	iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL,
+				    &iommu_table_lpar_multi_ops);
 
-	tbl->it_busno = phb->bus->number;
-	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
-	tbl->it_base   = 0;
-	tbl->it_blocksize  = 16;
-	tbl->it_type = TCE_PCI;
-	tbl->it_offset = offset >> tbl->it_page_shift;
-	tbl->it_size = size >> tbl->it_page_shift;
 
 	table_group->tce32_start = offset;
 	table_group->tce32_size = size;
@@ -647,7 +651,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 	tbl = pci->table_group->tables[0];
 
 	iommu_table_setparms(pci->phb, dn, tbl);
-	tbl->it_ops = &iommu_table_pseries_ops;
+
 	if (!iommu_init_table(tbl, pci->phb->node, 0, 0))
 		panic("Failed to initialize iommu table");
 
@@ -730,7 +734,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 		tbl = ppci->table_group->tables[0];
 		iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
 				ppci->table_group, dma_window);
-		tbl->it_ops = &iommu_table_lpar_multi_ops;
+
 		if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
 			panic("Failed to initialize iommu table");
 		iommu_register_group(ppci->table_group,
@@ -760,7 +764,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
 		PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node);
 		tbl = PCI_DN(dn)->table_group->tables[0];
 		iommu_table_setparms(phb, dn, tbl);
-		tbl->it_ops = &iommu_table_pseries_ops;
+
 		if (!iommu_init_table(tbl, phb->node, 0, 0))
 			panic("Failed to initialize iommu table");
 
@@ -1461,7 +1465,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 		tbl = pci->table_group->tables[0];
 		iommu_table_setparms_lpar(pci->phb, pdn, tbl,
 				pci->table_group, dma_window);
-		tbl->it_ops = &iommu_table_lpar_multi_ops;
+
 		iommu_init_table(tbl, pci->phb->node, 0, 0);
 		iommu_register_group(pci->table_group,
 				pci_domain_nr(pci->phb->bus), 0);
-- 
cgit 


From a5fd95120c653962a9e75e260a35436b96d2c991 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Tue, 17 Aug 2021 03:39:26 -0300
Subject: powerpc/pseries/iommu: Update remove_dma_window() to accept property
 name

Update remove_dma_window() so it can be used to remove DDW with a given
property name.

This enables the creation of new property names for DDW, so we can
have different usage for it, like indirect mapping.

Also, add return values to it so we can check if the property was found
while removing the active DDW. This allows skipping the remaining property
names while reducing the impact of multiple property names.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817063929.38701-9-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index a47f59a8f107..901f290999d0 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -844,31 +844,33 @@ static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
 	__remove_dma_window(np, ddw_avail, liobn);
 }
 
-static void remove_ddw(struct device_node *np, bool remove_prop)
+static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name)
 {
 	struct property *win;
 	u32 ddw_avail[DDW_APPLICABLE_SIZE];
 	int ret = 0;
 
+	win = of_find_property(np, win_name, NULL);
+	if (!win)
+		return -EINVAL;
+
 	ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
 					 &ddw_avail[0], DDW_APPLICABLE_SIZE);
 	if (ret)
-		return;
+		return 0;
 
-	win = of_find_property(np, DIRECT64_PROPNAME, NULL);
-	if (!win)
-		return;
 
 	if (win->length >= sizeof(struct dynamic_dma_window_prop))
 		remove_dma_window(np, ddw_avail, win);
 
 	if (!remove_prop)
-		return;
+		return 0;
 
 	ret = of_remove_property(np, win);
 	if (ret)
 		pr_warn("%pOF: failed to remove direct window property: %d\n",
 			np, ret);
+	return 0;
 }
 
 static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift)
@@ -921,7 +923,7 @@ static int find_existing_ddw_windows(void)
 	for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
 		direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
 		if (!direct64 || len < sizeof(*direct64)) {
-			remove_ddw(pdn, true);
+			remove_ddw(pdn, true, DIRECT64_PROPNAME);
 			continue;
 		}
 
@@ -1565,7 +1567,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
 		 * we have to remove the property when releasing
 		 * the device node.
 		 */
-		remove_ddw(np, false);
+		remove_ddw(np, false, DIRECT64_PROPNAME);
 		if (pci && pci->table_group)
 			iommu_pseries_free_group(pci->table_group,
 					np->full_name);
-- 
cgit 


From 8599395d34f2dd7b77bef42da1d99798e7a3d58f Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Tue, 17 Aug 2021 03:39:27 -0300
Subject: powerpc/pseries/iommu: Find existing DDW with given property name

At the moment pseries stores information about created directly mapped
DDW window in DIRECT64_PROPNAME.

With the objective of implementing indirect DMA mapping with DDW, it's
necessary to have another propriety name to make sure kexec'ing into older
kernels does not break, as it would if we reuse DIRECT64_PROPNAME.

In order to have this, find_existing_ddw_windows() needs to be able to
look for different property names.

Extract find_existing_ddw_windows() into find_existing_ddw_windows_named()
and calls it with current property name.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817063929.38701-10-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 901f290999d0..e11c00b2dc1e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -910,24 +910,21 @@ static struct direct_window *ddw_list_new_entry(struct device_node *pdn,
 	return window;
 }
 
-static int find_existing_ddw_windows(void)
+static void find_existing_ddw_windows_named(const char *name)
 {
 	int len;
 	struct device_node *pdn;
 	struct direct_window *window;
-	const struct dynamic_dma_window_prop *direct64;
-
-	if (!firmware_has_feature(FW_FEATURE_LPAR))
-		return 0;
+	const struct dynamic_dma_window_prop *dma64;
 
-	for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
-		direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
-		if (!direct64 || len < sizeof(*direct64)) {
-			remove_ddw(pdn, true, DIRECT64_PROPNAME);
+	for_each_node_with_property(pdn, name) {
+		dma64 = of_get_property(pdn, name, &len);
+		if (!dma64 || len < sizeof(*dma64)) {
+			remove_ddw(pdn, true, name);
 			continue;
 		}
 
-		window = ddw_list_new_entry(pdn, direct64);
+		window = ddw_list_new_entry(pdn, dma64);
 		if (!window)
 			break;
 
@@ -935,6 +932,14 @@ static int find_existing_ddw_windows(void)
 		list_add(&window->list, &direct_window_list);
 		spin_unlock(&direct_window_list_lock);
 	}
+}
+
+static int find_existing_ddw_windows(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	find_existing_ddw_windows_named(DIRECT64_PROPNAME);
 
 	return 0;
 }
-- 
cgit 


From 381ceda88c4c4c8345cad1cffa6328892f15dca6 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Tue, 17 Aug 2021 03:39:28 -0300
Subject: powerpc/pseries/iommu: Make use of DDW for indirect mapping

So far it's assumed possible to map the guest RAM 1:1 to the bus, which
works with a small number of devices. SRIOV changes it as the user can
configure hundreds VFs and since phyp preallocates TCEs and does not
allow IOMMU pages bigger than 64K, it has to limit the number of TCEs
per a PE to limit waste of physical pages.

As of today, if the assumed direct mapping is not possible, DDW creation
is skipped and the default DMA window "ibm,dma-window" is used instead.

By using DDW, indirect mapping  can get more TCEs than available for the
default DMA window, and also get access to using much larger pagesizes
(16MB as implemented in qemu vs 4k from default DMA window), causing a
significant increase on the maximum amount of memory that can be IOMMU
mapped at the same time.

Indirect mapping will only be used if direct mapping is not a
possibility.

For indirect mapping, it's necessary to re-create the iommu_table with
the new DMA window parameters, so iommu_alloc() can use it.

Removing the default DMA window for using DDW with indirect mapping
is only allowed if there is no current IOMMU memory allocated in
the iommu_table. enable_ddw() is aborted otherwise.

Even though there won't be both direct and indirect mappings at the
same time, we can't reuse the DIRECT64_PROPNAME property name, or else
an older kexec()ed kernel can assume direct mapping, and skip
iommu_alloc(), causing undesirable behavior.
So a new property name DMA64_PROPNAME "linux,dma64-ddr-window-info"
was created to represent a DDW that does not allow direct mapping.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817063929.38701-11-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 89 ++++++++++++++++++++++++++++------
 1 file changed, 74 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index e11c00b2dc1e..0eccc29f5573 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -375,6 +375,7 @@ static DEFINE_SPINLOCK(direct_window_list_lock);
 /* protects initializing window twice for same device */
 static DEFINE_MUTEX(direct_window_init_mutex);
 #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
+#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
 
 static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
 					unsigned long num_pfn, const void *arg)
@@ -940,6 +941,7 @@ static int find_existing_ddw_windows(void)
 		return 0;
 
 	find_existing_ddw_windows_named(DIRECT64_PROPNAME);
+	find_existing_ddw_windows_named(DMA64_PROPNAME);
 
 	return 0;
 }
@@ -1226,14 +1228,17 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	struct ddw_create_response create;
 	int page_shift;
 	u64 win_addr;
+	const char *win_name;
 	struct device_node *dn;
 	u32 ddw_avail[DDW_APPLICABLE_SIZE];
 	struct direct_window *window;
 	struct property *win64;
 	bool ddw_enabled = false;
 	struct failed_ddw_pdn *fpdn;
-	bool default_win_removed = false;
+	bool default_win_removed = false, direct_mapping = false;
 	bool pmem_present;
+	struct pci_dn *pci = PCI_DN(pdn);
+	struct iommu_table *tbl = pci->table_group->tables[0];
 
 	dn = of_find_node_by_type(NULL, "ibm,pmemory");
 	pmem_present = dn != NULL;
@@ -1242,6 +1247,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	mutex_lock(&direct_window_init_mutex);
 
 	if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) {
+		direct_mapping = (len >= max_ram_len);
 		ddw_enabled = true;
 		goto out_unlock;
 	}
@@ -1322,8 +1328,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 			  query.page_size);
 		goto out_failed;
 	}
-	/* verify the window * number of ptes will map the partition */
-	/* check largest block * page size > max memory hotplug addr */
+
+
 	/*
 	 * The "ibm,pmemory" can appear anywhere in the address space.
 	 * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
@@ -1339,13 +1345,25 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 			dev_info(&dev->dev, "Skipping ibm,pmemory");
 	}
 
+	/* check if the available block * number of ptes will map everything */
 	if (query.largest_available_block < (1ULL << (len - page_shift))) {
 		dev_dbg(&dev->dev,
 			"can't map partition max 0x%llx with %llu %llu-sized pages\n",
 			1ULL << len,
 			query.largest_available_block,
 			1ULL << page_shift);
-		goto out_failed;
+
+		/* DDW + IOMMU on single window may fail if there is any allocation */
+		if (default_win_removed && iommu_table_in_use(tbl)) {
+			dev_dbg(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
+			goto out_failed;
+		}
+
+		len = order_base_2(query.largest_available_block << page_shift);
+		win_name = DMA64_PROPNAME;
+	} else {
+		direct_mapping = true;
+		win_name = DIRECT64_PROPNAME;
 	}
 
 	ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
@@ -1356,8 +1374,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 		  create.liobn, dn);
 
 	win_addr = ((u64)create.addr_hi << 32) | create.addr_lo;
-	win64 = ddw_property_create(DIRECT64_PROPNAME, create.liobn, win_addr,
-				    page_shift, len);
+	win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len);
+
 	if (!win64) {
 		dev_info(&dev->dev,
 			 "couldn't allocate property, property name, or value\n");
@@ -1375,15 +1393,54 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	if (!window)
 		goto out_del_prop;
 
-	ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
-			win64->value, tce_setrange_multi_pSeriesLP_walk);
-	if (ret) {
-		dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
-			 dn, ret);
+	if (direct_mapping) {
+		/* DDW maps the whole partition, so enable direct DMA mapping */
+		ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
+					    win64->value, tce_setrange_multi_pSeriesLP_walk);
+		if (ret) {
+			dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
+				 dn, ret);
 
 		/* Make sure to clean DDW if any TCE was set*/
 		clean_dma_window(pdn, win64->value);
-		goto out_del_list;
+			goto out_del_list;
+		}
+	} else {
+		struct iommu_table *newtbl;
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) {
+			const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM;
+
+			/* Look for MMIO32 */
+			if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM)
+				break;
+		}
+
+		if (i == ARRAY_SIZE(pci->phb->mem_resources))
+			goto out_del_list;
+
+		/* New table for using DDW instead of the default DMA window */
+		newtbl = iommu_pseries_alloc_table(pci->phb->node);
+		if (!newtbl) {
+			dev_dbg(&dev->dev, "couldn't create new IOMMU table\n");
+			goto out_del_list;
+		}
+
+		iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr,
+					    1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops);
+		iommu_init_table(newtbl, pci->phb->node, pci->phb->mem_resources[i].start,
+				 pci->phb->mem_resources[i].end);
+
+		pci->table_group->tables[1] = newtbl;
+
+		/* Keep default DMA window stuct if removed */
+		if (default_win_removed) {
+			tbl->it_size = 0;
+			kfree(tbl->it_map);
+		}
+
+		set_iommu_table_base(&dev->dev, newtbl);
 	}
 
 	spin_lock(&direct_window_list_lock);
@@ -1427,10 +1484,10 @@ out_unlock:
 	 * as RAM, then we failed to create a window to cover persistent
 	 * memory and need to set the DMA limit.
 	 */
-	if (pmem_present && ddw_enabled && (len == max_ram_len))
+	if (pmem_present && ddw_enabled && direct_mapping && len == max_ram_len)
 		dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
 
-	return ddw_enabled;
+    return ddw_enabled && direct_mapping;
 }
 
 static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
@@ -1572,7 +1629,9 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
 		 * we have to remove the property when releasing
 		 * the device node.
 		 */
-		remove_ddw(np, false, DIRECT64_PROPNAME);
+		if (remove_ddw(np, false, DIRECT64_PROPNAME))
+			remove_ddw(np, false, DMA64_PROPNAME);
+
 		if (pci && pci->table_group)
 			iommu_pseries_free_group(pci->table_group,
 					np->full_name);
-- 
cgit 


From 57dbbe590f152e5e8a3ff8bf5ba163df34eeae0b Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Tue, 17 Aug 2021 03:39:29 -0300
Subject: powerpc/pseries/iommu: Rename "direct window" to "dma window"

A previous change introduced the usage of DDW as a bigger indirect DMA
mapping when the DDW available size does not map the whole partition.

As most of the code that manipulates direct mappings was reused for
indirect mappings, it's necessary to rename all names and debug/info
messages to reflect that it can be used for both kinds of mapping.

This should cause no behavioural change, just adjust naming.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210817063929.38701-12-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 87 ++++++++++++++++++----------------
 1 file changed, 45 insertions(+), 42 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 0eccc29f5573..dab5c56ffd0e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -349,7 +349,7 @@ struct dynamic_dma_window_prop {
 	__be32	window_shift;	/* ilog2(tce_window_size) */
 };
 
-struct direct_window {
+struct dma_win {
 	struct device_node *device;
 	const struct dynamic_dma_window_prop *prop;
 	struct list_head list;
@@ -369,11 +369,11 @@ struct ddw_create_response {
 	u32 addr_lo;
 };
 
-static LIST_HEAD(direct_window_list);
+static LIST_HEAD(dma_win_list);
 /* prevents races between memory on/offline and window creation */
-static DEFINE_SPINLOCK(direct_window_list_lock);
+static DEFINE_SPINLOCK(dma_win_list_lock);
 /* protects initializing window twice for same device */
-static DEFINE_MUTEX(direct_window_init_mutex);
+static DEFINE_MUTEX(dma_win_init_mutex);
 #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
 #define DMA64_PROPNAME "linux,dma64-ddr-window-info"
 
@@ -713,7 +713,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 	pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
 		 dn);
 
-	/* Find nearest ibm,dma-window, walking up the device tree */
+	/*
+	 * Find nearest ibm,dma-window (default DMA window), walking up the
+	 * device tree
+	 */
 	for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
 		dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
 		if (dma_window != NULL)
@@ -869,37 +872,37 @@ static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_
 
 	ret = of_remove_property(np, win);
 	if (ret)
-		pr_warn("%pOF: failed to remove direct window property: %d\n",
+		pr_warn("%pOF: failed to remove DMA window property: %d\n",
 			np, ret);
 	return 0;
 }
 
 static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift)
 {
-	struct direct_window *window;
-	const struct dynamic_dma_window_prop *direct64;
+	struct dma_win *window;
+	const struct dynamic_dma_window_prop *dma64;
 	bool found = false;
 
-	spin_lock(&direct_window_list_lock);
+	spin_lock(&dma_win_list_lock);
 	/* check if we already created a window and dupe that config if so */
-	list_for_each_entry(window, &direct_window_list, list) {
+	list_for_each_entry(window, &dma_win_list, list) {
 		if (window->device == pdn) {
-			direct64 = window->prop;
-			*dma_addr = be64_to_cpu(direct64->dma_base);
-			*window_shift = be32_to_cpu(direct64->window_shift);
+			dma64 = window->prop;
+			*dma_addr = be64_to_cpu(dma64->dma_base);
+			*window_shift = be32_to_cpu(dma64->window_shift);
 			found = true;
 			break;
 		}
 	}
-	spin_unlock(&direct_window_list_lock);
+	spin_unlock(&dma_win_list_lock);
 
 	return found;
 }
 
-static struct direct_window *ddw_list_new_entry(struct device_node *pdn,
-						const struct dynamic_dma_window_prop *dma64)
+static struct dma_win *ddw_list_new_entry(struct device_node *pdn,
+					  const struct dynamic_dma_window_prop *dma64)
 {
-	struct direct_window *window;
+	struct dma_win *window;
 
 	window = kzalloc(sizeof(*window), GFP_KERNEL);
 	if (!window)
@@ -915,7 +918,7 @@ static void find_existing_ddw_windows_named(const char *name)
 {
 	int len;
 	struct device_node *pdn;
-	struct direct_window *window;
+	struct dma_win *window;
 	const struct dynamic_dma_window_prop *dma64;
 
 	for_each_node_with_property(pdn, name) {
@@ -929,9 +932,9 @@ static void find_existing_ddw_windows_named(const char *name)
 		if (!window)
 			break;
 
-		spin_lock(&direct_window_list_lock);
-		list_add(&window->list, &direct_window_list);
-		spin_unlock(&direct_window_list_lock);
+		spin_lock(&dma_win_list_lock);
+		list_add(&window->list, &dma_win_list);
+		spin_unlock(&dma_win_list_lock);
 	}
 }
 
@@ -1231,7 +1234,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	const char *win_name;
 	struct device_node *dn;
 	u32 ddw_avail[DDW_APPLICABLE_SIZE];
-	struct direct_window *window;
+	struct dma_win *window;
 	struct property *win64;
 	bool ddw_enabled = false;
 	struct failed_ddw_pdn *fpdn;
@@ -1244,7 +1247,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	pmem_present = dn != NULL;
 	of_node_put(dn);
 
-	mutex_lock(&direct_window_init_mutex);
+	mutex_lock(&dma_win_init_mutex);
 
 	if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) {
 		direct_mapping = (len >= max_ram_len);
@@ -1324,8 +1327,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 
 	page_shift = iommu_get_page_shift(query.page_size);
 	if (!page_shift) {
-		dev_dbg(&dev->dev, "no supported direct page size in mask %x",
-			  query.page_size);
+		dev_dbg(&dev->dev, "no supported page size in mask %x",
+			query.page_size);
 		goto out_failed;
 	}
 
@@ -1384,7 +1387,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 
 	ret = of_add_property(pdn, win64);
 	if (ret) {
-		dev_err(&dev->dev, "unable to add dma window property for %pOF: %d",
+		dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d",
 			pdn, ret);
 		goto out_free_prop;
 	}
@@ -1398,7 +1401,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 		ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
 					    win64->value, tce_setrange_multi_pSeriesLP_walk);
 		if (ret) {
-			dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
+			dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n",
 				 dn, ret);
 
 		/* Make sure to clean DDW if any TCE was set*/
@@ -1443,9 +1446,9 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 		set_iommu_table_base(&dev->dev, newtbl);
 	}
 
-	spin_lock(&direct_window_list_lock);
-	list_add(&window->list, &direct_window_list);
-	spin_unlock(&direct_window_list_lock);
+	spin_lock(&dma_win_list_lock);
+	list_add(&window->list, &dma_win_list);
+	spin_unlock(&dma_win_list_lock);
 
 	dev->dev.archdata.dma_offset = win_addr;
 	ddw_enabled = true;
@@ -1477,7 +1480,7 @@ out_failed:
 	list_add(&fpdn->list, &failed_ddw_pdn_list);
 
 out_unlock:
-	mutex_unlock(&direct_window_init_mutex);
+	mutex_unlock(&dma_win_init_mutex);
 
 	/*
 	 * If we have persistent memory and the window size is only as big
@@ -1575,29 +1578,29 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
 static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
 		void *data)
 {
-	struct direct_window *window;
+	struct dma_win *window;
 	struct memory_notify *arg = data;
 	int ret = 0;
 
 	switch (action) {
 	case MEM_GOING_ONLINE:
-		spin_lock(&direct_window_list_lock);
-		list_for_each_entry(window, &direct_window_list, list) {
+		spin_lock(&dma_win_list_lock);
+		list_for_each_entry(window, &dma_win_list, list) {
 			ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
 					arg->nr_pages, window->prop);
 			/* XXX log error */
 		}
-		spin_unlock(&direct_window_list_lock);
+		spin_unlock(&dma_win_list_lock);
 		break;
 	case MEM_CANCEL_ONLINE:
 	case MEM_OFFLINE:
-		spin_lock(&direct_window_list_lock);
-		list_for_each_entry(window, &direct_window_list, list) {
+		spin_lock(&dma_win_list_lock);
+		list_for_each_entry(window, &dma_win_list, list) {
 			ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
 					arg->nr_pages, window->prop);
 			/* XXX log error */
 		}
-		spin_unlock(&direct_window_list_lock);
+		spin_unlock(&dma_win_list_lock);
 		break;
 	default:
 		break;
@@ -1618,7 +1621,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
 	struct of_reconfig_data *rd = data;
 	struct device_node *np = rd->dn;
 	struct pci_dn *pci = PCI_DN(np);
-	struct direct_window *window;
+	struct dma_win *window;
 
 	switch (action) {
 	case OF_RECONFIG_DETACH_NODE:
@@ -1636,15 +1639,15 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
 			iommu_pseries_free_group(pci->table_group,
 					np->full_name);
 
-		spin_lock(&direct_window_list_lock);
-		list_for_each_entry(window, &direct_window_list, list) {
+		spin_lock(&dma_win_list_lock);
+		list_for_each_entry(window, &dma_win_list, list) {
 			if (window->device == np) {
 				list_del(&window->list);
 				kfree(window);
 				break;
 			}
 		}
-		spin_unlock(&direct_window_list_lock);
+		spin_unlock(&dma_win_list_lock);
 		break;
 	default:
 		err = NOTIFY_DONE;
-- 
cgit 


From b76dd9302af7803aac62243ae94d53067fc819b4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 25 Jun 2021 10:34:34 +0200
Subject: um: make PCI emulation driver init/exit static

The functions aren't used elsewhere, so they can be static.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-By: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/drivers/virt-pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
index 0b802834f40a..70c17de16662 100644
--- a/arch/um/drivers/virt-pci.c
+++ b/arch/um/drivers/virt-pci.c
@@ -810,7 +810,7 @@ void *pci_root_bus_fwnode(struct pci_bus *bus)
 	return um_pci_fwnode;
 }
 
-int um_pci_init(void)
+static int um_pci_init(void)
 {
 	int err, i;
 
@@ -884,7 +884,7 @@ free:
 }
 module_init(um_pci_init);
 
-void um_pci_exit(void)
+static void um_pci_exit(void)
 {
 	unregister_virtio_driver(&um_pci_virtio_driver);
 	irq_domain_remove(um_pci_msi_domain);
-- 
cgit 


From 21976f2b747edd9350336cdd6700b792a3bc2170 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 25 Jun 2021 10:34:36 +0200
Subject: um: virtio_uml: include linux/virtio-uml.h

This fixes a sparse warning, since the function defined
here should have a declaration (or be static).

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 43c590cb8666 ("um: virtio/pci: enable suspend/resume")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-By: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/drivers/virtio_uml.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index 4412d6febade..cb79fe33d84e 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -27,6 +27,7 @@
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
 #include <linux/time-internal.h>
+#include <linux/virtio-uml.h>
 #include <shared/as-layout.h>
 #include <irq_kern.h>
 #include <init.h>
-- 
cgit 


From 7ad28e0df7ee9dbcb793bb88dd81d4d22bb9a10e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 25 Jun 2021 10:34:37 +0200
Subject: um: virtio_uml: fix memory leak on init failures

If initialization fails, e.g. because the connection failed,
we leak the 'vu_dev'. Fix that. Reported by smatch.

Fixes: 5d38f324993f ("um: drivers: Add virtio vhost-user driver")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-By: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/drivers/virtio_uml.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index cb79fe33d84e..d51e445df797 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -1140,7 +1140,7 @@ static int virtio_uml_probe(struct platform_device *pdev)
 		rc = os_connect_socket(pdata->socket_path);
 	} while (rc == -EINTR);
 	if (rc < 0)
-		return rc;
+		goto error_free;
 	vu_dev->sock = rc;
 
 	spin_lock_init(&vu_dev->sock_lock);
@@ -1161,6 +1161,8 @@ static int virtio_uml_probe(struct platform_device *pdev)
 
 error_init:
 	os_close_file(vu_dev->sock);
+error_free:
+	kfree(vu_dev);
 	return rc;
 }
 
-- 
cgit 


From 4a22c4cebd61f67bd8a2c79f8dae10df074f62b9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 11 Aug 2021 16:58:25 +0200
Subject: um: virt-pci: don't do DMA from stack

When enabling VMAP_STACK, SG helpers immediately complained
that we were doing DMA from stack. Use per-CPU variables to
avoid that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/drivers/virt-pci.c | 104 +++++++++++++++++++++++++++++++++------------
 1 file changed, 78 insertions(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
index 70c17de16662..c08066633023 100644
--- a/arch/um/drivers/virt-pci.c
+++ b/arch/um/drivers/virt-pci.c
@@ -56,6 +56,13 @@ static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
 
 #define UM_VIRT_PCI_MAXDELAY 40000
 
+struct um_pci_message_buffer {
+	struct virtio_pcidev_msg hdr;
+	u8 data[8];
+};
+
+static struct um_pci_message_buffer __percpu *um_pci_msg_bufs;
+
 static int um_pci_send_cmd(struct um_pci_device *dev,
 			   struct virtio_pcidev_msg *cmd,
 			   unsigned int cmd_size,
@@ -68,11 +75,12 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
 		[1] = extra ? &extra_sg : &in_sg,
 		[2] = extra ? &in_sg : NULL,
 	};
+	struct um_pci_message_buffer *buf;
 	int delay_count = 0;
 	int ret, len;
 	bool posted;
 
-	if (WARN_ON(cmd_size < sizeof(*cmd)))
+	if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf)))
 		return -EINVAL;
 
 	switch (cmd->op) {
@@ -88,6 +96,9 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
 		break;
 	}
 
+	buf = get_cpu_var(um_pci_msg_bufs);
+	memcpy(buf, cmd, cmd_size);
+
 	if (posted) {
 		u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC);
 
@@ -102,7 +113,10 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
 		} else {
 			/* try without allocating memory */
 			posted = false;
+			cmd = (void *)buf;
 		}
+	} else {
+		cmd = (void *)buf;
 	}
 
 	sg_init_one(&out_sg, cmd, cmd_size);
@@ -118,11 +132,12 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
 				posted ? cmd : HANDLE_NO_FREE(cmd),
 				GFP_ATOMIC);
 	if (ret)
-		return ret;
+		goto out;
 
 	if (posted) {
 		virtqueue_kick(dev->cmd_vq);
-		return 0;
+		ret = 0;
+		goto out;
 	}
 
 	/* kick and poll for getting a response on the queue */
@@ -148,6 +163,8 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
 	}
 	clear_bit(UM_PCI_STAT_WAITING, &dev->status);
 
+out:
+	put_cpu_var(um_pci_msg_bufs);
 	return ret;
 }
 
@@ -161,12 +178,17 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
 		.size = size,
 		.addr = offset,
 	};
-	/* maximum size - we may only use parts of it */
-	u8 data[8];
+	/* buf->data is maximum size - we may only use parts of it */
+	struct um_pci_message_buffer *buf;
+	u8 *data;
+	unsigned long ret = ~0ULL;
 
 	if (!dev)
 		return ~0ULL;
 
+	buf = get_cpu_var(um_pci_msg_bufs);
+	data = buf->data;
+
 	memset(data, 0xff, sizeof(data));
 
 	switch (size) {
@@ -179,27 +201,34 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
 		break;
 	default:
 		WARN(1, "invalid config space read size %d\n", size);
-		return ~0ULL;
+		goto out;
 	}
 
-	if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0,
-			    data, sizeof(data)))
-		return ~0ULL;
+	if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, 8))
+		goto out;
 
 	switch (size) {
 	case 1:
-		return data[0];
+		ret = data[0];
+		break;
 	case 2:
-		return le16_to_cpup((void *)data);
+		ret = le16_to_cpup((void *)data);
+		break;
 	case 4:
-		return le32_to_cpup((void *)data);
+		ret = le32_to_cpup((void *)data);
+		break;
 #ifdef CONFIG_64BIT
 	case 8:
-		return le64_to_cpup((void *)data);
+		ret = le64_to_cpup((void *)data);
+		break;
 #endif
 	default:
-		return ~0ULL;
+		break;
 	}
+
+out:
+	put_cpu_var(um_pci_msg_bufs);
+	return ret;
 }
 
 static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
@@ -272,8 +301,13 @@ static void um_pci_bar_copy_from(void *priv, void *buffer,
 static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
 				     int size)
 {
-	/* maximum size - we may only use parts of it */
-	u8 data[8];
+	/* buf->data is maximum size - we may only use parts of it */
+	struct um_pci_message_buffer *buf;
+	u8 *data;
+	unsigned long ret = ~0ULL;
+
+	buf = get_cpu_var(um_pci_msg_bufs);
+	data = buf->data;
 
 	switch (size) {
 	case 1:
@@ -285,25 +319,33 @@ static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
 		break;
 	default:
 		WARN(1, "invalid config space read size %d\n", size);
-		return ~0ULL;
+		goto out;
 	}
 
 	um_pci_bar_copy_from(priv, data, offset, size);
 
 	switch (size) {
 	case 1:
-		return data[0];
+		ret = data[0];
+		break;
 	case 2:
-		return le16_to_cpup((void *)data);
+		ret = le16_to_cpup((void *)data);
+		break;
 	case 4:
-		return le32_to_cpup((void *)data);
+		ret = le32_to_cpup((void *)data);
+		break;
 #ifdef CONFIG_64BIT
 	case 8:
-		return le64_to_cpup((void *)data);
+		ret = le64_to_cpup((void *)data);
+		break;
 #endif
 	default:
-		return ~0ULL;
+		break;
 	}
+
+out:
+	put_cpu_var(um_pci_msg_bufs);
+	return ret;
 }
 
 static void um_pci_bar_copy_to(void *priv, unsigned int offset,
@@ -823,10 +865,16 @@ static int um_pci_init(void)
 		 "No virtio device ID configured for PCI - no PCI support\n"))
 		return 0;
 
-	bridge = pci_alloc_host_bridge(0);
-	if (!bridge)
+	um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer);
+	if (!um_pci_msg_bufs)
 		return -ENOMEM;
 
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge) {
+		err = -ENOMEM;
+		goto free;
+	}
+
 	um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci");
 	if (!um_pci_fwnode) {
 		err = -ENOMEM;
@@ -878,8 +926,11 @@ free:
 		irq_domain_remove(um_pci_inner_domain);
 	if (um_pci_fwnode)
 		irq_domain_free_fwnode(um_pci_fwnode);
-	pci_free_resource_list(&bridge->windows);
-	pci_free_host_bridge(bridge);
+	if (bridge) {
+		pci_free_resource_list(&bridge->windows);
+		pci_free_host_bridge(bridge);
+	}
+	free_percpu(um_pci_msg_bufs);
 	return err;
 }
 module_init(um_pci_init);
@@ -891,5 +942,6 @@ static void um_pci_exit(void)
 	irq_domain_remove(um_pci_inner_domain);
 	pci_free_resource_list(&bridge->windows);
 	pci_free_host_bridge(bridge);
+	free_percpu(um_pci_msg_bufs);
 }
 module_exit(um_pci_exit);
-- 
cgit 


From bc5c49d79206b40ad16e055837a33b6dc6160bed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 11 Aug 2021 16:58:26 +0200
Subject: um: enable VMAP_STACK

This works just fine, so select HAVE_ARCH_VMAP_STACK to
let users enable VMAP_STACK if desired.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 0561b73cfd9a..389c9f5bcb2b 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -21,6 +21,7 @@ config UML
 	select HAVE_GCC_PLUGINS
 	select SET_FS
 	select TTY # Needed for line.c
+	select HAVE_ARCH_VMAP_STACK
 
 config MMU
 	bool
-- 
cgit 


From adf9ae0d159d3dc94f58d788fc4757c8749ac0df Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 13 Jul 2021 23:47:10 +0200
Subject: um: fix stub location calculation

In commit 9f0b4807a44f ("um: rework userspace stubs to not hard-code
stub location") I changed stub_segv_handler() to do a calculation with
a pointer to a stack variable to find the data page that we're using
for the stack and the rest of the data. This same commit was meant to
do it as well for stub_clone_handler(), but the change inadvertently
went into commit 84b2789d6115 ("um: separate child and parent errors
in clone stub") instead.

This was reported to not be compiled correctly by gcc 5, causing the
code to crash here. I'm not sure why, perhaps it's UB because the var
isn't initialized? In any case, this trick always seemed bad, so just
create a new inline function that does the calculation in assembly.

Reported-by: subashab@codeaurora.org
Fixes: 9f0b4807a44f ("um: rework userspace stubs to not hard-code stub location")
Fixes: 84b2789d6115 ("um: separate child and parent errors in clone stub")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/kernel/skas/clone.c         |  3 +--
 arch/x86/um/shared/sysdep/stub_32.h | 12 ++++++++++++
 arch/x86/um/shared/sysdep/stub_64.h | 12 ++++++++++++
 arch/x86/um/stub_segv.c             |  3 +--
 4 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index 5afac0fef24e..ff5061f29167 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -24,8 +24,7 @@
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_clone_handler(void)
 {
-	int stack;
-	struct stub_data *data = (void *) ((unsigned long)&stack & ~(UM_KERN_PAGE_SIZE - 1));
+	struct stub_data *data = get_stub_page();
 	long err;
 
 	err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index b95db9daf0e8..4c6c2be0c899 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -101,4 +101,16 @@ static inline void remap_stack_and_trap(void)
 		"memory");
 }
 
+static __always_inline void *get_stub_page(void)
+{
+	unsigned long ret;
+
+	asm volatile (
+		"movl %%esp,%0 ;"
+		"andl %1,%0"
+		: "=a" (ret)
+		: "g" (~(UM_KERN_PAGE_SIZE - 1)));
+
+	return (void *)ret;
+}
 #endif
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index 6e2626b77a2e..e9c4b2b38803 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -108,4 +108,16 @@ static inline void remap_stack_and_trap(void)
 		__syscall_clobber, "r10", "r8", "r9");
 }
 
+static __always_inline void *get_stub_page(void)
+{
+	unsigned long ret;
+
+	asm volatile (
+		"movq %%rsp,%0 ;"
+		"andq %1,%0"
+		: "=a" (ret)
+		: "g" (~(UM_KERN_PAGE_SIZE - 1)));
+
+	return (void *)ret;
+}
 #endif
diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c
index 21836eaf1725..f7eefba034f9 100644
--- a/arch/x86/um/stub_segv.c
+++ b/arch/x86/um/stub_segv.c
@@ -11,9 +11,8 @@
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_segv_handler(int sig, siginfo_t *info, void *p)
 {
-	int stack;
+	struct faultinfo *f = get_stub_page();
 	ucontext_t *uc = p;
-	struct faultinfo *f = (void *)(((unsigned long)&stack) & ~(UM_KERN_PAGE_SIZE - 1));
 
 	GET_FAULTINFO_FROM_MC(*f, &uc->uc_mcontext);
 	trap_myself();
-- 
cgit 


From d9820ff76f95fa26d33e412254a89cd65b23142d Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Thu, 12 Aug 2021 12:54:43 -0700
Subject: ARC: mm: switch pgtable_t back to struct page *

So far ARC pgtable_t has not been struct page based to avoid extra
page_address() calls involved. However the differences are down to
noise and get in the way of using generic code, hence this patch.

This also allows us to reuse generic THP depost/withdraw code.

There's some additional consideration for PGDIR_SHIFT in 4K page config.
Now due to page tables being PAGE_SIZE deep only, the address split
can't be really arbitrary.

Tested-by: kernel test robot <lkp@intel.com>
Suggested-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/hugepage.h       |  8 -----
 arch/arc/include/asm/page.h           |  2 +-
 arch/arc/include/asm/pgalloc.h        | 57 +++++++++++------------------------
 arch/arc/include/asm/pgtable-levels.h |  8 ++++-
 arch/arc/mm/init.c                    |  3 ++
 arch/arc/mm/tlb.c                     | 37 -----------------------
 6 files changed, 28 insertions(+), 87 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 4eef17c5c1da..11b0ff26b97b 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -58,14 +58,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 				 pmd_t *pmd);
 
-/* Generic variants assume pgtable_t is struct page *, hence need for these */
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				       pgtable_t pgtable);
-
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
-
 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
 extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 				unsigned long end);
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 313e6f543d2d..28ed82b1800f 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -60,7 +60,7 @@ typedef struct {
 #define __pgprot(x)	((pgprot_t) { (x) })
 #define pte_pgprot(x)	__pgprot(pte_val(x))
 
-typedef pte_t * pgtable_t;
+typedef struct page *pgtable_t;
 
 /*
  * Use virt_to_pfn with caution:
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 408bc4b0842d..8ab1af3da6e7 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -45,22 +45,17 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 	set_pmd(pmd, __pmd((unsigned long)pte));
 }
 
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page)
 {
-	set_pmd(pmd, __pmd((unsigned long)pte));
-}
-
-static inline int __get_order_pgd(void)
-{
-	return get_order(PTRS_PER_PGD * sizeof(pgd_t));
+	set_pmd(pmd, __pmd((unsigned long)page_address(pte_page)));
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	int num, num2;
-	pgd_t *ret = (pgd_t *) __get_free_pages(GFP_KERNEL, __get_order_pgd());
+	pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL);
 
 	if (ret) {
+		int num, num2;
 		num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
 		memzero(ret, num * sizeof(pgd_t));
 
@@ -76,61 +71,43 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-	free_pages((unsigned long)pgd, __get_order_pgd());
-}
-
-
-/*
- * With software-only page-tables, addr-split for traversal is tweakable and
- * that directly governs how big tables would be at each level.
- * Further, the MMU page size is configurable.
- * Thus we need to programatically assert the size constraint
- * All of this is const math, allowing gcc to do constant folding/propagation.
- */
-
-static inline int __get_order_pte(void)
-{
-	return get_order(PTRS_PER_PTE * sizeof(pte_t));
+	free_page((unsigned long)pgd);
 }
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					 __get_order_pte());
+	pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_ZERO);
 
 	return pte;
 }
 
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-	pgtable_t pte_pg;
 	struct page *page;
 
-	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
-	if (!pte_pg)
-		return 0;
-	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
-	page = virt_to_page(pte_pg);
+	page = (pgtable_t)alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
+	if (!page)
+		return NULL;
+
 	if (!pgtable_pte_page_ctor(page)) {
 		__free_page(page);
-		return 0;
+		return NULL;
 	}
 
-	return pte_pg;
+	return page;
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-	free_pages((unsigned long)pte, __get_order_pte()); /* takes phy addr */
+	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte_page)
 {
-	pgtable_pte_page_dtor(virt_to_page(ptep));
-	free_pages((unsigned long)ptep, __get_order_pte());
+	pgtable_pte_page_dtor(pte_page);
+	__free_page(pte_page);
 }
 
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
index e65fb8c9da12..561dedab79ed 100644
--- a/arch/arc/include/asm/pgtable-levels.h
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -35,9 +35,15 @@
 #else
 /*
  * No Super page case
- * Default value provides 11:8:13 (8K), 11:9:12 (4K)
+ * Default value provides 11:8:13 (8K), 10:10:12 (4K)
+ * Limits imposed by pgtable_t only PAGE_SIZE long
+ * (so 4K page can only have 1K entries: or 10 bits)
  */
+#ifdef CONFIG_ARC_PAGE_SIZE_4K
+#define PGDIR_SHIFT		22
+#else
 #define PGDIR_SHIFT		21
+#endif
 
 #endif
 
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index c083bf660cec..46ad9aee7a73 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -189,6 +189,9 @@ void __init mem_init(void)
 {
 	memblock_free_all();
 	highmem_init();
+
+	BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
 }
 
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index b68d5798327b..dfc0c1aba48f 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -534,43 +534,6 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 	update_mmu_cache(vma, addr, &pte);
 }
 
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable)
-{
-	struct list_head *lh = (struct list_head *) pgtable;
-
-	assert_spin_locked(&mm->page_table_lock);
-
-	/* FIFO */
-	if (!pmd_huge_pte(mm, pmdp))
-		INIT_LIST_HEAD(lh);
-	else
-		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
-	pmd_huge_pte(mm, pmdp) = pgtable;
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
-	struct list_head *lh;
-	pgtable_t pgtable;
-
-	assert_spin_locked(&mm->page_table_lock);
-
-	pgtable = pmd_huge_pte(mm, pmdp);
-	lh = (struct list_head *) pgtable;
-	if (list_empty(lh))
-		pmd_huge_pte(mm, pmdp) = NULL;
-	else {
-		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
-		list_del(lh);
-	}
-
-	pte_val(pgtable[0]) = 0;
-	pte_val(pgtable[1]) = 0;
-
-	return pgtable;
-}
-
 void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			       unsigned long end)
 {
-- 
cgit 


From 9f3c76aedcbfee61dcdf299e708888141c7132fd Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Thu, 12 Aug 2021 14:31:36 -0700
Subject: ARC: mm: switch to asm-generic/pgalloc.h

With previous patch ARC pgalloc functions are same as generic, hence
switch to that.

Suggested-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/pgalloc.h | 42 +-----------------------------------------
 1 file changed, 1 insertion(+), 41 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 8ab1af3da6e7..0cde9e5eefd7 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -31,6 +31,7 @@
 
 #include <linux/mm.h>
 #include <linux/log2.h>
+#include <asm-generic/pgalloc.h>
 
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
@@ -69,47 +70,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	return ret;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_page((unsigned long)pgd);
-}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-	pte_t *pte;
-
-	pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_ZERO);
-
-	return pte;
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
-	struct page *page;
-
-	page = (pgtable_t)alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
-	if (!page)
-		return NULL;
-
-	if (!pgtable_pte_page_ctor(page)) {
-		__free_page(page);
-		return NULL;
-	}
-
-	return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte_page)
-{
-	pgtable_pte_page_dtor(pte_page);
-	__free_page(pte_page);
-}
-
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
 
 #endif /* _ASM_ARC_PGALLOC_H */
-- 
cgit 


From 2dde02ab6d1a725ddccc7144ff6bf5f55d37f916 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Wed, 30 Sep 2020 18:58:50 -0700
Subject: ARC: mm: support 3 levels of page tables

ARCv2 MMU is software walked and Linux implements 2 levels of paging: pgd/pte.
Forthcoming hw will have multiple levels, so this change preps mm code
for same. It is also fun to try multi levels even on soft-walked code to
ensure generic mm code is robust to handle.

overview
________

2 levels {pgd, pte} : pmd is folded but pmd_* macros are valid and operate on pgd
3 levels {pgd, pmd, pte}:
  - pud is folded and pud_* macros point to pgd
  - pmd_* macros operate on actual pmd

code changes
____________

1. #include <asm-generic/pgtable-nopud.h>

2. Define CONFIG_PGTABLE_LEVELS 3

3a. Define PMD_SHIFT, PMD_SIZE, PMD_MASK, pmd_t
3b. Define pmd_val() which actually deals with pmd
    (pmd_offset(), pmd_index() are provided by generic code)
3c. pmd_alloc_one()/pmd_free() also provided by generic code
    (pmd_populate/pmd_free already exist)

4. Define pud_none(), pud_bad() macros based on generic pud_val() which
   internally pertains to pgd now.
4b. define pud_populate() to just setup pgd

Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/Kconfig                      |  4 +++
 arch/arc/include/asm/page.h           | 11 ++++++
 arch/arc/include/asm/pgalloc.h        | 11 ++++++
 arch/arc/include/asm/pgtable-levels.h | 63 ++++++++++++++++++++++++++++++++---
 arch/arc/include/asm/processor.h      |  2 +-
 arch/arc/mm/fault.c                   |  4 +++
 arch/arc/mm/init.c                    |  1 +
 arch/arc/mm/tlb.c                     |  4 +--
 arch/arc/mm/tlbex.S                   |  9 +++++
 9 files changed, 101 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 56aad105ad13..f631a7a60979 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -314,6 +314,10 @@ config ARC_HUGEPAGE_16M
 
 endchoice
 
+config PGTABLE_LEVELS
+	int "Number of Page table levels"
+	default 2
+
 config ARC_COMPACT_IRQ_LEVELS
 	depends on ISA_ARCOMPACT
 	bool "Setup Timer IRQ as high Priority"
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 28ed82b1800f..5d7899d87c08 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -41,6 +41,17 @@ typedef struct {
 #define pgd_val(x)	((x).pgd)
 #define __pgd(x)	((pgd_t) { (x) })
 
+#if CONFIG_PGTABLE_LEVELS > 2
+
+typedef struct {
+	unsigned long pmd;
+} pmd_t;
+
+#define pmd_val(x)	((x).pmd)
+#define __pmd(x)	((pmd_t) { (x) })
+
+#endif
+
 typedef struct {
 #ifdef CONFIG_ARC_HAS_PAE40
 	unsigned long long pte;
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 0cde9e5eefd7..781620d2e86f 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -70,6 +70,17 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	return ret;
 }
 
+#if CONFIG_PGTABLE_LEVELS > 2
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
+{
+	set_pud(pudp, __pud((unsigned long)pmdp));
+}
+
+#define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
+
+#endif
+
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
 
 #endif /* _ASM_ARC_PGALLOC_H */
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
index 561dedab79ed..5dd8c58f7d70 100644
--- a/arch/arc/include/asm/pgtable-levels.h
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -10,6 +10,8 @@
 #ifndef _ASM_ARC_PGTABLE_LEVELS_H
 #define _ASM_ARC_PGTABLE_LEVELS_H
 
+#if CONFIG_PGTABLE_LEVELS == 2
+
 /*
  * 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS)
  *
@@ -47,16 +49,38 @@
 
 #endif
 
-#define PGDIR_SIZE		BIT(PGDIR_SHIFT)	/* vaddr span, not PDG sz */
-#define PGDIR_MASK		(~(PGDIR_SIZE - 1))
+#else /* CONFIG_PGTABLE_LEVELS != 2 */
+
+/*
+ * A default 3 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <7> : <8> : <13>
+ */
+#define PGDIR_SHIFT		28
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SHIFT		21
+#endif
 
+#endif /* CONFIG_PGTABLE_LEVELS */
+
+#define PGDIR_SIZE		BIT(PGDIR_SHIFT)
+#define PGDIR_MASK		(~(PGDIR_SIZE - 1))
 #define PTRS_PER_PGD		BIT(32 - PGDIR_SHIFT)
 
-#define PTRS_PER_PTE		BIT(PGDIR_SHIFT - PAGE_SHIFT)
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SIZE		BIT(PMD_SHIFT)
+#define PMD_MASK		(~(PMD_SIZE - 1))
+#define PTRS_PER_PMD		BIT(PGDIR_SHIFT - PMD_SHIFT)
+#endif
+
+#define PTRS_PER_PTE		BIT(PMD_SHIFT - PAGE_SHIFT)
 
 #ifndef __ASSEMBLY__
 
+#if CONFIG_PGTABLE_LEVELS > 2
+#include <asm-generic/pgtable-nopud.h>
+#else
 #include <asm-generic/pgtable-nopmd.h>
+#endif
 
 /*
  * 1st level paging: pgd
@@ -67,9 +91,35 @@
 #define pgd_ERROR(e) \
 	pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
+#if CONFIG_PGTABLE_LEVELS > 2
+
+/* In 3 level paging, pud_* macros work on pgd */
+#define pud_none(x)		(!pud_val(x))
+#define pud_bad(x)		((pud_val(x) & ~PAGE_MASK))
+#define pud_present(x)		(pud_val(x))
+#define pud_clear(xp)		do { pud_val(*(xp)) = 0; } while (0)
+#define pud_pgtable(pud)	((pmd_t *)(pud_val(pud) & PAGE_MASK))
+#define pud_page(pud)		virt_to_page(pud_pgtable(pud))
+#define set_pud(pudp, pud)	(*(pudp) = pud)
+
 /*
- * Due to the strange way generic pgtable level folding works, in a 2 level
- * setup, pmd_val() returns pgd, so these pmd_* macros actually work on pgd
+ * 2nd level paging: pmd
+ */
+#define pmd_ERROR(e) \
+	pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+
+#define pmd_pfn(pmd)		((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+#endif
+
+/*
+ * Due to the strange way generic pgtable level folding works, the pmd_* macros
+ *  - are valid even for 2 levels (which supposedly only has pgd - pte)
+ *  - behave differently for 2 vs. 3
+ * In 2  level paging        (pgd -> pte), pmd_* macros work on pgd
+ * In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd
  */
 #define pmd_none(x)		(!pmd_val(x))
 #define pmd_bad(x)		((pmd_val(x) & ~PAGE_MASK))
@@ -80,6 +130,9 @@
 #define set_pmd(pmdp, pmd)	(*(pmdp) = pmd)
 #define pmd_pgtable(pmd)	((pgtable_t) pmd_page_vaddr(pmd))
 
+/*
+ * 3rd level paging: pte
+ */
 #define pte_ERROR(e) \
 	pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
 
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index e4031ecd3c8c..f28afcf5c6d1 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -93,7 +93,7 @@ extern unsigned int get_wchan(struct task_struct *p);
 #define VMALLOC_START	(PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
 
 /* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
-#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4)
+#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4)
 
 #define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
 
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 41f154320964..8da2f0ad8c69 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -39,6 +39,8 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
 	if (!pgd_present(*pgd_k))
 		goto bad_area;
 
+	set_pgd(pgd, *pgd_k);
+
 	p4d = p4d_offset(pgd, address);
 	p4d_k = p4d_offset(pgd_k, address);
 	if (!p4d_present(*p4d_k))
@@ -49,6 +51,8 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
 	if (!pud_present(*pud_k))
 		goto bad_area;
 
+	set_pud(pud, *pud_k);
+
 	pmd = pmd_offset(pud, address);
 	pmd_k = pmd_offset(pud_k, address);
 	if (!pmd_present(*pmd_k))
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 46ad9aee7a73..f7ba2a5d5ec8 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -191,6 +191,7 @@ void __init mem_init(void)
 	highmem_init();
 
 	BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE);
 	BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
 }
 
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index dfc0c1aba48f..5f71445f26bd 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -621,8 +621,8 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
 
 	n += scnprintf(buf + n, len - n,
-		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
-		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
+		      "MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
+		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,  CONFIG_PGTABLE_LEVELS,
 		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
 		       p_mmu->u_dtlb, p_mmu->u_itlb,
 		       IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index c4a5f16444ce..5f57eba1089d 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -173,6 +173,15 @@ ex_saved_reg1:
 	tst	r3, r3
 	bz	do_slow_path_pf         ; if no Page Table, do page fault
 
+#if CONFIG_PGTABLE_LEVELS > 2
+	lsr     r0, r2, PMD_SHIFT	; Bits for indexing into PMD
+	and	r0, r0, (PTRS_PER_PMD - 1)
+	ld.as	r1, [r3, r0]		; PMD entry
+	tst	r1, r1
+	bz	do_slow_path_pf
+	mov	r3, r1
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	and.f	0, r3, _PAGE_HW_SZ	; Is this Huge PMD (thp)
 	add2.nz	r1, r1, r0
-- 
cgit 


From 8747ff704ac886f6ef992b1b7eadcf77d151fd3a Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Thu, 1 Oct 2020 15:46:42 -0700
Subject: ARC: mm: support 4 levels of page tables

Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/page.h           | 11 +++++++++
 arch/arc/include/asm/pgalloc.h        | 11 +++++++++
 arch/arc/include/asm/pgtable-levels.h | 45 +++++++++++++++++++++++++++++++----
 arch/arc/mm/fault.c                   |  2 ++
 arch/arc/mm/init.c                    |  1 +
 arch/arc/mm/tlbex.S                   |  9 +++++++
 6 files changed, 74 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 5d7899d87c08..9a62e1d87967 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -41,6 +41,17 @@ typedef struct {
 #define pgd_val(x)	((x).pgd)
 #define __pgd(x)	((pgd_t) { (x) })
 
+#if CONFIG_PGTABLE_LEVELS > 3
+
+typedef struct {
+	unsigned long pud;
+} pud_t;
+
+#define pud_val(x)      	((x).pud)
+#define __pud(x)        	((pud_t) { (x) })
+
+#endif
+
 #if CONFIG_PGTABLE_LEVELS > 2
 
 typedef struct {
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 781620d2e86f..096b8ef58edb 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -70,6 +70,17 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	return ret;
 }
 
+#if CONFIG_PGTABLE_LEVELS > 3
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
+{
+	set_p4d(p4dp, __p4d((unsigned long)pudp));
+}
+
+#define __pud_free_tlb(tlb, pmd, addr)  pud_free((tlb)->mm, pmd)
+
+#endif
+
 #if CONFIG_PGTABLE_LEVELS > 2
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
index 5dd8c58f7d70..8084ef2f6491 100644
--- a/arch/arc/include/asm/pgtable-levels.h
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -54,8 +54,13 @@
 /*
  * A default 3 level paging testing setup in software walked MMU
  *   MMUv4 (8K page): <4> : <7> : <8> : <13>
+ * A default 4 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <3> : <4> : <8> : <13>
  */
 #define PGDIR_SHIFT		28
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SHIFT		25
+#endif
 #if CONFIG_PGTABLE_LEVELS > 2
 #define PMD_SHIFT		21
 #endif
@@ -66,17 +71,25 @@
 #define PGDIR_MASK		(~(PGDIR_SIZE - 1))
 #define PTRS_PER_PGD		BIT(32 - PGDIR_SHIFT)
 
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SIZE		BIT(PUD_SHIFT)
+#define PUD_MASK		(~(PUD_SIZE - 1))
+#define PTRS_PER_PUD		BIT(PGDIR_SHIFT - PUD_SHIFT)
+#endif
+
 #if CONFIG_PGTABLE_LEVELS > 2
 #define PMD_SIZE		BIT(PMD_SHIFT)
 #define PMD_MASK		(~(PMD_SIZE - 1))
-#define PTRS_PER_PMD		BIT(PGDIR_SHIFT - PMD_SHIFT)
+#define PTRS_PER_PMD		BIT(PUD_SHIFT - PMD_SHIFT)
 #endif
 
 #define PTRS_PER_PTE		BIT(PMD_SHIFT - PAGE_SHIFT)
 
 #ifndef __ASSEMBLY__
 
-#if CONFIG_PGTABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 3
+#include <asm-generic/pgtable-nop4d.h>
+#elif CONFIG_PGTABLE_LEVELS > 2
 #include <asm-generic/pgtable-nopud.h>
 #else
 #include <asm-generic/pgtable-nopmd.h>
@@ -91,9 +104,31 @@
 #define pgd_ERROR(e) \
 	pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
+#if CONFIG_PGTABLE_LEVELS > 3
+
+/* In 4 level paging, p4d_* macros work on pgd */
+#define p4d_none(x)		(!p4d_val(x))
+#define p4d_bad(x)		((p4d_val(x) & ~PAGE_MASK))
+#define p4d_present(x)		(p4d_val(x))
+#define p4d_clear(xp)		do { p4d_val(*(xp)) = 0; } while (0)
+#define p4d_pgtable(p4d)	((pud_t *)(p4d_val(p4d) & PAGE_MASK))
+#define p4d_page(p4d)		virt_to_page(p4d_pgtable(p4d))
+#define set_p4d(p4dp, p4d)	(*(p4dp) = p4d)
+
+/*
+ * 2nd level paging: pud
+ */
+#define pud_ERROR(e) \
+	pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+
+#endif
+
 #if CONFIG_PGTABLE_LEVELS > 2
 
-/* In 3 level paging, pud_* macros work on pgd */
+/*
+ * In 3 level paging, pud_* macros work on pgd
+ * In 4 level paging, pud_* macros work on pud
+ */
 #define pud_none(x)		(!pud_val(x))
 #define pud_bad(x)		((pud_val(x) & ~PAGE_MASK))
 #define pud_present(x)		(pud_val(x))
@@ -103,7 +138,7 @@
 #define set_pud(pudp, pud)	(*(pudp) = pud)
 
 /*
- * 2nd level paging: pmd
+ * 3rd level paging: pmd
  */
 #define pmd_ERROR(e) \
 	pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
@@ -131,7 +166,7 @@
 #define pmd_pgtable(pmd)	((pgtable_t) pmd_page_vaddr(pmd))
 
 /*
- * 3rd level paging: pte
+ * 4th level paging: pte
  */
 #define pte_ERROR(e) \
 	pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 8da2f0ad8c69..f8994164fa36 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -46,6 +46,8 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
 	if (!p4d_present(*p4d_k))
 		goto bad_area;
 
+	set_p4d(p4d, *p4d_k);
+
 	pud = pud_offset(p4d, address);
 	pud_k = pud_offset(p4d_k, address);
 	if (!pud_present(*pud_k))
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index f7ba2a5d5ec8..699ecf119641 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -191,6 +191,7 @@ void __init mem_init(void)
 	highmem_init();
 
 	BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE);
 	BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE);
 	BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
 }
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 5f57eba1089d..e054780a8fe0 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -173,6 +173,15 @@ ex_saved_reg1:
 	tst	r3, r3
 	bz	do_slow_path_pf         ; if no Page Table, do page fault
 
+#if CONFIG_PGTABLE_LEVELS > 3
+	lsr     r0, r2, PUD_SHIFT	; Bits for indexing into PUD
+	and	r0, r0, (PTRS_PER_PUD - 1)
+	ld.as	r1, [r3, r0]		; PMD entry
+	tst	r1, r1
+	bz	do_slow_path_pf
+	mov	r3, r1
+#endif
+
 #if CONFIG_PGTABLE_LEVELS > 2
 	lsr     r0, r2, PMD_SHIFT	; Bits for indexing into PMD
 	and	r0, r0, (PTRS_PER_PMD - 1)
-- 
cgit 


From 56809a28d45fcad94b28cfd614600568c0d46545 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@kernel.org>
Date: Tue, 13 Oct 2020 19:44:07 -0700
Subject: ARC: mm: vmalloc sync from kernel to user table to update PMD ...

... not PGD

vmalloc() sets up the kernel page table (starting from @swapper_pg_dir).
But when vmalloc area is accessed in context of a user task, say opening
terminal in n_tty_open(), the user page tables need to be synced from
kernel page tables so that TLB entry is created in "user context".

The old code was doing this incorrectly, as it was updating the user pgd
entry (first level itself) to point to kernel pud table (2nd level),
effectively yanking away the entire user space translation with kernel one.

The correct way to do this is to ONLY update a user space pgd/pud/pmd entry
if it is not popluated already. This ensures that only the missing leaf
pmd entry gets updated to point to relevant kernel pte table.

From code change pov, we are chaging the pattern:

	p4d = p4d_offset(pgd, address);
	p4d_k = p4d_offset(pgd_k, address);
	if (!p4d_present(*p4d_k))
		goto bad_area;
	set_p4d(p4d, *p4d_k);

with
	p4d = p4d_offset(pgd, address);
	p4d_k = p4d_offset(pgd_k, address);
	if (p4d_none(*p4d_k))
		goto bad_area;
	if (!p4d_present(*p4d))
		set_p4d(p4d, *p4d_k);

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/mm/fault.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index f8994164fa36..5787c261c9a4 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -36,31 +36,31 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
 	pgd = pgd_offset(current->active_mm, address);
 	pgd_k = pgd_offset_k(address);
 
-	if (!pgd_present(*pgd_k))
+	if (pgd_none (*pgd_k))
 		goto bad_area;
-
-	set_pgd(pgd, *pgd_k);
+	if (!pgd_present(*pgd))
+		set_pgd(pgd, *pgd_k);
 
 	p4d = p4d_offset(pgd, address);
 	p4d_k = p4d_offset(pgd_k, address);
-	if (!p4d_present(*p4d_k))
+	if (p4d_none(*p4d_k))
 		goto bad_area;
-
-	set_p4d(p4d, *p4d_k);
+	if (!p4d_present(*p4d))
+		set_p4d(p4d, *p4d_k);
 
 	pud = pud_offset(p4d, address);
 	pud_k = pud_offset(p4d_k, address);
-	if (!pud_present(*pud_k))
+	if (pud_none(*pud_k))
 		goto bad_area;
-
-	set_pud(pud, *pud_k);
+	if (!pud_present(*pud))
+		set_pud(pud, *pud_k);
 
 	pmd = pmd_offset(pud, address);
 	pmd_k = pmd_offset(pud_k, address);
-	if (!pmd_present(*pmd_k))
+	if (pmd_none(*pmd_k))
 		goto bad_area;
-
-	set_pmd(pmd, *pmd_k);
+	if (!pmd_present(*pmd))
+		set_pmd(pmd, *pmd_k);
 
 	/* XXX: create the TLB entry here */
 	return 0;
-- 
cgit 


From c24a19674258dcc968a198d8e0d4717c8f27700c Mon Sep 17 00:00:00 2001
From: Chen Wandun <chenwandun@huawei.com>
Date: Tue, 10 Aug 2021 21:51:05 +0800
Subject: riscv: add support for hugepage migration

Generic arch_hugetlb_migration_supported can provide appropriate
support for hugepage migration when enable ARCH_ENABLE_HUGEPAGE_MIGRATION,
so enable it for hugepage migration in riscv.

Signed-off-by: Chen Wandun <chenwandun@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index e26e255dce3c..78644e29f1fd 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -13,6 +13,7 @@ config 32BIT
 config RISCV
 	def_bool y
 	select ARCH_CLOCKSOURCE_INIT
+	select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
 	select ARCH_HAS_BINFMT_FLAT
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
-- 
cgit 


From 7119decf47d9867266459615be502e5d2cecedba Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Tue, 29 Jun 2021 10:55:30 +0200
Subject: KVM: s390: Enable specification exception interpretation

When this feature is enabled the hardware is free to interpret
specification exceptions generated by the guest, instead of causing
program interruption interceptions.

This benefits (test) programs that generate a lot of specification
exceptions (roughly 4x increase in exceptions/sec).

Interceptions will occur as before if ICTL_PINT is set,
i.e. if guest debug is enabled.

There is no indication if this feature is available or not and the
hardware is free to interpret or not. So we can simply set this bit and
if the hardware ignores it we fall back to intercept 8 handling.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Link: https://lore.kernel.org/linux-s390/20210706114714.3936825-1-scgl@linux.ibm.com/
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h | 1 +
 arch/s390/kvm/kvm-s390.c         | 2 ++
 arch/s390/kvm/vsie.c             | 2 ++
 3 files changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8925f3969478..118d5450c523 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -244,6 +244,7 @@ struct kvm_s390_sie_block {
 	__u8	fpf;			/* 0x0060 */
 #define ECB_GS		0x40
 #define ECB_TE		0x10
+#define ECB_SPECI	0x08
 #define ECB_SRSI	0x04
 #define ECB_HOSTPROTINT	0x02
 	__u8	ecb;			/* 0x0061 */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f72f361d39dd..5b45c83ced21 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -3180,6 +3180,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
 	if (test_kvm_facility(vcpu->kvm, 73))
 		vcpu->arch.sie_block->ecb |= ECB_TE;
+	if (!kvm_is_ucontrol(vcpu->kvm))
+		vcpu->arch.sie_block->ecb |= ECB_SPECI;
 
 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 4002a24bc43a..acda4b6fc851 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -510,6 +510,8 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 			prefix_unmapped(vsie_page);
 		scb_s->ecb |= ECB_TE;
 	}
+	/* specification exception interpretation */
+	scb_s->ecb |= scb_o->ecb & ECB_SPECI;
 	/* branch prediction */
 	if (test_kvm_facility(vcpu->kvm, 82))
 		scb_s->fpf |= scb_o->fpf & FPF_BPBC;
-- 
cgit 


From a3e03bc1368c1bc16e19b001fc96dc7430573cc8 Mon Sep 17 00:00:00 2001
From: Halil Pasic <pasic@linux.ibm.com>
Date: Fri, 27 Aug 2021 14:54:29 +0200
Subject: KVM: s390: index kvm->arch.idle_mask by vcpu_idx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While in practice vcpu->vcpu_idx ==  vcpu->vcp_id is often true, it may
not always be, and we must not rely on this. Reason is that KVM decides
the vcpu_idx, userspace decides the vcpu_id, thus the two might not
match.

Currently kvm->arch.idle_mask is indexed by vcpu_id, which implies
that code like
for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
                vcpu = kvm_get_vcpu(kvm, vcpu_id);
		do_stuff(vcpu);
}
is not legit. Reason is that kvm_get_vcpu expects an vcpu_idx, not an
vcpu_id.  The trouble is, we do actually use kvm->arch.idle_mask like
this. To fix this problem we have two options. Either use
kvm_get_vcpu_by_id(vcpu_id), which would loop to find the right vcpu_id,
or switch to indexing via vcpu_idx. The latter is preferable for obvious
reasons.

Let us make switch from indexing kvm->arch.idle_mask by vcpu_id to
indexing it by vcpu_idx.  To keep gisa_int.kicked_mask indexed by the
same index as idle_mask lets make the same change for it as well.

Fixes: 1ee0bc559dc3 ("KVM: s390: get rid of local_int array")
Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
Reviewed-by: Christian Bornträger <borntraeger@de.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: <stable@vger.kernel.org> # 3.15+
Link: https://lore.kernel.org/r/20210827125429.1912577-1-pasic@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |  1 +
 arch/s390/kvm/interrupt.c        | 12 ++++++------
 arch/s390/kvm/kvm-s390.c         |  2 +-
 arch/s390/kvm/kvm-s390.h         |  2 +-
 4 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 118d5450c523..611f18ecde91 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -963,6 +963,7 @@ struct kvm_arch{
 	atomic64_t cmma_dirty_pages;
 	/* subset of available cpu features enabled by user space */
 	DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+	/* indexed by vcpu_idx */
 	DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
 	struct kvm_s390_gisa_interrupt gisa_int;
 	struct kvm_s390_pv pv;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index d548d60caed2..16256e17a544 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
-	set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
+	set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
 }
 
 static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
 {
 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
-	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
+	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
 }
 
 static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
@@ -3050,18 +3050,18 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
 
 static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
 {
-	int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus);
+	int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
 	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
 	struct kvm_vcpu *vcpu;
 
-	for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
-		vcpu = kvm_get_vcpu(kvm, vcpu_id);
+	for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
+		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
 		if (psw_ioint_disabled(vcpu))
 			continue;
 		deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
 		if (deliverable_mask) {
 			/* lately kicked but not yet running */
-			if (test_and_set_bit(vcpu_id, gi->kicked_mask))
+			if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
 				return;
 			kvm_s390_vcpu_wakeup(vcpu);
 			return;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 5b45c83ced21..e144c8046ceb 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4026,7 +4026,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 		kvm_s390_patch_guest_per_regs(vcpu);
 	}
 
-	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
+	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
 
 	vcpu->arch.sie_block->icptcode = 0;
 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 9fad25109b0d..ecd741ee3276 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
 
 static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
 {
-	return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
+	return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
 }
 
 static inline int kvm_is_ucontrol(struct kvm *kvm)
-- 
cgit 


From 71f8817c28e2e1e5549138e2aef68c2fd784e162 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Mon, 23 Aug 2021 18:34:05 +0100
Subject: MIPS: ingenic: Unconditionally enable clock of CPU #0

Make sure that the PLL that feeds the CPU won't be stopped while the
kernel is running.

This fixes a problem on JZ4760 (and probably others) where under very
specific conditions, the main PLL would be turned OFF when the kernel
was shutting down, causing the shutdown process to fail.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/generic/board-ingenic.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/generic/board-ingenic.c b/arch/mips/generic/board-ingenic.c
index e86e0016c548..3f44f14bdb33 100644
--- a/arch/mips/generic/board-ingenic.c
+++ b/arch/mips/generic/board-ingenic.c
@@ -7,6 +7,8 @@
  * Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
  */
 
+#include <linux/clk.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_fdt.h>
 #include <linux/pm.h>
@@ -129,10 +131,36 @@ static const struct platform_suspend_ops ingenic_pm_ops __maybe_unused = {
 
 static int __init ingenic_pm_init(void)
 {
+	struct device_node *cpu_node;
+	struct clk *cpu0_clk;
+	int ret;
+
 	if (boot_cpu_type() == CPU_XBURST) {
 		if (IS_ENABLED(CONFIG_PM_SLEEP))
 			suspend_set_ops(&ingenic_pm_ops);
 		_machine_halt = ingenic_halt;
+
+		/*
+		 * Unconditionally enable the clock for the first CPU.
+		 * This makes sure that the PLL that feeds the CPU won't be
+		 * stopped while the kernel is running.
+		 */
+		cpu_node = of_get_cpu_node(0, NULL);
+		if (!cpu_node) {
+			pr_err("Unable to get CPU node\n");
+		} else {
+			cpu0_clk = of_clk_get(cpu_node, 0);
+			if (IS_ERR(cpu0_clk)) {
+				pr_err("Unable to get CPU0 clock\n");
+				return PTR_ERR(cpu0_clk);
+			}
+
+			ret = clk_prepare_enable(cpu0_clk);
+			if (ret) {
+				pr_err("Unable to enable CPU0 clock\n");
+				return ret;
+			}
+		}
 	}
 
 	return 0;
-- 
cgit 


From 1d78dfde33a02da1d816279c2e3452978b7abd39 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Fri, 27 Aug 2021 14:07:06 +1000
Subject: KVM: PPC: Fix clearing never mapped TCEs in realmode

Since commit e1a1ef84cd07 ("KVM: PPC: Book3S: Allocate guest TCEs on
demand too"), pages for TCE tables for KVM guests are allocated only
when needed. This allows skipping any update when clearing TCEs. This
works mostly fine as TCE updates are handled when the MMU is enabled.
The realmode handlers fail with H_TOO_HARD when pages are not yet
allocated, except when clearing a TCE in which case KVM prints a warning
and proceeds to dereference a NULL pointer, which crashes the host OS.

This has not been caught so far as the change in commit e1a1ef84cd07 is
reasonably new, and POWER9 runs mostly radix which does not use realmode
handlers. With hash, the default TCE table is memset() by QEMU when the
machine is reset which triggers page faults and the KVM TCE device's
kvm_spapr_tce_fault() handles those with MMU on. And the huge DMA
windows are not cleared by VMs which instead successfully create a DMA
window big enough to map the VM memory 1:1 and then VMs just map
everything without clearing.

This started crashing now as commit 381ceda88c4c ("powerpc/pseries/iommu:
Make use of DDW for indirect mapping") added a mode when a dymanic DMA
window not big enough to map the VM memory 1:1 but it is used anyway,
and the VM now is the first (i.e. not QEMU) to clear a just created
table. Note that upstream QEMU needs to be modified to trigger the VM to
trigger the host OS crash.

This replaces WARN_ON_ONCE_RM() with a check and return, and adds
another warning if TCE is not being cleared.

Fixes: e1a1ef84cd07 ("KVM: PPC: Book3S: Allocate guest TCEs on demand too")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210827040706.517652-1-aik@ozlabs.ru
---
 arch/powerpc/kvm/book3s_64_vio_hv.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index dc6591548f0c..636c6ae0939b 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -173,10 +173,13 @@ static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
 	idx -= stt->offset;
 	page = stt->pages[idx / TCES_PER_PAGE];
 	/*
-	 * page must not be NULL in real mode,
-	 * kvmppc_rm_ioba_validate() must have taken care of this.
+	 * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is
+	 * being cleared, otherwise it returns H_TOO_HARD and we skip this.
 	 */
-	WARN_ON_ONCE_RM(!page);
+	if (!page) {
+		WARN_ON_ONCE_RM(tce != 0);
+		return;
+	}
 	tbl = kvmppc_page_address(page);
 
 	tbl[idx % TCES_PER_PAGE] = tce;
-- 
cgit 


From 7f2dcc7371c1ab6e5d3678a7eff1cb0eb9725de9 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 17 Aug 2021 20:18:09 -0700
Subject: parisc: math-emu: Avoid "fmt" macro collision

The printk "fmt" macro was colliding. Rename like the others with a
"bits" suffix. Fixes a build failure:

arch/parisc/math-emu/decode_exc.c: In function 'decode_fpu':
arch/parisc/math-emu/decode_exc.c:49:14: error: expected identifier before numeric constant
   49 | #define fmt  11 /* bits 19 & 20 */
      |              ^~
./include/linux/printk.h:379:6: note: in expansion of macro 'fmt'
  379 |     .fmt = __builtin_constant_p(_fmt) ? (_fmt) : NULL, \
      |      ^~~
./include/linux/printk.h:417:3: note: in expansion of macro '__printk_index_emit'
  417 |   __printk_index_emit(_fmt, NULL, NULL);   \
      |   ^~~~~~~~~~~~~~~~~~~
./include/linux/printk.h:446:26: note: in expansion of macro 'printk_index_wrap'
  446 | #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__)
      |                          ^~~~~~~~~~~~~~~~~
arch/parisc/math-emu/decode_exc.c:339:3: note: in expansion of macro 'printk'
  339 |   printk("%s(%d) Unknown FPU exception 0x%x\n", __FILE__,
      |   ^~~~~~

Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Helge Deller <deller@gmx.de>
Cc: linux-parisc@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/math-emu/decode_exc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/math-emu/decode_exc.c b/arch/parisc/math-emu/decode_exc.c
index cd8ffc6ceadf..494ca41df05d 100644
--- a/arch/parisc/math-emu/decode_exc.c
+++ b/arch/parisc/math-emu/decode_exc.c
@@ -46,7 +46,7 @@
 #define SIGNALCODE(signal, code) ((signal) << 24 | (code))
 #define copropbit	1<<31-2	/* bit position 2 */
 #define opclass		9	/* bits 21 & 22 */
-#define fmt		11	/* bits 19 & 20 */
+#define fmtbits		11	/* bits 19 & 20 */
 #define df		13	/* bits 17 & 18 */
 #define twobits		3	/* mask low-order 2 bits */
 #define fivebits	31	/* mask low-order 5 bits */
@@ -57,7 +57,7 @@
 #define Excp_instr(index) Instructionfield(Fpu_register[index])
 #define Clear_excp_register(index) Allexception(Fpu_register[index]) = 0
 #define Excp_format() \
-    (current_ir >> ((current_ir>>opclass & twobits)==1 ? df : fmt) & twobits)
+	(current_ir >> ((current_ir>>opclass & twobits) == 1 ? df : fmtbits) & twobits)
 
 /* Miscellaneous definitions */
 #define Fpu_sgl(index) Fpu_register[index*2]
-- 
cgit 


From 7bf82eb3873fbbee8273f60ddef584194b99f6c1 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 15 Jul 2021 14:46:12 +0100
Subject: parisc: Rename PMD_ORDER to PMD_TABLE_ORDER

This is the order of the page table allocation, not the order of a PMD.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/pgalloc.h | 6 +++---
 arch/parisc/include/asm/pgtable.h | 4 ++--
 arch/parisc/mm/init.c             | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index 6a7e98e71f1d..54b63374579b 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -48,15 +48,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pmd_t *pmd;
 
-	pmd = (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER);
+	pmd = (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_TABLE_ORDER);
 	if (likely(pmd))
-		memset ((void *)pmd, 0, PAGE_SIZE << PMD_ORDER);
+		memset ((void *)pmd, 0, PAGE_SIZE << PMD_TABLE_ORDER);
 	return pmd;
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-	free_pages((unsigned long)pmd, PMD_ORDER);
+	free_pages((unsigned long)pmd, PMD_TABLE_ORDER);
 }
 #endif
 
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 43937af127b1..7badd872f05a 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -112,7 +112,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define KERNEL_INITIAL_SIZE	(1 << KERNEL_INITIAL_ORDER)
 
 #if CONFIG_PGTABLE_LEVELS == 3
-#define PMD_ORDER	1
+#define PMD_TABLE_ORDER	1
 #define PGD_ORDER	0
 #else
 #define PGD_ORDER	1
@@ -131,7 +131,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define PMD_SHIFT       (PLD_SHIFT + BITS_PER_PTE)
 #define PMD_SIZE	(1UL << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
-#define BITS_PER_PMD	(PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
+#define BITS_PER_PMD	(PAGE_SHIFT + PMD_TABLE_ORDER - BITS_PER_PMD_ENTRY)
 #define PTRS_PER_PMD    (1UL << BITS_PER_PMD)
 #else
 #define BITS_PER_PMD	0
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 591a4e939415..3f7d6d5b56ac 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -378,8 +378,8 @@ static void __init map_pages(unsigned long start_vaddr,
 
 #if CONFIG_PGTABLE_LEVELS == 3
 		if (pud_none(*pud)) {
-			pmd = memblock_alloc(PAGE_SIZE << PMD_ORDER,
-					     PAGE_SIZE << PMD_ORDER);
+			pmd = memblock_alloc(PAGE_SIZE << PMD_TABLE_ORDER,
+					     PAGE_SIZE << PMD_TABLE_ORDER);
 			if (!pmd)
 				panic("pmd allocation failed.\n");
 			pud_populate(NULL, pud, pmd);
-- 
cgit 


From d220da0967dbda232350c5dc39317e04e0892743 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Wed, 25 Aug 2021 00:48:20 +0900
Subject: parisc: remove unused arch/parisc/boot/install.sh and its phony
 target

Parisc has two similar installation scripts:

  arch/parisc/install.sh
  arch/parisc/boot/install.sh

The latter is never used because 'make ARCH=parisc install' always
invokes the 'install' target in arch/parisc/Makefile.

The target in arch/parisc/boot/Makefile is not used either.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/boot/Makefile   |  4 ---
 arch/parisc/boot/install.sh | 65 ---------------------------------------------
 2 files changed, 69 deletions(-)
 delete mode 100644 arch/parisc/boot/install.sh

(limited to 'arch')

diff --git a/arch/parisc/boot/Makefile b/arch/parisc/boot/Makefile
index 61f44142cfe1..b873ee4720ca 100644
--- a/arch/parisc/boot/Makefile
+++ b/arch/parisc/boot/Makefile
@@ -15,7 +15,3 @@ $(obj)/bzImage: $(obj)/compressed/vmlinux FORCE
 
 $(obj)/compressed/vmlinux: FORCE
 	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
-
-install: $(CONFIGURE) $(obj)/bzImage
-	sh -x  $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
-	      System.map "$(INSTALL_PATH)"
diff --git a/arch/parisc/boot/install.sh b/arch/parisc/boot/install.sh
deleted file mode 100644
index 8f7c365fad83..000000000000
--- a/arch/parisc/boot/install.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/sh
-#
-# arch/parisc/install.sh, derived from arch/i386/boot/install.sh
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1995 by Linus Torvalds
-#
-# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
-#
-# "make install" script for i386 architecture
-#
-# Arguments:
-#   $1 - kernel version
-#   $2 - kernel image file
-#   $3 - kernel map file
-#   $4 - default install path (blank if root directory)
-#
-
-verify () {
-	if [ ! -f "$1" ]; then
-		echo ""                                                   1>&2
-		echo " *** Missing file: $1"                              1>&2
-		echo ' *** You need to run "make" before "make install".' 1>&2
-		echo ""                                                   1>&2
-		exit 1
-	fi
-}
-
-# Make sure the files actually exist
-
-verify "$2"
-verify "$3"
-
-# User may have a custom install script
-
-if [ -n "${INSTALLKERNEL}" ]; then
-  if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
-  if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
-fi
-
-# Default install
-
-if [ "$(basename $2)" = "zImage" ]; then
-# Compressed install
-  echo "Installing compressed kernel"
-  base=vmlinuz
-else
-# Normal install
-  echo "Installing normal kernel"
-  base=vmlinux
-fi
-
-if [ -f $4/$base-$1 ]; then
-  mv $4/$base-$1 $4/$base-$1.old
-fi
-cat $2 > $4/$base-$1
-
-# Install system map file
-if [ -f $4/System.map-$1 ]; then
-  mv $4/System.map-$1 $4/System.map-$1.old
-fi
-cp $3 $4/System.map-$1
-- 
cgit 


From 87875c1084a28364dad8cd4f9ecbfdfe0b845ad5 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Sat, 7 Aug 2021 11:19:27 +0200
Subject: parisc: Make struct parisc_driver::remove() return void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The caller of this function (parisc_driver_remove() in
arch/parisc/kernel/drivers.c) ignores the return value, so better don't
return any value at all to not wake wrong expectations in driver authors.

The only function that could return a non-zero value before was
ipmi_parisc_remove() which returns the return value of
ipmi_si_remove_by_dev(). Make this function return void, too, as for all
other callers the value is ignored, too.

Also fold in a small checkpatch fix for:

WARNING: Unnecessary space before function pointer arguments
+	void (*remove) (struct parisc_device *dev);

Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> (for drivers/input)
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by:  Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Acked-by: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/parisc-device.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/parisc-device.h b/arch/parisc/include/asm/parisc-device.h
index d02d144c6012..4de3b391d812 100644
--- a/arch/parisc/include/asm/parisc-device.h
+++ b/arch/parisc/include/asm/parisc-device.h
@@ -34,8 +34,8 @@ struct parisc_driver {
 	struct parisc_driver *next;
 	char *name; 
 	const struct parisc_device_id *id_table;
-	int (*probe) (struct parisc_device *dev); /* New device discovered */
-	int (*remove) (struct parisc_device *dev);
+	int (*probe)(struct parisc_device *dev); /* New device discovered */
+	void (*remove)(struct parisc_device *dev);
 	struct device_driver drv;
 };
 
-- 
cgit 


From 6ef4661cad32b5098ffb31be3282c866befde85f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 12 Aug 2021 01:41:16 +0900
Subject: parisc: move core-y in arch/parisc/Makefile to arch/parisc/Kbuild

Use obj-y to clean up Makefile.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/Kbuild   | 1 +
 arch/parisc/Makefile | 3 ---
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/Kbuild b/arch/parisc/Kbuild
index a4e40e534e6a..3c068b700a81 100644
--- a/arch/parisc/Kbuild
+++ b/arch/parisc/Kbuild
@@ -1 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-y	+= mm/ kernel/ math-emu/
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index aed8ea29268b..6c27940eff03 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -111,9 +111,6 @@ KBUILD_CFLAGS	+= $(cflags-y)
 LIBGCC		:= $(shell $(CC) -print-libgcc-file-name)
 export LIBGCC
 
-kernel-y			:= mm/ kernel/ math-emu/
-
-core-y	+= $(addprefix arch/parisc/, $(kernel-y))
 libs-y	+= arch/parisc/lib/ $(LIBGCC)
 
 boot	:= arch/parisc/boot
-- 
cgit 


From 4b511d5bfa74b1926daefd1694205c7f1bcf677f Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 30 Jul 2021 11:26:21 +0200
Subject: xen: fix setting of max_pfn in shared_info

Xen PV guests are specifying the highest used PFN via the max_pfn
field in shared_info. This value is used by the Xen tools when saving
or migrating the guest.

Unfortunately this field is misnamed, as in reality it is specifying
the number of pages (including any memory holes) of the guest, so it
is the highest used PFN + 1. Renaming isn't possible, as this is a
public Xen hypervisor interface which needs to be kept stable.

The kernel will set the value correctly initially at boot time, but
when adding more pages (e.g. due to memory hotplug or ballooning) a
real PFN number is stored in max_pfn. This is done when expanding the
p2m array, and the PFN stored there is even possibly wrong, as it
should be the last possible PFN of the just added P2M frame, and not
one which led to the P2M expansion.

Fix that by setting shared_info->max_pfn to the last possible PFN + 1.

Fixes: 98dd166ea3a3c3 ("x86/xen/p2m: hint at the last populated P2M entry")
Cc: stable@vger.kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Link: https://lore.kernel.org/r/20210730092622.9973-2-jgross@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/p2m.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index ac06ca32e9ef..5e6e236977c7 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -618,8 +618,8 @@ int xen_alloc_p2m_entry(unsigned long pfn)
 	}
 
 	/* Expanded the p2m? */
-	if (pfn > xen_p2m_last_pfn) {
-		xen_p2m_last_pfn = pfn;
+	if (pfn >= xen_p2m_last_pfn) {
+		xen_p2m_last_pfn = ALIGN(pfn + 1, P2M_PER_PAGE);
 		HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
 	}
 
-- 
cgit 


From 2526cff7c4f944924f39043dc657189eccc4fe5c Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 30 Jul 2021 09:18:03 +0200
Subject: xen: assume XENFEAT_mmu_pt_update_preserve_ad being set for pv guests

XENFEAT_mmu_pt_update_preserve_ad is always set in Xen 4.0 and newer.
Remove coding assuming it might be zero.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/20210730071804.4302-3-jgross@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten_pv.c | 12 ++----------
 arch/x86/xen/mmu_pv.c       |  4 ++--
 2 files changed, 4 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 03149422dce2..753f63734c13 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -116,9 +116,8 @@ static void __init xen_banner(void)
 	HYPERVISOR_xen_version(XENVER_extraversion, &extra);
 
 	pr_info("Booting paravirtualized kernel on %s\n", pv_info.name);
-	printk(KERN_INFO "Xen version: %d.%d%s%s\n",
-	       version >> 16, version & 0xffff, extra.extraversion,
-	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
+	pr_info("Xen version: %d.%d%s (preserve-AD)\n",
+		version >> 16, version & 0xffff, extra.extraversion);
 }
 
 static void __init xen_pv_init_platform(void)
@@ -1302,13 +1301,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	xen_init_apic();
 #endif
 
-	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
-		pv_ops.mmu.ptep_modify_prot_start =
-			xen_ptep_modify_prot_start;
-		pv_ops.mmu.ptep_modify_prot_commit =
-			xen_ptep_modify_prot_commit;
-	}
-
 	machine_ops = xen_machine_ops;
 
 	/*
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index ade789e73ee4..1df5f01529e5 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2099,8 +2099,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.set_pte = xen_set_pte_init,
 	.set_pmd = xen_set_pmd_hyper,
 
-	.ptep_modify_prot_start = __ptep_modify_prot_start,
-	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
+	.ptep_modify_prot_start = xen_ptep_modify_prot_start,
+	.ptep_modify_prot_commit = xen_ptep_modify_prot_commit,
 
 	.pte_val = PV_CALLEE_SAVE(xen_pte_val),
 	.pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
-- 
cgit 


From 1a0df28c09831dc4bd336fe6e090bbe85a07e79f Mon Sep 17 00:00:00 2001
From: zhaoxiao <zhaoxiao@uniontech.com>
Date: Wed, 25 Aug 2021 19:41:11 +0800
Subject: x86: xen: platform-pci-unplug: use pr_err() and pr_warn() instead of
 raw printk()

Since we have the nice helpers pr_err() and pr_warn(), use them instead
of raw printk().

[jgross@suse.com] Move the "#define pr_fmt" above the #includes in
                  order to avoid build warnings due to redefinition.

Signed-off-by: zhaoxiao <zhaoxiao@uniontech.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20210825114111.29009-1-zhaoxiao@uniontech.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/platform-pci-unplug.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 96d7f7d39cb9..62ac4898df1a 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -7,6 +7,8 @@
  * Copyright (c) 2010, Citrix
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/export.h>
@@ -30,13 +32,13 @@ static int check_platform_magic(void)
 
 	magic = inw(XEN_IOPORT_MAGIC);
 	if (magic != XEN_IOPORT_MAGIC_VAL) {
-		printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n");
+		pr_err("Xen Platform PCI: unrecognised magic value\n");
 		return XEN_PLATFORM_ERR_MAGIC;
 	}
 
 	protocol = inb(XEN_IOPORT_PROTOVER);
 
-	printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n",
+	pr_debug("Xen Platform PCI: I/O protocol version %d\n",
 			protocol);
 
 	switch (protocol) {
@@ -44,12 +46,12 @@ static int check_platform_magic(void)
 		outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM);
 		outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER);
 		if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) {
-			printk(KERN_ERR "Xen Platform: blacklisted by host\n");
+			pr_err("Xen Platform: blacklisted by host\n");
 			return XEN_PLATFORM_ERR_BLACKLIST;
 		}
 		break;
 	default:
-		printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version\n");
+		pr_warn("Xen Platform PCI: unknown I/O protocol version\n");
 		return XEN_PLATFORM_ERR_PROTOCOL;
 	}
 
@@ -155,12 +157,12 @@ void xen_unplug_emulated_devices(void)
 	 * been compiled for this kernel (modules or built-in are both OK). */
 	if (!xen_emul_unplug) {
 		if (xen_must_unplug_nics()) {
-			printk(KERN_INFO "Netfront and the Xen platform PCI driver have "
+			pr_info("Netfront and the Xen platform PCI driver have "
 					"been compiled for this kernel: unplug emulated NICs.\n");
 			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
 		}
 		if (xen_must_unplug_disks()) {
-			printk(KERN_INFO "Blkfront and the Xen platform PCI driver have "
+			pr_info("Blkfront and the Xen platform PCI driver have "
 					"been compiled for this kernel: unplug emulated disks.\n"
 					"You might have to change the root device\n"
 					"from /dev/hd[a-d] to /dev/xvd[a-d]\n"
@@ -200,7 +202,7 @@ static int __init parse_xen_emul_unplug(char *arg)
 		else if (!strncmp(p, "never", l))
 			xen_emul_unplug |= XEN_UNPLUG_NEVER;
 		else
-			printk(KERN_WARNING "unrecognised option '%s' "
+			pr_warn("unrecognised option '%s' "
 				 "in parameter 'xen_emul_unplug'\n", p);
 	}
 	return 0;
-- 
cgit 


From 436fc4feeabbf103d78d50a8e091b3aac28cc37f Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Fri, 27 Aug 2021 08:36:06 +0200
Subject: s390: add kmemleak annotation in stack_alloc()

kmemleak with enabled auto scanning reports that our stack allocation is
lost. This is because we're saving the pointer + STACK_INIT_OFFSET to
lowcore. When kmemleak now scans the objects, it thinks that this one is
lost because it can't find a corresponding pointer.

Reported-by: Marc Hartmayer <mhartmay@linux.ibm.com>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Tested-by: Marc Hartmayer <mhartmay@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/setup.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index fe14beb338e5..1b8316212512 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -50,6 +50,7 @@
 #include <linux/compat.h>
 #include <linux/start_kernel.h>
 #include <linux/hugetlb.h>
+#include <linux/kmemleak.h>
 
 #include <asm/boot_data.h>
 #include <asm/ipl.h>
@@ -356,9 +357,12 @@ void *restart_stack;
 unsigned long stack_alloc(void)
 {
 #ifdef CONFIG_VMAP_STACK
-	return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE,
-			THREADINFO_GFP, NUMA_NO_NODE,
-			__builtin_return_address(0));
+	void *ret;
+
+	ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+			     NUMA_NO_NODE, __builtin_return_address(0));
+	kmemleak_not_leak(ret);
+	return (unsigned long)ret;
 #else
 	return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 #endif
-- 
cgit 


From 15256194eff64f9a774b33b7817ea663e352394a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Fri, 27 Aug 2021 13:45:14 +0200
Subject: s390/entry: make oklabel within CHKSTG macro local

Make the oklabel within the CHKSTG macro local. This makes sure that
tools like objdump and the crash debugging tool still disassemble full
functions where the macro has been used instead of stopping half way
where such a global label is used and one has to guess how to
disassemble the rest of such a function:

E.g.:

0000000000cb0270 <mcck_int_handler>:
  cb0270:       b2 05 03 20             stck    800
  ...
  cb0354:       a7 74 00 97             jne     cb0482 <oklabel270+0xe2>

0000000000cb0358 <oklabel243>:
  cb0358:       c0 e0 00 22 4e 8f       larl    %r14,10fa076 <opcode+0x2558>
  ...

Fixes: d35925b34996 ("s390/mcck: move storage error checks to assembler")
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/entry.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index b9716a7e326d..4c9b967290ae 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -140,10 +140,10 @@ _LPP_OFFSET	= __LC_LPP
 	TSTMSK	__LC_MCCK_CODE,(MCCK_CODE_STG_ERROR|MCCK_CODE_STG_KEY_ERROR)
 	jnz	\errlabel
 	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_STG_DEGRAD
-	jz	oklabel\@
+	jz	.Loklabel\@
 	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_STG_FAIL_ADDR
 	jnz	\errlabel
-oklabel\@:
+.Loklabel\@:
 	.endm
 
 #if IS_ENABLED(CONFIG_KVM)
-- 
cgit 


From 81912856e0fbc294deed595db568a7a00f962d95 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Sat, 28 Aug 2021 10:18:44 +0200
Subject: s390/configs: enable CONFIG_KFENCE in debug_defconfig

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/configs/debug_defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 11ffc7c37ada..37b6115ed80e 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -804,6 +804,7 @@ CONFIG_DEBUG_VM_PGFLAGS=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
 CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_KFENCE=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_DETECT_HUNG_TASK=y
-- 
cgit 


From 7851155a1a7c33f2270dcd979ab6532a89be5293 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Thu, 26 Aug 2021 22:59:44 +0930
Subject: openrisc/litex: Update uart address

Recent litex socs will place the UART at 0xe0006800.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/boot/dts/or1klitex.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/openrisc/boot/dts/or1klitex.dts b/arch/openrisc/boot/dts/or1klitex.dts
index 3f9867aa3844..baba4f49fa6b 100644
--- a/arch/openrisc/boot/dts/or1klitex.dts
+++ b/arch/openrisc/boot/dts/or1klitex.dts
@@ -41,10 +41,10 @@
 		interrupt-controller;
 	};
 
-	serial0: serial@e0002000 {
+	serial0: serial@e0006800 {
 		device_type = "serial";
 		compatible = "litex,liteuart";
-		reg = <0xe0002000 0x100>;
+		reg = <0xe0006800 0x100>;
 	};
 
 	soc_ctrl0: soc_controller@e0000000 {
-- 
cgit 


From 978c791491bce8cc1a27cb50392a2e8bbcea79d4 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Thu, 26 Aug 2021 22:59:45 +0930
Subject: openrisc/litex: Add ethernet device

Add the liteeth ethernet device.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/boot/dts/or1klitex.dts | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/openrisc/boot/dts/or1klitex.dts b/arch/openrisc/boot/dts/or1klitex.dts
index baba4f49fa6b..91c7173c50e6 100644
--- a/arch/openrisc/boot/dts/or1klitex.dts
+++ b/arch/openrisc/boot/dts/or1klitex.dts
@@ -52,4 +52,13 @@
 			reg = <0xe0000000 0xc>;
 			status = "okay";
 	};
+
+	ethernet@e0001000 {
+		compatible = "litex,liteeth";
+		reg = <0xe0001000 0x7c>,
+		      <0xe0001800 0x0a>,
+		      <0x80000000 0x2000>;
+		reg-names = "mac", "mdio", "buffer";
+		interrupts = <2>;
+	};
 };
-- 
cgit 


From 1955d843efc3b5cf3ab4878986a87ad4972ff4e1 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Thu, 26 Aug 2021 22:59:46 +0930
Subject: openrisc/litex: Update defconfig

Add the liteeth network device and basic network options, and update the
options by doing a savedefconfig.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/configs/or1klitex_defconfig | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/openrisc/configs/or1klitex_defconfig b/arch/openrisc/configs/or1klitex_defconfig
index 3c2c70d3d740..d695879a4d26 100644
--- a/arch/openrisc/configs/or1klitex_defconfig
+++ b/arch/openrisc/configs/or1klitex_defconfig
@@ -1,18 +1,24 @@
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
-CONFIG_BUG_ON_DATA_CORRUPTION=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_EMBEDDED=y
+CONFIG_OPENRISC_BUILTIN_DTB="or1klitex"
 CONFIG_HZ_100=y
-CONFIG_INITRAMFS_SOURCE="openrisc-rootfs.cpio.gz"
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_OF_OVERLAY=y
-CONFIG_OPENRISC_BUILTIN_DTB="or1klitex"
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_PRINTK_TIME=y
-CONFIG_LITEX_SOC_CONTROLLER=y
+CONFIG_NETDEVICES=y
+CONFIG_LITEX_LITEETH=y
 CONFIG_SERIAL_LITEUART=y
 CONFIG_SERIAL_LITEUART_CONSOLE=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_TTY_PRINTK=y
+CONFIG_LITEX_SOC_CONTROLLER=y
+CONFIG_TMPFS=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
-- 
cgit 


From b14b8b1ed0e15b8f43fba9c25654278a31ee3c2f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 31 Aug 2021 23:51:51 +1000
Subject: powerpc/ptdump: Fix generic ptdump for 64-bit

Since the conversion to generic ptdump we see crashes on 64-bit:

  BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
  Faulting instruction address: 0xc00000000045e5fc
  Oops: Kernel access of bad area, sig: 11 [#1]
  ...
  NIP __walk_page_range+0x2bc/0xce0
  LR  __walk_page_range+0x240/0xce0
  Call Trace:
    __walk_page_range+0x240/0xce0 (unreliable)
    walk_page_range_novma+0x74/0xb0
    ptdump_walk_pgd+0x98/0x170
    ptdump_check_wx+0x88/0xd0
    mark_rodata_ro+0x48/0x80
    kernel_init+0x74/0x1a0
    ret_from_kernel_thread+0x5c/0x64

What's happening is that have walked off the end of the kernel page
tables, and started dereferencing junk values.

That happens because we initialised the ptdump_range to span all the way
up to 0xffffffffffffffff:

static struct ptdump_range ptdump_range[] __ro_after_init = {
	{TASK_SIZE_MAX, ~0UL},

But the kernel page tables don't span that far. So on 64-bit set the end
of the range to be the address immediately past the end of the kernel
page tables, to limit the page table walk to valid addresses.

Fixes: e084728393a5 ("powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP")
Reported-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210831135151.886620-1-mpe@ellerman.id.au
---
 arch/powerpc/mm/ptdump/ptdump.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 2d80d775d15e..bf251191e78d 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -359,6 +359,8 @@ static int __init ptdump_init(void)
 		ptdump_range[0].start = KERN_VIRT_START;
 	else
 		ptdump_range[0].start = PAGE_OFFSET;
+
+	ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);
 #endif
 
 	populate_markers();
-- 
cgit 


From 4b92d4add5f6dcf21275185c997d6ecb800054cd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 31 Aug 2021 13:48:34 +0200
Subject: drivers: base: cacheinfo: Get rid of DEFINE_SMP_CALL_CACHE_FUNCTION()

DEFINE_SMP_CALL_CACHE_FUNCTION() was usefel before the CPU hotplug rework
to ensure that the cache related functions are called on the upcoming CPU
because the notifier itself could run on any online CPU.

The hotplug state machine guarantees that the callbacks are invoked on the
upcoming CPU. So there is no need to have this SMP function call
obfuscation. That indirection was missed when the hotplug notifiers were
converted.

This also solves the problem of ARM64 init_cache_level() invoking ACPI
functions which take a semaphore in that context. That's invalid as SMP
function calls run with interrupts disabled. Running it just from the
callback in context of the CPU hotplug thread solves this.

Fixes: 8571890e1513 ("arm64: Add support for ACPI based firmware tables")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/871r69ersb.ffs@tglx
---
 arch/arm64/kernel/cacheinfo.c   | 7 ++-----
 arch/mips/kernel/cacheinfo.c    | 7 ++-----
 arch/riscv/kernel/cacheinfo.c   | 7 ++-----
 arch/x86/kernel/cpu/cacheinfo.c | 7 ++-----
 4 files changed, 8 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 7fa6828bb488..587543c6c51c 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -43,7 +43,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 	this_leaf->type = type;
 }
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
 	unsigned int ctype, level, leaves, fw_level;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -78,7 +78,7 @@ static int __init_cache_level(unsigned int cpu)
 	return 0;
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
 	unsigned int level, idx;
 	enum cache_type type;
@@ -97,6 +97,3 @@ static int __populate_cache_leaves(unsigned int cpu)
 	}
 	return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c
index 53d8ea7d36e6..495dd058231d 100644
--- a/arch/mips/kernel/cacheinfo.c
+++ b/arch/mips/kernel/cacheinfo.c
@@ -17,7 +17,7 @@ do {								\
 	leaf++;							\
 } while (0)
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
 	struct cpuinfo_mips *c = &current_cpu_data;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -74,7 +74,7 @@ static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map)
 			cpumask_set_cpu(cpu1, cpu_map);
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
 	struct cpuinfo_mips *c = &current_cpu_data;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -114,6 +114,3 @@ static int __populate_cache_leaves(unsigned int cpu)
 
 	return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index d86781357044..90deabfe63ea 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -113,7 +113,7 @@ static void fill_cacheinfo(struct cacheinfo **this_leaf,
 	}
 }
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	struct device_node *np = of_cpu_device_node_get(cpu);
@@ -155,7 +155,7 @@ static int __init_cache_level(unsigned int cpu)
 	return 0;
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
@@ -187,6 +187,3 @@ static int __populate_cache_leaves(unsigned int cpu)
 
 	return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index d66af2950e06..b5e36bd0425b 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -985,7 +985,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 	this_leaf->priv = base->nb;
 }
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
@@ -1014,7 +1014,7 @@ static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
 	id4_regs->id = c->apicid >> index_msb;
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
 	unsigned int idx, ret;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -1033,6 +1033,3 @@ static int __populate_cache_leaves(unsigned int cpu)
 
 	return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
-- 
cgit 


From 58e636039b512697554b579c2bb23774061877f5 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 25 Aug 2021 13:31:58 +0200
Subject: xen: remove stray preempt_disable() from PV AP startup code

In cpu_bringup() there is a call of preempt_disable() without a paired
preempt_enable(). This is not needed as interrupts are off initially.
Additionally this will result in early boot messages like:

BUG: scheduling while atomic: swapper/1/0/0x00000002

Signed-off-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20210825113158.11716-1-jgross@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/smp_pv.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index c2ac319f11a4..96afadf9878e 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -64,7 +64,6 @@ static void cpu_bringup(void)
 	cr4_init();
 	cpu_init();
 	touch_softlockup_watchdog();
-	preempt_disable();
 
 	/* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
 	if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
-- 
cgit 


From e432fe97f3e5de325b40021e505cce53877586c5 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 27 Aug 2021 22:51:06 +1000
Subject: powerpc/bug: Cast to unsigned long before passing to inline asm

In commit 1e688dd2a3d6 ("powerpc/bug: Provide better flexibility to
WARN_ON/__WARN_FLAGS() with asm goto") we changed WARN_ON(). Previously
it would take the warning condition, x, and double negate it before
converting the result to int, and passing that int to the underlying
inline asm. ie:

  #define WARN_ON(x) ({
  	int __ret_warn_on = !!(x);
  	if (__builtin_constant_p(__ret_warn_on)) {
  	...
  	} else {
  		BUG_ENTRY(PPC_TLNEI " %4, 0",
  			  BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN),
  			  "r" (__ret_warn_on));

The asm then does a full register width comparison with zero and traps
if it is non-zero (PPC_TLNEI).

The new code instead passes the full expression, x, with some arbitrary
type, to the inline asm:

  #define WARN_ON(x) ({
	...
	do {
		if (__builtin_constant_p((x))) {
		...
		} else {
			...
			WARN_ENTRY(PPC_TLNEI " %4, 0",
				   BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN),
				   __label_warn_on, "r" (x));

As reported[1] by Nathan, when building with clang this can cause
spurious warnings to fire repeatedly at boot:

  WARNING: CPU: 0 PID: 1 at lib/klist.c:62 .klist_add_tail+0x3c/0x110
  Modules linked in:
  CPU: 0 PID: 1 Comm: swapper/0 Tainted: G        W         5.14.0-rc7-next-20210825 #1
  NIP:  c0000000007ff81c LR: c00000000090a038 CTR: 0000000000000000
  REGS: c0000000073c32a0 TRAP: 0700   Tainted: G        W          (5.14.0-rc7-next-20210825)
  MSR:  8000000002029032 <SF,VEC,EE,ME,IR,DR,RI>  CR: 22000a40  XER: 00000000
  CFAR: c00000000090a034 IRQMASK: 0
  GPR00: c00000000090a038 c0000000073c3540 c000000001be3200 0000000000000001
  GPR04: c0000000072d65c0 0000000000000000 c0000000091ba798 c0000000091bb0a0
  GPR08: 0000000000000001 0000000000000000 c000000008581918 fffffffffffffc00
  GPR12: 0000000044000240 c000000001dd0000 c000000000012300 0000000000000000
  GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
  GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
  GPR24: 0000000000000000 c0000000017e3200 0000000000000000 c000000001a0e778
  GPR28: c0000000072d65b0 c0000000072d65a8 c000000007de72c8 c0000000073c35d0
  NIP .klist_add_tail+0x3c/0x110
  LR  .bus_add_driver+0x148/0x290
  Call Trace:
    0xc0000000073c35d0 (unreliable)
    .bus_add_driver+0x148/0x290
    .driver_register+0xb8/0x190
    .__hid_register_driver+0x70/0xd0
    .redragon_driver_init+0x34/0x58
    .do_one_initcall+0x130/0x3b0
    .do_initcall_level+0xd8/0x188
    .do_initcalls+0x7c/0xdc
    .kernel_init_freeable+0x178/0x21c
    .kernel_init+0x34/0x220
    .ret_from_kernel_thread+0x58/0x60
  Instruction dump:
  fba10078 7c7d1b78 38600001 fb810070 3b9d0008 fbc10080 7c9e2378 389d0018
  fb9d0008 fb9d0010 90640000 fbdd0000 <0b1e0000> e87e0018 28230000 41820024

The instruction dump shows that we are trapping because r30 is not zero:
  tdnei   r30,0

Where r30 = c000000007de72c8

The WARN_ON() comes from:

  static void knode_set_klist(struct klist_node *knode, struct klist *klist)
  {
  	knode->n_klist = klist;
  	/* no knode deserves to start its life dead */
  	WARN_ON(knode_dead(knode));
  		      ^^^^^^^^^^^^^^^^^

Where:

  #define KNODE_DEAD		1LU

  static bool knode_dead(struct klist_node *knode)
  {
  	return (unsigned long)knode->n_klist & KNODE_DEAD;
  }

The full disassembly shows that clang has not generated any code to
apply the "& KNODE_DEAD" to the n_klist pointer, which is surprising.

Nathan filed an LLVM bug [2], in which Eli Friedman explained that clang
believes it is only passing a single bit to the asm (ie. a bool) and so
the mask of bit 0 with 1 can be omitted, and suggested that if we want
the full 64-bit value passed to the inline asm we should cast to a
64-bit type (or 32-bit on 32-bits).

In fact we already do that for BUG_ENTRY(), which was added to fix a
possibly similar bug in 2005 in commit 32818c2eb6b8 ("[PATCH] ppc64: Fix
issue with gcc 4.0 compiled kernels").

So cast the value we pass to the inline asm to long.

For GCC this appears to have no effect on code generation, other than
causing sign extension in some cases.

[1]: http://lore.kernel.org/r/YSa1O4fcX1nNKqN/@Ryzen-9-3900X.localdomain
[2]: https://bugs.llvm.org/show_bug.cgi?id=51634

Fixes: 1e688dd2a3d6 ("powerpc/bug: Provide better flexibility to WARN_ON/__WARN_FLAGS() with asm goto")
Reported-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210901112522.1085134-1-mpe@ellerman.id.au
---
 arch/powerpc/include/asm/bug.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 1ee0f22313ee..02c08d1492f8 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -119,7 +119,8 @@ __label_warn_on:						\
 								\
 			WARN_ENTRY(PPC_TLNEI " %4, 0",		\
 				   BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN),	\
-				   __label_warn_on, "r" (x));	\
+				   __label_warn_on,		\
+				   "r" ((__force long)(x)));	\
 			break;					\
 __label_warn_on:						\
 			__ret_warn_on = true;			\
-- 
cgit 


From 5f6e0fe01b6b33894cf6f61b359ab5a6d2b7674e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 10 Jun 2021 11:03:31 +0900
Subject: parisc: Fix compile failure when building 64-bit kernel natively

Commit 23243c1ace9f ("arch: use cross_compiling to check whether it is
a cross build or not") broke 64-bit parisc builds on 32-bit parisc
systems.

Helge mentioned:
  - 64-bit parisc userspace is not supported yet [1]
  - hppa gcc does not support "-m64" flag [2]

That means, parisc developers working on a 32-bit parisc machine need
to use hppa64-linux-gnu-gcc (cross compiler) for building the 64-bit
parisc kernel.

After the offending commit, gcc is used in such a case because
both $(SRCARCH) and $(SUBARCH) are 'parisc', hence cross_compiling is
unset.

A correct way is to introduce ARCH=parisc64 because building the 64-bit
parisc kernel on a 32-bit parisc system is not exactly a native build,
but rather a semi-cross build.

[1]: https://lore.kernel.org/linux-parisc/5dfd81eb-c8ca-b7f5-e80e-8632767c022d@gmx.de/#t
[2]: https://lore.kernel.org/linux-parisc/89515325-fc21-31da-d238-6f7a9abbf9a0@gmx.de/

Fixes: 23243c1ace9f ("arch: use cross_compiling to check whether it is a cross build or not")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reported-by: Meelis Roos <mroos@linux.ee>
Tested-by: Meelis Roos <mroos@linux.ee>
Cc: <stable@vger.kernel.org> # v5.13+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 6c27940eff03..fcde3ffa0221 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -25,18 +25,18 @@ CHECKFLAGS	+= -D__hppa__=1
 ifdef CONFIG_64BIT
 UTS_MACHINE	:= parisc64
 CHECKFLAGS	+= -D__LP64__=1
-CC_ARCHES	= hppa64
 LD_BFD		:= elf64-hppa-linux
 else # 32-bit
-CC_ARCHES	= hppa hppa2.0 hppa1.1
 LD_BFD		:= elf32-hppa-linux
 endif
 
 # select defconfig based on actual architecture
-ifeq ($(shell uname -m),parisc64)
+ifeq ($(ARCH),parisc64)
 	KBUILD_DEFCONFIG := generic-64bit_defconfig
+	CC_ARCHES := hppa64
 else
 	KBUILD_DEFCONFIG := generic-32bit_defconfig
+	CC_ARCHES := hppa hppa2.0 hppa1.1
 endif
 
 export LD_BFD
-- 
cgit 


From 030f653078316a9cc9ca6bd1b0234dcf858be35d Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 30 Aug 2021 05:42:27 -0400
Subject: parisc: fix crash with signals and alloca

I was debugging some crashes on parisc and I found out that there is a
crash possibility if a function using alloca is interrupted by a signal.
The reason for the crash is that the gcc alloca implementation leaves
garbage in the upper 32 bits of the sp register. This normally doesn't
matter (the upper bits are ignored because the PSW W-bit is clear),
however the signal delivery routine in the kernel uses full 64 bits of sp
and it fails with -EFAULT if the upper 32 bits are not zero.

I created this program that demonstrates the problem:

#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <alloca.h>

static __attribute__((noinline,noclone)) void aa(int *size)
{
	void * volatile p = alloca(-*size);
	while (1) ;
}

static void handler(int sig)
{
	write(1, "signal delivered\n", 17);
	_exit(0);
}

int main(void)
{
	int size = -0x100;
	signal(SIGALRM, handler);
	alarm(1);
	aa(&size);
}

If you compile it with optimizations, it will crash.
The "aa" function has this disassembly:

000106a0 <aa>:
   106a0:       08 03 02 41     copy r3,r1
   106a4:       08 1e 02 43     copy sp,r3
   106a8:       6f c1 00 80     stw,ma r1,40(sp)
   106ac:       37 dc 3f c1     ldo -20(sp),ret0
   106b0:       0c 7c 12 90     stw ret0,8(r3)
   106b4:       0f 40 10 9c     ldw 0(r26),ret0		; ret0 = 0x00000000FFFFFF00
   106b8:       97 9c 00 7e     subi 3f,ret0,ret0	; ret0 = 0xFFFFFFFF0000013F
   106bc:       d7 80 1c 1a     depwi 0,31,6,ret0	; ret0 = 0xFFFFFFFF00000100
   106c0:       0b 9e 0a 1e     add,l sp,ret0,sp	;   sp = 0xFFFFFFFFxxxxxxxx
   106c4:       e8 1f 1f f7     b,l,n 106c4 <aa+0x24>,r0

This patch fixes the bug by truncating the "usp" variable to 32 bits.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/signal.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index fb1e94a3982b..db1a47cf424d 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -237,6 +237,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 #endif
 	
 	usp = (regs->gr[30] & ~(0x01UL));
+#ifdef CONFIG_64BIT
+	if (is_compat_task()) {
+		/* The gcc alloca implementation leaves garbage in the upper 32 bits of sp */
+		usp = (compat_uint_t)usp;
+	}
+#endif
 	/*FIXME: frame_size parameter is unused, remove it. */
 	frame = get_sigframe(&ksig->ka, usp, sizeof(*frame));
 
-- 
cgit 


From 6f1fce595b78b775d7fb585c15c2dc3a6994f96e Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 1 Sep 2021 22:18:18 +0200
Subject: parisc: math-emu: Fix fall-through warnings

Fix lots of fallthrough warnings, e.g.:
arch/parisc/math-emu/fpudispatch.c:323:33: warning: this statement may fall through [-Wimplicit-fallthrough=]

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/math-emu/fpudispatch.c | 56 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/math-emu/fpudispatch.c b/arch/parisc/math-emu/fpudispatch.c
index 7c46969ead9b..01ed133227c2 100644
--- a/arch/parisc/math-emu/fpudispatch.c
+++ b/arch/parisc/math-emu/fpudispatch.c
@@ -310,12 +310,15 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					r1 &= ~3;
 					fpregs[t+3] = fpregs[r1+3];
 					fpregs[t+2] = fpregs[r1+2];
+					fallthrough;
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					fpregs[t] = fpregs[r1];
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 3: /* FABS */
 				switch (fmt) {
 				    case 2: /* illegal */
@@ -325,13 +328,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					r1 &= ~3;
 					fpregs[t+3] = fpregs[r1+3];
 					fpregs[t+2] = fpregs[r1+2];
+					fallthrough;
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					/* copy and clear sign bit */
 					fpregs[t] = fpregs[r1] & 0x7fffffff;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 6: /* FNEG */
 				switch (fmt) {
 				    case 2: /* illegal */
@@ -341,13 +347,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					r1 &= ~3;
 					fpregs[t+3] = fpregs[r1+3];
 					fpregs[t+2] = fpregs[r1+2];
+					fallthrough;
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					/* copy and invert sign bit */
 					fpregs[t] = fpregs[r1] ^ 0x80000000;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 7: /* FNEGABS */
 				switch (fmt) {
 				    case 2: /* illegal */
@@ -357,13 +366,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					r1 &= ~3;
 					fpregs[t+3] = fpregs[r1+3];
 					fpregs[t+2] = fpregs[r1+2];
+					fallthrough;
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					/* copy and set sign bit */
 					fpregs[t] = fpregs[r1] | 0x80000000;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 4: /* FSQRT */
 				switch (fmt) {
 				    case 0:
@@ -376,6 +388,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 5: /* FRND */
 				switch (fmt) {
 				    case 0:
@@ -389,7 +402,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(MAJOR_0C_EXCP);
 				}
 		} /* end of switch (subop) */
-
+		BUG();
 	case 1: /* class 1 */
 		df = extru(ir,fpdfpos,2); /* get dest format */
 		if ((df & 2) || (fmt & 2)) {
@@ -419,6 +432,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* dbl/dbl */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 1: /* FCNVXF */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -434,6 +448,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvxf(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 2: /* FCNVFX */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -449,6 +464,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvfx(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 3: /* FCNVFXT */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -464,6 +480,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 5: /* FCNVUF (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -479,6 +496,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvuf(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 6: /* FCNVFU (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -494,6 +512,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvfu(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 7: /* FCNVFUT (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -509,10 +528,11 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvfut(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 4: /* undefined */
 				return(MAJOR_0C_EXCP);
 		} /* end of switch subop */
-
+		BUG();
 	case 2: /* class 2 */
 		fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS];
 		r2 = extru(ir, fpr2pos, 5) * sizeof(double)/sizeof(u_int);
@@ -590,6 +610,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 1: /* FTEST */
 				switch (fmt) {
 				    case 0:
@@ -609,8 +630,10 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3:
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 		    } /* end of switch subop */
 		} /* end of else for PA1.0 & PA1.1 */
+		BUG();
 	case 3: /* class 3 */
 		r2 = extru(ir,fpr2pos,5) * sizeof(double)/sizeof(u_int);
 		if (r2 == 0)
@@ -633,6 +656,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 1: /* FSUB */
 				switch (fmt) {
 				    case 0:
@@ -645,6 +669,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 2: /* FMPY */
 				switch (fmt) {
 				    case 0:
@@ -657,6 +682,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 3: /* FDIV */
 				switch (fmt) {
 				    case 0:
@@ -669,6 +695,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 4: /* FREM */
 				switch (fmt) {
 				    case 0:
@@ -681,6 +708,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 		} /* end of class 3 switch */
 	} /* end of switch(class) */
 
@@ -736,10 +764,12 @@ u_int fpregs[];
 					return(MAJOR_0E_EXCP);
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					fpregs[t] = fpregs[r1];
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 3: /* FABS */
 				switch (fmt) {
 				    case 2:
@@ -747,10 +777,12 @@ u_int fpregs[];
 					return(MAJOR_0E_EXCP);
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					fpregs[t] = fpregs[r1] & 0x7fffffff;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 6: /* FNEG */
 				switch (fmt) {
 				    case 2:
@@ -758,10 +790,12 @@ u_int fpregs[];
 					return(MAJOR_0E_EXCP);
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					fpregs[t] = fpregs[r1] ^ 0x80000000;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 7: /* FNEGABS */
 				switch (fmt) {
 				    case 2:
@@ -769,10 +803,12 @@ u_int fpregs[];
 					return(MAJOR_0E_EXCP);
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					fpregs[t] = fpregs[r1] | 0x80000000;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 4: /* FSQRT */
 				switch (fmt) {
 				    case 0:
@@ -785,6 +821,7 @@ u_int fpregs[];
 				    case 3:
 					return(MAJOR_0E_EXCP);
 				}
+				BUG();
 			case 5: /* FRMD */
 				switch (fmt) {
 				    case 0:
@@ -798,7 +835,7 @@ u_int fpregs[];
 					return(MAJOR_0E_EXCP);
 				}
 		} /* end of switch (subop */
-	
+		BUG();
 	case 1: /* class 1 */
 		df = extru(ir,fpdfpos,2); /* get dest format */
 		/*
@@ -826,6 +863,7 @@ u_int fpregs[];
 				    case 3: /* dbl/dbl */
 					return(MAJOR_0E_EXCP);
 				}
+				BUG();
 			case 1: /* FCNVXF */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -841,6 +879,7 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvxf(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 2: /* FCNVFX */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -856,6 +895,7 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvfx(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 3: /* FCNVFXT */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -871,6 +911,7 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 5: /* FCNVUF (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -886,6 +927,7 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvuf(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 6: /* FCNVFU (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -901,6 +943,7 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvfu(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 7: /* FCNVFUT (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -916,9 +959,11 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvfut(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 4: /* undefined */
 				return(MAJOR_0C_EXCP);
 		} /* end of switch subop */
+		BUG();
 	case 2: /* class 2 */
 		/*
 		 * Be careful out there.
@@ -994,6 +1039,7 @@ u_int fpregs[];
 				}
 		    } /* end of switch subop */
 		} /* end of else for PA1.0 & PA1.1 */
+		BUG();
 	case 3: /* class 3 */
 		/*
 		 * Be careful out there.
@@ -1026,6 +1072,7 @@ u_int fpregs[];
 					return(dbl_fadd(&fpregs[r1],&fpregs[r2],
 						&fpregs[t],status));
 				}
+				BUG();
 			case 1: /* FSUB */
 				switch (fmt) {
 				    case 0:
@@ -1035,6 +1082,7 @@ u_int fpregs[];
 					return(dbl_fsub(&fpregs[r1],&fpregs[r2],
 						&fpregs[t],status));
 				}
+				BUG();
 			case 2: /* FMPY or XMPYU */
 				/*
 				 * check for integer multiply (x bit set)
@@ -1071,6 +1119,7 @@ u_int fpregs[];
 					       &fpregs[r2],&fpregs[t],status));
 				    }
 				}
+				BUG();
 			case 3: /* FDIV */
 				switch (fmt) {
 				    case 0:
@@ -1080,6 +1129,7 @@ u_int fpregs[];
 					return(dbl_fdiv(&fpregs[r1],&fpregs[r2],
 						&fpregs[t],status));
 				}
+				BUG();
 			case 4: /* FREM */
 				switch (fmt) {
 				    case 0:
-- 
cgit 


From bea6a94a279bcbe6b2cde348782b28baf12255a5 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Thu, 2 Sep 2021 09:19:51 +0200
Subject: MIPS: Malta: fix alignment of the devicetree buffer

Starting with following patch MIPS Malta is not able to boot:
| commit 79edff12060fe7772af08607eff50c0e2486c5ba
| Author: Rob Herring <robh@kernel.org>
| scripts/dtc: Update to upstream version v1.6.0-51-g183df9e9c2b9

The reason is the alignment test added to the fdt_ro_probe_(). To fix
this issue, we need to make sure that fdt_buf is aligned.

Since the dtc patch was designed to uncover potential issue, I handle
initial MIPS Malta patch as initial bug.

Fixes: e81a8c7dabac ("MIPS: Malta: Setup RAM regions via DT")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/mti-malta/malta-dtshim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/mti-malta/malta-dtshim.c b/arch/mips/mti-malta/malta-dtshim.c
index 0ddf03df6268..f451268f6c38 100644
--- a/arch/mips/mti-malta/malta-dtshim.c
+++ b/arch/mips/mti-malta/malta-dtshim.c
@@ -22,7 +22,7 @@
 #define  ROCIT_CONFIG_GEN1_MEMMAP_SHIFT	8
 #define  ROCIT_CONFIG_GEN1_MEMMAP_MASK	(0xf << 8)
 
-static unsigned char fdt_buf[16 << 10] __initdata;
+static unsigned char fdt_buf[16 << 10] __initdata __aligned(8);
 
 /* determined physical memory size, not overridden by command line args	 */
 extern unsigned long physical_memsize;
-- 
cgit 


From 0da14a19493da0f9b4c5ee5930ab05a4c61f5883 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 22 Aug 2021 20:49:20 +0200
Subject: x86/PCI: sta2x11: switch from 'pci_' to 'dma_' API

The wrappers in include/linux/pci-dma-compat.h should go away.

The patch has been generated with the coccinelle script below.

It has been hand modified to use 'dma_set_mask_and_coherent()' instead of
'pci_set_dma_mask()/pci_set_consistent_dma_mask()' when applicable.
This is less verbose.

It has been compile tested.

@@
@@
-    PCI_DMA_BIDIRECTIONAL
+    DMA_BIDIRECTIONAL

@@
@@
-    PCI_DMA_TODEVICE
+    DMA_TO_DEVICE

@@
@@
-    PCI_DMA_FROMDEVICE
+    DMA_FROM_DEVICE

@@
@@
-    PCI_DMA_NONE
+    DMA_NONE

@@
expression e1, e2, e3;
@@
-    pci_alloc_consistent(e1, e2, e3)
+    dma_alloc_coherent(&e1->dev, e2, e3, GFP_)

@@
expression e1, e2, e3;
@@
-    pci_zalloc_consistent(e1, e2, e3)
+    dma_alloc_coherent(&e1->dev, e2, e3, GFP_)

@@
expression e1, e2, e3, e4;
@@
-    pci_free_consistent(e1, e2, e3, e4)
+    dma_free_coherent(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_map_single(e1, e2, e3, e4)
+    dma_map_single(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_unmap_single(e1, e2, e3, e4)
+    dma_unmap_single(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4, e5;
@@
-    pci_map_page(e1, e2, e3, e4, e5)
+    dma_map_page(&e1->dev, e2, e3, e4, e5)

@@
expression e1, e2, e3, e4;
@@
-    pci_unmap_page(e1, e2, e3, e4)
+    dma_unmap_page(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_map_sg(e1, e2, e3, e4)
+    dma_map_sg(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_unmap_sg(e1, e2, e3, e4)
+    dma_unmap_sg(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_single_for_cpu(e1, e2, e3, e4)
+    dma_sync_single_for_cpu(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_single_for_device(e1, e2, e3, e4)
+    dma_sync_single_for_device(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_sg_for_cpu(e1, e2, e3, e4)
+    dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_sg_for_device(e1, e2, e3, e4)
+    dma_sync_sg_for_device(&e1->dev, e2, e3, e4)

@@
expression e1, e2;
@@
-    pci_dma_mapping_error(e1, e2)
+    dma_mapping_error(&e1->dev, e2)

@@
expression e1, e2;
@@
-    pci_set_dma_mask(e1, e2)
+    dma_set_mask(&e1->dev, e2)

@@
expression e1, e2;
@@
-    pci_set_consistent_dma_mask(e1, e2)
+    dma_set_coherent_mask(&e1->dev, e2)

Link: https://lore.kernel.org/r/99656452963ba3c63a6cb12e151279d81da365eb.1629658069.git.christophe.jaillet@wanadoo.fr
Link: https://lore.kernel.org/kernel-janitors/20200421081257.GA131897@infradead.org/
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/x86/pci/sta2x11-fixup.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 7d2525691854..101081ad64b6 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -146,8 +146,7 @@ static void sta2x11_map_ep(struct pci_dev *pdev)
 		dev_err(dev, "sta2x11: could not set DMA offset\n");
 
 	dev->bus_dma_limit = max_amba_addr;
-	pci_set_consistent_dma_mask(pdev, max_amba_addr);
-	pci_set_dma_mask(pdev, max_amba_addr);
+	dma_set_mask_and_coherent(&pdev->dev, max_amba_addr);
 
 	/* Configure AHB mapping */
 	pci_write_config_dword(pdev, AHB_PEXLBASE(0), 0);
-- 
cgit 


From e5a2cac908df691f1637f9272d4c6dec83239611 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 2 Sep 2021 23:54:03 +0200
Subject: parisc: Drop __arch_swab16(), arch_swab24(), _arch_swab32() and
 __arch_swab64() functions

No need to keep those as inline assembly functions, the compiler now
generates the same or even better optimized code.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/uapi/asm/swab.h | 68 -------------------------------------
 1 file changed, 68 deletions(-)
 delete mode 100644 arch/parisc/include/uapi/asm/swab.h

(limited to 'arch')

diff --git a/arch/parisc/include/uapi/asm/swab.h b/arch/parisc/include/uapi/asm/swab.h
deleted file mode 100644
index 35fb2d1bfbbd..000000000000
--- a/arch/parisc/include/uapi/asm/swab.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _PARISC_SWAB_H
-#define _PARISC_SWAB_H
-
-#include <asm/bitsperlong.h>
-#include <linux/types.h>
-#include <linux/compiler.h>
-
-#define __SWAB_64_THRU_32__
-
-static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
-{
-	__asm__("dep %0, 15, 8, %0\n\t"		/* deposit 00ab -> 0bab */
-		"shd %%r0, %0, 8, %0"		/* shift 000000ab -> 00ba */
-		: "=r" (x)
-		: "0" (x));
-	return x;
-}
-#define __arch_swab16 __arch_swab16
-
-static inline __attribute_const__ __u32 __arch_swab24(__u32 x)
-{
-	__asm__("shd %0, %0, 8, %0\n\t"		/* shift xabcxabc -> cxab */
-		"dep %0, 15, 8, %0\n\t"		/* deposit cxab -> cbab */
-		"shd %%r0, %0, 8, %0"		/* shift 0000cbab -> 0cba */
-		: "=r" (x)
-		: "0" (x));
-	return x;
-}
-
-static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
-{
-	unsigned int temp;
-	__asm__("shd %0, %0, 16, %1\n\t"	/* shift abcdabcd -> cdab */
-		"dep %1, 15, 8, %1\n\t"		/* deposit cdab -> cbab */
-		"shd %0, %1, 8, %0"		/* shift abcdcbab -> dcba */
-		: "=r" (x), "=&r" (temp)
-		: "0" (x));
-	return x;
-}
-#define __arch_swab32 __arch_swab32
-
-#if __BITS_PER_LONG > 32
-/*
-** From "PA-RISC 2.0 Architecture", HP Professional Books.
-** See Appendix I page 8 , "Endian Byte Swapping".
-**
-** Pretty cool algorithm: (* == zero'd bits)
-**      PERMH   01234567 -> 67452301 into %0
-**      HSHL    67452301 -> 7*5*3*1* into %1
-**      HSHR    67452301 -> *6*4*2*0 into %0
-**      OR      %0 | %1  -> 76543210 into %0 (all done!)
-*/
-static inline __attribute_const__ __u64 __arch_swab64(__u64 x)
-{
-	__u64 temp;
-	__asm__("permh,3210 %0, %0\n\t"
-		"hshl %0, 8, %1\n\t"
-		"hshr,u %0, 8, %0\n\t"
-		"or %1, %0, %0"
-		: "=r" (x), "=&r" (temp)
-		: "0" (x));
-	return x;
-}
-#define __arch_swab64 __arch_swab64
-#endif /* __BITS_PER_LONG > 32 */
-
-#endif /* _PARISC_SWAB_H */
-- 
cgit 


From 55a6d00ed0c1b4d20d7966d00fda6848d776658d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 19 Aug 2021 10:22:51 +0900
Subject: x86/build/vdso: fix missing FORCE for *.so build rule

Add FORCE so that if_changed can detect the command line change.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/x86/entry/vdso/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 05c4abc2fdfd..a2dddcc189f6 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -131,7 +131,7 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE
 targets += vdsox32.lds $(vobjx32s-y)
 
 $(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table
-$(obj)/%.so: $(obj)/%.so.dbg
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
 	$(call if_changed,objcopy)
 
 $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
-- 
cgit 


From 25c648a066c1cc5ed763f2354531fdff41ac83a1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 7 Apr 2021 07:34:17 +0200
Subject: kbuild: sh: remove unused install script

The sh arch has a install.sh script, but no Makefile actually calls it.
Remove it to keep anyone from accidentally calling it in the future.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/sh/boot/compressed/install.sh | 56 --------------------------------------
 1 file changed, 56 deletions(-)
 delete mode 100644 arch/sh/boot/compressed/install.sh

(limited to 'arch')

diff --git a/arch/sh/boot/compressed/install.sh b/arch/sh/boot/compressed/install.sh
deleted file mode 100644
index f9f41818b17e..000000000000
--- a/arch/sh/boot/compressed/install.sh
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/sh
-#
-# arch/sh/boot/install.sh
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1995 by Linus Torvalds
-#
-# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
-# Adapted from code in arch/i386/boot/install.sh by Russell King
-# Adapted from code in arch/arm/boot/install.sh by Stuart Menefy
-#
-# "make install" script for sh architecture
-#
-# Arguments:
-#   $1 - kernel version
-#   $2 - kernel image file
-#   $3 - kernel map file
-#   $4 - default install path (blank if root directory)
-#
-
-# User may have a custom install script
-
-if [ -x /sbin/${INSTALLKERNEL} ]; then
-  exec /sbin/${INSTALLKERNEL} "$@"
-fi
-
-if [ "$2" = "zImage" ]; then
-# Compressed install
-  echo "Installing compressed kernel"
-  if [ -f $4/vmlinuz-$1 ]; then
-    mv $4/vmlinuz-$1 $4/vmlinuz.old
-  fi
-
-  if [ -f $4/System.map-$1 ]; then
-    mv $4/System.map-$1 $4/System.old
-  fi
-
-  cat $2 > $4/vmlinuz-$1
-  cp $3 $4/System.map-$1
-else
-# Normal install
-  echo "Installing normal kernel"
-  if [ -f $4/vmlinux-$1 ]; then
-    mv $4/vmlinux-$1 $4/vmlinux.old
-  fi
-
-  if [ -f $4/System.map ]; then
-    mv $4/System.map $4/System.old
-  fi
-
-  cat $2 > $4/vmlinux-$1
-  cp $3 $4/System.map
-fi
-- 
cgit 


From 87c3cb564f3e34626f1afc0286b864045559b403 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 29 Jul 2021 23:24:40 +0900
Subject: sparc: move the install rule to arch/sparc/Makefile

Currently, the install target in arch/sparc/Makefile descends into
arch/sparc/boot/Makefile to invoke the shell script, but there is no
good reason to do so.

arch/sparc/Makefile can run the shell script directly.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/sparc/Makefile      | 3 ++-
 arch/sparc/boot/Makefile | 4 ----
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 4e65245bc755..24fb5a99f439 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -72,7 +72,8 @@ image zImage uImage tftpboot.img vmlinux.aout: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 install:
-	$(Q)$(MAKE) $(build)=$(boot) $@
+	sh $(srctree)/$(boot)/install.sh $(KERNELRELEASE) $(KBUILD_IMAGE) \
+		System.map "$(INSTALL_PATH)"
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile
index 380e2b018992..849236d4eca4 100644
--- a/arch/sparc/boot/Makefile
+++ b/arch/sparc/boot/Makefile
@@ -70,7 +70,3 @@ $(obj)/image: vmlinux FORCE
 $(obj)/tftpboot.img: $(obj)/image $(obj)/piggyback System.map $(ROOT_IMG) FORCE
 	$(call if_changed,elftoaout)
 	$(call if_changed,piggy)
-
-install:
-	sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/zImage \
-		System.map "$(INSTALL_PATH)"
-- 
cgit 


From ba3e87cfa2a0f2db889297b6fc8a9e91a35c2d21 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 12 Aug 2021 01:43:12 +0900
Subject: ia64: move core-y in arch/ia64/Makefile to arch/ia64/Kbuild

Use obj-y to clean up Makefile.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/ia64/Kbuild   | 2 ++
 arch/ia64/Makefile | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/Kbuild b/arch/ia64/Kbuild
index a4e40e534e6a..e77cc76d228c 100644
--- a/arch/ia64/Kbuild
+++ b/arch/ia64/Kbuild
@@ -1 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-y				+= kernel/ mm/
+obj-$(CONFIG_IA64_SGI_UV)	+= uv/
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 467b7e7f967c..7e548c654a29 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -47,8 +47,6 @@ KBUILD_CFLAGS += $(cflags-y)
 head-y := arch/ia64/kernel/head.o
 
 libs-y				+= arch/ia64/lib/
-core-y				+= arch/ia64/kernel/ arch/ia64/mm/
-core-$(CONFIG_IA64_SGI_UV)	+= arch/ia64/uv/
 
 drivers-y			+= arch/ia64/pci/ arch/ia64/hp/common/
 
-- 
cgit 


From ff00f64bceb164267dccc3648eb299aeafd3a342 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 16 Aug 2021 17:21:04 -0700
Subject: s390: replace cc-option-yn uses with cc-option

cc-option-yn can be replaced with cc-option. ie.
Checking for support:
ifeq ($(call cc-option-yn,$(FLAG)),y)
becomes:
ifneq ($(call cc-option,$(FLAG)),)

Checking for lack of support:
ifeq ($(call cc-option-yn,$(FLAG)),n)
becomes:
ifeq ($(call cc-option,$(FLAG)),)

This allows us to pursue removing cc-option-yn.

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/s390/Makefile | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 1e3172877982..f5c399851d70 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -70,7 +70,7 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
 #
 cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
 
-ifeq ($(call cc-option-yn,-mpacked-stack -mbackchain -msoft-float),y)
+ifneq ($(call cc-option,-mpacked-stack -mbackchain -msoft-float),)
 cflags-$(CONFIG_PACK_STACK)  += -mpacked-stack -D__PACK_STACK
 aflags-$(CONFIG_PACK_STACK)  += -D__PACK_STACK
 endif
@@ -78,22 +78,22 @@ endif
 KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
 KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
 
-ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y)
+ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
 cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE)
-ifneq ($(call cc-option-yn,-mstack-size=8192),y)
+ifeq ($(call cc-option,-mstack-size=8192),)
 cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD)
 endif
 endif
 
 ifdef CONFIG_WARN_DYNAMIC_STACK
-  ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y)
+  ifneq ($(call cc-option,-mwarn-dynamicstack),)
     KBUILD_CFLAGS += -mwarn-dynamicstack
     KBUILD_CFLAGS_DECOMPRESSOR += -mwarn-dynamicstack
   endif
 endif
 
 ifdef CONFIG_EXPOLINE
-  ifeq ($(call cc-option-yn,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),y)
+  ifneq ($(call cc-option,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),)
     CC_FLAGS_EXPOLINE := -mindirect-branch=thunk
     CC_FLAGS_EXPOLINE += -mfunction-return=thunk
     CC_FLAGS_EXPOLINE += -mindirect-branch-table
@@ -104,10 +104,10 @@ ifdef CONFIG_EXPOLINE
 endif
 
 ifdef CONFIG_FUNCTION_TRACER
-  ifeq ($(call cc-option-yn,-mfentry -mnop-mcount),n)
+  ifeq ($(call cc-option,-mfentry -mnop-mcount),)
     # make use of hotpatch feature if the compiler supports it
     cc_hotpatch	:= -mhotpatch=0,3
-    ifeq ($(call cc-option-yn,$(cc_hotpatch)),y)
+    ifneq ($(call cc-option,$(cc_hotpatch)),)
       CC_FLAGS_FTRACE := $(cc_hotpatch)
       KBUILD_AFLAGS	+= -DCC_USING_HOTPATCH
       KBUILD_CFLAGS	+= -DCC_USING_HOTPATCH
-- 
cgit 


From 43e6b58f793c47b0a6772a112e6586cd1e24b239 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 16 Aug 2021 17:21:06 -0700
Subject: arc: replace cc-option-yn uses with cc-option

cc-option-yn can be replaced with cc-option. ie.
Checking for support:
ifeq ($(call cc-option-yn,$(FLAG)),y)
becomes:
ifneq ($(call cc-option,$(FLAG)),)

Checking for lack of support:
ifeq ($(call cc-option-yn,$(FLAG)),n)
becomes:
ifeq ($(call cc-option,$(FLAG)),)

This allows us to pursue removing cc-option-yn.

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/arc/Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index c0d87ac2e221..8782a03f24a8 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -18,8 +18,7 @@ ifeq ($(CONFIG_ARC_TUNE_MCPU),"")
 cflags-y				+= $(tune-mcpu-def-y)
 else
 tune-mcpu				:= $(shell echo $(CONFIG_ARC_TUNE_MCPU))
-tune-mcpu-ok 				:= $(call cc-option-yn, $(tune-mcpu))
-ifeq ($(tune-mcpu-ok),y)
+ifneq ($(call cc-option,$(tune-mcpu)),)
 cflags-y				+= $(tune-mcpu)
 else
 # The flag provided by 'CONFIG_ARC_TUNE_MCPU' option isn't known by this compiler
-- 
cgit 


From 7ab44e9ee5f241c0ed19e350d17e80bd90bde93d Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 16 Aug 2021 17:21:07 -0700
Subject: x86: remove cc-option-yn test for -mtune=

As noted in the comment, -mtune= has been supported since GCC 3.4. The
minimum required version of GCC to build the kernel (as specified in
Documentation/process/changes.rst) is GCC 4.9.

tune is not immediately expanded. Instead it defines a macro that will
test via cc-option later values for -mtune=. But we can skip the test
whether to use -mtune= vs. -mcpu=.

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/x86/Makefile_32.cpu | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index cd3056759880..e7355f8b51c2 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -2,13 +2,7 @@
 # CPU tuning section - shared with UML.
 # Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML.
 
-#-mtune exists since gcc 3.4
-HAS_MTUNE	:= $(call cc-option-yn, -mtune=i386)
-ifeq ($(HAS_MTUNE),y)
 tune		= $(call cc-option,-mtune=$(1),$(2))
-else
-tune		= $(call cc-option,-mcpu=$(1),$(2))
-endif
 
 cflags-$(CONFIG_M486SX)		+= -march=i486
 cflags-$(CONFIG_M486)		+= -march=i486
-- 
cgit 


From c42813b71a06a2ff4a155aa87ac609feeab76cf3 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 2 Sep 2021 23:24:42 +0200
Subject: parisc: Fix unaligned-access crash in bootloader

Kernel v5.14 has various changes to optimize unaligned memory accesses,
e.g. commit 0652035a5794 ("asm-generic: unaligned: remove byteshift helpers").

Those changes triggered an unalignment-exception and thus crashed the
bootloader on parisc because the unaligned "output_len" variable now suddenly
was read word-wise while it was read byte-wise in the past.

Fix this issue by declaring the external output_len variable as char which then
forces the compiler to generate byte-accesses.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: John David Anglin <dave.anglin@bell.net>
Bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102162
Fixes: 8c031ba63f8f ("parisc: Unbreak bootloader due to gcc-7 optimizations")
Fixes: 0652035a5794 ("asm-generic: unaligned: remove byteshift helpers")
Cc: <stable@vger.kernel.org> # v5.14+
---
 arch/parisc/boot/compressed/misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 2d395998f524..7ee49f5881d1 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -26,7 +26,7 @@
 extern char input_data[];
 extern int input_len;
 /* output_len is inserted by the linker possibly at an unaligned address */
-extern __le32 output_len __aligned(1);
+extern char output_len;
 extern char _text, _end;
 extern char _bss, _ebss;
 extern char _startcode_end;
-- 
cgit 


From 42be8b42535183f84df99acbaf799e38724348f3 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 22 Apr 2021 12:53:00 +0200
Subject: binfmt: don't use MAP_DENYWRITE when loading shared libraries via
 uselib()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

uselib() is the legacy systemcall for loading shared libraries.
Nowadays, applications use dlopen() to load shared libraries, completely
implemented in user space via mmap().

For example, glibc uses MAP_COPY to mmap shared libraries. While this
maps to MAP_PRIVATE | MAP_DENYWRITE on Linux, Linux ignores any
MAP_DENYWRITE specification from user space in mmap.

With this change, all remaining in-tree users of MAP_DENYWRITE use it
to map an executable. We will be able to open shared libraries loaded
via uselib() writable, just as we already can via dlopen() from user
space.

This is one step into the direction of removing MAP_DENYWRITE from the
kernel. This can be considered a minor user space visible change.

Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/x86/ia32/ia32_aout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 5e5b9fc2747f..321d7b22ad2d 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -293,7 +293,7 @@ static int load_aout_library(struct file *file)
 	/* Now use mmap to map the library into memory. */
 	error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
 			PROT_READ | PROT_WRITE | PROT_EXEC,
-			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT,
+			MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
 			N_TXTOFF(ex));
 	retval = error;
 	if (error != start_addr)
-- 
cgit 


From 4589ff7ca81516381393649dec8dd4948884b2b2 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 23 Apr 2021 09:42:41 +0200
Subject: binfmt: remove in-tree usage of MAP_DENYWRITE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At exec time when we mmap the new executable via MAP_DENYWRITE we have it
opened via do_open_execat() and already deny_write_access()'ed the file
successfully. Once exec completes, we allow_write_acces(); however,
we set mm->exe_file in begin_new_exec() via set_mm_exe_file() and
also deny_write_access() as long as mm->exe_file remains set. We'll
effectively deny write access to our executable via mm->exe_file
until mm->exe_file is changed -- when the process is removed, on new
exec, or via sys_prctl(PR_SET_MM_MAP/EXE_FILE).

Let's remove all usage of MAP_DENYWRITE, it's no longer necessary for
mm->exe_file.

In case of an elf interpreter, we'll now only deny write access to the file
during exec. This is somewhat okay, because the interpreter behaves
(and sometime is) a shared library; all shared libraries, especially the
ones loaded directly in user space like via dlopen() won't ever be mapped
via MAP_DENYWRITE, because we ignore that from user space completely;
these shared libraries can always be modified while mapped and executed.
Let's only special-case the main executable, denying write access while
being executed by a process. This can be considered a minor user space
visible change.

While this is a cleanup, it also fixes part of a problem reported with
VM_DENYWRITE on overlayfs, as VM_DENYWRITE is effectively unused with
this patch and will be removed next:
  "Overlayfs did not honor positive i_writecount on realfile for
   VM_DENYWRITE mappings." [1]

[1] https://lore.kernel.org/r/YNHXzBgzRrZu1MrD@miu.piliscsaba.redhat.com/

Reported-by: Chengguang Xu <cgxu519@mykernel.net>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/x86/ia32/ia32_aout.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 321d7b22ad2d..9bd15241fadb 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -202,8 +202,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
 
 		error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
 				PROT_READ | PROT_EXEC,
-				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
-				MAP_32BIT,
+				MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
 				fd_offset);
 
 		if (error != N_TXTADDR(ex))
@@ -211,8 +210,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
 
 		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
 				PROT_READ | PROT_WRITE | PROT_EXEC,
-				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
-				MAP_32BIT,
+				MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
 				fd_offset + ex.a_text);
 		if (error != N_DATADDR(ex))
 			return error;
-- 
cgit 


From 577706de69c1e53bc23893953770d829a2242c38 Mon Sep 17 00:00:00 2001
From: Jason Wang <wangborong@cdjrlc.com>
Date: Thu, 2 Sep 2021 14:49:58 -0700
Subject: ia64: fix typo in a comment

s/when when/when/

Link: https://lkml.kernel.org/r/20210817112500.12848-1-wangborong@cdjrlc.com
Signed-off-by: Jason Wang <wangborong@cdjrlc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index e2af6b172200..96d13cb7c19f 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -906,6 +906,6 @@ EXPORT_SYMBOL(acpi_unregister_ioapic);
 /*
  * acpi_suspend_lowlevel() - save kernel state and suspend.
  *
- * TBD when when IA64 starts to support suspend...
+ * TBD when IA64 starts to support suspend...
  */
 int acpi_suspend_lowlevel(void) { return 0; }
-- 
cgit 


From 1d1f4bf845d36d73b27ed37790cae0da3112ca77 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 2 Sep 2021 14:50:01 -0700
Subject: ia64: fix #endif comment for reserve_elfcorehdr()

Patch series "ia64: Miscellaneous fixes and cleanups".

This patch series contains some miscellaneous fixes and cleanups for ia64.
The second patch fixes a naming conflict triggered by a patch for the FDT
code.

This patch (of 3):

The definition of reserve_elfcorehdr() depends on CONFIG_CRASH_DUMP, not
CONFIG_PROC_VMCORE.

Link: https://lkml.kernel.org/r/cover.1629884459.git.geert+renesas@glider.be
Link: https://lkml.kernel.org/r/77b4c0648f200cab7e1c2c5171c06763e09362aa.1629884459.git.geert+renesas@glider.be
Fixes: d9a9855d0b06ca6d ("always reserve elfcore header memory in crash kernel")
Fixes: 17c1f07ed70afa4f ("[IA64] Reserve elfcorehdr memory in CONFIG_CRASH_DUMP")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Simon Horman <horms@verge.net.au>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Jay Lan <jlan@sgi.com>
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index dd595fbd8006..fbd931f1eb27 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -546,7 +546,7 @@ int __init reserve_elfcorehdr(u64 *start, u64 *end)
 	return 0;
 }
 
-#endif /* CONFIG_PROC_VMCORE */
+#endif /* CONFIG_CRASH_DUMP */
 
 void __init
 setup_arch (char **cmdline_p)
-- 
cgit 


From 70b2e9912a018e9fddb3a1e0cbb397d4d3c0e98f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 2 Sep 2021 14:50:04 -0700
Subject: ia64: make reserve_elfcorehdr() static

There never was a reason for reserve_elfcorehdr() to be global.  Make the
function static, and move it before its sole caller.

Link: https://lkml.kernel.org/r/fe236cd73b64abc4abd03dd808cb015c907f4c8c.1629884459.git.geert+renesas@glider.be
Fixes: cee87af2a5f75713 ("[IA64] kexec: Use EFI_LOADER_DATA for ELF core header")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Jay Lan <jlan@sgi.com>
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Simon Horman <horms@verge.net.au>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/meminit.h |  1 -
 arch/ia64/kernel/setup.c        | 51 ++++++++++++++++++++---------------------
 2 files changed, 25 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h
index 6c47a239fc26..2738f62b5f98 100644
--- a/arch/ia64/include/asm/meminit.h
+++ b/arch/ia64/include/asm/meminit.h
@@ -40,7 +40,6 @@ extern unsigned long efi_memmap_init(u64 *s, u64 *e);
 extern int find_max_min_low_pfn (u64, u64, void *);
 
 extern unsigned long vmcore_find_descriptor_size(unsigned long address);
-extern int reserve_elfcorehdr(u64 *start, u64 *end);
 
 /*
  * For rounding an address to the next IA64_GRANULE_SIZE or order
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index fbd931f1eb27..5e6ee8939092 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -325,6 +325,31 @@ static inline void __init setup_crashkernel(unsigned long total, int *n)
 {}
 #endif
 
+#ifdef CONFIG_CRASH_DUMP
+static int __init reserve_elfcorehdr(u64 *start, u64 *end)
+{
+	u64 length;
+
+	/* We get the address using the kernel command line,
+	 * but the size is extracted from the EFI tables.
+	 * Both address and size are required for reservation
+	 * to work properly.
+	 */
+
+	if (!is_vmcore_usable())
+		return -EINVAL;
+
+	if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
+		vmcore_unusable();
+		return -EINVAL;
+	}
+
+	*start = (unsigned long)__va(elfcorehdr_addr);
+	*end = *start + length;
+	return 0;
+}
+#endif /* CONFIG_CRASH_DUMP */
+
 /**
  * reserve_memory - setup reserved memory areas
  *
@@ -522,32 +547,6 @@ static __init int setup_nomca(char *s)
 }
 early_param("nomca", setup_nomca);
 
-#ifdef CONFIG_CRASH_DUMP
-int __init reserve_elfcorehdr(u64 *start, u64 *end)
-{
-	u64 length;
-
-	/* We get the address using the kernel command line,
-	 * but the size is extracted from the EFI tables.
-	 * Both address and size are required for reservation
-	 * to work properly.
-	 */
-
-	if (!is_vmcore_usable())
-		return -EINVAL;
-
-	if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
-		vmcore_unusable();
-		return -EINVAL;
-	}
-
-	*start = (unsigned long)__va(elfcorehdr_addr);
-	*end = *start + length;
-	return 0;
-}
-
-#endif /* CONFIG_CRASH_DUMP */
-
 void __init
 setup_arch (char **cmdline_p)
 {
-- 
cgit 


From 7e4265c88968d4e53b164944c05e12e79d8ff9c6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 2 Sep 2021 14:50:08 -0700
Subject: ia64: make num_rsvd_regions static

Commit f62800992e5917f2 ("ia64: switch to NO_BOOTMEM") removed the last
user of num_rsvd_regions outside arch/ia64/kernel/setup.c.

Link: https://lkml.kernel.org/r/a377b5437e3e9da93d02f996fe06a2b956cb0990.1629884459.git.geert+renesas@glider.be
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Jay Lan <jlan@sgi.com>
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Simon Horman <horms@verge.net.au>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/meminit.h | 1 -
 arch/ia64/kernel/setup.c        | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h
index 2738f62b5f98..f1d5bf2ba847 100644
--- a/arch/ia64/include/asm/meminit.h
+++ b/arch/ia64/include/asm/meminit.h
@@ -29,7 +29,6 @@ struct rsvd_region {
 };
 
 extern struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
-extern int num_rsvd_regions;
 
 extern void find_memory (void);
 extern void reserve_memory (void);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 5e6ee8939092..31fb84de2d21 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -131,7 +131,7 @@ unsigned long ia64_cache_stride_shift = ~0;
  * We use a special marker for the end of memory and it uses the extra (+1) slot
  */
 struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
-int num_rsvd_regions __initdata;
+static int num_rsvd_regions __initdata;
 
 
 /*
-- 
cgit 


From 4bdffd2708d65e68ff254d90793bb167d828219f Mon Sep 17 00:00:00 2001
From: kernel test robot <lkp@intel.com>
Date: Thu, 2 Sep 2021 14:50:20 -0700
Subject: arch/csky/kernel/probes/kprobes.c: fix bugon.cocci warnings

Use BUG_ON instead of a if condition followed by BUG.

Generated by: scripts/coccinelle/misc/bugon.cocci

Link: https://lkml.kernel.org/r/alpine.DEB.2.22.394.2107061049150.7197@hadrien
Fixes: 7d37cb2c912d ("lib: fix kconfig dependency on ARCH_WANT_FRAME_POINTERS")
Signed-off-by: kernel test robot <lkp@intel.com>
Signed-off-by: Julia Lawall <julia.lawall@inria.fr>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Julian Braha <julianbraha@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/csky/kernel/probes/kprobes.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c
index 68b22b499aeb..8fffa34d4e1c 100644
--- a/arch/csky/kernel/probes/kprobes.c
+++ b/arch/csky/kernel/probes/kprobes.c
@@ -283,8 +283,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
 		 * normal page fault.
 		 */
 		regs->pc = (unsigned long) cur->addr;
-		if (!instruction_pointer(regs))
-			BUG();
+		BUG_ON(!instruction_pointer(regs));
 
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
-- 
cgit 


From 9857a17f206ff374aea78bccfb687f145368be2e Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Thu, 2 Sep 2021 14:53:54 -0700
Subject: mm/gup: remove try_get_page(), call try_get_compound_head() directly

try_get_page() is very similar to try_get_compound_head(), and in fact
try_get_page() has fallen a little behind in terms of maintenance:
try_get_compound_head() handles speculative page references more
thoroughly.

There are only two try_get_page() callsites, so just call
try_get_compound_head() directly from those, and remove try_get_page()
entirely.

Also, seeing as how this changes try_get_compound_head() into a non-static
function, provide some kerneldoc documentation for it.

Link: https://lkml.kernel.org/r/20210813044133.1536842-4-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/mm/fault.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index e33c43b38afe..81d760749987 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -817,7 +817,7 @@ void do_secure_storage_access(struct pt_regs *regs)
 		break;
 	case KERNEL_FAULT:
 		page = phys_to_page(addr);
-		if (unlikely(!try_get_page(page)))
+		if (unlikely(!try_get_compound_head(page, 1)))
 			break;
 		rc = arch_make_page_accessible(page);
 		put_page(page);
-- 
cgit 


From ec403e2ae0dfc85996aad6e944a98a16e6dfcc6d Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Thu, 2 Sep 2021 14:55:43 -0700
Subject: memcg: enable accounting for ldt_struct objects

Each task can request own LDT and force the kernel to allocate up to 64Kb
memory per-mm.

There are legitimate workloads with hundreds of processes and there can be
hundreds of workloads running on large machines.  The unaccounted memory
can cause isolation issues between the workloads particularly on highly
utilized machines.

It makes sense to account for this objects to restrict the host's memory
consumption from inside the memcg-limited container.

Link: https://lkml.kernel.org/r/38010594-50fe-c06d-7cb0-d1f77ca422f3@virtuozzo.com
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Acked-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrei Vagin <avagin@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Dmitry Safonov <0x7f454c46@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Yutian Yang <nglaive@gmail.com>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/ldt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index aa15132228da..525876e7b9f4 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -154,7 +154,7 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
 	if (num_entries > LDT_ENTRIES)
 		return NULL;
 
-	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
+	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL_ACCOUNT);
 	if (!new_ldt)
 		return NULL;
 
@@ -168,9 +168,9 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
 	 * than PAGE_SIZE.
 	 */
 	if (alloc_size > PAGE_SIZE)
-		new_ldt->entries = vzalloc(alloc_size);
+		new_ldt->entries = __vmalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 	else
-		new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
+		new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
 
 	if (!new_ldt->entries) {
 		kfree(new_ldt);
-- 
cgit 


From f358afc52c3066f4e8cd7b3a2d75b31e822519e9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 2 Sep 2021 14:56:36 -0700
Subject: mm: remove flush_kernel_dcache_page

flush_kernel_dcache_page is a rather confusing interface that implements a
subset of flush_dcache_page by not being able to properly handle page
cache mapped pages.

The only callers left are in the exec code as all other previous callers
were incorrect as they could have dealt with page cache pages.  Replace
the calls to flush_kernel_dcache_page with calls to flush_dcache_page,
which for all architectures does either exactly the same thing, can
contains one or more of the following:

 1) an optimization to defer the cache flush for page cache pages not
    mapped into userspace
 2) additional flushing for mapped page cache pages if cache aliases
    are possible

Link: https://lkml.kernel.org/r/20210712060928.4161649-7-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Cc: Alex Shi <alexs@kernel.org>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Guo Ren <guoren@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Nick Hu <nickhu@andestech.com>
Cc: Paul Cercueil <paul@crapouillou.net>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Yoshinori Sato <ysato@users.osdn.me>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/cacheflush.h    |  4 +---
 arch/arm/mm/flush.c                  | 33 ---------------------------------
 arch/arm/mm/nommu.c                  |  6 ------
 arch/csky/abiv1/cacheflush.c         | 11 -----------
 arch/csky/abiv1/inc/abi/cacheflush.h |  4 +---
 arch/mips/include/asm/cacheflush.h   |  8 +-------
 arch/nds32/include/asm/cacheflush.h  |  3 +--
 arch/nds32/mm/cacheflush.c           |  9 ---------
 arch/parisc/include/asm/cacheflush.h |  8 ++------
 arch/parisc/kernel/cache.c           |  3 +--
 arch/sh/include/asm/cacheflush.h     |  8 ++------
 11 files changed, 9 insertions(+), 88 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 2e24e765e6d3..5e56288e343b 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -291,6 +291,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
 {
 	if ((cache_is_vivt() || cache_is_vipt_aliasing()))
@@ -312,9 +313,6 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 		__flush_anon_page(vma, page, vmaddr);
 }
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-extern void flush_kernel_dcache_page(struct page *);
-
 #define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
 #define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 6d89db7895d1..7ff9feea13a6 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -345,39 +345,6 @@ void flush_dcache_page(struct page *page)
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
-/*
- * Ensure cache coherency for the kernel mapping of this page. We can
- * assume that the page is pinned via kmap.
- *
- * If the page only exists in the page cache and there are no user
- * space mappings, this is a no-op since the page was already marked
- * dirty at creation.  Otherwise, we need to flush the dirty kernel
- * cache lines directly.
- */
-void flush_kernel_dcache_page(struct page *page)
-{
-	if (cache_is_vivt() || cache_is_vipt_aliasing()) {
-		struct address_space *mapping;
-
-		mapping = page_mapping_file(page);
-
-		if (!mapping || mapping_mapped(mapping)) {
-			void *addr;
-
-			addr = page_address(page);
-			/*
-			 * kmap_atomic() doesn't set the page virtual
-			 * address for highmem pages, and
-			 * kunmap_atomic() takes care of cache
-			 * flushing already.
-			 */
-			if (!IS_ENABLED(CONFIG_HIGHMEM) || addr)
-				__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-		}
-	}
-}
-EXPORT_SYMBOL(flush_kernel_dcache_page);
-
 /*
  * Flush an anonymous page so that users of get_user_pages()
  * can safely access the data.  The expected sequence is:
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 8b3d7191e2b8..2658f52903da 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -166,12 +166,6 @@ void flush_dcache_page(struct page *page)
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
-void flush_kernel_dcache_page(struct page *page)
-{
-	__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);
-}
-EXPORT_SYMBOL(flush_kernel_dcache_page);
-
 void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		       unsigned long uaddr, void *dst, const void *src,
 		       unsigned long len)
diff --git a/arch/csky/abiv1/cacheflush.c b/arch/csky/abiv1/cacheflush.c
index 07ff17ea33de..fb91b069dc69 100644
--- a/arch/csky/abiv1/cacheflush.c
+++ b/arch/csky/abiv1/cacheflush.c
@@ -56,17 +56,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
 	}
 }
 
-void flush_kernel_dcache_page(struct page *page)
-{
-	struct address_space *mapping;
-
-	mapping = page_mapping_file(page);
-
-	if (!mapping || mapping_mapped(mapping))
-		dcache_wbinv_all();
-}
-EXPORT_SYMBOL(flush_kernel_dcache_page);
-
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 	unsigned long end)
 {
diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h
index 6cab7afae962..ed62e2066ba7 100644
--- a/arch/csky/abiv1/inc/abi/cacheflush.h
+++ b/arch/csky/abiv1/inc/abi/cacheflush.h
@@ -14,12 +14,10 @@ extern void flush_dcache_page(struct page *);
 #define flush_cache_page(vma, page, pfn)	cache_wbinv_all()
 #define flush_cache_dup_mm(mm)			cache_wbinv_all()
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-extern void flush_kernel_dcache_page(struct page *);
-
 #define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
 #define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
 {
 	dcache_wbinv_all();
diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index d687b40b9fbb..b3dc9c589442 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -125,13 +125,7 @@ static inline void kunmap_noncoherent(void)
 	kunmap_coherent();
 }
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-static inline void flush_kernel_dcache_page(struct page *page)
-{
-	BUG_ON(cpu_has_dc_aliases && PageHighMem(page));
-	flush_dcache_page(page);
-}
-
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 /*
  * For now flush_kernel_vmap_range and invalidate_kernel_vmap_range both do a
  * cache writeback and invalidate operation.
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 7d6824f7c0e8..c2a222ebfa2a 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -36,8 +36,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 void flush_anon_page(struct vm_area_struct *vma,
 		     struct page *page, unsigned long vaddr);
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-void flush_kernel_dcache_page(struct page *page);
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 void flush_kernel_vmap_range(void *addr, int size);
 void invalidate_kernel_vmap_range(void *addr, int size);
 #define flush_dcache_mmap_lock(mapping)   xa_lock_irq(&(mapping)->i_pages)
diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c
index ad5344ef5d33..07aac65d1cab 100644
--- a/arch/nds32/mm/cacheflush.c
+++ b/arch/nds32/mm/cacheflush.c
@@ -318,15 +318,6 @@ void flush_anon_page(struct vm_area_struct *vma,
 	local_irq_restore(flags);
 }
 
-void flush_kernel_dcache_page(struct page *page)
-{
-	unsigned long flags;
-	local_irq_save(flags);
-	cpu_dcache_wbinval_page((unsigned long)page_address(page));
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(flush_kernel_dcache_page);
-
 void flush_kernel_vmap_range(void *addr, int size)
 {
 	unsigned long flags;
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 99663fc1f997..eef0096db5f8 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -36,16 +36,12 @@ void flush_cache_all_local(void);
 void flush_cache_all(void);
 void flush_cache_mm(struct mm_struct *mm);
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
 void flush_kernel_dcache_page_addr(void *addr);
-static inline void flush_kernel_dcache_page(struct page *page)
-{
-	flush_kernel_dcache_page_addr(page_address(page));
-}
 
 #define flush_kernel_dcache_range(start,size) \
 	flush_kernel_dcache_range_asm((start), (start)+(size));
 
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 void flush_kernel_vmap_range(void *vaddr, int size);
 void invalidate_kernel_vmap_range(void *vaddr, int size);
 
@@ -59,7 +55,7 @@ extern void flush_dcache_page(struct page *page);
 #define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
 #define flush_icache_page(vma,page)	do { 		\
-	flush_kernel_dcache_page(page);			\
+	flush_kernel_dcache_page_addr(page_address(page)); \
 	flush_kernel_icache_page(page_address(page)); 	\
 } while (0)
 
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 86a1a63563fd..39e02227e231 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -334,7 +334,7 @@ void flush_dcache_page(struct page *page)
 		return;
 	}
 
-	flush_kernel_dcache_page(page);
+	flush_kernel_dcache_page_addr(page_address(page));
 
 	if (!mapping)
 		return;
@@ -375,7 +375,6 @@ EXPORT_SYMBOL(flush_dcache_page);
 
 /* Defined in arch/parisc/kernel/pacache.S */
 EXPORT_SYMBOL(flush_kernel_dcache_range_asm);
-EXPORT_SYMBOL(flush_kernel_dcache_page_asm);
 EXPORT_SYMBOL(flush_data_cache_local);
 EXPORT_SYMBOL(flush_kernel_icache_range_asm);
 
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index 4486a865ff62..372afa82fee6 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -63,6 +63,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 	if (boot_cpu_data.dcache.n_aliases && PageAnon(page))
 		__flush_anon_page(page, vmaddr);
 }
+
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
 {
 	__flush_wback_region(addr, size);
@@ -72,12 +74,6 @@ static inline void invalidate_kernel_vmap_range(void *addr, int size)
 	__flush_invalidate_region(addr, size);
 }
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-static inline void flush_kernel_dcache_page(struct page *page)
-{
-	flush_dcache_page(page);
-}
-
 extern void copy_to_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len);
-- 
cgit 


From fc1f5e980a463325cf41d39ac6a69aa3cca73995 Mon Sep 17 00:00:00 2001
From: Ohhoon Kwon <ohoono.kwon@samsung.com>
Date: Thu, 2 Sep 2021 14:57:01 -0700
Subject: mm: sparse: pass section_nr to find_memory_block

With CONFIG_SPARSEMEM_EXTREME enabled, __section_nr() which converts
mem_section to section_nr could be costly since it iterates all section
roots to check if the given mem_section is in its range.

On the other hand, __nr_to_section() which converts section_nr to
mem_section can be done in O(1).

Let's pass section_nr instead of mem_section ptr to find_memory_block() in
order to reduce needless iterations.

Link: https://lkml.kernel.org/r/20210707150212.855-3-ohoono.kwon@samsung.com
Signed-off-by: Ohhoon Kwon <ohoono.kwon@samsung.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 377d852f5a9a..d4f28ee4d5dc 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -211,13 +211,11 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
 static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
 {
 	unsigned long section_nr;
-	struct mem_section *mem_sect;
 	struct memory_block *mem_block;
 
 	section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
-	mem_sect = __nr_to_section(section_nr);
 
-	mem_block = find_memory_block(mem_sect);
+	mem_block = find_memory_block(section_nr);
 	return mem_block;
 }
 
-- 
cgit 


From c3ab6baf6a004eab7344a1d8880a971f2414e1b6 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Thu, 2 Sep 2021 14:57:56 -0700
Subject: mm/page_alloc: always initialize memory map for the holes

Patch series "mm: ensure consistency of memory map poisoning".

Currently memory map allocation for FLATMEM case does not poison the
struct pages regardless of CONFIG_PAGE_POISON setting.

This happens because allocation of the memory map for FLATMEM and SPARSMEM
use different memblock functions and those that are used for SPARSMEM case
(namely memblock_alloc_try_nid_raw() and memblock_alloc_exact_nid_raw())
implicitly poison the allocated memory.

Another side effect of this implicit poisoning is that early setup code
that uses the same functions to allocate memory burns cycles for the
memory poisoning even if it was not intended.

These patches introduce memmap_alloc() wrapper that ensure that the memory
map allocation is consistent for different memory models.

This patch (of 4):

Currently memory map for the holes is initialized only when SPARSEMEM
memory model is used.  Yet, even with FLATMEM there could be holes in the
physical memory layout that have memory map entries.

For instance, the memory reserved using e820 API on i386 or
"reserved-memory" nodes in device tree would not appear in memblock.memory
and hence the struct pages for such holes will be skipped during memory
map initialization.

These struct pages will be zeroed because the memory map for FLATMEM
systems is allocated with memblock_alloc_node() that clears the allocated
memory.  While zeroed struct pages do not cause immediate problems, the
correct behaviour is to initialize every page using __init_single_page().
Besides, enabling page poison for FLATMEM case will trigger
PF_POISONED_CHECK() unless the memory map is properly initialized.

Make sure init_unavailable_range() is called for both SPARSEMEM and
FLATMEM so that struct pages representing memory holes would appear as
PG_Reserved with any memory layout.

[rppt@kernel.org: fix microblaze]
  Link: https://lkml.kernel.org/r/YQWW3RCE4eWBuMu/@kernel.org

Link: https://lkml.kernel.org/r/20210714123739.16493-1-rppt@kernel.org
Link: https://lkml.kernel.org/r/20210714123739.16493-2-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/microblaze/include/asm/page.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index ce550978f4fc..4b8b2fa78fc5 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -112,8 +112,7 @@ extern int page_is_ram(unsigned long pfn);
 #  define page_to_phys(page)     (page_to_pfn(page) << PAGE_SHIFT)
 
 #  define ARCH_PFN_OFFSET	(memory_start >> PAGE_SHIFT)
-#  define pfn_valid(pfn)	((pfn) < (max_mapnr + ARCH_PFN_OFFSET))
-
+#  define pfn_valid(pfn)	((pfn) >= ARCH_PFN_OFFSET && (pfn) < (max_mapnr + ARCH_PFN_OFFSET))
 # endif /* __ASSEMBLY__ */
 
 #define	virt_addr_valid(vaddr)	(pfn_valid(virt_to_pfn(vaddr)))
-- 
cgit 


From 22e7878102f94a50e9a4c2c19f909a9a0898c4ce Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Thu, 2 Sep 2021 14:57:59 -0700
Subject: microblaze: simplify pte_alloc_one_kernel()

The microblaze's implementation of pte_alloc_one_kernel() used
memblock_alloc_try_nid_raw() along with clear_page() to allocated a zeroed
page during early setup.

Replace calls of these functions with a call to memblock_alloc_try_nid()
that already returns zeroed page and respects the same allocation limits
as memblock_alloc_try_nid_raw().

While on it drop early_get_page() wrapper that was only used in
pte_alloc_one_kernel().

Link: https://lkml.kernel.org/r/20210714123739.16493-3-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/microblaze/include/asm/pgtable.h |  2 --
 arch/microblaze/mm/init.c             | 12 ------------
 arch/microblaze/mm/pgtable.c          | 17 ++++++++---------
 3 files changed, 8 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 71cd547655d9..c136a01e467e 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -443,8 +443,6 @@ extern int mem_init_done;
 
 asmlinkage void __init mmu_init(void);
 
-void __init *early_get_page(void);
-
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index ab55c70380a5..952f35b335b2 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -265,18 +265,6 @@ asmlinkage void __init mmu_init(void)
 	dma_contiguous_reserve(memory_start + lowmem_size - 1);
 }
 
-/* This is only called until mem_init is done. */
-void __init *early_get_page(void)
-{
-	/*
-	 * Mem start + kernel_tlb -> here is limit
-	 * because of mem mapping from head.S
-	 */
-	return memblock_alloc_try_nid_raw(PAGE_SIZE, PAGE_SIZE,
-				MEMBLOCK_LOW_LIMIT, memory_start + kernel_tlb,
-				NUMA_NO_NODE);
-}
-
 void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 {
 	void *p;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 38ccb909bc9d..c1833b159d3b 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -33,6 +33,7 @@
 #include <linux/init.h>
 #include <linux/mm_types.h>
 #include <linux/pgtable.h>
+#include <linux/memblock.h>
 
 #include <asm/pgalloc.h>
 #include <linux/io.h>
@@ -242,15 +243,13 @@ unsigned long iopa(unsigned long addr)
 
 __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	pte_t *pte;
-	if (mem_init_done) {
-		pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-	} else {
-		pte = (pte_t *)early_get_page();
-		if (pte)
-			clear_page(pte);
-	}
-	return pte;
+	if (mem_init_done)
+		return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	else
+		return memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+					      MEMBLOCK_LOW_LIMIT,
+					      memory_start + kernel_tlb,
+					      NUMA_NO_NODE);
 }
 
 void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
-- 
cgit 


From a7259df7670240ee03b0cfce8a3e5d3773911e24 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Thu, 2 Sep 2021 15:00:26 -0700
Subject: memblock: make memblock_find_in_range method private

There are a lot of uses of memblock_find_in_range() along with
memblock_reserve() from the times memblock allocation APIs did not exist.

memblock_find_in_range() is the very core of memblock allocations, so any
future changes to its internal behaviour would mandate updates of all the
users outside memblock.

Replace the calls to memblock_find_in_range() with an equivalent calls to
memblock_phys_alloc() and memblock_phys_alloc_range() and make
memblock_find_in_range() private method of memblock.

This simplifies the callers, ensures that (unlikely) errors in
memblock_reserve() are handled and improves maintainability of
memblock_find_in_range().

Link: https://lkml.kernel.org/r/20210816122622.30279-1-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>		[arm64]
Acked-by: Kirill A. Shutemov <kirill.shtuemov@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>	[ACPI]
Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Acked-by: Nick Kossifidis <mick@ics.forth.gr>			[riscv]
Tested-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/setup.c           | 20 +++++++-----------
 arch/arm64/kvm/hyp/reserved_mem.c |  9 +++-----
 arch/arm64/mm/init.c              | 36 ++++++++++----------------------
 arch/mips/kernel/setup.c          | 14 ++++++-------
 arch/riscv/mm/init.c              | 44 +++++++++++++--------------------------
 arch/s390/kernel/setup.c          |  9 +++++---
 arch/x86/kernel/aperture_64.c     |  5 ++---
 arch/x86/mm/init.c                | 23 +++++++++++++-------
 arch/x86/mm/numa.c                |  5 ++---
 arch/x86/mm/numa_emulation.c      |  5 ++---
 arch/x86/realmode/init.c          |  2 +-
 11 files changed, 69 insertions(+), 103 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index f97eb2371672..284a80c0b6e1 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1012,31 +1012,25 @@ static void __init reserve_crashkernel(void)
 		unsigned long long lowmem_max = __pa(high_memory - 1) + 1;
 		if (crash_max > lowmem_max)
 			crash_max = lowmem_max;
-		crash_base = memblock_find_in_range(CRASH_ALIGN, crash_max,
-						    crash_size, CRASH_ALIGN);
+
+		crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
+						       CRASH_ALIGN, crash_max);
 		if (!crash_base) {
 			pr_err("crashkernel reservation failed - No suitable area found.\n");
 			return;
 		}
 	} else {
+		unsigned long long crash_max = crash_base + crash_size;
 		unsigned long long start;
 
-		start = memblock_find_in_range(crash_base,
-					       crash_base + crash_size,
-					       crash_size, SECTION_SIZE);
-		if (start != crash_base) {
+		start = memblock_phys_alloc_range(crash_size, SECTION_SIZE,
+						  crash_base, crash_max);
+		if (!start) {
 			pr_err("crashkernel reservation failed - memory is in use.\n");
 			return;
 		}
 	}
 
-	ret = memblock_reserve(crash_base, crash_size);
-	if (ret < 0) {
-		pr_warn("crashkernel reservation failed - memory is in use (0x%lx)\n",
-			(unsigned long)crash_base);
-		return;
-	}
-
 	pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
 		(unsigned long)(crash_size >> 20),
 		(unsigned long)(crash_base >> 20),
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c
index d654921dd09b..578670e3f608 100644
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ b/arch/arm64/kvm/hyp/reserved_mem.c
@@ -92,12 +92,10 @@ void __init kvm_hyp_reserve(void)
 	 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
 	 */
 	hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
-	hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(),
-					      ALIGN(hyp_mem_size, PMD_SIZE),
-					      PMD_SIZE);
+	hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
+					   PMD_SIZE);
 	if (!hyp_mem_base)
-		hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(),
-						      hyp_mem_size, PAGE_SIZE);
+		hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
 	else
 		hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
 
@@ -105,7 +103,6 @@ void __init kvm_hyp_reserve(void)
 		kvm_err("Failed to reserve hyp memory\n");
 		return;
 	}
-	memblock_reserve(hyp_mem_base, hyp_mem_size);
 
 	kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
 		 hyp_mem_base);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 1fdb7bb7c198..bf5b8a5cd451 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -74,6 +74,7 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
 static void __init reserve_crashkernel(void)
 {
 	unsigned long long crash_base, crash_size;
+	unsigned long long crash_max = arm64_dma_phys_limit;
 	int ret;
 
 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
@@ -84,33 +85,18 @@ static void __init reserve_crashkernel(void)
 
 	crash_size = PAGE_ALIGN(crash_size);
 
-	if (crash_base == 0) {
-		/* Current arm64 boot protocol requires 2MB alignment */
-		crash_base = memblock_find_in_range(0, arm64_dma_phys_limit,
-				crash_size, SZ_2M);
-		if (crash_base == 0) {
-			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
-				crash_size);
-			return;
-		}
-	} else {
-		/* User specifies base address explicitly. */
-		if (!memblock_is_region_memory(crash_base, crash_size)) {
-			pr_warn("cannot reserve crashkernel: region is not memory\n");
-			return;
-		}
+	/* User specifies base address explicitly. */
+	if (crash_base)
+		crash_max = crash_base + crash_size;
 
-		if (memblock_is_region_reserved(crash_base, crash_size)) {
-			pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
-			return;
-		}
-
-		if (!IS_ALIGNED(crash_base, SZ_2M)) {
-			pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
-			return;
-		}
+	/* Current arm64 boot protocol requires 2MB alignment */
+	crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
+					       crash_base, crash_max);
+	if (!crash_base) {
+		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
+			crash_size);
+		return;
 	}
-	memblock_reserve(crash_base, crash_size);
 
 	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
 		crash_base, crash_base + crash_size, crash_size >> 20);
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 23a140327a0b..f979adfd4fc2 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -452,8 +452,9 @@ static void __init mips_parse_crashkernel(void)
 		return;
 
 	if (crash_base <= 0) {
-		crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_MAX,
-							crash_size, CRASH_ALIGN);
+		crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
+						       CRASH_ALIGN,
+						       CRASH_ADDR_MAX);
 		if (!crash_base) {
 			pr_warn("crashkernel reservation failed - No suitable area found.\n");
 			return;
@@ -461,8 +462,9 @@ static void __init mips_parse_crashkernel(void)
 	} else {
 		unsigned long long start;
 
-		start = memblock_find_in_range(crash_base, crash_base + crash_size,
-						crash_size, 1);
+		start = memblock_phys_alloc_range(crash_size, 1,
+						  crash_base,
+						  crash_base + crash_size);
 		if (start != crash_base) {
 			pr_warn("Invalid memory region reserved for crash kernel\n");
 			return;
@@ -656,10 +658,6 @@ static void __init arch_mem_init(char **cmdline_p)
 	mips_reserve_vmcore();
 
 	mips_parse_crashkernel();
-#ifdef CONFIG_KEXEC
-	if (crashk_res.start != crashk_res.end)
-		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
-#endif
 	device_tree_init();
 
 	/*
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 7cb4f391d106..e6cac495a9e8 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -819,38 +819,22 @@ static void __init reserve_crashkernel(void)
 
 	crash_size = PAGE_ALIGN(crash_size);
 
-	if (crash_base == 0) {
-		/*
-		 * Current riscv boot protocol requires 2MB alignment for
-		 * RV64 and 4MB alignment for RV32 (hugepage size)
-		 */
-		crash_base = memblock_find_in_range(search_start, search_end,
-						    crash_size, PMD_SIZE);
-
-		if (crash_base == 0) {
-			pr_warn("crashkernel: couldn't allocate %lldKB\n",
-				crash_size >> 10);
-			return;
-		}
-	} else {
-		/* User specifies base address explicitly. */
-		if (!memblock_is_region_memory(crash_base, crash_size)) {
-			pr_warn("crashkernel: requested region is not memory\n");
-			return;
-		}
-
-		if (memblock_is_region_reserved(crash_base, crash_size)) {
-			pr_warn("crashkernel: requested region is reserved\n");
-			return;
-		}
-
+	if (crash_base) {
+		search_start = crash_base;
+		search_end = crash_base + crash_size;
+	}
 
-		if (!IS_ALIGNED(crash_base, PMD_SIZE)) {
-			pr_warn("crashkernel: requested region is misaligned\n");
-			return;
-		}
+	/*
+	 * Current riscv boot protocol requires 2MB alignment for
+	 * RV64 and 4MB alignment for RV32 (hugepage size)
+	 */
+	crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
+					       search_start, search_end);
+	if (crash_base == 0) {
+		pr_warn("crashkernel: couldn't allocate %lldKB\n",
+			crash_size >> 10);
+		return;
 	}
-	memblock_reserve(crash_base, crash_size);
 
 	pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
 		crash_base, crash_base + crash_size, crash_size >> 20);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ff0f9e838916..0bab57d6413b 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -626,8 +626,9 @@ static void __init reserve_crashkernel(void)
 			return;
 		}
 		low = crash_base ?: low;
-		crash_base = memblock_find_in_range(low, high, crash_size,
-						    KEXEC_CRASH_MEM_ALIGN);
+		crash_base = memblock_phys_alloc_range(crash_size,
+						       KEXEC_CRASH_MEM_ALIGN,
+						       low, high);
 	}
 
 	if (!crash_base) {
@@ -636,8 +637,10 @@ static void __init reserve_crashkernel(void)
 		return;
 	}
 
-	if (register_memory_notifier(&kdump_mem_nb))
+	if (register_memory_notifier(&kdump_mem_nb)) {
+		memblock_free(crash_base, crash_size);
 		return;
+	}
 
 	if (!OLDMEM_BASE && MACHINE_IS_VM)
 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 294ed4392a0e..10562885f5fc 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -109,14 +109,13 @@ static u32 __init allocate_aperture(void)
 	 * memory. Unfortunately we cannot move it up because that would
 	 * make the IOMMU useless.
 	 */
-	addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
-				      aper_size, aper_size);
+	addr = memblock_phys_alloc_range(aper_size, aper_size,
+					 GART_MIN_ADDR, GART_MAX_ADDR);
 	if (!addr) {
 		pr_err("Cannot allocate aperture memory hole [mem %#010lx-%#010lx] (%uKB)\n",
 		       addr, addr + aper_size - 1, aper_size >> 10);
 		return 0;
 	}
-	memblock_reserve(addr, aper_size);
 	pr_info("Mapping aperture over RAM [mem %#010lx-%#010lx] (%uKB)\n",
 		addr, addr + aper_size - 1, aper_size >> 10);
 	register_nosave_region(addr >> PAGE_SHIFT,
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 75ef19aa8903..23a14d82e783 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -127,14 +127,12 @@ __ref void *alloc_low_pages(unsigned int num)
 		unsigned long ret = 0;
 
 		if (min_pfn_mapped < max_pfn_mapped) {
-			ret = memblock_find_in_range(
+			ret = memblock_phys_alloc_range(
+					PAGE_SIZE * num, PAGE_SIZE,
 					min_pfn_mapped << PAGE_SHIFT,
-					max_pfn_mapped << PAGE_SHIFT,
-					PAGE_SIZE * num , PAGE_SIZE);
+					max_pfn_mapped << PAGE_SHIFT);
 		}
-		if (ret)
-			memblock_reserve(ret, PAGE_SIZE * num);
-		else if (can_use_brk_pgt)
+		if (!ret && can_use_brk_pgt)
 			ret = __pa(extend_brk(PAGE_SIZE * num, PAGE_SIZE));
 
 		if (!ret)
@@ -610,8 +608,17 @@ static void __init memory_map_top_down(unsigned long map_start,
 	unsigned long addr;
 	unsigned long mapped_ram_size = 0;
 
-	/* xen has big range in reserved near end of ram, skip it at first.*/
-	addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE);
+	/*
+	 * Systems that have many reserved areas near top of the memory,
+	 * e.g. QEMU with less than 1G RAM and EFI enabled, or Xen, will
+	 * require lots of 4K mappings which may exhaust pgt_buf.
+	 * Start with top-most PMD_SIZE range aligned at PMD_SIZE to ensure
+	 * there is enough mapped memory that can be allocated from
+	 * memblock.
+	 */
+	addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start,
+					 map_end);
+	memblock_free(addr, PMD_SIZE);
 	real_end = addr + PMD_SIZE;
 
 	/* step_size need to be small so pgt_buf from BRK could cover it */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index e94da744386f..a1b5c71099e6 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -376,15 +376,14 @@ static int __init numa_alloc_distance(void)
 	cnt++;
 	size = cnt * cnt * sizeof(numa_distance[0]);
 
-	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
-				      size, PAGE_SIZE);
+	phys = memblock_phys_alloc_range(size, PAGE_SIZE, 0,
+					 PFN_PHYS(max_pfn_mapped));
 	if (!phys) {
 		pr_warn("Warning: can't allocate distance table!\n");
 		/* don't retry until explicitly reset */
 		numa_distance = (void *)1LU;
 		return -ENOMEM;
 	}
-	memblock_reserve(phys, size);
 
 	numa_distance = __va(phys);
 	numa_distance_cnt = cnt;
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 87d77cc52f86..737491b13728 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -447,13 +447,12 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 	if (numa_dist_cnt) {
 		u64 phys;
 
-		phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
-					      phys_size, PAGE_SIZE);
+		phys = memblock_phys_alloc_range(phys_size, PAGE_SIZE, 0,
+						 PFN_PHYS(max_pfn_mapped));
 		if (!phys) {
 			pr_warn("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
 			goto no_emu;
 		}
-		memblock_reserve(phys, phys_size);
 		phys_dist = __va(phys);
 
 		for (i = 0; i < numa_dist_cnt; i++)
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 6534c92d0f83..31b5856010cb 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -28,7 +28,7 @@ void __init reserve_real_mode(void)
 	WARN_ON(slab_is_available());
 
 	/* Has to be under 1M so we can execute real-mode AP code. */
-	mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
+	mem = memblock_phys_alloc_range(size, PAGE_SIZE, 0, 1<<20);
 	if (!mem)
 		pr_info("No sub-1M memory is available for the trampoline\n");
 	else
-- 
cgit 


From dce49103962840dd61423d7627748d6c558d58c5 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Thu, 2 Sep 2021 15:00:33 -0700
Subject: mm: wire up syscall process_mrelease

Split off from prev patch in the series that implements the syscall.

Link: https://lkml.kernel.org/r/20210809185259.405936-2-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Jan Engelhardt <jengelh@inai.de>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tim Murray <timmurray@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/syscalls/syscall.tbl      | 2 ++
 arch/arm/tools/syscall.tbl                  | 2 ++
 arch/arm64/include/asm/unistd.h             | 2 +-
 arch/arm64/include/asm/unistd32.h           | 2 ++
 arch/ia64/kernel/syscalls/syscall.tbl       | 2 ++
 arch/m68k/kernel/syscalls/syscall.tbl       | 2 ++
 arch/microblaze/kernel/syscalls/syscall.tbl | 2 ++
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 2 ++
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 2 ++
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 2 ++
 arch/parisc/kernel/syscalls/syscall.tbl     | 2 ++
 arch/powerpc/kernel/syscalls/syscall.tbl    | 2 ++
 arch/s390/kernel/syscalls/syscall.tbl       | 2 ++
 arch/sh/kernel/syscalls/syscall.tbl         | 2 ++
 arch/sparc/kernel/syscalls/syscall.tbl      | 2 ++
 arch/x86/entry/syscalls/syscall_32.tbl      | 1 +
 arch/x86/entry/syscalls/syscall_64.tbl      | 1 +
 arch/xtensa/kernel/syscalls/syscall.tbl     | 2 ++
 18 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index a17687ed4b51..605645eae04c 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -486,3 +486,5 @@
 554	common	landlock_create_ruleset		sys_landlock_create_ruleset
 555	common	landlock_add_rule		sys_landlock_add_rule
 556	common	landlock_restrict_self		sys_landlock_restrict_self
+# 557 reserved for memfd_secret
+558	common	process_mrelease		sys_process_mrelease
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index c5df1179fc5d..2f32eb8beca8 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -460,3 +460,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 727bfc3be99b..3cb206aea3db 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		447
+#define __NR_compat_syscalls		449
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 99ffcafc736c..0f49cdb180dd 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -901,6 +901,8 @@ __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
 __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
 #define __NR_landlock_restrict_self 446
 __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
+#define __NR_process_mrelease 448
+__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 6d07742c57b8..9bf45f2be966 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -367,3 +367,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 541bc1b3a8f9..f1f98ee6c82d 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -446,3 +446,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index a176faca2927..da49ddd4bb54 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -452,3 +452,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index c2d2e19abea8..56c8d3cf42ed 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -385,3 +385,5 @@
 444	n32	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n32	landlock_add_rule		sys_landlock_add_rule
 446	n32	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	n32	process_mrelease		sys_process_mrelease
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index ac653d08b1ea..1ca7bc337932 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -361,3 +361,5 @@
 444	n64	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n64	landlock_add_rule		sys_landlock_add_rule
 446	n64	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	n64	process_mrelease		sys_process_mrelease
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 253f2cd70b6b..fd3a9df60ec2 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -434,3 +434,5 @@
 444	o32	landlock_create_ruleset		sys_landlock_create_ruleset
 445	o32	landlock_add_rule		sys_landlock_add_rule
 446	o32	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	o32	process_mrelease		sys_process_mrelease
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index e26187b9ab87..040df1b7a589 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -444,3 +444,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index aef2a290e71a..d8ebd7d37c0f 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -526,3 +526,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 64d51ab5a8b4..57233ace30cb 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -449,3 +449,5 @@
 444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
 445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
 446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448  common	process_mrelease	sys_process_mrelease		sys_process_mrelease
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index e0a70be77d84..2f6e95eb4690 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -449,3 +449,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 603f5a821502..42fc2906215d 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -492,3 +492,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index ce763a12311c..661a03bcfbd1 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -452,3 +452,4 @@
 445	i386	landlock_add_rule	sys_landlock_add_rule
 446	i386	landlock_restrict_self	sys_landlock_restrict_self
 447	i386	memfd_secret		sys_memfd_secret
+448	i386	process_mrelease	sys_process_mrelease
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f6b57799c1ea..807b6a1de8e8 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -369,6 +369,7 @@
 445	common	landlock_add_rule	sys_landlock_add_rule
 446	common	landlock_restrict_self	sys_landlock_restrict_self
 447	common	memfd_secret		sys_memfd_secret
+448	common	process_mrelease	sys_process_mrelease
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 235d67d6ceb4..f4384951f393 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -417,3 +417,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
-- 
cgit 


From a717a780fc4e1dddd3fbf9ad08f180eec2a78194 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 23 Aug 2021 17:58:24 -0700
Subject: KVM: x86/mmu: Don't freak out if pml5_root is NULL on 4-level host

Include pml5_root in the set of special roots if and only if the host,
and thus NPT, is using 5-level paging.  mmu_alloc_special_roots() expects
special roots to be allocated as a bundle, i.e. they're either all valid
or all NULL.  But for pml5_root, that expectation only holds true if the
host uses 5-level paging, which causes KVM to WARN about pml5_root being
NULL when the other special roots are valid.

The silver lining of 4-level vs. 5-level NPT being tied to the host
kernel's paging level is that KVM's shadow root level is constant; unlike
VMX's EPT, KVM can't choose 4-level NPT based on guest.MAXPHYADDR.  That
means KVM can still expect pml5_root to be bundled with the other special
roots, it just needs to be conditioned on the shadow root level.

Fixes: cb0f722aff6e ("KVM: x86/mmu: Support shadowing NPT when 5-level paging is enabled in host")
Reported-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210824005824.205536-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c04e30f6e0db..e7bac871747c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3588,6 +3588,7 @@ out_unlock:
 static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
+	bool need_pml5 = mmu->shadow_root_level > PT64_ROOT_4LEVEL;
 	u64 *pml5_root = NULL;
 	u64 *pml4_root = NULL;
 	u64 *pae_root;
@@ -3602,7 +3603,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	    mmu->shadow_root_level < PT64_ROOT_4LEVEL)
 		return 0;
 
-	if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root)
+	/*
+	 * NPT, the only paging mode that uses this horror, uses a fixed number
+	 * of levels for the shadow page tables, e.g. all MMUs are 4-level or
+	 * all MMus are 5-level.  Thus, this can safely require that pml5_root
+	 * is allocated if the other roots are valid and pml5 is needed, as any
+	 * prior MMU would also have required pml5.
+	 */
+	if (mmu->pae_root && mmu->pml4_root && (!need_pml5 || mmu->pml5_root))
 		return 0;
 
 	/*
@@ -3610,7 +3618,7 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	 * bail if KVM ends up in a state where only one of the roots is valid.
 	 */
 	if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root ||
-			 mmu->pml5_root))
+			 (need_pml5 && mmu->pml5_root)))
 		return -EIO;
 
 	/*
@@ -3626,7 +3634,7 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	if (!pml4_root)
 		goto err_pml4;
 
-	if (mmu->shadow_root_level > PT64_ROOT_4LEVEL) {
+	if (need_pml5) {
 		pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
 		if (!pml5_root)
 			goto err_pml5;
-- 
cgit 


From 81b4b56d4f8130bbb99cf4e2b48082e5b4cfccb9 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Thu, 26 Aug 2021 12:57:49 +0300
Subject: KVM: VMX: avoid running vmx_handle_exit_irqoff in case of emulation

If we are emulating an invalid guest state, we don't have a correct
exit reason, and thus we shouldn't do anything in this function.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210826095750.1650467-2-mlevitsk@redhat.com>
Cc: stable@vger.kernel.org
Fixes: 95b5a48c4f2b ("KVM: VMX: Handle NMIs, #MCs and async #PFs in common irqs-disabled fn", 2019-06-18)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fada1055f325..0c2c0d5ae873 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6382,6 +6382,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	if (vmx->emulation_required)
+		return;
+
 	if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
 		handle_external_interrupt_irqoff(vcpu);
 	else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
-- 
cgit 


From 4ddacd525a2f16cebec8ba409fbce6b6fb45e987 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Fri, 3 Sep 2021 17:15:58 -0400
Subject: kvm: x86: Set KVM_MAX_VCPU_ID to 4*KVM_MAX_VCPUS

Instead of requiring KVM_MAX_VCPU_ID to be manually increased
every time we increase KVM_MAX_VCPUS, set it to 4*KVM_MAX_VCPUS.
This should be enough for CPU topologies where Cores-per-Package
and Packages-per-Socket are not powers of 2.

In practice, this increases KVM_MAX_VCPU_ID from 1023 to 1152.
The only side effect of this change is making some fields in
struct kvm_ioapic larger, increasing the struct size from 1628 to
1780 bytes (in x86_64).

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20210903211600.2002377-2-ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0532f4e84308..59da1fbf5248 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -39,7 +39,19 @@
 
 #define KVM_MAX_VCPUS 288
 #define KVM_SOFT_MAX_VCPUS 240
-#define KVM_MAX_VCPU_ID 1023
+
+/*
+ * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
+ * might be larger than the actual number of VCPUs because the
+ * APIC ID encodes CPU topology information.
+ *
+ * In the worst case, we'll need less than one extra bit for the
+ * Core ID, and less than one extra bit for the Package (Die) ID,
+ * so ratio of 4 should be enough.
+ */
+#define KVM_VCPU_ID_RATIO 4
+#define KVM_MAX_VCPU_ID (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
+
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
 
-- 
cgit 


From 074c82c8f7cf8a46c3b81965f122599e3a133450 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Fri, 3 Sep 2021 17:15:59 -0400
Subject: kvm: x86: Increase MAX_VCPUS to 1024

Increase KVM_MAX_VCPUS to 1024, so we can test larger VMs.

I'm not changing KVM_SOFT_MAX_VCPUS yet because I'm afraid it
might involve complicated questions around the meaning of
"supported" and "recommended" in the upstream tree.
KVM_SOFT_MAX_VCPUS will be changed in a separate patch.

For reference, visible effects of this change are:
- KVM_CAP_MAX_VCPUS will now return 1024 (of course)
- Default value for CPUID[HYPERV_CPUID_IMPLEMENT_LIMITS (00x40000005)].EAX
  will now be 1024
- KVM_MAX_VCPU_ID will change from 1151 to 4096
- Size of struct kvm will increase from 19328 to 22272 bytes
  (in x86_64)
- Size of struct kvm_ioapic will increase from 1780 to 5084 bytes
  (in x86_64)
- Bitmap stack variables that will grow:
  - At kvm_hv_flush_tlb() kvm_hv_send_ipi(),
    vp_bitmap[] and vcpu_bitmap[] will now be 128 bytes long
  - vcpu_bitmap at bioapic_write_indirect() will be 128 bytes long
    once patch "KVM: x86: Fix stack-out-of-bounds memory access
    from ioapic_write_indirect()" is applied

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20210903211600.2002377-3-ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 59da1fbf5248..52ca8da4ab52 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -37,7 +37,7 @@
 
 #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
 
-#define KVM_MAX_VCPUS 288
+#define KVM_MAX_VCPUS 1024
 #define KVM_SOFT_MAX_VCPUS 240
 
 /*
-- 
cgit 


From 1dbaf04cb91b2b680d10f9a8f9f823d94c7087bf Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Fri, 3 Sep 2021 17:16:00 -0400
Subject: kvm: x86: Increase KVM_SOFT_MAX_VCPUS to 710

Support for 710 VCPUs was tested by Red Hat since RHEL-8.4,
so increase KVM_SOFT_MAX_VCPUS to 710.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20210903211600.2002377-4-ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 52ca8da4ab52..f8f48a7ec577 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -38,7 +38,7 @@
 #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
 
 #define KVM_MAX_VCPUS 1024
-#define KVM_SOFT_MAX_VCPUS 240
+#define KVM_SOFT_MAX_VCPUS 710
 
 /*
  * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
-- 
cgit 


From 678a305b85d95f288c12e3d69a32d3351b34f2bb Mon Sep 17 00:00:00 2001
From: Jia He <justin.he@arm.com>
Date: Mon, 30 Aug 2021 22:53:36 +0800
Subject: KVM: x86/mmu: Remove unused field mmio_cached in struct kvm_mmu_page

After reverting and restoring the fast tlb invalidation patch series,
the mmio_cached is not removed. Hence a unused field is left in
kvm_mmu_page.

Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Jia He <justin.he@arm.com>
Message-Id: <20210830145336.27183-1-justin.he@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu_internal.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 62bb8f758b3f..6b6f10895710 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -37,7 +37,6 @@ struct kvm_mmu_page {
 
 	bool unsync;
 	u8 mmu_valid_gen;
-	bool mmio_cached;
 	bool lpage_disallowed; /* Can't be replaced by an equiv large page */
 
 	/*
-- 
cgit 


From e7177339d7b5f9594b316842122b5fda9513d5e2 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 31 Aug 2021 09:42:22 -0700
Subject: Revert "KVM: x86: mmu: Add guest physical address check in
 translate_gpa()"

Revert a misguided illegal GPA check when "translating" a non-nested GPA.
The check is woefully incomplete as it does not fill in @exception as
expected by all callers, which leads to KVM attempting to inject a bogus
exception, potentially exposing kernel stack information in the process.

 WARNING: CPU: 0 PID: 8469 at arch/x86/kvm/x86.c:525 exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525
 CPU: 1 PID: 8469 Comm: syz-executor531 Not tainted 5.14.0-rc7-syzkaller #0
 RIP: 0010:exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525
 Call Trace:
  x86_emulate_instruction+0xef6/0x1460 arch/x86/kvm/x86.c:7853
  kvm_mmu_page_fault+0x2f0/0x1810 arch/x86/kvm/mmu/mmu.c:5199
  handle_ept_misconfig+0xdf/0x3e0 arch/x86/kvm/vmx/vmx.c:5336
  __vmx_handle_exit arch/x86/kvm/vmx/vmx.c:6021 [inline]
  vmx_handle_exit+0x336/0x1800 arch/x86/kvm/vmx/vmx.c:6038
  vcpu_enter_guest+0x2a1c/0x4430 arch/x86/kvm/x86.c:9712
  vcpu_run arch/x86/kvm/x86.c:9779 [inline]
  kvm_arch_vcpu_ioctl_run+0x47d/0x1b20 arch/x86/kvm/x86.c:10010
  kvm_vcpu_ioctl+0x49e/0xe50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3652

The bug has escaped notice because practically speaking the GPA check is
useless.  The GPA check in question only comes into play when KVM is
walking guest page tables (or "translating" CR3), and KVM already handles
illegal GPA checks by setting reserved bits in rsvd_bits_mask for each
PxE, or in the case of CR3 for loading PTDPTRs, manually checks for an
illegal CR3.  This particular failure doesn't hit the existing reserved
bits checks because syzbot sets guest.MAXPHYADDR=1, and IA32 architecture
simply doesn't allow for such an absurd MAXPHYADDR, e.g. 32-bit paging
doesn't define any reserved PA bits checks, which KVM emulates by only
incorporating the reserved PA bits into the "high" bits, i.e. bits 63:32.

Simply remove the bogus check.  There is zero meaningful value and no
architectural justification for supporting guest.MAXPHYADDR < 32, and
properly filling the exception would introduce non-trivial complexity.

This reverts commit ec7771ab471ba6a945350353617e2e3385d0e013.

Fixes: ec7771ab471b ("KVM: x86: mmu: Add guest physical address check in translate_gpa()")
Cc: stable@vger.kernel.org
Reported-by: syzbot+200c08e88ae818f849ce@syzkaller.appspotmail.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210831164224.1119728-2-seanjc@google.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e7bac871747c..9b0cdec8b62d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -334,12 +334,6 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
 static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
                                   struct x86_exception *exception)
 {
-	/* Check if guest physical address doesn't exceed guest maximum */
-	if (kvm_vcpu_is_illegal_gpa(vcpu, gpa)) {
-		exception->error_code |= PFERR_RSVD_MASK;
-		return UNMAPPED_GVA;
-	}
-
         return gpa;
 }
 
-- 
cgit 


From ca41c34cab1f50f13ab5ac95739483871637a684 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 1 Sep 2021 15:10:22 -0700
Subject: KVM: x86/mmu: Relocate kvm_mmu_page.tdp_mmu_page for better cache
 locality

Move "tdp_mmu_page" into the 1-byte void left by the recently removed
"mmio_cached" so that it resides in the first 64 bytes of kvm_mmu_page,
i.e. in the same cache line as the most commonly accessed fields.

Don't bother wrapping tdp_mmu_page in CONFIG_X86_64, including the field in
32-bit builds doesn't affect the size of kvm_mmu_page, and a future patch
can always wrap the field in the unlikely event KVM gains a 1-byte flag
that is 32-bit specific.

Note, the size of kvm_mmu_page is also unchanged on CONFIG_X86_64=y due
to it previously sharing an 8-byte chunk with write_flooding_count.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210901221023.1303578-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu_internal.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 6b6f10895710..4e7b634efa38 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -35,6 +35,7 @@ struct kvm_mmu_page {
 	struct hlist_node hash_link;
 	struct list_head lpage_disallowed_link;
 
+	bool tdp_mmu_page;
 	bool unsync;
 	u8 mmu_valid_gen;
 	bool lpage_disallowed; /* Can't be replaced by an equiv large page */
@@ -70,8 +71,6 @@ struct kvm_mmu_page {
 	atomic_t write_flooding_count;
 
 #ifdef CONFIG_X86_64
-	bool tdp_mmu_page;
-
 	/* Used for freeing the page asynchronously if it is a TDP MMU page. */
 	struct rcu_head rcu_head;
 #endif
-- 
cgit 


From 1148bfc47be3f885a10945ecbc1e3789a0313603 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 1 Sep 2021 15:10:23 -0700
Subject: KVM: x86/mmu: Move lpage_disallowed_link further "down" in
 kvm_mmu_page

Move "lpage_disallowed_link" out of the first 64 bytes, i.e. out of the
first cache line, of kvm_mmu_page so that "spt" and to a lesser extent
"gfns" land in the first cache line.  "lpage_disallowed_link" is accessed
relatively infrequently compared to "spt", which is accessed any time KVM
is walking and/or manipulating the shadow page tables.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210901221023.1303578-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu_internal.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 4e7b634efa38..bf2bdbf333c2 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -31,9 +31,12 @@ extern bool dbg;
 #define IS_VALID_PAE_ROOT(x)	(!!(x))
 
 struct kvm_mmu_page {
+	/*
+	 * Note, "link" through "spt" fit in a single 64 byte cache line on
+	 * 64-bit kernels, keep it that way unless there's a reason not to.
+	 */
 	struct list_head link;
 	struct hlist_node hash_link;
-	struct list_head lpage_disallowed_link;
 
 	bool tdp_mmu_page;
 	bool unsync;
@@ -59,6 +62,7 @@ struct kvm_mmu_page {
 	struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
 	DECLARE_BITMAP(unsync_child_bitmap, 512);
 
+	struct list_head lpage_disallowed_link;
 #ifdef CONFIG_X86_32
 	/*
 	 * Used out of the mmu-lock to avoid reading spte values while an
-- 
cgit 


From 3cc4e148b96263313e3dce926eae569c942bb74e Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Tue, 17 Aug 2021 00:26:39 +0000
Subject: KVM: stats: Add VM stat for remote tlb flush requests

Add a new stat that counts the number of times a remote TLB flush is
requested, regardless of whether it kicks vCPUs out of guest mode. This
allows us to look at how often flushes are initiated.

Unlike remote_tlb_flush, this one applies to ARM's instruction-set-based
TLB flush implementation, so apply it there too.

Original-by: David Matlack <dmatlack@google.com>
Signed-off-by: Jing Zhang <jingzhangos@google.com>
Message-Id: <20210817002639.3856694-1-jingzhangos@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/kvm/mmu.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 0625bf2353c2..f5bb235bbb59 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -80,6 +80,7 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
  */
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
+	++kvm->stat.generic.remote_tlb_flush_requests;
 	kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
 }
 
-- 
cgit 


From a40b2fd064bb7c0425c7cbdca2120cf0609a90a2 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@linux.alibaba.com>
Date: Sat, 14 Aug 2021 11:51:29 +0800
Subject: x86/kvm: Don't enable IRQ when IRQ enabled in kvm_wait

Commit f4e61f0c9add3 ("x86/kvm: Fix broken irq restoration in kvm_wait")
replaced "local_irq_restore() when IRQ enabled" with "local_irq_enable()
when IRQ enabled" to suppress a warnning.

Although there is no similar debugging warnning for doing local_irq_enable()
when IRQ enabled as doing local_irq_restore() in the same IRQ situation.  But
doing local_irq_enable() when IRQ enabled is no less broken as doing
local_irq_restore() and we'd better avoid it.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
Message-Id: <20210814035129.154242-1-jiangshanlai@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kernel/kvm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a26643dc6bd6..b656456c3a94 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -884,10 +884,11 @@ static void kvm_wait(u8 *ptr, u8 val)
 	} else {
 		local_irq_disable();
 
+		/* safe_halt() will enable IRQ */
 		if (READ_ONCE(*ptr) == val)
 			safe_halt();
-
-		local_irq_enable();
+		else
+			local_irq_enable();
 	}
 }
 
-- 
cgit 


From a3cf527e70bd1553500746ef2f38db41e2af1d9e Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Tue, 6 Apr 2021 10:49:11 +0800
Subject: KVM: MIPS: Remove a "set but not used" variable

This fix a build warning:

   arch/mips/kvm/vz.c: In function '_kvm_vz_restore_htimer':
>> arch/mips/kvm/vz.c:392:10: warning: variable 'freeze_time' set but not used [-Wunused-but-set-variable]
     392 |  ktime_t freeze_time;
         |          ^~~~~~~~~~~

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Message-Id: <20210406024911.2008046-1-chenhuacai@loongson.cn>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/mips/kvm/vz.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index 43cad10b877d..4adca5abbc72 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -388,7 +388,6 @@ static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu,
 				   u32 compare, u32 cause)
 {
 	u32 start_count, after_count;
-	ktime_t freeze_time;
 	unsigned long flags;
 
 	/*
@@ -396,7 +395,7 @@ static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu,
 	 * this with interrupts disabled to avoid latency.
 	 */
 	local_irq_save(flags);
-	freeze_time = kvm_mips_freeze_hrtimer(vcpu, &start_count);
+	kvm_mips_freeze_hrtimer(vcpu, &start_count);
 	write_c0_gtoffset(start_count - read_c0_count());
 	local_irq_restore(flags);
 
-- 
cgit 


From 4ac214574d2d83b7ef20c603d75f1937c912bfd6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 6 Sep 2021 05:52:23 -0400
Subject: KVM: MMU: mark role_regs and role accessors as maybe unused

It is reasonable for these functions to be used only in some configurations,
for example only if the host is 64-bits (and therefore supports 64-bit
guests).  It is also reasonable to keep the role_regs and role accessors
in sync even though some of the accessors may be used only for one of the
two sets (as is the case currently for CR4.LA57)..

Because clang reports warnings for unused inlines declared in a .c file,
mark both sets of accessors as __maybe_unused.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 9b0cdec8b62d..2d7e61122af8 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -204,7 +204,7 @@ struct kvm_mmu_role_regs {
  * the single source of truth for the MMU's state.
  */
 #define BUILD_MMU_ROLE_REGS_ACCESSOR(reg, name, flag)			\
-static inline bool ____is_##reg##_##name(struct kvm_mmu_role_regs *regs)\
+static inline bool __maybe_unused ____is_##reg##_##name(struct kvm_mmu_role_regs *regs)\
 {									\
 	return !!(regs->reg & flag);					\
 }
@@ -226,7 +226,7 @@ BUILD_MMU_ROLE_REGS_ACCESSOR(efer, lma, EFER_LMA);
  * and the vCPU may be incorrect/irrelevant.
  */
 #define BUILD_MMU_ROLE_ACCESSOR(base_or_ext, reg, name)		\
-static inline bool is_##reg##_##name(struct kvm_mmu *mmu)	\
+static inline bool __maybe_unused is_##reg##_##name(struct kvm_mmu *mmu)	\
 {								\
 	return !!(mmu->mmu_role. base_or_ext . reg##_##name);	\
 }
-- 
cgit 


From d9130a2dfdd4b21736c91b818f87dbc0ccd1e757 Mon Sep 17 00:00:00 2001
From: Zelin Deng <zelin.deng@linux.alibaba.com>
Date: Wed, 28 Apr 2021 10:22:01 +0800
Subject: KVM: x86: Update vCPU's hv_clock before back to guest when tsc_offset
 is adjusted

When MSR_IA32_TSC_ADJUST is written by guest due to TSC ADJUST feature
especially there's a big tsc warp (like a new vCPU is hot-added into VM
which has been up for a long time), tsc_offset is added by a large value
then go back to guest. This causes system time jump as tsc_timestamp is
not adjusted in the meantime and pvclock monotonic character.
To fix this, just notify kvm to update vCPU's guest time before back to
guest.

Cc: stable@vger.kernel.org
Signed-off-by: Zelin Deng <zelin.deng@linux.alibaba.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1619576521-81399-2-git-send-email-zelin.deng@linux.alibaba.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1a00af1b076b..28ef14155726 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3321,6 +3321,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			if (!msr_info->host_initiated) {
 				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
 				adjust_tsc_offset_guest(vcpu, adj);
+				/* Before back to guest, tsc_timestamp must be adjusted
+				 * as well, otherwise guest's percpu pvclock time could jump.
+				 */
+				kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 			}
 			vcpu->arch.ia32_tsc_adjust_msr = data;
 		}
-- 
cgit 


From 88b604263f3d6eedae0b1c2c3bbd602d1e2e8775 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 1 Sep 2021 16:05:59 +0200
Subject: s390/unwind: use current_frame_address() to unwind current task

current_stack_pointer() simply returns current value of %r15. If
current_stack_pointer() caller allocates stack (which is the case in
unwind code) %r15 points to a stack frame allocated for callees, meaning
current_stack_pointer() caller (e.g. stack_trace_save) will end up in
the stacktrace. This is not expected by stack_trace_save*() callers and
causes problems.

current_frame_address() on the other hand returns function stack frame
address, which matches %r15 upon function invocation. Using it in
get_stack_pointer() makes it more aligned with x86 implementation
(according to BACKTRACE_SELF_TEST output) and meets stack_trace_save*()
caller's expectations, notably KCSAN.

Also make sure unwind_start is always inlined.

Reported-by: Nathan Chancellor <nathan@kernel.org>
Suggested-by: Marco Elver <elver@google.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Tested-by: Marco Elver <elver@google.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/patch.git-04dd26be3043.your-ad-here.call-01630504868-ext-6188@work.hours
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/stacktrace.h | 20 ++++++++++----------
 arch/s390/include/asm/unwind.h     |  8 ++++----
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 3d8a4b94c620..dd00d98804ec 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -34,16 +34,6 @@ static inline bool on_stack(struct stack_info *info,
 	return addr >= info->begin && addr + len <= info->end;
 }
 
-static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
-						       struct pt_regs *regs)
-{
-	if (regs)
-		return (unsigned long) kernel_stack_pointer(regs);
-	if (task == current)
-		return current_stack_pointer();
-	return (unsigned long) task->thread.ksp;
-}
-
 /*
  * Stack layout of a C stack frame.
  */
@@ -74,6 +64,16 @@ struct stack_frame {
 	((unsigned long)__builtin_frame_address(0) -			\
 	 offsetof(struct stack_frame, back_chain))
 
+static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
+						       struct pt_regs *regs)
+{
+	if (regs)
+		return (unsigned long)kernel_stack_pointer(regs);
+	if (task == current)
+		return current_frame_address();
+	return (unsigned long)task->thread.ksp;
+}
+
 /*
  * To keep this simple mark register 2-6 as being changed (volatile)
  * by the called function, even though register 6 is saved/nonvolatile.
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index de9006b0cfeb..5ebf534ef753 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -55,10 +55,10 @@ static inline bool unwind_error(struct unwind_state *state)
 	return state->error;
 }
 
-static inline void unwind_start(struct unwind_state *state,
-				struct task_struct *task,
-				struct pt_regs *regs,
-				unsigned long first_frame)
+static __always_inline void unwind_start(struct unwind_state *state,
+					 struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned long first_frame)
 {
 	task = task ?: current;
 	first_frame = first_frame ?: get_stack_pointer(task, regs);
-- 
cgit 


From a052096bdd6809eeab809202726634d1ac975aa1 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Fri, 27 Aug 2021 20:21:05 +0200
Subject: s390/topology: fix topology information when calling cpu hotplug
 notifiers

The cpu hotplug notifiers are called without updating the core/thread
masks when a new CPU is added. This causes problems with code setting
up data structures in a cpu hotplug notifier, and relying on that later
in normal code.

This caused a crash in the new core scheduling code (SCHED_CORE),
where rq->core was set up in a notifier depending on cpu masks.

To fix this, add a cpu_setup_mask which is used in update_cpu_masks()
instead of the cpu_online_mask to determine whether the cpu masks should
be set for a certain cpu. Also move update_cpu_masks() to update the
masks before calling notify_cpu_starting() so that the notifiers are
seeing the updated masks.

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Cc: <stable@vger.kernel.org>
[hca@linux.ibm.com: get rid of cpu_online_mask handling]
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/smp.h |  1 +
 arch/s390/kernel/smp.c      |  9 +++++++--
 arch/s390/kernel/topology.c | 13 +++++++------
 3 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index e317fd4866c1..f16f4d054ae2 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -18,6 +18,7 @@ extern struct mutex smp_cpu_state_mutex;
 extern unsigned int smp_cpu_mt_shift;
 extern unsigned int smp_cpu_mtid;
 extern __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
+extern cpumask_t cpu_setup_mask;
 
 extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2a991e43ead3..1a04e5bdf655 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -95,6 +95,7 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
 #endif
 
 static unsigned int smp_max_threads __initdata = -1U;
+cpumask_t cpu_setup_mask;
 
 static int __init early_nosmt(char *s)
 {
@@ -902,13 +903,14 @@ static void smp_start_secondary(void *cpuvoid)
 	vtime_init();
 	vdso_getcpu_init();
 	pfault_init();
+	cpumask_set_cpu(cpu, &cpu_setup_mask);
+	update_cpu_masks();
 	notify_cpu_starting(cpu);
 	if (topology_cpu_dedicated(cpu))
 		set_cpu_flag(CIF_DEDICATED_CPU);
 	else
 		clear_cpu_flag(CIF_DEDICATED_CPU);
 	set_cpu_online(cpu, true);
-	update_cpu_masks();
 	inc_irq_stat(CPU_RST);
 	local_irq_enable();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
@@ -950,10 +952,13 @@ early_param("possible_cpus", _setup_possible_cpus);
 int __cpu_disable(void)
 {
 	unsigned long cregs[16];
+	int cpu;
 
 	/* Handle possible pending IPIs */
 	smp_handle_ext_call();
-	set_cpu_online(smp_processor_id(), false);
+	cpu = smp_processor_id();
+	set_cpu_online(cpu, false);
+	cpumask_clear_cpu(cpu, &cpu_setup_mask);
 	update_cpu_masks();
 	/* Disable pseudo page faults on this cpu. */
 	pfault_fini();
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index d2458a29618f..58f8291950cb 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -67,7 +67,7 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
 	static cpumask_t mask;
 
 	cpumask_clear(&mask);
-	if (!cpu_online(cpu))
+	if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
 		goto out;
 	cpumask_set_cpu(cpu, &mask);
 	switch (topology_mode) {
@@ -88,7 +88,7 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
 	case TOPOLOGY_MODE_SINGLE:
 		break;
 	}
-	cpumask_and(&mask, &mask, cpu_online_mask);
+	cpumask_and(&mask, &mask, &cpu_setup_mask);
 out:
 	cpumask_copy(dst, &mask);
 }
@@ -99,16 +99,16 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
 	int i;
 
 	cpumask_clear(&mask);
-	if (!cpu_online(cpu))
+	if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
 		goto out;
 	cpumask_set_cpu(cpu, &mask);
 	if (topology_mode != TOPOLOGY_MODE_HW)
 		goto out;
 	cpu -= cpu % (smp_cpu_mtid + 1);
-	for (i = 0; i <= smp_cpu_mtid; i++)
-		if (cpu_present(cpu + i))
+	for (i = 0; i <= smp_cpu_mtid; i++) {
+		if (cpumask_test_cpu(cpu + i, &cpu_setup_mask))
 			cpumask_set_cpu(cpu + i, &mask);
-	cpumask_and(&mask, &mask, cpu_online_mask);
+	}
 out:
 	cpumask_copy(dst, &mask);
 }
@@ -569,6 +569,7 @@ void __init topology_init_early(void)
 	alloc_masks(info, &book_info, 2);
 	alloc_masks(info, &drawer_info, 3);
 out:
+	cpumask_set_cpu(0, &cpu_setup_mask);
 	__arch_update_cpu_topology();
 	__arch_update_dedicated_flag(NULL);
 }
-- 
cgit 


From 2e8275285a60868231eaf63abd9552cc27e512a2 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Thu, 2 Sep 2021 19:47:37 +0200
Subject: s390/mm: fix kernel doc comments

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/mm/gmap.c    | 11 +++++------
 arch/s390/mm/pgtable.c |  4 ++--
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 9bb2c7512cd5..4d3b33ce81c6 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -27,7 +27,6 @@
 
 /**
  * gmap_alloc - allocate and initialize a guest address space
- * @mm: pointer to the parent mm_struct
  * @limit: maximum address of the gmap address space
  *
  * Returns a guest address space structure.
@@ -504,7 +503,7 @@ EXPORT_SYMBOL_GPL(gmap_translate);
 
 /**
  * gmap_unlink - disconnect a page table from the gmap shadow tables
- * @gmap: pointer to guest mapping meta data structure
+ * @mm: pointer to the parent mm_struct
  * @table: pointer to the host page table
  * @vmaddr: vm address associated with the host page table
  */
@@ -527,7 +526,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
 			   unsigned long gaddr);
 
 /**
- * gmap_link - set up shadow page tables to connect a host to a guest address
+ * __gmap_link - set up shadow page tables to connect a host to a guest address
  * @gmap: pointer to guest mapping meta data structure
  * @gaddr: guest address
  * @vmaddr: vm address
@@ -1971,7 +1970,7 @@ out_free:
 EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
 
 /**
- * gmap_shadow_lookup_pgtable - find a shadow page table
+ * gmap_shadow_pgt_lookup - find a shadow page table
  * @sg: pointer to the shadow guest address space structure
  * @saddr: the address in the shadow aguest address space
  * @pgt: parent gmap address of the page table to get shadowed
@@ -2165,7 +2164,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
 }
 EXPORT_SYMBOL_GPL(gmap_shadow_page);
 
-/**
+/*
  * gmap_shadow_notify - handle notifications for shadow gmap
  *
  * Called with sg->parent->shadow_lock.
@@ -2225,7 +2224,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
 /**
  * ptep_notify - call all invalidation callbacks for a specific pte.
  * @mm: pointer to the process mm_struct
- * @addr: virtual address in the process address space
+ * @vmaddr: virtual address in the process address space
  * @pte: pointer to the page table entry
  * @bits: bits from the pgste that caused the notify call
  *
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index eec3a9d7176e..034721a68d8f 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -834,7 +834,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(set_guest_storage_key);
 
-/**
+/*
  * Conditionally set a guest storage key (handling csske).
  * oldkey will be updated when either mr or mc is set and a pointer is given.
  *
@@ -867,7 +867,7 @@ int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(cond_set_guest_storage_key);
 
-/**
+/*
  * Reset a guest reference bit (rrbe), returning the reference and changed bit.
  *
  * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
-- 
cgit 


From 5dddfaac4c258f5eda7b3dba7f9d851262fcbbab Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Thu, 2 Sep 2021 19:51:59 +0200
Subject: s390/cpum_cf: move array from header to C file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move array from header to C file to avoid that it gets defined in
every C file where the header is included:

In file included from arch/s390/kernel/perf_cpum_cf_common.c:19:
./arch/s390/include/asm/cpu_mcf.h:27:18: warning: ‘cpumf_ctr_ctl’ defined but not used [-Wunused-const-variable=]
   27 | static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
      |                  ^~~~~~~~~~~~~

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/cpu_mcf.h | 7 -------
 arch/s390/kernel/perf_cpum_cf.c | 8 ++++++++
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h
index ca0e0e5ddbc4..f87a4788c19c 100644
--- a/arch/s390/include/asm/cpu_mcf.h
+++ b/arch/s390/include/asm/cpu_mcf.h
@@ -24,13 +24,6 @@ enum cpumf_ctr_set {
 
 #define CPUMF_LCCTL_ENABLE_SHIFT    16
 #define CPUMF_LCCTL_ACTCTL_SHIFT     0
-static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
-	[CPUMF_CTR_SET_BASIC]	= 0x02,
-	[CPUMF_CTR_SET_USER]	= 0x04,
-	[CPUMF_CTR_SET_CRYPTO]	= 0x08,
-	[CPUMF_CTR_SET_EXT]	= 0x01,
-	[CPUMF_CTR_SET_MT_DIAG] = 0x20,
-};
 
 static inline void ctr_set_enable(u64 *state, u64 ctrsets)
 {
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 2e3bb633acf6..4a99154fe651 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -158,6 +158,14 @@ static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
 	return need;
 }
 
+static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
+	[CPUMF_CTR_SET_BASIC]	= 0x02,
+	[CPUMF_CTR_SET_USER]	= 0x04,
+	[CPUMF_CTR_SET_CRYPTO]	= 0x08,
+	[CPUMF_CTR_SET_EXT]	= 0x01,
+	[CPUMF_CTR_SET_MT_DIAG] = 0x20,
+};
+
 /* Read out all counter sets and save them in the provided data buffer.
  * The last 64 byte host an artificial trailer entry.
  */
-- 
cgit 


From ebd9cc6593691e6bc8526e368cedbdfc8034f403 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Fri, 3 Sep 2021 18:27:01 +0200
Subject: s390/pci: fix clp_get_state() handling of -ENODEV

With commit cc049eecfb7a ("s390/pci: simplify CLP List PCI handling")
clp_get_state() was changed to make use of the new clp_find_pci() helper
function to query a specific function. This however returns -ENODEV when
the device is not found at all and this error was passed to the caller.
It was missed however that the callers actually expect a success return
from clp_get_state() if the device is gone.

Fix this by handling the -ENODEV return of clp_find_pci() explicitly in
clp_get_state() returning success and setting the state parameter to
ZPCI_FN_STATE_RESERVED matching the design concept that a PCI function
that disappeared must have been resverved elsewhere. For all other error
returns continue to just pass them on to the caller.

Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Fixes: cc049eecfb7a ("s390/pci: simplify CLP List PCI handling")
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/pci/pci_clp.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 51dc2215a2b7..df895d98a56b 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -449,14 +449,17 @@ int clp_get_state(u32 fid, enum zpci_state *state)
 	struct clp_fh_list_entry entry;
 	int rc;
 
-	*state = ZPCI_FN_STATE_RESERVED;
 	rrb = clp_alloc_block(GFP_ATOMIC);
 	if (!rrb)
 		return -ENOMEM;
 
 	rc = clp_find_pci(rrb, fid, &entry);
-	if (!rc)
+	if (!rc) {
 		*state = entry.config_state;
+	} else if (rc == -ENODEV) {
+		*state = ZPCI_FN_STATE_RESERVED;
+		rc = 0;
+	}
 
 	clp_free_block(rrb);
 	return rc;
-- 
cgit 


From 85ad27215ca57d02bb7c43d76f65a865dc863b59 Mon Sep 17 00:00:00 2001
From: Pierre Morel <pmorel@linux.ibm.com>
Date: Mon, 6 Sep 2021 08:49:44 +0200
Subject: s390/pci: read clp_list_pci_req only once

We do not have to reset the fh_list in the loop.

Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/pci/pci_clp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index df895d98a56b..be077b39da33 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -383,8 +383,8 @@ static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid,
 		rc = clp_list_pci_req(rrb, &resume_token, &nentries);
 		if (rc)
 			return rc;
+		fh_list = rrb->response.fh_list;
 		for (i = 0; i < nentries; i++) {
-			fh_list = rrb->response.fh_list;
 			if (fh_list[i].fid == fid) {
 				*entry = fh_list[i];
 				return 0;
-- 
cgit 


From 68c32eb2707aed0a3be1a60b0f206943a25e8f34 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 6 Sep 2021 13:59:26 +0200
Subject: s390: remove xpram device driver

Support for expanded storage was only available until z13 and z/VM 6.3
respectively. However there haven't been any use cases a long time
before for this device driver.
Therefore remove it.

Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/configs/defconfig          | 1 -
 arch/s390/configs/zfcpdump_defconfig | 1 -
 2 files changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index e1642d2cba59..56a1cc85c5d7 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -397,7 +397,6 @@ CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-# CONFIG_BLK_DEV_XPRAM is not set
 CONFIG_VIRTIO_BLK=y
 CONFIG_BLK_DEV_RBD=m
 CONFIG_BLK_DEV_NVME=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index d576aaab27c9..aceccf3b9a88 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -35,7 +35,6 @@ CONFIG_NET=y
 # CONFIG_ETHTOOL_NETLINK is not set
 CONFIG_DEVTMPFS=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_BLK_DEV_XPRAM is not set
 # CONFIG_DCSSBLK is not set
 # CONFIG_DASD is not set
 CONFIG_ENCLOSURE_SERVICES=y
-- 
cgit 


From cd1adf1b63a112d762832e9c64b0a886fbb840d6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 7 Sep 2021 11:03:45 -0700
Subject: Revert "mm/gup: remove try_get_page(), call try_get_compound_head()
 directly"

This reverts commit 9857a17f206ff374aea78bccfb687f145368be2e.

That commit was completely broken, and I should have caught on to it
earlier.  But happily, the kernel test robot noticed the breakage fairly
quickly.

The breakage is because "try_get_page()" is about avoiding the page
reference count overflow case, but is otherwise the exact same as a
plain "get_page()".

In contrast, "try_get_compound_head()" is an entirely different beast,
and uses __page_cache_add_speculative() because it's not just about the
page reference count, but also about possibly racing with the underlying
page going away.

So all the commentary about how

 "try_get_page() has fallen a little behind in terms of maintenance,
  try_get_compound_head() handles speculative page references more
  thoroughly"

was just completely wrong: yes, try_get_compound_head() handles
speculative page references, but the point is that try_get_page() does
not, and must not.

So there's no lack of maintainance - there are fundamentally different
semantics.

A speculative page reference would be entirely wrong in "get_page()",
and it's entirely wrong in "try_get_page()".  It's not about
speculation, it's purely about "uhhuh, you can't get this page because
you've tried to increment the reference count too much already".

The reason the kernel test robot noticed this bug was that it hit the
VM_BUG_ON() in __page_cache_add_speculative(), which is all about
verifying that the context of any speculative page access is correct.
But since that isn't what try_get_page() is all about, the VM_BUG_ON()
tests things that are not correct to test for try_get_page().

Reported-by: kernel test robot <oliver.sang@intel.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/mm/fault.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index a834e4672f72..212632d57db9 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -822,7 +822,7 @@ void do_secure_storage_access(struct pt_regs *regs)
 		break;
 	case KERNEL_FAULT:
 		page = phys_to_page(addr);
-		if (unlikely(!try_get_compound_head(page, 1)))
+		if (unlikely(!try_get_page(page)))
 			break;
 		rc = arch_make_page_accessible(page);
 		put_page(page);
-- 
cgit 


From 9652cb805c44b74ba935c84bfd59745cb1962de8 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Tue, 7 Sep 2021 20:37:59 +0200
Subject: s390/ftrace: remove incorrect __va usage

The address of ftrace_graph_caller is already virtual.
Using __va() to translate the address is wrong.

Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 0a464d328467..1d94ffdf347b 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -341,13 +341,13 @@ NOKPROBE_SYMBOL(prepare_ftrace_return);
  */
 int ftrace_enable_ftrace_graph_caller(void)
 {
-	brcl_disable(__va(ftrace_graph_caller));
+	brcl_disable(ftrace_graph_caller);
 	return 0;
 }
 
 int ftrace_disable_ftrace_graph_caller(void)
 {
-	brcl_enable(__va(ftrace_graph_caller));
+	brcl_enable(ftrace_graph_caller);
 	return 0;
 }
 
-- 
cgit 


From bb9c14ad267d25dd77ceccbcfd69804bdb7240f5 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 8 Sep 2021 17:45:06 +0200
Subject: hugetlbfs: s390 is always 64bit

No need to check for 64BIT. While at it, let's just select
ARCH_SUPPORTS_HUGETLBFS from arch/s390/Kconfig.

Signed-off-by: David Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/r/20210908154506.20764-1-david@redhat.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 92c0a1b4c528..d42c8161a0ed 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -110,6 +110,7 @@ config S390
 	select ARCH_STACKWALK
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	select ARCH_SUPPORTS_HUGETLBFS
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
-- 
cgit 


From 65a2aa5f482ed0c1b5afb9e6b0b9e0b16bb8b616 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 7 Sep 2021 19:55:04 -0700
Subject: mm/memory_hotplug: remove nid parameter from arch_remove_memory()

The parameter is unused, let's remove it.

Link: https://lkml.kernel.org/r/20210712124052.26491-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> [powerpc]
Acked-by: Heiko Carstens <hca@linux.ibm.com>	[s390]
Reviewed-by: Pankaj Gupta <pankaj.gupta@ionos.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Sergei Trofimovich <slyfox@gentoo.org>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Michel Lespinasse <michel@lespinasse.org>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Cc: Joe Perches <joe@perches.com>
Cc: Pierre Morel <pmorel@linux.ibm.com>
Cc: Jia He <justin.he@arm.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Len Brown <lenb@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Scott Cheloha <cheloha@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/mm/mmu.c   | 3 +--
 arch/ia64/mm/init.c   | 3 +--
 arch/powerpc/mm/mem.c | 3 +--
 arch/s390/mm/init.c   | 3 +--
 arch/sh/mm/init.c     | 3 +--
 arch/x86/mm/init_32.c | 3 +--
 arch/x86/mm/init_64.c | 3 +--
 7 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9ff0de1b2b93..cfd9deb347c3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1502,8 +1502,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	return ret;
 }
 
-void arch_remove_memory(int nid, u64 start, u64 size,
-			struct vmem_altmap *altmap)
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 064a967a7b6e..5c6da8d83c1a 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -484,8 +484,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	return ret;
 }
 
-void arch_remove_memory(int nid, u64 start, u64 size,
-			struct vmem_altmap *altmap)
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index ad198b439222..c3c4e31462ec 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -119,8 +119,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
 	return rc;
 }
 
-void __ref arch_remove_memory(int nid, u64 start, u64 size,
-			      struct vmem_altmap *altmap)
+void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8ac710de1ab1..d85bd7f5d8dc 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -306,8 +306,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	return rc;
 }
 
-void arch_remove_memory(int nid, u64 start, u64 size,
-			struct vmem_altmap *altmap)
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index ce26c7f8950a..506784702430 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -414,8 +414,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	return ret;
 }
 
-void arch_remove_memory(int nid, u64 start, u64 size,
-			struct vmem_altmap *altmap)
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 74b78840182d..bd90b8fe81e4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -801,8 +801,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	return __add_pages(nid, start_pfn, nr_pages, params);
 }
 
-void arch_remove_memory(int nid, u64 start, u64 size,
-			struct vmem_altmap *altmap)
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ddeaba947eb3..a6e11763763f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1255,8 +1255,7 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end)
 	remove_pagetable(start, end, true, NULL);
 }
 
-void __ref arch_remove_memory(int nid, u64 start, u64 size,
-			      struct vmem_altmap *altmap)
+void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-- 
cgit 


From e1c158e4956612e7bada4c03dfb99210af4d6cde Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 7 Sep 2021 19:55:09 -0700
Subject: mm/memory_hotplug: remove nid parameter from remove_memory() and
 friends

There is only a single user remaining.  We can simply lookup the nid only
used for node offlining purposes when walking our memory blocks.  We don't
expect to remove multi-nid ranges; and if we'd ever do, we most probably
don't care about removing multi-nid ranges that actually result in empty
nodes.

If ever required, we can detect the "multi-nid" scenario and simply try
offlining all online nodes.

Link: https://lkml.kernel.org/r/20210712124052.26491-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: Scott Cheloha <cheloha@linux.ibm.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jia He <justin.he@arm.com>
Cc: Joe Perches <joe@perches.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michel Lespinasse <michel@lespinasse.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pankaj Gupta <pankaj.gupta@ionos.com>
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pierre Morel <pmorel@linux.ibm.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Rich Felker <dalias@libc.org>
Cc: Sergei Trofimovich <slyfox@gentoo.org>
Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 377d852f5a9a..ef5c24b42cf1 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -286,7 +286,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si
 {
 	unsigned long block_sz, start_pfn;
 	int sections_per_block;
-	int i, nid;
+	int i;
 
 	start_pfn = base >> PAGE_SHIFT;
 
@@ -297,10 +297,9 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si
 
 	block_sz = pseries_memory_block_size();
 	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
-	nid = memory_add_physaddr_to_nid(base);
 
 	for (i = 0; i < sections_per_block; i++) {
-		__remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
+		__remove_memory(base, MIN_MEMORY_BLOCK_SIZE);
 		base += MIN_MEMORY_BLOCK_SIZE;
 	}
 
@@ -387,7 +386,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 
 	block_sz = pseries_memory_block_size();
 
-	__remove_memory(mem_block->nid, lmb->base_addr, block_sz);
+	__remove_memory(lmb->base_addr, block_sz);
 	put_device(&mem_block->dev);
 
 	/* Update memory regions for memory remove */
@@ -660,7 +659,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
 
 	rc = dlpar_online_lmb(lmb);
 	if (rc) {
-		__remove_memory(nid, lmb->base_addr, block_sz);
+		__remove_memory(lmb->base_addr, block_sz);
 		invalidate_lmb_associativity_index(lmb);
 	} else {
 		lmb->flags |= DRCONF_MEM_ASSIGNED;
-- 
cgit 


From 8350229ffceb372621e277d4d36f1ffca6212135 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 7 Sep 2021 19:55:58 -0700
Subject: riscv: only select GENERIC_IOREMAP if MMU support is enabled

nommu ioremap is an inline stub in asm-generic/io.h.

Link: https://lkml.kernel.org/r/20210825072036.GA29161@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/riscv/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 4f7b70ae7c31..8598e3d103a3 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -48,7 +48,7 @@ config RISCV
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
-	select GENERIC_IOREMAP
+	select GENERIC_IOREMAP if MMU
 	select GENERIC_IRQ_MULTI_HANDLER
 	select GENERIC_IRQ_SHOW
 	select GENERIC_LIB_DEVMEM_IS_ALLOWED
-- 
cgit 


From 5ecae8f6aafe6ae7c8fb10c3c175f85baf779814 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 7 Sep 2021 19:57:21 -0700
Subject: alpha: agp: make empty macros use do-while-0 style

Copy these macros from ia64/include/asm/agp.h to avoid the
"empty-body" in 'if' statment warning.

drivers/char/agp/generic.c: In function 'agp_generic_destroy_page':
../drivers/char/agp/generic.c:1265:42: warning: suggest braces around empty body in an 'if' statement [-Wempty-body]
 1265 |                 unmap_page_from_agp(page);

Link: https://lkml.kernel.org/r/20210809030822.20658-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: David Airlie <airlied@linux.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/agp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/agp.h b/arch/alpha/include/asm/agp.h
index 7173eada1567..7874f063d000 100644
--- a/arch/alpha/include/asm/agp.h
+++ b/arch/alpha/include/asm/agp.h
@@ -6,8 +6,8 @@
 
 /* dummy for now */
 
-#define map_page_into_agp(page) 
-#define unmap_page_from_agp(page) 
+#define map_page_into_agp(page)		do { } while (0)
+#define unmap_page_from_agp(page)	do { } while (0)
 #define flush_agp_cache() mb()
 
 /* GATT allocation. Returns/accepts GATT kernel virtual address. */
-- 
cgit 


From 0a9d991c424b4cda5db574b6355f0c627ae59109 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 7 Sep 2021 19:57:24 -0700
Subject: alpha: pci-sysfs: fix all kernel-doc warnings

Fix all kernel-doc warnings in arch/alpha/kernel/pci-sysfs.c:

  arch/alpha/kernel/pci-sysfs.c:67: warning: No description found for return value of 'pci_mmap_resource'
  arch/alpha/kernel/pci-sysfs.c:115: warning: Function parameter or member 'pdev' not described in 'pci_remove_resource_files'
  arch/alpha/kernel/pci-sysfs.c:115: warning: Excess function parameter 'dev' description in 'pci_remove_resource_files'
  arch/alpha/kernel/pci-sysfs.c:230: warning: Function parameter or member 'pdev' not described in 'pci_create_resource_files'
  arch/alpha/kernel/pci-sysfs.c:230: warning: Excess function parameter 'dev' description in 'pci_create_resource_files'
  arch/alpha/kernel/pci-sysfs.c:232: warning: No description found for return value of 'pci_create_resource_files'
  arch/alpha/kernel/pci-sysfs.c:305: warning: Function parameter or member 'bus' not described in 'pci_adjust_legacy_attr'
  arch/alpha/kernel/pci-sysfs.c:305: warning: Excess function parameter 'b' description in 'pci_adjust_legacy_attr'

Link: https://lkml.kernel.org/r/20210808185249.31442-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/pci-sysfs.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index 0021580d79ad..5808a66e2a81 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -60,6 +60,8 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num,
  * @sparse: address space type
  *
  * Use the bus mapping routines to map a PCI resource into userspace.
+ *
+ * Return: %0 on success, negative error code otherwise
  */
 static int pci_mmap_resource(struct kobject *kobj,
 			     struct bin_attribute *attr,
@@ -106,7 +108,7 @@ static int pci_mmap_resource_dense(struct file *filp, struct kobject *kobj,
 
 /**
  * pci_remove_resource_files - cleanup resource files
- * @dev: dev to cleanup
+ * @pdev: pci_dev to cleanup
  *
  * If we created resource files for @dev, remove them from sysfs and
  * free their resources.
@@ -221,10 +223,12 @@ static int pci_create_attr(struct pci_dev *pdev, int num)
 }
 
 /**
- * pci_create_resource_files - create resource files in sysfs for @dev
- * @dev: dev in question
+ * pci_create_resource_files - create resource files in sysfs for @pdev
+ * @pdev: pci_dev in question
  *
  * Walk the resources in @dev creating files for each resource available.
+ *
+ * Return: %0 on success, or negative error code
  */
 int pci_create_resource_files(struct pci_dev *pdev)
 {
@@ -296,7 +300,7 @@ int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
 
 /**
  * pci_adjust_legacy_attr - adjustment of legacy file attributes
- * @b: bus to create files under
+ * @bus: bus to create files under
  * @mmap_type: I/O port or memory
  *
  * Adjust file name and size for sparse mappings.
-- 
cgit 


From c226bc3cd99bd3535f6a7264c7c7fb0bce16db12 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 7 Sep 2021 19:57:38 -0700
Subject: arch: Kconfig: fix spelling mistake "seperate" -> "separate"

Threre is a spelling mistake in the Kconfig text. Fix it.

Link: https://lkml.kernel.org/r/20210704095207.37342-1-colin.king@canonical.com
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index 129df498a8e1..766e68b1e703 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -886,7 +886,7 @@ config HAVE_SOFTIRQ_ON_OWN_STACK
 	bool
 	help
 	  Architecture provides a function to run __do_softirq() on a
-	  seperate stack.
+	  separate stack.
 
 config PGTABLE_LEVELS
 	int
-- 
cgit 


From 8b097881b54cbc23dd78262ed88c9924d00ea457 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 7 Sep 2021 20:16:06 -0700
Subject: trap: cleanup trap_init()

There are some empty trap_init() definitions in different ARCHs, Introduce
a new weak trap_init() function to clean them up.

Link: https://lkml.kernel.org/r/20210812123602.76356-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>	[arm32]
Acked-by: Vineet Gupta						[arc]
Acked-by: Michael Ellerman <mpe@ellerman.id.au>			[powerpc]
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Ley Foon Tan <ley.foon.tan@intel.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Stafford Horne <shorne@gmail.com>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Walmsley <palmerdabbelt@google.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/kernel/traps.c      | 5 -----
 arch/arm/kernel/traps.c      | 5 -----
 arch/h8300/kernel/traps.c    | 4 ----
 arch/hexagon/kernel/traps.c  | 4 ----
 arch/nds32/kernel/traps.c    | 5 -----
 arch/nios2/kernel/traps.c    | 5 -----
 arch/openrisc/kernel/traps.c | 5 -----
 arch/parisc/kernel/traps.c   | 4 ----
 arch/powerpc/kernel/traps.c  | 5 -----
 arch/riscv/kernel/traps.c    | 5 -----
 arch/um/kernel/trap.c        | 4 ----
 11 files changed, 51 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index 57235e5c0cea..6b83e3f2b41c 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -20,11 +20,6 @@
 #include <asm/unaligned.h>
 #include <asm/kprobes.h>
 
-void __init trap_init(void)
-{
-	return;
-}
-
 void die(const char *str, struct pt_regs *regs, unsigned long address)
 {
 	show_kernel_fault_diag(str, regs, address);
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 64308e3a5d0c..e9b4f2b49bd8 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -781,11 +781,6 @@ void abort(void)
 	panic("Oops failed to kill thread");
 }
 
-void __init trap_init(void)
-{
-	return;
-}
-
 #ifdef CONFIG_KUSER_HELPERS
 static void __init kuser_init(void *vectors)
 {
diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c
index 5d8b969cd8f3..bdbe988d8dbc 100644
--- a/arch/h8300/kernel/traps.c
+++ b/arch/h8300/kernel/traps.c
@@ -39,10 +39,6 @@ void __init base_trap_init(void)
 {
 }
 
-void __init trap_init(void)
-{
-}
-
 asmlinkage void set_esp0(unsigned long ssp)
 {
 	current->thread.esp0 = ssp;
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 904134b37232..edfc35dafeb1 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -28,10 +28,6 @@
 #define TRAP_SYSCALL	1
 #define TRAP_DEBUG	0xdb
 
-void __init trap_init(void)
-{
-}
-
 #ifdef CONFIG_GENERIC_BUG
 /* Maybe should resemble arch/sh/kernel/traps.c ?? */
 int is_valid_bugaddr(unsigned long addr)
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index ee0d9ae192a5..f06421c645af 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -183,11 +183,6 @@ void __pgd_error(const char *file, int line, unsigned long val)
 }
 
 extern char *exception_vector, *exception_vector_end;
-void __init trap_init(void)
-{
-	return;
-}
-
 void __init early_trap_init(void)
 {
 	unsigned long ivb = 0;
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
index b172da4eb1a9..596986a74a26 100644
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -105,11 +105,6 @@ void show_stack(struct task_struct *task, unsigned long *stack,
 	printk("%s\n", loglvl);
 }
 
-void __init trap_init(void)
-{
-	/* Nothing to do here */
-}
-
 /* Breakpoint handler */
 asmlinkage void breakpoint_c(struct pt_regs *fp)
 {
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 4d61333c2623..aa1e709405ac 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -231,11 +231,6 @@ void unhandled_exception(struct pt_regs *regs, int ea, int vector)
 	die("Oops", regs, 9);
 }
 
-void __init trap_init(void)
-{
-	/* Nothing needs to be done */
-}
-
 asmlinkage void do_trap(struct pt_regs *regs, unsigned long address)
 {
 	force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc);
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 8d8441d4562a..747c328fb886 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -859,7 +859,3 @@ void  __init early_trap_init(void)
 
 	initialize_ivt(&fault_vector_20);
 }
-
-void __init trap_init(void)
-{
-}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index d56254f05e17..7819231242b2 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -2215,11 +2215,6 @@ DEFINE_INTERRUPT_HANDLER(kernel_bad_stack)
 	die("Bad kernel stack pointer", regs, SIGABRT);
 }
 
-void __init trap_init(void)
-{
-}
-
-
 #ifdef CONFIG_PPC_EMULATED_STATS
 
 #define WARN_EMULATED_SETUP(type)	.type = { .name = #type }
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 0a98fd0ddfe9..0daaa3e4630d 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -199,11 +199,6 @@ int is_valid_bugaddr(unsigned long pc)
 }
 #endif /* CONFIG_GENERIC_BUG */
 
-/* stvec & scratch is already set from head.S */
-void __init trap_init(void)
-{
-}
-
 #ifdef CONFIG_VMAP_STACK
 static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
 		overflow_stack)__aligned(16);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index ad12f78bda7e..3198c4767387 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -311,7 +311,3 @@ void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
 	do_IRQ(WINCH_IRQ, regs);
 }
-
-void trap_init(void)
-{
-}
-- 
cgit 


From 4cb398fe1bf127c1e9eff40969454b289f219c43 Mon Sep 17 00:00:00 2001
From: Zenghui Yu <yuzenghui@huawei.com>
Date: Tue, 7 Sep 2021 20:00:44 -0700
Subject: configs: remove the obsolete CONFIG_INPUT_POLLDEV

This CONFIG option was removed in commit 278b13ce3a89 ("Input: remove
input_polled_dev implementation") so there's no point to keep it in
defconfigs any longer.

Get rid of the leftover for all arches.

Link: https://lkml.kernel.org/r/20210726074741.1062-1-yuzenghui@huawei.com
Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/configs/dove_defconfig             | 1 -
 arch/arm/configs/pxa_defconfig              | 1 -
 arch/mips/configs/lemote2f_defconfig        | 1 -
 arch/mips/configs/pic32mzda_defconfig       | 1 -
 arch/mips/configs/rt305x_defconfig          | 1 -
 arch/mips/configs/xway_defconfig            | 1 -
 arch/parisc/configs/generic-32bit_defconfig | 1 -
 arch/x86/configs/i386_defconfig             | 1 -
 arch/x86/configs/x86_64_defconfig           | 1 -
 9 files changed, 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig
index b935162a8bba..33074fdab2ea 100644
--- a/arch/arm/configs/dove_defconfig
+++ b/arch/arm/configs/dove_defconfig
@@ -56,7 +56,6 @@ CONFIG_ATA=y
 CONFIG_SATA_MV=y
 CONFIG_NETDEVICES=y
 CONFIG_MV643XX_ETH=y
-CONFIG_INPUT_POLLDEV=y
 # CONFIG_INPUT_MOUSEDEV is not set
 CONFIG_INPUT_EVDEV=y
 # CONFIG_KEYBOARD_ATKBD is not set
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index 363f1b1b08e3..58f4834289e6 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -284,7 +284,6 @@ CONFIG_RT2800USB=m
 CONFIG_MWIFIEX=m
 CONFIG_MWIFIEX_SDIO=m
 CONFIG_INPUT_FF_MEMLESS=m
-CONFIG_INPUT_POLLDEV=y
 CONFIG_INPUT_MATRIXKMAP=y
 CONFIG_INPUT_MOUSEDEV=m
 CONFIG_INPUT_MOUSEDEV_SCREEN_X=640
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index aaf9d5e0aa2c..791894c4d8fb 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -116,7 +116,6 @@ CONFIG_8139TOO=y
 CONFIG_R8169=y
 CONFIG_USB_USBNET=m
 CONFIG_USB_NET_CDC_EEM=m
-CONFIG_INPUT_POLLDEV=m
 CONFIG_INPUT_EVDEV=y
 # CONFIG_MOUSE_PS2_ALPS is not set
 # CONFIG_MOUSE_PS2_LOGIPS2PP is not set
diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
index 63fe2da1b37f..fd567247adc7 100644
--- a/arch/mips/configs/pic32mzda_defconfig
+++ b/arch/mips/configs/pic32mzda_defconfig
@@ -34,7 +34,6 @@ CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_SCAN_ASYNC=y
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_INPUT_LEDS=m
-CONFIG_INPUT_POLLDEV=y
 CONFIG_INPUT_MOUSEDEV=m
 CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_EVBUG=m
diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig
index fec5851c164b..eb359db15dba 100644
--- a/arch/mips/configs/rt305x_defconfig
+++ b/arch/mips/configs/rt305x_defconfig
@@ -90,7 +90,6 @@ CONFIG_PPPOE=m
 CONFIG_PPP_ASYNC=m
 CONFIG_ISDN=y
 CONFIG_INPUT=m
-CONFIG_INPUT_POLLDEV=m
 # CONFIG_KEYBOARD_ATKBD is not set
 # CONFIG_INPUT_MOUSE is not set
 CONFIG_INPUT_MISC=y
diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig
index 9abbc0debc2a..eeb689f715cb 100644
--- a/arch/mips/configs/xway_defconfig
+++ b/arch/mips/configs/xway_defconfig
@@ -96,7 +96,6 @@ CONFIG_PPPOE=m
 CONFIG_PPP_ASYNC=m
 CONFIG_ISDN=y
 CONFIG_INPUT=m
-CONFIG_INPUT_POLLDEV=m
 # CONFIG_KEYBOARD_ATKBD is not set
 # CONFIG_INPUT_MOUSE is not set
 CONFIG_INPUT_MISC=y
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 7611d48c599e..dd14e3131325 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -111,7 +111,6 @@ CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
 CONFIG_PPPOE=m
 # CONFIG_WLAN is not set
-CONFIG_INPUT_POLLDEV=y
 CONFIG_KEYBOARD_HIL_OLD=m
 CONFIG_KEYBOARD_HIL=m
 CONFIG_MOUSE_SERIAL=y
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 9c9c4a888b1d..e81885384f60 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -156,7 +156,6 @@ CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y
 # CONFIG_8139TOO_PIO is not set
 CONFIG_R8169=y
-CONFIG_INPUT_POLLDEV=y
 CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_JOYSTICK=y
 CONFIG_INPUT_TABLET=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index b60bd2d86034..e8a7a0af2bda 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -148,7 +148,6 @@ CONFIG_SKY2=y
 CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y
 CONFIG_R8169=y
-CONFIG_INPUT_POLLDEV=y
 CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_JOYSTICK=y
 CONFIG_INPUT_TABLET=y
-- 
cgit 


From 59ab844eed9c6b01d32dcb27b57accc23771b324 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 8 Sep 2021 15:18:25 -0700
Subject: compat: remove some compat entry points

These are all handled correctly when calling the native system call entry
point, so remove the special cases.

Link: https://lkml.kernel.org/r/20210727144859.4150043-6-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/unistd32.h         | 10 +++++-----
 arch/mips/kernel/syscalls/syscall_n32.tbl | 10 +++++-----
 arch/mips/kernel/syscalls/syscall_o32.tbl | 10 +++++-----
 arch/parisc/kernel/syscalls/syscall.tbl   |  8 ++++----
 arch/powerpc/kernel/syscalls/syscall.tbl  | 10 +++++-----
 arch/s390/kernel/syscalls/syscall.tbl     | 10 +++++-----
 arch/sparc/kernel/syscalls/syscall.tbl    | 10 +++++-----
 arch/x86/entry/syscalls/syscall_32.tbl    |  4 ++--
 arch/x86/entry/syscalls/syscall_64.tbl    |  2 +-
 9 files changed, 37 insertions(+), 37 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 4e99e4b912ef..844f6ae58662 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -649,11 +649,11 @@ __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
 #define __NR_inotify_rm_watch 318
 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
 #define __NR_mbind 319
-__SYSCALL(__NR_mbind, compat_sys_mbind)
+__SYSCALL(__NR_mbind, sys_mbind)
 #define __NR_get_mempolicy 320
-__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy)
+__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
 #define __NR_set_mempolicy 321
-__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy)
+__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
 #define __NR_openat 322
 __SYSCALL(__NR_openat, compat_sys_openat)
 #define __NR_mkdirat 323
@@ -699,7 +699,7 @@ __SYSCALL(__NR_tee, sys_tee)
 #define __NR_vmsplice 343
 __SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages 344
-__SYSCALL(__NR_move_pages, compat_sys_move_pages)
+__SYSCALL(__NR_move_pages, sys_move_pages)
 #define __NR_getcpu 345
 __SYSCALL(__NR_getcpu, sys_getcpu)
 #define __NR_epoll_pwait 346
@@ -811,7 +811,7 @@ __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_io_pgetevents 399
 __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
 #define __NR_migrate_pages 400
-__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages)
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
 #define __NR_kexec_file_load 401
 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
 /* 402 is unused */
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 56c8d3cf42ed..70e32de2bcaa 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -239,9 +239,9 @@
 228	n32	clock_nanosleep			sys_clock_nanosleep_time32
 229	n32	tgkill				sys_tgkill
 230	n32	utimes				sys_utimes_time32
-231	n32	mbind				compat_sys_mbind
-232	n32	get_mempolicy			compat_sys_get_mempolicy
-233	n32	set_mempolicy			compat_sys_set_mempolicy
+231	n32	mbind				sys_mbind
+232	n32	get_mempolicy			sys_get_mempolicy
+233	n32	set_mempolicy			sys_set_mempolicy
 234	n32	mq_open				compat_sys_mq_open
 235	n32	mq_unlink			sys_mq_unlink
 236	n32	mq_timedsend			sys_mq_timedsend_time32
@@ -258,7 +258,7 @@
 247	n32	inotify_init			sys_inotify_init
 248	n32	inotify_add_watch		sys_inotify_add_watch
 249	n32	inotify_rm_watch		sys_inotify_rm_watch
-250	n32	migrate_pages			compat_sys_migrate_pages
+250	n32	migrate_pages			sys_migrate_pages
 251	n32	openat				sys_openat
 252	n32	mkdirat				sys_mkdirat
 253	n32	mknodat				sys_mknodat
@@ -279,7 +279,7 @@
 268	n32	sync_file_range			sys_sync_file_range
 269	n32	tee				sys_tee
 270	n32	vmsplice			sys_vmsplice
-271	n32	move_pages			compat_sys_move_pages
+271	n32	move_pages			sys_move_pages
 272	n32	set_robust_list			compat_sys_set_robust_list
 273	n32	get_robust_list			compat_sys_get_robust_list
 274	n32	kexec_load			compat_sys_kexec_load
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 201237fd0f43..a61c35edaa74 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -279,9 +279,9 @@
 265	o32	clock_nanosleep			sys_clock_nanosleep_time32
 266	o32	tgkill				sys_tgkill
 267	o32	utimes				sys_utimes_time32
-268	o32	mbind				sys_mbind			compat_sys_mbind
-269	o32	get_mempolicy			sys_get_mempolicy		compat_sys_get_mempolicy
-270	o32	set_mempolicy			sys_set_mempolicy		compat_sys_set_mempolicy
+268	o32	mbind				sys_mbind
+269	o32	get_mempolicy			sys_get_mempolicy
+270	o32	set_mempolicy			sys_set_mempolicy
 271	o32	mq_open				sys_mq_open			compat_sys_mq_open
 272	o32	mq_unlink			sys_mq_unlink
 273	o32	mq_timedsend			sys_mq_timedsend_time32
@@ -298,7 +298,7 @@
 284	o32	inotify_init			sys_inotify_init
 285	o32	inotify_add_watch		sys_inotify_add_watch
 286	o32	inotify_rm_watch		sys_inotify_rm_watch
-287	o32	migrate_pages			sys_migrate_pages		compat_sys_migrate_pages
+287	o32	migrate_pages			sys_migrate_pages
 288	o32	openat				sys_openat			compat_sys_openat
 289	o32	mkdirat				sys_mkdirat
 290	o32	mknodat				sys_mknodat
@@ -319,7 +319,7 @@
 305	o32	sync_file_range			sys_sync_file_range		sys32_sync_file_range
 306	o32	tee				sys_tee
 307	o32	vmsplice			sys_vmsplice
-308	o32	move_pages			sys_move_pages			compat_sys_move_pages
+308	o32	move_pages			sys_move_pages
 309	o32	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
 310	o32	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
 311	o32	kexec_load			sys_kexec_load			compat_sys_kexec_load
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 0bf854b70612..bf751e0732b7 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -292,9 +292,9 @@
 258	32	clock_nanosleep		sys_clock_nanosleep_time32
 258	64	clock_nanosleep		sys_clock_nanosleep
 259	common	tgkill			sys_tgkill
-260	common	mbind			sys_mbind			compat_sys_mbind
-261	common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
-262	common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
+260	common	mbind			sys_mbind
+261	common	get_mempolicy		sys_get_mempolicy
+262	common	set_mempolicy		sys_set_mempolicy
 # 263 was vserver
 264	common	add_key			sys_add_key
 265	common	request_key		sys_request_key
@@ -331,7 +331,7 @@
 292	64	sync_file_range		sys_sync_file_range
 293	common	tee			sys_tee
 294	common	vmsplice		sys_vmsplice
-295	common	move_pages		sys_move_pages			compat_sys_move_pages
+295	common	move_pages		sys_move_pages
 296	common	getcpu			sys_getcpu
 297	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
 298	common	statfs64		sys_statfs64			compat_sys_statfs64
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 29b55e2e035c..7bef917cc84e 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -330,10 +330,10 @@
 256	64	sys_debug_setcontext		sys_ni_syscall
 256	spu	sys_debug_setcontext		sys_ni_syscall
 # 257 reserved for vserver
-258	nospu	migrate_pages			sys_migrate_pages		compat_sys_migrate_pages
-259	nospu	mbind				sys_mbind			compat_sys_mbind
-260	nospu	get_mempolicy			sys_get_mempolicy		compat_sys_get_mempolicy
-261	nospu	set_mempolicy			sys_set_mempolicy		compat_sys_set_mempolicy
+258	nospu	migrate_pages			sys_migrate_pages
+259	nospu	mbind				sys_mbind
+260	nospu	get_mempolicy			sys_get_mempolicy
+261	nospu	set_mempolicy			sys_set_mempolicy
 262	nospu	mq_open				sys_mq_open			compat_sys_mq_open
 263	nospu	mq_unlink			sys_mq_unlink
 264	32	mq_timedsend			sys_mq_timedsend_time32
@@ -381,7 +381,7 @@
 298	common	faccessat			sys_faccessat
 299	common	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
 300	common	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
-301	common	move_pages			sys_move_pages			compat_sys_move_pages
+301	common	move_pages			sys_move_pages
 302	common	getcpu				sys_getcpu
 303	nospu	epoll_pwait			sys_epoll_pwait			compat_sys_epoll_pwait
 304	32	utimensat			sys_utimensat_time32
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index aa9d68b8ee14..df5261e5cfe1 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -274,9 +274,9 @@
 265  common	statfs64		sys_statfs64			compat_sys_statfs64
 266  common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
 267  common	remap_file_pages	sys_remap_file_pages		sys_remap_file_pages
-268  common	mbind			sys_mbind			compat_sys_mbind
-269  common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
-270  common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
+268  common	mbind			sys_mbind			sys_mbind
+269  common	get_mempolicy		sys_get_mempolicy		sys_get_mempolicy
+270  common	set_mempolicy		sys_set_mempolicy		sys_set_mempolicy
 271  common	mq_open			sys_mq_open			compat_sys_mq_open
 272  common	mq_unlink		sys_mq_unlink			sys_mq_unlink
 273  common	mq_timedsend		sys_mq_timedsend		sys_mq_timedsend_time32
@@ -293,7 +293,7 @@
 284  common	inotify_init		sys_inotify_init		sys_inotify_init
 285  common	inotify_add_watch	sys_inotify_add_watch		sys_inotify_add_watch
 286  common	inotify_rm_watch	sys_inotify_rm_watch		sys_inotify_rm_watch
-287  common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
+287  common	migrate_pages		sys_migrate_pages		sys_migrate_pages
 288  common	openat			sys_openat			compat_sys_openat
 289  common	mkdirat			sys_mkdirat			sys_mkdirat
 290  common	mknodat			sys_mknodat			sys_mknodat
@@ -317,7 +317,7 @@
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				sys_tee
 309  common	vmsplice		sys_vmsplice			sys_vmsplice
-310  common	move_pages		sys_move_pages			compat_sys_move_pages
+310  common	move_pages		sys_move_pages			sys_move_pages
 311  common	getcpu			sys_getcpu			sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
 313  common	utimes			sys_utimes			sys_utimes_time32
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 7893104718c2..c37764dc764d 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -365,12 +365,12 @@
 299	common	unshare			sys_unshare
 300	common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
 301	common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
-302	common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
-303	common	mbind			sys_mbind			compat_sys_mbind
-304	common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
-305	common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
+302	common	migrate_pages		sys_migrate_pages
+303	common	mbind			sys_mbind
+304	common	get_mempolicy		sys_get_mempolicy
+305	common	set_mempolicy		sys_set_mempolicy
 306	common	kexec_load		sys_kexec_load			compat_sys_kexec_load
-307	common	move_pages		sys_move_pages			compat_sys_move_pages
+307	common	move_pages		sys_move_pages
 308	common	getcpu			sys_getcpu
 309	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
 310	32	utimensat		sys_utimensat_time32
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 61f18b72552b..960a021d543e 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -286,7 +286,7 @@
 272	i386	fadvise64_64		sys_ia32_fadvise64_64
 273	i386	vserver
 274	i386	mbind			sys_mbind
-275	i386	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
+275	i386	get_mempolicy		sys_get_mempolicy
 276	i386	set_mempolicy		sys_set_mempolicy
 277	i386	mq_open			sys_mq_open			compat_sys_mq_open
 278	i386	mq_unlink		sys_mq_unlink
@@ -328,7 +328,7 @@
 314	i386	sync_file_range		sys_ia32_sync_file_range
 315	i386	tee			sys_tee
 316	i386	vmsplice		sys_vmsplice
-317	i386	move_pages		sys_move_pages			compat_sys_move_pages
+317	i386	move_pages		sys_move_pages
 318	i386	getcpu			sys_getcpu
 319	i386	epoll_pwait		sys_epoll_pwait
 320	i386	utimensat		sys_utimensat_time32
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 807b6a1de8e8..18b5500ea8bf 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -398,7 +398,7 @@
 530	x32	set_robust_list		compat_sys_set_robust_list
 531	x32	get_robust_list		compat_sys_get_robust_list
 532	x32	vmsplice		sys_vmsplice
-533	x32	move_pages		compat_sys_move_pages
+533	x32	move_pages		sys_move_pages
 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
 536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
-- 
cgit 


From a7a08b275a8bbade798c4bdaad07ade68fe7003c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 8 Sep 2021 15:18:29 -0700
Subject: arch: remove compat_alloc_user_space

All users of compat_alloc_user_space() and copy_in_user() have been
removed from the kernel, only a few functions in sparc remain that can be
changed to calling arch_copy_in_user() instead.

Link: https://lkml.kernel.org/r/20210727144859.4150043-7-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/compat.h         |  5 ---
 arch/arm64/include/asm/uaccess.h        | 11 -----
 arch/arm64/lib/Makefile                 |  2 +-
 arch/arm64/lib/copy_in_user.S           | 77 ---------------------------------
 arch/mips/cavium-octeon/octeon-memcpy.S |  2 -
 arch/mips/include/asm/compat.h          |  8 ----
 arch/mips/include/asm/uaccess.h         | 26 -----------
 arch/mips/lib/memcpy.S                  | 11 -----
 arch/parisc/include/asm/compat.h        |  6 ---
 arch/parisc/include/asm/uaccess.h       |  2 -
 arch/parisc/lib/memcpy.c                |  9 ----
 arch/powerpc/include/asm/compat.h       | 16 -------
 arch/s390/include/asm/compat.h          | 10 -----
 arch/s390/include/asm/uaccess.h         |  3 --
 arch/s390/lib/uaccess.c                 | 63 ---------------------------
 arch/sparc/include/asm/compat.h         | 19 --------
 arch/sparc/kernel/process_64.c          |  2 +-
 arch/sparc/kernel/signal32.c            | 12 ++---
 arch/sparc/kernel/signal_64.c           |  8 ++--
 arch/x86/include/asm/compat.h           | 13 ------
 arch/x86/include/asm/uaccess_64.h       |  7 ---
 21 files changed, 12 insertions(+), 300 deletions(-)
 delete mode 100644 arch/arm64/lib/copy_in_user.S

(limited to 'arch')

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 79c1a750e357..eaa6ca062d89 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -107,11 +107,6 @@ struct compat_statfs {
 #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
 #define COMPAT_MINSIGSTKSZ	2048
 
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	return (void __user *)compat_user_stack_pointer() - len;
-}
-
 struct compat_ipc64_perm {
 	compat_key_t key;
 	__compat_uid32_t uid;
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index b5f08621fa29..190b494e22ab 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -430,17 +430,6 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi
 	__actu_ret;							\
 })
 
-extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n);
-#define raw_copy_in_user(to, from, n)					\
-({									\
-	unsigned long __aciu_ret;					\
-	uaccess_ttbr0_enable();						\
-	__aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to),	\
-				    __uaccess_mask_ptr(from), (n));	\
-	uaccess_ttbr0_disable();					\
-	__aciu_ret;							\
-})
-
 #define INLINE_COPY_TO_USER
 #define INLINE_COPY_FROM_USER
 
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 6dd56a49790a..0941180a86d3 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 lib-y		:= clear_user.o delay.o copy_from_user.o		\
-		   copy_to_user.o copy_in_user.o copy_page.o		\
+		   copy_to_user.o copy_page.o				\
 		   clear_page.o csum.o insn.o memchr.o memcpy.o		\
 		   memset.o memcmp.o strcmp.o strncmp.o strlen.o	\
 		   strnlen.o strchr.o strrchr.o tishift.o
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
deleted file mode 100644
index dbea3799c3ef..000000000000
--- a/arch/arm64/lib/copy_in_user.S
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copy from user space to user space
- *
- * Copyright (C) 2012 ARM Ltd.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asm-uaccess.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-/*
- * Copy from user space to user space (alignment handled by the hardware)
- *
- * Parameters:
- *	x0 - to
- *	x1 - from
- *	x2 - n
- * Returns:
- *	x0 - bytes not copied
- */
-	.macro ldrb1 reg, ptr, val
-	user_ldst 9998f, ldtrb, \reg, \ptr, \val
-	.endm
-
-	.macro strb1 reg, ptr, val
-	user_ldst 9998f, sttrb, \reg, \ptr, \val
-	.endm
-
-	.macro ldrh1 reg, ptr, val
-	user_ldst 9997f, ldtrh, \reg, \ptr, \val
-	.endm
-
-	.macro strh1 reg, ptr, val
-	user_ldst 9997f, sttrh, \reg, \ptr, \val
-	.endm
-
-	.macro ldr1 reg, ptr, val
-	user_ldst 9997f, ldtr, \reg, \ptr, \val
-	.endm
-
-	.macro str1 reg, ptr, val
-	user_ldst 9997f, sttr, \reg, \ptr, \val
-	.endm
-
-	.macro ldp1 reg1, reg2, ptr, val
-	user_ldp 9997f, \reg1, \reg2, \ptr, \val
-	.endm
-
-	.macro stp1 reg1, reg2, ptr, val
-	user_stp 9997f, \reg1, \reg2, \ptr, \val
-	.endm
-
-end	.req	x5
-srcin	.req	x15
-SYM_FUNC_START(__arch_copy_in_user)
-	add	end, x0, x2
-	mov	srcin, x1
-#include "copy_template.S"
-	mov	x0, #0
-	ret
-SYM_FUNC_END(__arch_copy_in_user)
-EXPORT_SYMBOL(__arch_copy_in_user)
-
-	.section .fixup,"ax"
-	.align	2
-9997:	cmp	dst, dstin
-	b.ne	9998f
-	// Before being absolutely sure we couldn't copy anything, try harder
-USER(9998f, ldtrb tmp1w, [srcin])
-USER(9998f, sttrb tmp1w, [dst])
-	add	dst, dst, #1
-9998:	sub	x0, end, dst			// bytes not copied
-	ret
-	.previous
diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S
index 600d018cf354..0a515cde1c18 100644
--- a/arch/mips/cavium-octeon/octeon-memcpy.S
+++ b/arch/mips/cavium-octeon/octeon-memcpy.S
@@ -154,8 +154,6 @@ FEXPORT(__raw_copy_from_user)
 EXPORT_SYMBOL(__raw_copy_from_user)
 FEXPORT(__raw_copy_to_user)
 EXPORT_SYMBOL(__raw_copy_to_user)
-FEXPORT(__raw_copy_in_user)
-EXPORT_SYMBOL(__raw_copy_in_user)
 	/*
 	 * Note: dst & src may be unaligned, len may be 0
 	 * Temps
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 53f015a1b0a7..bbb3bc5a42fd 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -96,14 +96,6 @@ struct compat_statfs {
 
 #define COMPAT_OFF_T_MAX	0x7fffffff
 
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	struct pt_regs *regs = (struct pt_regs *)
-		((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1;
-
-	return (void __user *) (regs->regs[29] - len);
-}
-
 struct compat_ipc64_perm {
 	compat_key_t key;
 	__compat_uid32_t uid;
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index 783fecce65c8..f8f74f9f5883 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -428,7 +428,6 @@ do {									\
 
 extern size_t __raw_copy_from_user(void *__to, const void *__from, size_t __n);
 extern size_t __raw_copy_to_user(void *__to, const void *__from, size_t __n);
-extern size_t __raw_copy_in_user(void *__to, const void *__from, size_t __n);
 
 static inline unsigned long
 raw_copy_from_user(void *to, const void __user *from, unsigned long n)
@@ -480,31 +479,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 #define INLINE_COPY_FROM_USER
 #define INLINE_COPY_TO_USER
 
-static inline unsigned long
-raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
-{
-	register void __user *__cu_to_r __asm__("$4");
-	register const void __user *__cu_from_r __asm__("$5");
-	register long __cu_len_r __asm__("$6");
-
-	__cu_to_r = to;
-	__cu_from_r = from;
-	__cu_len_r = n;
-
-	__asm__ __volatile__(
-		".set\tnoreorder\n\t"
-		__MODULE_JAL(__raw_copy_in_user)
-		".set\tnoat\n\t"
-		__UA_ADDU "\t$1, %1, %2\n\t"
-		".set\tat\n\t"
-		".set\treorder"
-		: "+r" (__cu_to_r), "+r" (__cu_from_r), "+r" (__cu_len_r)
-		:
-		: "$8", "$9", "$10", "$11", "$12", "$14", "$15", "$24", "$31",
-		  DADDI_SCRATCH, "memory");
-	return __cu_len_r;
-}
-
 extern __kernel_size_t __bzero(void __user *addr, __kernel_size_t size);
 
 /*
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index e19fb98b5d38..277c32296636 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -666,8 +666,6 @@ FEXPORT(__raw_copy_from_user)
 EXPORT_SYMBOL(__raw_copy_from_user)
 FEXPORT(__raw_copy_to_user)
 EXPORT_SYMBOL(__raw_copy_to_user)
-FEXPORT(__raw_copy_in_user)
-EXPORT_SYMBOL(__raw_copy_in_user)
 #endif
 	/* Legacy Mode, user <-> user */
 	__BUILD_COPY_USER LEGACY_MODE USEROP USEROP
@@ -703,13 +701,4 @@ EXPORT_SYMBOL(__raw_copy_to_user)
 __BUILD_COPY_USER EVA_MODE KERNELOP USEROP
 END(__raw_copy_to_user)
 
-/*
- * __copy_in_user (EVA)
- */
-
-LEAF(__raw_copy_in_user)
-EXPORT_SYMBOL(__raw_copy_in_user)
-__BUILD_COPY_USER EVA_MODE USEROP USEROP
-END(__raw_copy_in_user)
-
 #endif
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index b5d90e82b65d..c04f5a637c39 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -163,12 +163,6 @@ struct compat_shmid64_ds {
 #define COMPAT_ELF_NGREG 80
 typedef compat_ulong_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
 
-static __inline__ void __user *arch_compat_alloc_user_space(long len)
-{
-	struct pt_regs *regs = &current->thread.regs;
-	return (void __user *)regs->gr[30];
-}
-
 static inline int __is_compat_task(struct task_struct *t)
 {
 	return test_tsk_thread_flag(t, TIF_32BIT);
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index ed2cd4fb479b..7c13314aae4a 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -215,8 +215,6 @@ unsigned long __must_check raw_copy_to_user(void __user *dst, const void *src,
 					    unsigned long len);
 unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src,
 					    unsigned long len);
-unsigned long __must_check raw_copy_in_user(void __user *dst, const void __user *src,
-					    unsigned long len);
 #define INLINE_COPY_TO_USER
 #define INLINE_COPY_FROM_USER
 
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index 4b75388190b4..ea70a0e08321 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -38,14 +38,6 @@ unsigned long raw_copy_from_user(void *dst, const void __user *src,
 }
 EXPORT_SYMBOL(raw_copy_from_user);
 
-unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long len)
-{
-	mtsp(get_user_space(), 1);
-	mtsp(get_user_space(), 2);
-	return pa_memcpy((void __force *)dst, (void __force *)src, len);
-}
-
-
 void * memcpy(void * dst,const void *src, size_t count)
 {
 	mtsp(get_kernel_space(), 1);
@@ -54,7 +46,6 @@ void * memcpy(void * dst,const void *src, size_t count)
 	return dst;
 }
 
-EXPORT_SYMBOL(raw_copy_in_user);
 EXPORT_SYMBOL(memcpy);
 
 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index e33dcf134cdd..7afc96fb6524 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -83,22 +83,6 @@ struct compat_statfs {
 
 #define COMPAT_OFF_T_MAX	0x7fffffff
 
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	struct pt_regs *regs = current->thread.regs;
-	unsigned long usp = regs->gpr[1];
-
-	/*
-	 * We can't access below the stack pointer in the 32bit ABI and
-	 * can access 288 bytes in the 64bit big-endian ABI,
-	 * or 512 bytes with the new ELFv2 little-endian ABI.
-	 */
-	if (!is_32bit_task())
-		usp -= USER_REDZONE_SIZE;
-
-	return (void __user *) (usp - len);
-}
-
 /*
  * ipc64_perm is actually 32/64bit clean but since the compat layer refers to
  * it we may as well define it.
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 8d49505b4a43..cdc7ae72529d 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -176,16 +176,6 @@ static inline int is_compat_task(void)
 	return test_thread_flag(TIF_31BIT);
 }
 
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	unsigned long stack;
-
-	stack = KSTK_ESP(current);
-	if (is_compat_task())
-		stack &= 0x7fffffffUL;
-	return (void __user *) (stack - len);
-}
-
 #endif
 
 struct compat_ipc64_perm {
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 9ed9aa37e836..ce550d06abc3 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -227,9 +227,6 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s
 	__get_user(x, ptr);					\
 })
 
-unsigned long __must_check
-raw_copy_in_user(void __user *to, const void __user *from, unsigned long n);
-
 /*
  * Copy a null terminated string from userspace.
  */
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 94ca99bde59d..a596e69d3c47 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -204,69 +204,6 @@ unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long
 }
 EXPORT_SYMBOL(raw_copy_to_user);
 
-static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from,
-					       unsigned long size)
-{
-	unsigned long tmp1, tmp2;
-
-	tmp1 = -4096UL;
-	/* FIXME: copy with reduced length. */
-	asm volatile(
-		"   lgr	  0,%[spec]\n"
-		"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
-		"   jz	  2f\n"
-		"1: algr  %0,%3\n"
-		"   slgr  %1,%3\n"
-		"   slgr  %2,%3\n"
-		"   j	  0b\n"
-		"2:slgr  %0,%0\n"
-		"3: \n"
-		EX_TABLE(0b,3b)
-		: "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
-		: [spec] "d" (0x810081UL)
-		: "cc", "memory", "0");
-	return size;
-}
-
-static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from,
-					     unsigned long size)
-{
-	unsigned long tmp1;
-
-	asm volatile(
-		"   sacf  256\n"
-		"   aghi  %0,-1\n"
-		"   jo	  5f\n"
-		"   bras  %3,3f\n"
-		"0: aghi  %0,257\n"
-		"1: mvc	  0(1,%1),0(%2)\n"
-		"   la	  %1,1(%1)\n"
-		"   la	  %2,1(%2)\n"
-		"   aghi  %0,-1\n"
-		"   jnz	  1b\n"
-		"   j	  5f\n"
-		"2: mvc	  0(256,%1),0(%2)\n"
-		"   la	  %1,256(%1)\n"
-		"   la	  %2,256(%2)\n"
-		"3: aghi  %0,-256\n"
-		"   jnm	  2b\n"
-		"4: ex	  %0,1b-0b(%3)\n"
-		"5: slgr  %0,%0\n"
-		"6: sacf  768\n"
-		EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
-		: "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
-		: : "cc", "memory");
-	return size;
-}
-
-unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
-{
-	if (copy_with_mvcos())
-		return copy_in_user_mvcos(to, from, n);
-	return copy_in_user_mvc(to, from, n);
-}
-EXPORT_SYMBOL(raw_copy_in_user);
-
 static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
 {
 	unsigned long tmp1, tmp2;
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 8b63410e830f..bd949fcf9d63 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -116,25 +116,6 @@ struct compat_statfs {
 
 #define COMPAT_OFF_T_MAX	0x7fffffff
 
-#ifdef CONFIG_COMPAT
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	struct pt_regs *regs = current_thread_info()->kregs;
-	unsigned long usp = regs->u_regs[UREG_I6];
-
-	if (test_thread_64bit_stack(usp))
-		usp += STACK_BIAS;
-
-	if (test_thread_flag(TIF_32BIT))
-		usp &= 0xffffffffUL;
-
-	usp -= len;
-	usp &= ~0x7UL;
-
-	return (void __user *) usp;
-}
-#endif
-
 struct compat_ipc64_perm {
 	compat_key_t key;
 	__compat_uid32_t uid;
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 093849bfda50..d1cc410d2f64 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -455,7 +455,7 @@ static unsigned long clone_stackframe(unsigned long csp, unsigned long psp)
 
 	distance = fp - psp;
 	rval = (csp - distance);
-	if (copy_in_user((void __user *) rval, (void __user *) psp, distance))
+	if (raw_copy_in_user((void __user *)rval, (void __user *)psp, distance))
 		rval = 0;
 	else if (!stack_64bit) {
 		if (put_user(((u32)csp),
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 4276b9e003ca..6cc124a3bb98 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -435,9 +435,9 @@ static int setup_frame32(struct ksignal *ksig, struct pt_regs *regs,
 			      (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
 
 	if (!wsaved) {
-		err |= copy_in_user((u32 __user *)sf,
-				    (u32 __user *)(regs->u_regs[UREG_FP]),
-				    sizeof(struct reg_window32));
+		err |= raw_copy_in_user((u32 __user *)sf,
+					(u32 __user *)(regs->u_regs[UREG_FP]),
+					sizeof(struct reg_window32));
 	} else {
 		struct reg_window *rp;
 
@@ -567,9 +567,9 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs,
 	err |= put_compat_sigset(&sf->mask, oldset, sizeof(compat_sigset_t));
 
 	if (!wsaved) {
-		err |= copy_in_user((u32 __user *)sf,
-				    (u32 __user *)(regs->u_regs[UREG_FP]),
-				    sizeof(struct reg_window32));
+		err |= raw_copy_in_user((u32 __user *)sf,
+					(u32 __user *)(regs->u_regs[UREG_FP]),
+					sizeof(struct reg_window32));
 	} else {
 		struct reg_window *rp;
 
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index cea23cf95600..2a78d2af1265 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -406,10 +406,10 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 	err |= copy_to_user(&sf->mask, sigmask_to_save(), sizeof(sigset_t));
 
 	if (!wsaved) {
-		err |= copy_in_user((u64 __user *)sf,
-				    (u64 __user *)(regs->u_regs[UREG_FP] +
-						   STACK_BIAS),
-				    sizeof(struct reg_window));
+		err |= raw_copy_in_user((u64 __user *)sf,
+					(u64 __user *)(regs->u_regs[UREG_FP] +
+					   STACK_BIAS),
+					sizeof(struct reg_window));
 	} else {
 		struct reg_window *rp;
 
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 4ae01cdb99de..7516e4199b3c 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -156,19 +156,6 @@ struct compat_shmid64_ds {
 	(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
 #endif
 
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	compat_uptr_t sp = task_pt_regs(current)->sp;
-
-	/*
-	 * -128 for the x32 ABI redzone.  For IA32, it is not strictly
-	 * necessary, but not harmful.
-	 */
-	sp -= 128;
-
-	return (void __user *)round_down(sp - len, 16);
-}
-
 static inline bool in_x32_syscall(void)
 {
 #ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index e7265a552f4f..45697e04d771 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -58,13 +58,6 @@ raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
 	return copy_user_generic((__force void *)dst, src, size);
 }
 
-static __always_inline __must_check
-unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long size)
-{
-	return copy_user_generic((__force void *)dst,
-				 (__force void *)src, size);
-}
-
 extern long __copy_user_nocache(void *dst, const void __user *src,
 				unsigned size, int zerorest);
 
-- 
cgit 


From ddb13122aa7e988e15283701afb086e0950c405f Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 8 Sep 2021 18:10:23 -0700
Subject: nds32/setup: remove unused memblock_region variable in setup_memory()

kernel test robot reports unused variable warning:

   arch/nds32/kernel/setup.c:247:26: warning: Unused variable: region
   [unusedVariable]
    struct memblock_region *region;
                            ^

Remove the unused variable.

Link: https://lkml.kernel.org/r/20210712125218.28951-1-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Nick Hu <nickhu@andestech.com>
Cc: Vincent Chen <deanbo422@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/nds32/kernel/setup.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index 41725eaf8bac..b3d34d646652 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -244,7 +244,6 @@ static void __init setup_memory(void)
 	unsigned long ram_start_pfn;
 	unsigned long free_ram_start_pfn;
 	phys_addr_t memory_start, memory_end;
-	struct memblock_region *region;
 
 	memory_end = memory_start = 0;
 
-- 
cgit 


From 3da6379a6d865ef4faa369eebe2019dac75cda4b Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 5 Sep 2021 11:50:56 +0200
Subject: parisc: Add missing FORCE prerequisite in Makefile

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/boot/compressed/Makefile | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile
index dff453687530..9fe54878167d 100644
--- a/arch/parisc/boot/compressed/Makefile
+++ b/arch/parisc/boot/compressed/Makefile
@@ -26,7 +26,7 @@ endif
 OBJECTS += $(obj)/head.o $(obj)/real2.o $(obj)/firmware.o $(obj)/misc.o $(obj)/piggy.o
 
 LDFLAGS_vmlinux := -X -e startup --as-needed -T
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC)
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC) FORCE
 	$(call if_changed,ld)
 
 sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\)$$/\#define SZ\2 0x\1/p'
@@ -34,7 +34,7 @@ sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\
 quiet_cmd_sizes = GEN $@
       cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
 
-$(obj)/sizes.h: vmlinux
+$(obj)/sizes.h: vmlinux FORCE
 	$(call if_changed,sizes)
 
 AFLAGS_head.o += -I$(objtree)/$(obj) -DBOOTLOADER
@@ -70,19 +70,19 @@ suffix-$(CONFIG_KERNEL_LZMA)  := lzma
 suffix-$(CONFIG_KERNEL_LZO)  := lzo
 suffix-$(CONFIG_KERNEL_XZ)  := xz
 
-$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lz4)
-$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lzma)
-$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lzo)
-$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,xzkern)
 
 LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y)
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE
 	$(call if_changed,ld)
-- 
cgit 


From 1260dea6d2eb75706c978da828a36f0def590d3a Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 4 Sep 2021 23:49:26 +0200
Subject: parisc: Drop strnlen_user() in favour of generic version

As suggested by Arnd Bergmann, drop the parisc version of
strnlen_user() and switch to the generic version.

Suggested-by: Arnd Bergmann <arnd@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/Kconfig               |  1 -
 arch/parisc/include/asm/uaccess.h |  3 +--
 arch/parisc/kernel/parisc_ksyms.c |  1 -
 arch/parisc/lib/lusercopy.S       | 34 ----------------------------------
 4 files changed, 1 insertion(+), 38 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 3001a7d8fc76..86e879608e2a 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -10,7 +10,6 @@ config PARISC
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
-	select ARCH_HAS_STRNLEN_USER
 	select ARCH_NO_SG_CHAIN
 	select ARCH_SUPPORTS_HUGETLBFS if PA20
 	select ARCH_SUPPORTS_MEMORY_FAILURE
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 7c13314aae4a..cfb4ca7e39fe 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -201,13 +201,12 @@ struct exception_table_entry {
 
 extern long strncpy_from_user(char *, const char __user *, long);
 extern unsigned lclear_user(void __user *, unsigned long);
-extern long lstrnlen_user(const char __user *, long);
+extern __must_check long strnlen_user(const char __user *src, long n);
 /*
  * Complex access routines -- macros
  */
 #define user_addr_max() (~0UL)
 
-#define strnlen_user lstrnlen_user
 #define clear_user lclear_user
 #define __clear_user lclear_user
 
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index e8a6a751dfd8..00297e8e1c88 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -32,7 +32,6 @@ EXPORT_SYMBOL(__xchg64);
 
 #include <linux/uaccess.h>
 EXPORT_SYMBOL(lclear_user);
-EXPORT_SYMBOL(lstrnlen_user);
 
 #ifndef CONFIG_64BIT
 /* Needed so insmod can set dp value */
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
index 36d6a8638ead..0aad5ce89f4d 100644
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -67,40 +67,6 @@ $lclu_done:
 ENDPROC_CFI(lclear_user)
 
 
-	/*
-	 * long lstrnlen_user(char *s, long n)
-	 *
-	 * Returns 0 if exception before zero byte or reaching N,
-	 *         N+1 if N would be exceeded,
-	 *         else strlen + 1 (i.e. includes zero byte).
-	 */
-
-ENTRY_CFI(lstrnlen_user)
-	comib,=     0,%r25,$lslen_nzero
-	copy	    %r26,%r24
-	get_sr
-1:      ldbs,ma     1(%sr1,%r26),%r1
-$lslen_loop:
-	comib,=,n   0,%r1,$lslen_done
-	addib,<>    -1,%r25,$lslen_loop
-2:      ldbs,ma     1(%sr1,%r26),%r1
-$lslen_done:
-	bv          %r0(%r2)
-	sub	    %r26,%r24,%r28
-
-$lslen_nzero:
-	b           $lslen_done
-	ldo         1(%r26),%r26 /* special case for N == 0 */
-
-3:      b	    $lslen_done
-	copy        %r24,%r26    /* reset r26 so 0 is returned on fault */
-
-	ASM_EXCEPTIONTABLE_ENTRY(1b,3b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,3b)
-
-ENDPROC_CFI(lstrnlen_user)
-
-
 /*
  * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
  *
-- 
cgit 


From ea4b3fca18adc81e6a965b2ad0668f38d5d3485b Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Mon, 6 Sep 2021 22:45:16 +0200
Subject: parisc: Drop useless debug info and comments from signal.c

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/signal.c | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index db1a47cf424d..77db707ce391 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -293,16 +293,6 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 			&frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
 	err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
 
-#if DEBUG_SIG
-	/* Assert that we're flushing in the correct space... */
-	{
-		unsigned long sid;
-		asm ("mfsp %%sr3,%0" : "=r" (sid));
-		DBG(1,"setup_rt_frame: Flushing 64 bytes at space %#x offset %p\n",
-		       sid, frame->tramp);
-	}
-#endif
-
 	start = (unsigned long) &frame->tramp[0];
 	end = (unsigned long) &frame->tramp[TRAMP_SIZE];
 	flush_user_dcache_range_asm(start, end);
@@ -501,7 +491,6 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
 		DBG(1,"ERESTARTNOHAND: returning -EINTR\n");
 		regs->gr[28] = -EINTR;
 		break;
-
 	case -ERESTARTSYS:
 		if (!(ka->sa.sa_flags & SA_RESTART)) {
 			DBG(1,"ERESTARTSYS: putting -EINTR\n");
@@ -569,10 +558,6 @@ insert_restart_trampoline(struct pt_regs *regs)
 }
 
 /*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- *
  * We need to be able to restore the syscall arguments (r21-r26) to
  * restart syscalls.  Thus, the syscall path should save them in the
  * pt_regs structure (it's okay to do so since they are caller-save
-- 
cgit 


From 3e4a1aff2a97cb4fd7f0268e4b69e8c9d3641277 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 5 Sep 2021 11:53:32 +0200
Subject: parisc: Check user signal stack trampoline is inside TASK_SIZE

Add some additional checks to ensure the signal stack is inside
userspace bounds.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/signal.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 77db707ce391..46b1050640b8 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -237,18 +237,22 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 #endif
 	
 	usp = (regs->gr[30] & ~(0x01UL));
+	sigframe_size = PARISC_RT_SIGFRAME_SIZE;
 #ifdef CONFIG_64BIT
 	if (is_compat_task()) {
 		/* The gcc alloca implementation leaves garbage in the upper 32 bits of sp */
 		usp = (compat_uint_t)usp;
+		sigframe_size = PARISC_RT_SIGFRAME_SIZE32;
 	}
 #endif
-	/*FIXME: frame_size parameter is unused, remove it. */
-	frame = get_sigframe(&ksig->ka, usp, sizeof(*frame));
+	frame = get_sigframe(&ksig->ka, usp, sigframe_size);
 
 	DBG(1,"SETUP_RT_FRAME: START\n");
 	DBG(1,"setup_rt_frame: frame %p info %p\n", frame, ksig->info);
 
+	start = (unsigned long) frame;
+	if (start >= user_addr_max() - sigframe_size)
+		return -EFAULT;
 	
 #ifdef CONFIG_64BIT
 
@@ -343,11 +347,6 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 
 	/* The syscall return path will create IAOQ values from r31.
 	 */
-	sigframe_size = PARISC_RT_SIGFRAME_SIZE;
-#ifdef CONFIG_64BIT
-	if (is_compat_task())
-		sigframe_size = PARISC_RT_SIGFRAME_SIZE32;
-#endif
 	if (in_syscall) {
 		regs->gr[31] = haddr;
 #ifdef CONFIG_64BIT
@@ -518,6 +517,10 @@ insert_restart_trampoline(struct pt_regs *regs)
 		unsigned long end  = (unsigned long) &usp[5];
 		long err = 0;
 
+		/* check that we don't exceed the stack */
+		if (A(&usp[0]) >= user_addr_max() - 5 * sizeof(int))
+			return;
+
 		/* Setup a trampoline to restart the syscall
 		 * with __NR_restart_syscall
 		 *
-- 
cgit 


From e4f2006f1287e7ea17660490569cff323772dac4 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 7 Sep 2021 05:03:29 +0200
Subject: parisc: Reduce sigreturn trampoline to 3 instructions

We can move the INSN_LDI_R20 instruction into the branch delay slot.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/rt_sigframe.h |  2 +-
 arch/parisc/kernel/signal.c           | 13 ++++++-------
 arch/parisc/kernel/signal32.h         |  2 +-
 3 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
index 2b3010ade00e..4b9e3d707571 100644
--- a/arch/parisc/include/asm/rt_sigframe.h
+++ b/arch/parisc/include/asm/rt_sigframe.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_PARISC_RT_SIGFRAME_H
 #define _ASM_PARISC_RT_SIGFRAME_H
 
-#define SIGRETURN_TRAMP 4
+#define SIGRETURN_TRAMP 3
 #define SIGRESTARTBLOCK_TRAMP 5 
 #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
 
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 46b1050640b8..bbfe23c40c01 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -288,22 +288,21 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	   already in userspace. The first words of tramp are used to
 	   save the previous sigrestartblock trampoline that might be
 	   on the stack. We start the sigreturn trampoline at 
-	   SIGRESTARTBLOCK_TRAMP+X. */
+	   SIGRESTARTBLOCK_TRAMP. */
 	err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
 			&frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
-	err |= __put_user(INSN_LDI_R20, 
-			&frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
 	err |= __put_user(INSN_BLE_SR2_R0, 
+			&frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
+	err |= __put_user(INSN_LDI_R20,
 			&frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
-	err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
 
-	start = (unsigned long) &frame->tramp[0];
-	end = (unsigned long) &frame->tramp[TRAMP_SIZE];
+	start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0];
+	end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3];
 	flush_user_dcache_range_asm(start, end);
 	flush_user_icache_range_asm(start, end);
 
 	/* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
-	 * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
+	 * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP
 	 * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
 	 */
 	rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index f166250f2d06..a5bdbb5678b7 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -36,7 +36,7 @@ struct compat_regfile {
         compat_int_t rf_sar;
 };
 
-#define COMPAT_SIGRETURN_TRAMP 4
+#define COMPAT_SIGRETURN_TRAMP 3
 #define COMPAT_SIGRESTARTBLOCK_TRAMP 5
 #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
 				COMPAT_SIGRESTARTBLOCK_TRAMP)
-- 
cgit 


From d97180ad68bdb7ee10f327205a649bc2f558741d Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 8 Sep 2021 23:27:00 +0200
Subject: parisc: Mark sched_clock unstable only if clocks are not syncronized

We check at runtime if the cr16 clocks are stable across CPUs. Only mark
the sched_clock unstable by calling clear_sched_clock_stable() if we
know that we run on a system which isn't syncronized across CPUs.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/setup.c | 2 --
 arch/parisc/kernel/time.c  | 7 +++----
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 3fb86ee507dd..cceb09855e03 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -150,8 +150,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_PA11
 	dma_ops_init();
 #endif
-
-	clear_sched_clock_stable();
 }
 
 /*
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 08e4d480abe1..9fb1e794831b 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -265,6 +265,9 @@ static int __init init_cr16_clocksource(void)
 			    (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc))
 				continue;
 
+			/* mark sched_clock unstable */
+			clear_sched_clock_stable();
+
 			clocksource_cr16.name = "cr16_unstable";
 			clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
 			clocksource_cr16.rating = 0;
@@ -272,10 +275,6 @@ static int __init init_cr16_clocksource(void)
 		}
 	}
 
-	/* XXX: We may want to mark sched_clock stable here if cr16 clocks are
-	 *	in sync:
-	 *	(clocksource_cr16.flags == CLOCK_SOURCE_IS_CONTINUOUS) */
-
 	/* register at clocksource framework */
 	clocksource_register_hz(&clocksource_cr16,
 		100 * PAGE0->mem_10msec);
-- 
cgit 


From 88053ec8cb1b91df566353cd3116470193797e00 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 26 Aug 2021 18:56:13 +0200
Subject: arm64: mm: limit linear region to 51 bits for KVM in nVHE mode

KVM in nVHE mode divides up its VA space into two equal halves, and
picks the half that does not conflict with the HYP ID map to map its
linear region. This worked fine when the kernel's linear map itself was
guaranteed to cover precisely as many bits of VA space, but this was
changed by commit f4693c2716b35d08 ("arm64: mm: extend linear region for
52-bit VA configurations").

The result is that, depending on the placement of the ID map, kernel-VA
to hyp-VA translations may produce addresses that either conflict with
other HYP mappings (including the ID map itself) or generate addresses
outside of the 52-bit addressable range, neither of which is likely to
lead to anything useful.

Given that 52-bit capable cores are guaranteed to implement VHE, this
only affects configurations such as pKVM where we opt into non-VHE mode
even if the hardware is VHE capable. So just for these configurations,
let's limit the kernel linear map to 51 bits and work around the
problem.

Fixes: f4693c2716b3 ("arm64: mm: extend linear region for 52-bit VA configurations")
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210826165613.60774-1-ardb@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 8490ed2917ff..2b28d77e29a0 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -282,7 +282,21 @@ static void __init fdt_enforce_memory_region(void)
 
 void __init arm64_memblock_init(void)
 {
-	const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
+	s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
+
+	/*
+	 * Corner case: 52-bit VA capable systems running KVM in nVHE mode may
+	 * be limited in their ability to support a linear map that exceeds 51
+	 * bits of VA space, depending on the placement of the ID map. Given
+	 * that the placement of the ID map may be randomized, let's simply
+	 * limit the kernel's linear map to 51 bits as well if we detect this
+	 * configuration.
+	 */
+	if (IS_ENABLED(CONFIG_KVM) && vabits_actual == 52 &&
+	    is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
+		pr_info("Capping linear region to 51 bits for KVM in nVHE mode on LVA capable hardware.\n");
+		linear_region_size = min_t(u64, linear_region_size, BIT(51));
+	}
 
 	/* Handle linux,usable-memory-range property */
 	fdt_enforce_memory_region();
-- 
cgit 


From 671028728083e856e9919221b109e3b2cd2ccc49 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 9 Sep 2021 12:47:00 +0200
Subject: parisc: Implement __get/put_kernel_nofault()

Remove CONFIG_SET_FS from parisc, so we need to add
__get_kernel_nofault() and __put_kernel_nofault(), define
HAVE_GET_KERNEL_NOFAULT and remove set_fs(), get_fs(), load_sr2(),
thread_info->addr_limit, KERNEL_DS and USER_DS.

The nice side-effect of this patch is that we now can directly access
userspace via sr3 without the need to use a temporary sr2 which is
either copied from sr3 or set to zero (for kernel space).

Signed-off-by: Helge Deller <deller@gmx.de>
Suggested-by: Arnd Bergmann <arnd@kernel.org>
---
 arch/parisc/Kconfig                   |   1 -
 arch/parisc/include/asm/processor.h   |   4 --
 arch/parisc/include/asm/thread_info.h |   2 -
 arch/parisc/include/asm/uaccess.h     | 122 +++++++++++++++++-----------------
 arch/parisc/kernel/asm-offsets.c      |   1 -
 arch/parisc/lib/lusercopy.S           |  18 +----
 6 files changed, 62 insertions(+), 86 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 86e879608e2a..4742b6f169b7 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -64,7 +64,6 @@ config PARISC
 	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS
-	select SET_FS
 	select TRACE_IRQFLAGS_SUPPORT
 
 	help
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index b5fbcd2c1780..eeb7da064289 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -101,10 +101,6 @@ DECLARE_PER_CPU(struct cpuinfo_parisc, cpu_data);
 
 #define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF)
 
-typedef struct {
-	int seg;  
-} mm_segment_t;
-
 #define ARCH_MIN_TASKALIGN	8
 
 struct thread_struct {
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index 0bd38a972cea..00ad50fef769 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -11,7 +11,6 @@
 struct thread_info {
 	struct task_struct *task;	/* main task structure */
 	unsigned long flags;		/* thread_info flags (see TIF_*) */
-	mm_segment_t addr_limit;	/* user-level address space limit */
 	__u32 cpu;			/* current CPU */
 	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
 };
@@ -21,7 +20,6 @@ struct thread_info {
 	.task		= &tsk,			\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.addr_limit	= KERNEL_DS,		\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
 }
 
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index cfb4ca7e39fe..192ad9e11b25 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -11,14 +11,6 @@
 #include <linux/bug.h>
 #include <linux/string.h>
 
-#define KERNEL_DS	((mm_segment_t){0})
-#define USER_DS 	((mm_segment_t){1})
-
-#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
-
-#define get_fs()	(current_thread_info()->addr_limit)
-#define set_fs(x)	(current_thread_info()->addr_limit = (x))
-
 /*
  * Note that since kernel addresses are in a separate address space on
  * parisc, we don't need to do anything for access_ok().
@@ -33,11 +25,11 @@
 #define get_user __get_user
 
 #if !defined(CONFIG_64BIT)
-#define LDD_USER(val, ptr)	__get_user_asm64(val, ptr)
-#define STD_USER(x, ptr)	__put_user_asm64(x, ptr)
+#define LDD_USER(sr, val, ptr)	__get_user_asm64(sr, val, ptr)
+#define STD_USER(sr, x, ptr)	__put_user_asm64(sr, x, ptr)
 #else
-#define LDD_USER(val, ptr)	__get_user_asm(val, "ldd", ptr)
-#define STD_USER(x, ptr)	__put_user_asm("std", x, ptr)
+#define LDD_USER(sr, val, ptr)	__get_user_asm(sr, val, "ldd", ptr)
+#define STD_USER(sr, x, ptr)	__put_user_asm(sr, "std", x, ptr)
 #endif
 
 /*
@@ -67,28 +59,15 @@ struct exception_table_entry {
 #define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
 	ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
 
-/*
- * load_sr2() preloads the space register %%sr2 - based on the value of
- * get_fs() - with either a value of 0 to access kernel space (KERNEL_DS which
- * is 0), or with the current value of %%sr3 to access user space (USER_DS)
- * memory. The following __get_user_asm() and __put_user_asm() functions have
- * %%sr2 hard-coded to access the requested memory.
- */
-#define load_sr2() \
-	__asm__(" or,=  %0,%%r0,%%r0\n\t"	\
-		" mfsp %%sr3,%0\n\t"		\
-		" mtsp %0,%%sr2\n\t"		\
-		: : "r"(get_fs()) : )
-
-#define __get_user_internal(val, ptr)			\
+#define __get_user_internal(sr, val, ptr)		\
 ({							\
 	register long __gu_err __asm__ ("r8") = 0;	\
 							\
 	switch (sizeof(*(ptr))) {			\
-	case 1: __get_user_asm(val, "ldb", ptr); break;	\
-	case 2: __get_user_asm(val, "ldh", ptr); break; \
-	case 4: __get_user_asm(val, "ldw", ptr); break; \
-	case 8: LDD_USER(val, ptr); break;		\
+	case 1: __get_user_asm(sr, val, "ldb", ptr); break; \
+	case 2: __get_user_asm(sr, val, "ldh", ptr); break; \
+	case 4: __get_user_asm(sr, val, "ldw", ptr); break; \
+	case 8: LDD_USER(sr, val, ptr); break;		\
 	default: BUILD_BUG();				\
 	}						\
 							\
@@ -97,15 +76,14 @@ struct exception_table_entry {
 
 #define __get_user(val, ptr)				\
 ({							\
-	load_sr2();					\
-	__get_user_internal(val, ptr);			\
+	__get_user_internal("%%sr3,", val, ptr);	\
 })
 
-#define __get_user_asm(val, ldx, ptr)			\
+#define __get_user_asm(sr, val, ldx, ptr)		\
 {							\
 	register long __gu_val;				\
 							\
-	__asm__("1: " ldx " 0(%%sr2,%2),%0\n"		\
+	__asm__("1: " ldx " 0(" sr "%2),%0\n"		\
 		"9:\n"					\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
 		: "=r"(__gu_val), "=r"(__gu_err)        \
@@ -114,9 +92,22 @@ struct exception_table_entry {
 	(val) = (__force __typeof__(*(ptr))) __gu_val;	\
 }
 
+#define HAVE_GET_KERNEL_NOFAULT
+#define __get_kernel_nofault(dst, src, type, err_label)	\
+{							\
+	type __z;					\
+	long __err;					\
+	__err = __get_user_internal("%%sr0,", __z, (type *)(src)); \
+	if (unlikely(__err))				\
+		goto err_label;				\
+	else						\
+		*(type *)(dst) = __z;			\
+}
+
+
 #if !defined(CONFIG_64BIT)
 
-#define __get_user_asm64(val, ptr)			\
+#define __get_user_asm64(sr, val, ptr)			\
 {							\
 	union {						\
 		unsigned long long	l;		\
@@ -124,8 +115,8 @@ struct exception_table_entry {
 	} __gu_tmp;					\
 							\
 	__asm__("   copy %%r0,%R0\n"			\
-		"1: ldw 0(%%sr2,%2),%0\n"		\
-		"2: ldw 4(%%sr2,%2),%R0\n"		\
+		"1: ldw 0(" sr "%2),%0\n"		\
+		"2: ldw 4(" sr "%2),%R0\n"		\
 		"9:\n"					\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)	\
@@ -138,16 +129,16 @@ struct exception_table_entry {
 #endif /* !defined(CONFIG_64BIT) */
 
 
-#define __put_user_internal(x, ptr)				\
+#define __put_user_internal(sr, x, ptr)				\
 ({								\
 	register long __pu_err __asm__ ("r8") = 0;      	\
         __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x);	\
 								\
 	switch (sizeof(*(ptr))) {				\
-	case 1: __put_user_asm("stb", __x, ptr); break;		\
-	case 2: __put_user_asm("sth", __x, ptr); break;		\
-	case 4: __put_user_asm("stw", __x, ptr); break;		\
-	case 8: STD_USER(__x, ptr); break;			\
+	case 1: __put_user_asm(sr, "stb", __x, ptr); break;	\
+	case 2: __put_user_asm(sr, "sth", __x, ptr); break;	\
+	case 4: __put_user_asm(sr, "stw", __x, ptr); break;	\
+	case 8: STD_USER(sr, __x, ptr); break;			\
 	default: BUILD_BUG();					\
 	}							\
 								\
@@ -156,10 +147,20 @@ struct exception_table_entry {
 
 #define __put_user(x, ptr)					\
 ({								\
-	load_sr2();						\
-	__put_user_internal(x, ptr);				\
+	__put_user_internal("%%sr3,", x, ptr);			\
 })
 
+#define __put_kernel_nofault(dst, src, type, err_label)		\
+{								\
+	type __z = *(type *)(src);				\
+	long __err;						\
+	__err = __put_user_internal("%%sr0,", __z, (type *)(dst)); \
+	if (unlikely(__err))					\
+		goto err_label;					\
+}
+
+
+
 
 /*
  * The "__put_user/kernel_asm()" macros tell gcc they read from memory
@@ -170,26 +171,26 @@ struct exception_table_entry {
  * r8 is already listed as err.
  */
 
-#define __put_user_asm(stx, x, ptr)                         \
-	__asm__ __volatile__ (                              \
-		"1: " stx " %2,0(%%sr2,%1)\n"		    \
-		"9:\n"					    \
-		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	    \
-		: "=r"(__pu_err)                            \
+#define __put_user_asm(sr, stx, x, ptr)				\
+	__asm__ __volatile__ (					\
+		"1: " stx " %2,0(" sr "%1)\n"			\
+		"9:\n"						\
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)		\
+		: "=r"(__pu_err)				\
 		: "r"(ptr), "r"(x), "0"(__pu_err))
 
 
 #if !defined(CONFIG_64BIT)
 
-#define __put_user_asm64(__val, ptr) do {	    	    \
-	__asm__ __volatile__ (				    \
-		"1: stw %2,0(%%sr2,%1)\n"		    \
-		"2: stw %R2,4(%%sr2,%1)\n"		    \
-		"9:\n"					    \
-		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	    \
-		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)	    \
-		: "=r"(__pu_err)                            \
-		: "r"(ptr), "r"(__val), "0"(__pu_err));	    \
+#define __put_user_asm64(sr, __val, ptr) do {			\
+	__asm__ __volatile__ (					\
+		"1: stw %2,0(" sr "%1)\n"			\
+		"2: stw %R2,4(" sr "%1)\n"			\
+		"9:\n"						\
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)		\
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)		\
+		: "=r"(__pu_err)				\
+		: "r"(ptr), "r"(__val), "0"(__pu_err));		\
 } while (0)
 
 #endif /* !defined(CONFIG_64BIT) */
@@ -200,12 +201,11 @@ struct exception_table_entry {
  */
 
 extern long strncpy_from_user(char *, const char __user *, long);
-extern unsigned lclear_user(void __user *, unsigned long);
+extern __must_check unsigned lclear_user(void __user *, unsigned long);
 extern __must_check long strnlen_user(const char __user *src, long n);
 /*
  * Complex access routines -- macros
  */
-#define user_addr_max() (~0UL)
 
 #define clear_user lclear_user
 #define __clear_user lclear_user
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 33113ba24054..22924a3f1728 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -230,7 +230,6 @@ int main(void)
 	DEFINE(TI_TASK, offsetof(struct thread_info, task));
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
-	DEFINE(TI_SEGMENT, offsetof(struct thread_info, addr_limit));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
 	DEFINE(THREAD_SZ, sizeof(struct thread_info));
 	/* THREAD_SZ_ALGN includes space for a stack frame. */
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
index 0aad5ce89f4d..b428d29e45fb 100644
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -27,21 +27,6 @@
 #include <asm/errno.h>
 #include <linux/linkage.h>
 
-	/*
-	 * get_sr gets the appropriate space value into
-	 * sr1 for kernel/user space access, depending
-	 * on the flag stored in the task structure.
-	 */
-
-	.macro  get_sr
-	mfctl       %cr30,%r1
-	ldw         TI_SEGMENT(%r1),%r22
-	mfsp        %sr3,%r1
-	or,<>       %r22,%r0,%r0
-	copy        %r0,%r1
-	mtsp        %r1,%sr1
-	.endm
-
 	/*
 	 * unsigned long lclear_user(void *to, unsigned long n)
 	 *
@@ -51,10 +36,9 @@
 
 ENTRY_CFI(lclear_user)
 	comib,=,n   0,%r25,$lclu_done
-	get_sr
 $lclu_loop:
 	addib,<>    -1,%r25,$lclu_loop
-1:      stbs,ma     %r0,1(%sr1,%r26)
+1:	stbs,ma     %r0,1(%sr3,%r26)
 
 $lclu_done:
 	bv          %r0(%r2)
-- 
cgit 


From 85f58eb1889826b9745737718723a80b639e0fbd Mon Sep 17 00:00:00 2001
From: Chen Wandun <chenwandun@huawei.com>
Date: Fri, 10 Sep 2021 14:48:44 +0800
Subject: arm64: kdump: Skip kmemleak scan reserved memory for kdump

Trying to boot with kdump + kmemleak, command will result in a crash:
"echo scan > /sys/kernel/debug/kmemleak"

crashkernel reserved: 0x0000000007c00000 - 0x0000000027c00000 (512 MB)
Kernel command line: BOOT_IMAGE=(hd1,gpt2)/vmlinuz-5.14.0-rc5-next-20210809+ root=/dev/mapper/ao-root ro rd.lvm.lv=ao/root rd.lvm.lv=ao/swap crashkernel=512M
Unable to handle kernel paging request at virtual address ffff000007c00000
Mem abort info:
  ESR = 0x96000007
  EC = 0x25: DABT (current EL), IL = 32 bits
  SET = 0, FnV = 0
  EA = 0, S1PTW = 0
  FSC = 0x07: level 3 translation fault
Data abort info:
  ISV = 0, ISS = 0x00000007
  CM = 0, WnR = 0
swapper pgtable: 64k pages, 48-bit VAs, pgdp=00002024f0d80000
[ffff000007c00000] pgd=1800205ffffd0003, p4d=1800205ffffd0003, pud=1800205ffffd0003, pmd=1800205ffffc0003, pte=0068000007c00f06
Internal error: Oops: 96000007 [#1] SMP
pstate: 804000c9 (Nzcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : scan_block+0x98/0x230
lr : scan_block+0x94/0x230
sp : ffff80008d6cfb70
x29: ffff80008d6cfb70 x28: 0000000000000000 x27: 0000000000000000
x26: 00000000000000c0 x25: 0000000000000001 x24: 0000000000000000
x23: ffffa88a6b18b398 x22: ffff000007c00ff9 x21: ffffa88a6ac7fc40
x20: ffffa88a6af6a830 x19: ffff000007c00000 x18: 0000000000000000
x17: 0000000000000000 x16: 0000000000000000 x15: ffffffffffffffff
x14: ffffffff00000000 x13: ffffffffffffffff x12: 0000000000000020
x11: 0000000000000000 x10: 0000000001080000 x9 : ffffa88a6951c77c
x8 : ffffa88a6a893988 x7 : ffff203ff6cfb3c0 x6 : ffffa88a6a52b3c0
x5 : ffff203ff6cfb3c0 x4 : 0000000000000000 x3 : 0000000000000000
x2 : 0000000000000001 x1 : ffff20226cb56a40 x0 : 0000000000000000
Call trace:
 scan_block+0x98/0x230
 scan_gray_list+0x120/0x270
 kmemleak_scan+0x3a0/0x648
 kmemleak_write+0x3ac/0x4c8
 full_proxy_write+0x6c/0xa0
 vfs_write+0xc8/0x2b8
 ksys_write+0x70/0xf8
 __arm64_sys_write+0x24/0x30
 invoke_syscall+0x4c/0x110
 el0_svc_common+0x9c/0x190
 do_el0_svc+0x30/0x98
 el0_svc+0x28/0xd8
 el0t_64_sync_handler+0x90/0xb8
 el0t_64_sync+0x180/0x184

The reserved memory for kdump will be looked up by kmemleak, this area
will be set invalid when kdump service is bring up. That will result in
crash when kmemleak scan this area.

Fixes: a7259df76702 ("memblock: make memblock_find_in_range method private")
Signed-off-by: Chen Wandun <chenwandun@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210910064844.3827813-1-chenwandun@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 2b28d77e29a0..5a16f5715716 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -30,6 +30,7 @@
 #include <linux/crash_dump.h>
 #include <linux/hugetlb.h>
 #include <linux/acpi_iort.h>
+#include <linux/kmemleak.h>
 
 #include <asm/boot.h>
 #include <asm/fixmap.h>
@@ -115,6 +116,11 @@ static void __init reserve_crashkernel(void)
 	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
 		crash_base, crash_base + crash_size, crash_size >> 20);
 
+	/*
+	 * The crashkernel memory will be removed from the kernel linear
+	 * map. Inform kmemleak so that it won't try to access it.
+	 */
+	kmemleak_ignore_phys(crash_base);
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
 }
-- 
cgit 


From 3a87ff891290e1b9ef3f03396c22fd3b2f6eb955 Mon Sep 17 00:00:00 2001
From: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
Date: Thu, 12 Aug 2021 10:10:26 +0200
Subject: riscv: defconfig: enable BLK_DEV_NVME

NVMe is a non-volatile storage media attached via PCIe.
As NVMe has much higher throughput than other block devices like
SATA it is a must have for RISC-V. Enable CONFIG_BLK_DEV_NVME.

The HiFive Unmatched is a board providing M.2 slots for NVMe drives.
Enable CONFIG_PCIE_FU740.

Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/configs/defconfig | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index bc68231a8fb7..a7d915a5932e 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -39,10 +39,12 @@ CONFIG_PCI=y
 CONFIG_PCIEPORTBUS=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCIE_XILINX=y
+CONFIG_PCIE_FU740=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_NVME=m
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
 CONFIG_SCSI_VIRTIO=y
-- 
cgit 


From efe1e08bca9a1fab2b0ad886be4fb5335dcbf29c Mon Sep 17 00:00:00 2001
From: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
Date: Thu, 12 Aug 2021 10:10:27 +0200
Subject: riscv: defconfig: enable NLS_CODEPAGE_437, NLS_ISO8859_1

The EFI system partition uses the FAT file system. Many distributions add
an entry in /etc/fstab for the ESP. We must ensure that mounting does not
fail.

The default code page for FAT is 437 (cf. CONFIG_FAT_DEFAULT_CODEPAGE).
The default IO character set is "iso8859-1" (cf. CONFIG_NLS_ISO8859_1).

So let's enable NLS_CODEPAGE_437 and NLS_ISO8859_1 in defconfig.

Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/configs/defconfig | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index a7d915a5932e..4ebc80315f01 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -110,6 +110,8 @@ CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
 CONFIG_9P_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=m
 CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_PRINTK_TIME=y
-- 
cgit 


From d5935537c8256fc63c77d5f4914dfd6e3ef43241 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Thu, 12 Aug 2021 19:47:02 +0800
Subject: riscv: Improve stack randomisation on RV64

This enlarges the bits availiable for stack randomisation on RV64 from
the default of 8MiB to 1GiB, to match arm64 and x86.

Also, update the documentation to reflect our support for stack
randomisation.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
[Palmer: commit text]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/include/asm/elf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index f4b490cd0e5d..f53c40026c7a 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -42,6 +42,9 @@
  */
 #define ELF_ET_DYN_BASE		((TASK_SIZE / 3) * 2)
 
+#ifdef CONFIG_64BIT
+#define STACK_RND_MASK		(0x3ffff >> (PAGE_SHIFT - 12))
+#endif
 /*
  * This yields a mask that user programs can use to figure out what
  * instruction set this CPU supports.  This could be done in user space,
-- 
cgit 


From 399c1ec8467c563ae9db4c19a40a9d5e728b1e72 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 29 Jul 2021 23:21:47 +0900
Subject: riscv: move the (z)install rules to arch/riscv/Makefile

Currently, the (z)install targets in arch/riscv/Makefile descend into
arch/riscv/boot/Makefile to invoke the shell script, but there is no
good reason to do so.

arch/riscv/Makefile can run the shell script directly.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Makefile      | 7 +++++--
 arch/riscv/boot/Makefile | 8 --------
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 01906a90c501..0eb4568fbd29 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -132,8 +132,11 @@ $(BOOT_TARGETS): vmlinux
 Image.%: Image
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-zinstall install:
-	$(Q)$(MAKE) $(build)=$(boot) $@
+install: install-image = Image
+zinstall: install-image = Image.gz
+install zinstall:
+	$(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
+	$(boot)/$(install-image) System.map "$(INSTALL_PATH)"
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index 6bf299f70c27..becd0621071c 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -58,11 +58,3 @@ $(obj)/Image.lzo: $(obj)/Image FORCE
 
 $(obj)/loader.bin: $(obj)/loader FORCE
 	$(call if_changed,objcopy)
-
-install:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
-	$(obj)/Image System.map "$(INSTALL_PATH)"
-
-zinstall:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
-	$(obj)/Image.gz System.map "$(INSTALL_PATH)"
-- 
cgit 


From cbba17870881cd17bca24673ccb72859431da5bd Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 26 Aug 2021 15:19:39 +0200
Subject: riscv: dts: microchip: mpfs-icicle: Fix serial console

Currently, nothing is output on the serial console, unless
"console=ttyS0,115200n8" or "earlycon" are appended to the kernel
command line.  Enable automatic console selection using
chosen/stdout-path by adding a proper alias, and configure the expected
serial rate.

While at it, add aliases for the other three serial ports, which are
provided on the same micro-USB connector as the first one.

Fixes: 0fa6107eca4186ad ("RISC-V: Initial DTS for Microchip ICICLE board")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Bin Meng <bmeng.cn@gmail.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
index baea7d204639..b254c60589a1 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
@@ -16,10 +16,14 @@
 
 	aliases {
 		ethernet0 = &emac1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
 	};
 
 	chosen {
-		stdout-path = &serial0;
+		stdout-path = "serial0:115200n8";
 	};
 
 	cpus {
-- 
cgit 


From 54fed35fd3939398be292e4090b0b1c5ff2238b4 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Thu, 26 Aug 2021 22:10:29 +0800
Subject: riscv: Enable BUILDTIME_TABLE_SORT

Enable BUILDTIME_TABLE_SORT to sort the exception table at build time
rather than during boot.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index baf60fc350a4..c1abbc876e5b 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -41,6 +41,7 @@ config RISCV
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
 	select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
+	select BUILDTIME_TABLE_SORT if MMU
 	select CLONE_BACKWARDS
 	select CLINT_TIMER if !MMU
 	select COMMON_CLK
-- 
cgit 


From 6f55ab36bef505b449723300a5a445ddc76a94d7 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Thu, 26 Aug 2021 22:11:18 +0800
Subject: riscv: Move EXCEPTION_TABLE to RO_DATA segment

_ex_table section is read-only, so move it to RO_DATA.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/vmlinux-xip.lds.S | 1 -
 arch/riscv/kernel/vmlinux.lds.S     | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S
index af776555ded9..9c9f35091ef0 100644
--- a/arch/riscv/kernel/vmlinux-xip.lds.S
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -121,7 +121,6 @@ SECTIONS
 	}
 
 	BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
-	EXCEPTION_TABLE(0x10)
 
 	.rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) {
 		*(.rel.dyn*)
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 502d0826ecb1..5104f3a871e3 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -4,6 +4,8 @@
  * Copyright (C) 2017 SiFive
  */
 
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
 #ifdef CONFIG_XIP_KERNEL
 #include "vmlinux-xip.lds.S"
 #else
@@ -112,8 +114,6 @@ SECTIONS
 		*(.srodata*)
 	}
 
-	EXCEPTION_TABLE(0x10)
-
 	. = ALIGN(SECTION_ALIGN);
 	_data = .;
 
-- 
cgit