From cb8a2ef0848ca80d67d6d56e2df757cfdf6b3355 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Mon, 11 Mar 2024 22:23:47 +0800
Subject: LoongArch: Add ORC stack unwinder support

The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
similar in concept to a DWARF unwinder. The difference is that the format
of the ORC data is much simpler than DWARF, which in turn allows the ORC
unwinder to be much simpler and faster.

The ORC data consists of unwind tables which are generated by objtool.
After analyzing all the code paths of a .o file, it determines information
about the stack state at each instruction address in the file and outputs
that information to the .orc_unwind and .orc_unwind_ip sections.

The per-object ORC sections are combined at link time and are sorted and
post-processed at boot time. The unwinder uses the resulting data to
correlate instruction addresses with their stack states at run time.

Most of the logic are similar with x86, in order to get ra info before ra
is saved into stack, add ra_reg and ra_offset into orc_entry. At the same
time, modify some arch-specific code to silence the objtool warnings.

Co-developed-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Jinyang He <hejinyang@loongson.cn>
Co-developed-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/Kbuild         |  2 ++
 arch/loongarch/include/asm/bug.h          |  1 +
 arch/loongarch/include/asm/exception.h    |  2 ++
 arch/loongarch/include/asm/module.h       |  7 ++++
 arch/loongarch/include/asm/orc_header.h   | 18 ++++++++++
 arch/loongarch/include/asm/orc_lookup.h   | 31 +++++++++++++++++
 arch/loongarch/include/asm/orc_types.h    | 58 +++++++++++++++++++++++++++++++
 arch/loongarch/include/asm/stackframe.h   |  3 ++
 arch/loongarch/include/asm/unwind.h       | 20 +++++++++--
 arch/loongarch/include/asm/unwind_hints.h | 28 +++++++++++++++
 10 files changed, 168 insertions(+), 2 deletions(-)
 create mode 100644 arch/loongarch/include/asm/orc_header.h
 create mode 100644 arch/loongarch/include/asm/orc_lookup.h
 create mode 100644 arch/loongarch/include/asm/orc_types.h
 create mode 100644 arch/loongarch/include/asm/unwind_hints.h

(limited to 'arch/loongarch/include/asm')

diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 93783fa24f6e..a97c0edbb866 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+generated-y += orc_hash.h
+
 generic-y += dma-contiguous.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h
index d4ca3ba25418..08388876ade4 100644
--- a/arch/loongarch/include/asm/bug.h
+++ b/arch/loongarch/include/asm/bug.h
@@ -44,6 +44,7 @@
 do {								\
 	instrumentation_begin();				\
 	__BUG_FLAGS(BUGFLAG_WARNING|(flags));			\
+	annotate_reachable();					\
 	instrumentation_end();					\
 } while (0)
 
diff --git a/arch/loongarch/include/asm/exception.h b/arch/loongarch/include/asm/exception.h
index af74a3fdcad1..c6d20736fd92 100644
--- a/arch/loongarch/include/asm/exception.h
+++ b/arch/loongarch/include/asm/exception.h
@@ -6,6 +6,8 @@
 #include <asm/ptrace.h>
 #include <linux/kprobes.h>
 
+extern void *exception_table[];
+
 void show_registers(struct pt_regs *regs);
 
 asmlinkage void cache_parity_error(void);
diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h
index 2ecd82bb64e1..f33f3fd32ecc 100644
--- a/arch/loongarch/include/asm/module.h
+++ b/arch/loongarch/include/asm/module.h
@@ -6,6 +6,7 @@
 #define _ASM_MODULE_H
 
 #include <asm/inst.h>
+#include <asm/orc_types.h>
 #include <asm-generic/module.h>
 
 #define RELA_STACK_DEPTH 16
@@ -21,6 +22,12 @@ struct mod_arch_specific {
 	struct mod_section plt;
 	struct mod_section plt_idx;
 
+#ifdef CONFIG_UNWINDER_ORC
+	unsigned int num_orcs;
+	int *orc_unwind_ip;
+	struct orc_entry *orc_unwind;
+#endif
+
 	/* For CONFIG_DYNAMIC_FTRACE */
 	struct plt_entry *ftrace_trampolines;
 };
diff --git a/arch/loongarch/include/asm/orc_header.h b/arch/loongarch/include/asm/orc_header.h
new file mode 100644
index 000000000000..f9d509c3fd70
--- /dev/null
+++ b/arch/loongarch/include/asm/orc_header.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _ORC_HEADER_H
+#define _ORC_HEADER_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/orc_hash.h>
+
+/*
+ * The header is currently a 20-byte hash of the ORC entry definition; see
+ * scripts/orc_hash.sh.
+ */
+#define ORC_HEADER					\
+	__used __section(".orc_header") __aligned(4)	\
+	static const u8 orc_header[] = { ORC_HASH }
+
+#endif /* _ORC_HEADER_H */
diff --git a/arch/loongarch/include/asm/orc_lookup.h b/arch/loongarch/include/asm/orc_lookup.h
new file mode 100644
index 000000000000..b02e6357def4
--- /dev/null
+++ b/arch/loongarch/include/asm/orc_lookup.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ORC_LOOKUP_H
+#define _ORC_LOOKUP_H
+
+/*
+ * This is a lookup table for speeding up access to the .orc_unwind table.
+ * Given an input address offset, the corresponding lookup table entry
+ * specifies a subset of the .orc_unwind table to search.
+ *
+ * Each block represents the end of the previous range and the start of the
+ * next range.  An extra block is added to give the last range an end.
+ *
+ * The block size should be a power of 2 to avoid a costly 'div' instruction.
+ *
+ * A block size of 256 was chosen because it roughly doubles unwinder
+ * performance while only adding ~5% to the ORC data footprint.
+ */
+#define LOOKUP_BLOCK_ORDER	8
+#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
+
+#ifndef LINKER_SCRIPT
+
+extern unsigned int orc_lookup[];
+extern unsigned int orc_lookup_end[];
+
+#define LOOKUP_START_IP		(unsigned long)_stext
+#define LOOKUP_STOP_IP		(unsigned long)_etext
+
+#endif /* LINKER_SCRIPT */
+
+#endif /* _ORC_LOOKUP_H */
diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h
new file mode 100644
index 000000000000..caf1f71a1057
--- /dev/null
+++ b/arch/loongarch/include/asm/orc_types.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ORC_TYPES_H
+#define _ORC_TYPES_H
+
+#include <linux/types.h>
+
+/*
+ * The ORC_REG_* registers are base registers which are used to find other
+ * registers on the stack.
+ *
+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
+ * address of the previous frame: the caller's SP before it called the current
+ * function.
+ *
+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
+ * the current frame.
+ *
+ * The most commonly used base registers are SP and FP -- which the previous SP
+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous FP is
+ * usually based on.
+ *
+ * The rest of the base registers are needed for special cases like entry code
+ * and GCC realigned stacks.
+ */
+#define ORC_REG_UNDEFINED		0
+#define ORC_REG_PREV_SP			1
+#define ORC_REG_SP			2
+#define ORC_REG_FP			3
+#define ORC_REG_MAX			4
+
+#define ORC_TYPE_UNDEFINED		0
+#define ORC_TYPE_END_OF_STACK		1
+#define ORC_TYPE_CALL			2
+#define ORC_TYPE_REGS			3
+#define ORC_TYPE_REGS_PARTIAL		4
+
+#ifndef __ASSEMBLY__
+/*
+ * This struct is more or less a vastly simplified version of the DWARF Call
+ * Frame Information standard.  It contains only the necessary parts of DWARF
+ * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
+ * unwinder how to find the previous SP and FP (and sometimes entry regs) on
+ * the stack for a given code address.  Each instance of the struct corresponds
+ * to one or more code locations.
+ */
+struct orc_entry {
+	s16		sp_offset;
+	s16		fp_offset;
+	s16		ra_offset;
+	unsigned int	sp_reg:4;
+	unsigned int	fp_reg:4;
+	unsigned int	ra_reg:4;
+	unsigned int	type:3;
+	unsigned int	signal:1;
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ORC_TYPES_H */
diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
index 4fb1e6408b98..45b507a7b06f 100644
--- a/arch/loongarch/include/asm/stackframe.h
+++ b/arch/loongarch/include/asm/stackframe.h
@@ -13,6 +13,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/loongarch.h>
 #include <asm/thread_info.h>
+#include <asm/unwind_hints.h>
 
 /* Make the addition of cfi info a little easier. */
 	.macro cfi_rel_offset reg offset=0 docfi=0
@@ -162,6 +163,7 @@
 	li.w	t0, CSR_CRMD_WE
 	csrxchg	t0, t0, LOONGARCH_CSR_CRMD
 #endif
+	UNWIND_HINT_REGS
 	.endm
 
 	.macro	SAVE_ALL docfi=0
@@ -219,6 +221,7 @@
 
 	.macro	RESTORE_SP_AND_RET docfi=0
 	cfi_ld	sp, PT_R3, \docfi
+	UNWIND_HINT_FUNC
 	ertn
 	.endm
 
diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h
index b9dce87afd2e..40a6763c5aec 100644
--- a/arch/loongarch/include/asm/unwind.h
+++ b/arch/loongarch/include/asm/unwind.h
@@ -16,6 +16,7 @@
 enum unwinder_type {
 	UNWINDER_GUESS,
 	UNWINDER_PROLOGUE,
+	UNWINDER_ORC,
 };
 
 struct unwind_state {
@@ -24,7 +25,7 @@ struct unwind_state {
 	struct task_struct *task;
 	bool first, error, reset;
 	int graph_idx;
-	unsigned long sp, pc, ra;
+	unsigned long sp, fp, pc, ra;
 };
 
 bool default_next_frame(struct unwind_state *state);
@@ -61,14 +62,17 @@ static __always_inline void __unwind_start(struct unwind_state *state,
 		state->sp = regs->regs[3];
 		state->pc = regs->csr_era;
 		state->ra = regs->regs[1];
+		state->fp = regs->regs[22];
 	} else if (task && task != current) {
 		state->sp = thread_saved_fp(task);
 		state->pc = thread_saved_ra(task);
 		state->ra = 0;
+		state->fp = 0;
 	} else {
 		state->sp = (unsigned long)__builtin_frame_address(0);
 		state->pc = (unsigned long)__builtin_return_address(0);
 		state->ra = 0;
+		state->fp = 0;
 	}
 	state->task = task;
 	get_stack_info(state->sp, state->task, &state->stack_info);
@@ -77,6 +81,18 @@ static __always_inline void __unwind_start(struct unwind_state *state,
 
 static __always_inline unsigned long __unwind_get_return_address(struct unwind_state *state)
 {
-	return unwind_done(state) ? 0 : state->pc;
+	if (unwind_done(state))
+		return 0;
+
+	return __kernel_text_address(state->pc) ? state->pc : 0;
 }
+
+#ifdef CONFIG_UNWINDER_ORC
+void unwind_init(void);
+void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size);
+#else
+static inline void unwind_init(void) {}
+static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {}
+#endif
+
 #endif /* _ASM_UNWIND_H */
diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h
new file mode 100644
index 000000000000..a01086ad9dde
--- /dev/null
+++ b/arch/loongarch/include/asm/unwind_hints.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_UNWIND_HINTS_H
+#define _ASM_LOONGARCH_UNWIND_HINTS_H
+
+#include <linux/objtool.h>
+#include <asm/orc_types.h>
+
+#ifdef __ASSEMBLY__
+
+.macro UNWIND_HINT_UNDEFINED
+	UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED
+.endm
+
+.macro UNWIND_HINT_END_OF_STACK
+	UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK
+.endm
+
+.macro UNWIND_HINT_REGS
+	UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_REGS
+.endm
+
+.macro UNWIND_HINT_FUNC
+	UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */
-- 
cgit 


From 199cc14cb4f1cb8668be45f67af41755ed5f0175 Mon Sep 17 00:00:00 2001
From: Jinyang He <hejinyang@loongson.cn>
Date: Mon, 11 Mar 2024 22:23:47 +0800
Subject: LoongArch: Add kernel livepatching support

The arch-specified function ftrace_regs_set_instruction_pointer() has
been implemented in arch/loongarch/include/asm/ftrace.h, so here only
implement arch_stack_walk_reliable() function.

Here are the test logs:

[root@linux fedora]# cat /proc/cmdline
BOOT_IMAGE=/vmlinuz-6.8.0-rc2 root=/dev/sda3

[root@linux fedora]# modprobe livepatch-sample
[root@linux fedora]# cat /proc/cmdline
this has been live patched

[root@linux fedora]# echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled
[root@linux fedora]# rmmod livepatch_sample
[root@linux fedora]# cat /proc/cmdline
BOOT_IMAGE=/vmlinuz-6.8.0-rc2 root=/dev/sda3

[root@linux fedora]# dmesg -t | tail -5
livepatch: enabling patch 'livepatch_sample'
livepatch: 'livepatch_sample': starting patching transition
livepatch: 'livepatch_sample': patching complete
livepatch: 'livepatch_sample': starting unpatching transition
livepatch: 'livepatch_sample': unpatching complete

Signed-off-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/thread_info.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/loongarch/include/asm')

diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h
index 8cb653d49a54..8bf0e6f51546 100644
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -86,6 +86,7 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define TIF_LASX_CTX_LIVE	18	/* LASX context must be preserved */
 #define TIF_USEDLBT		19	/* LBT was used by this task this quantum (SMP) */
 #define TIF_LBT_CTX_LIVE	20	/* LBT context must be preserved */
+#define TIF_PATCH_PENDING	21	/* pending live patching update */
 
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
@@ -105,6 +106,7 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define _TIF_LASX_CTX_LIVE	(1<<TIF_LASX_CTX_LIVE)
 #define _TIF_USEDLBT		(1<<TIF_USEDLBT)
 #define _TIF_LBT_CTX_LIVE	(1<<TIF_LBT_CTX_LIVE)
+#define _TIF_PATCH_PENDING	(1<<TIF_PATCH_PENDING)
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_THREAD_INFO_H */
-- 
cgit 


From c87e12e0e8c1241410e758e181ca6bf23efa5b5b Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Tue, 19 Mar 2024 15:50:34 +0800
Subject: LoongArch: Change __my_cpu_offset definition to avoid
 mis-optimization

From GCC commit 3f13154553f8546a ("df-scan: remove ad-hoc handling of
global regs in asms"), global registers will no longer be forced to add
to the def-use chain. Then current_thread_info(), current_stack_pointer
and __my_cpu_offset may be lifted out of the loop because they are no
longer treated as "volatile variables".

This optimization is still correct for the current_thread_info() and
current_stack_pointer usages because they are associated to a thread.
However it is wrong for __my_cpu_offset because it is associated to a
CPU rather than a thread: if the thread migrates to a different CPU in
the loop, __my_cpu_offset should be changed.

Change __my_cpu_offset definition to treat it as a "volatile variable",
in order to avoid such a mis-optimization.

Cc: stable@vger.kernel.org
Reported-by: Xiaotian Wu <wuxiaotian@loongson.cn>
Reported-by: Miao Wang <shankerwangmiao@gmail.com>
Signed-off-by: Xing Li <lixing@loongson.cn>
Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
Signed-off-by: Rui Wang <wangrui@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/percpu.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch/loongarch/include/asm')

diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h
index 9b36ac003f89..8f290e5546cf 100644
--- a/arch/loongarch/include/asm/percpu.h
+++ b/arch/loongarch/include/asm/percpu.h
@@ -29,7 +29,12 @@ static inline void set_my_cpu_offset(unsigned long off)
 	__my_cpu_offset = off;
 	csr_write64(off, PERCPU_BASE_KS);
 }
-#define __my_cpu_offset __my_cpu_offset
+
+#define __my_cpu_offset					\
+({							\
+	__asm__ __volatile__("":"+r"(__my_cpu_offset));	\
+	__my_cpu_offset;				\
+})
 
 #define PERCPU_OP(op, asm_op, c_op)					\
 static __always_inline unsigned long __percpu_##op(void *ptr,		\
-- 
cgit 


From d42ab9af605ee406ec339e5e80a1c3a708637fd6 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Tue, 19 Mar 2024 15:50:34 +0800
Subject: LoongArch: Move {dmw,tlb}_virt_to_page() definition to page.h

These two functions are implemented in pgtable.c, and they are needed
only by the virt_to_page() macro in page.h. Having the prototypes in
pgtable.h causes a circular dependency between page.h and pgtable.h,
because the virt_to_page() macro in page.h needs pgtable.h for these
two functions, while pgtable.h needs various definitions from page.h
(e.g. pte_t and pgt_t).

Let's avoid this circular dependency by moving the function prototypes
to page.h.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/page.h    | 3 +++
 arch/loongarch/include/asm/pgtable.h | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/loongarch/include/asm')

diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 63f137ce82a4..2391fcd18908 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -83,6 +83,9 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
 #define sym_to_pfn(x)		__phys_to_pfn(__pa_symbol(x))
 
+struct page *dmw_virt_to_page(unsigned long kaddr);
+struct page *tlb_virt_to_page(unsigned long kaddr);
+
 #define virt_to_pfn(kaddr)	PFN_DOWN(PHYSADDR(kaddr))
 
 #define virt_to_page(kaddr)								\
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 8b5df1bbf9e9..af3acdf3481a 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -363,9 +363,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
 extern pgd_t swapper_pg_dir[];
 extern pgd_t invalid_pg_dir[];
 
-struct page *dmw_virt_to_page(unsigned long kaddr);
-struct page *tlb_virt_to_page(unsigned long kaddr);
-
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
-- 
cgit 


From 82bf60a6fed806d57e284a1fb40dbc1ad5097611 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Tue, 19 Mar 2024 15:50:34 +0800
Subject: LoongArch: Remove superfluous flush_dcache_page() definition

LoongArch doesn't have cache aliases, so flush_dcache_page() is a no-op.
There is a generic implementation for this case in include/asm-generic/
cacheflush.h. So remove the superfluous flush_dcache_page() definition,
which also silences such build warnings:

   In file included from crypto/scompress.c:12:
   include/crypto/scatterwalk.h: In function 'scatterwalk_pagedone':
   include/crypto/scatterwalk.h:76:30: warning: variable 'page' set but not used [-Wunused-but-set-variable]
      76 |                 struct page *page;
         |                              ^~~~
   crypto/scompress.c: In function 'scomp_acomp_comp_decomp':
>> crypto/scompress.c:174:38: warning: unused variable 'dst_page' [-Wunused-variable]
     174 |                         struct page *dst_page = sg_page(req->dst);
         |

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202403091614.NeUw5zcv-lkp@intel.com/
Suggested-by: Barry Song <baohua@kernel.org>
Acked-by: Barry Song <baohua@kernel.org>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/cacheflush.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/loongarch/include/asm')

diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
index 80bd74106985..f8754d08a31a 100644
--- a/arch/loongarch/include/asm/cacheflush.h
+++ b/arch/loongarch/include/asm/cacheflush.h
@@ -37,8 +37,6 @@ void local_flush_icache_range(unsigned long start, unsigned long end);
 #define flush_icache_range	local_flush_icache_range
 #define flush_icache_user_range	local_flush_icache_range
 
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-
 #define flush_cache_all()				do { } while (0)
 #define flush_cache_mm(mm)				do { } while (0)
 #define flush_cache_dup_mm(mm)				do { } while (0)
@@ -47,7 +45,6 @@ void local_flush_icache_range(unsigned long start, unsigned long end);
 #define flush_cache_vmap(start, end)			do { } while (0)
 #define flush_cache_vunmap(start, end)			do { } while (0)
 #define flush_icache_user_page(vma, page, addr, len)	do { } while (0)
-#define flush_dcache_page(page)				do { } while (0)
 #define flush_dcache_mmap_lock(mapping)			do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)		do { } while (0)
 
-- 
cgit 


From 9c68ece8b2a5c5ff9b2fcaea923dd73efeb174cd Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Tue, 19 Mar 2024 15:50:34 +0800
Subject: LoongArch: Define the __io_aw() hook as mmiowb()

Commit fb24ea52f78e0d595852e ("drivers: Remove explicit invocations of
mmiowb()") remove all mmiowb() in drivers, but it says:

"NOTE: mmiowb() has only ever guaranteed ordering in conjunction with
spin_unlock(). However, pairing each mmiowb() removal in this patch with
the corresponding call to spin_unlock() is not at all trivial, so there
is a small chance that this change may regress any drivers incorrectly
relying on mmiowb() to order MMIO writes between CPUs using lock-free
synchronisation."

The mmio in radeon_ring_commit() is protected by a mutex rather than a
spinlock, but in the mutex fastpath it behaves similar to spinlock. We
can add mmiowb() calls in the radeon driver but the maintainer says he
doesn't like such a workaround, and radeon is not the only example of
mutex protected mmio.

So we should extend the mmiowb tracking system from spinlock to mutex,
and maybe other locking primitives. This is not easy and error prone, so
we solve it in the architectural code, by simply defining the __io_aw()
hook as mmiowb(). And we no longer need to override queued_spin_unlock()
so use the generic definition.

Without this, we get such an error when run 'glxgears' on weak ordering
architectures such as LoongArch:

radeon 0000:04:00.0: ring 0 stalled for more than 10324msec
radeon 0000:04:00.0: ring 3 stalled for more than 10240msec
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000001f412 last fence id 0x000000000001f414 on ring 3)
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000000f940 last fence id 0x000000000000f941 on ring 0)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)

Link: https://lore.kernel.org/dri-devel/29df7e26-d7a8-4f67-b988-44353c4270ac@amd.com/T/#t
Link: https://lore.kernel.org/linux-arch/20240301130532.3953167-1-chenhuacai@loongson.cn/T/#t
Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/Kbuild      |  1 +
 arch/loongarch/include/asm/io.h        |  2 ++
 arch/loongarch/include/asm/qspinlock.h | 18 ------------------
 3 files changed, 3 insertions(+), 18 deletions(-)
 delete mode 100644 arch/loongarch/include/asm/qspinlock.h

(limited to 'arch/loongarch/include/asm')

diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index a97c0edbb866..2dbec7853ae8 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += early_ioremap.h
 generic-y += qrwlock.h
+generic-y += qspinlock.h
 generic-y += rwsem.h
 generic-y += segment.h
 generic-y += user.h
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index c486c2341b66..4a8adcca329b 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -71,6 +71,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
 #define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
 #define memcpy_toio(c, a, l)   __memcpy_toio((c), (a), (l))
 
+#define __io_aw() mmiowb()
+
 #include <asm-generic/io.h>
 
 #define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
deleted file mode 100644
index 34f43f8ad591..000000000000
--- a/arch/loongarch/include/asm/qspinlock.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_QSPINLOCK_H
-#define _ASM_QSPINLOCK_H
-
-#include <asm-generic/qspinlock_types.h>
-
-#define queued_spin_unlock queued_spin_unlock
-
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
-	compiletime_assert_atomic_type(lock->locked);
-	c_sync();
-	WRITE_ONCE(lock->locked, 0);
-}
-
-#include <asm-generic/qspinlock.h>
-
-#endif /* _ASM_QSPINLOCK_H */
-- 
cgit