Rebase locking/kcsan to locking/urgent

Merge the state of the locking kcsan branch before the read/write_once() and the atomics modifications got merged. Squash the fallout of the rebase on top of the read/write once and atomic fallback work into the merge. The history of the original branch is preserved in tag locking-kcsan-2020-06-02. Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
author: Thomas Gleixner <tglx@linutronix.de> 2020-06-11 20:02:46 +0200
committer: Thomas Gleixner <tglx@linutronix.de> 2020-06-11 20:02:46 +0200
commit: 37d1a04b13a6d2fec91a6813fc034947a27db034 (patch)
tree: c6a8d7d0df96a6eb1ddb53a12885761cb72e0e92 /kernel
parent: 37f8173dd84936ea78000ed1cad24f8b18d48ebb (diff)
parent: 97a9474aeb789183a1d0712e66a4283860279ac9 (diff)
12 files changed, 2210 insertions, 0 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index c332eb9d4841..ce8716a04d0e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -23,6 +23,9 @@ endif
 # Prevents flicker of uninteresting __do_softirq()/__local_bh_disable_ip()
 # in coverage traces.
 KCOV_INSTRUMENT_softirq.o := n
+# Avoid KCSAN instrumentation in softirq ("No shared variables, all the data
+# are CPU local" => assume no data races), to reduce overhead in interrupts.
+KCSAN_SANITIZE_softirq.o = n
 # These are called from save_stack_trace() on slub debug path,
 # and produce insane amounts of uninteresting coverage.
 KCOV_INSTRUMENT_module.o := n
@@ -31,6 +34,7 @@ KCOV_INSTRUMENT_stacktrace.o := n
 # Don't self-instrument.
 KCOV_INSTRUMENT_kcov.o := n
 KASAN_SANITIZE_kcov.o := n
+KCSAN_SANITIZE_kcov.o := n
 CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
 
 # cond_syscall is currently not LTO compatible
@@ -103,6 +107,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_CPU_PM) += cpu_pm.o
 obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_KCSAN) += kcsan/
 obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
 
 obj-$(CONFIG_PERF_EVENTS) += events/
@@ -121,6 +126,7 @@ obj-$(CONFIG_SYSCTL_KUNIT_TEST) += sysctl-test.o
 
 obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
 KASAN_SANITIZE_stackleak.o := n
+KCSAN_SANITIZE_stackleak.o := n
 KCOV_INSTRUMENT_stackleak.o := n
 
 $(obj)/configs.o: $(obj)/config_data.gz
diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
new file mode 100644
index 000000000000..d4999b38d1be
--- /dev/null
+++ b/kernel/kcsan/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+KCSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+UBSAN_SANITIZE := n
+
+CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_debugfs.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE)
+
+CFLAGS_core.o := $(call cc-option,-fno-conserve-stack,) \
+	$(call cc-option,-fno-stack-protector,)
+
+obj-y := core.o debugfs.o report.o
+obj-$(CONFIG_KCSAN_SELFTEST) += test.o
diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
new file mode 100644
index 000000000000..be9e625227f3
--- /dev/null
+++ b/kernel/kcsan/atomic.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _KERNEL_KCSAN_ATOMIC_H
+#define _KERNEL_KCSAN_ATOMIC_H
+
+#include <linux/jiffies.h>
+#include <linux/sched.h>
+
+/*
+ * Special rules for certain memory where concurrent conflicting accesses are
+ * common, however, the current convention is to not mark them; returns true if
+ * access to @ptr should be considered atomic. Called from slow-path.
+ */
+static bool kcsan_is_atomic_special(const volatile void *ptr)
+{
+	/* volatile globals that have been observed in data races. */
+	return ptr == &jiffies || ptr == &current->state;
+}
+
+#endif /* _KERNEL_KCSAN_ATOMIC_H */
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
new file mode 100644
index 000000000000..a73a66cf79df
--- /dev/null
+++ b/kernel/kcsan/core.c
@@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+
+#include "atomic.h"
+#include "encoding.h"
+#include "kcsan.h"
+
+static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE);
+unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
+unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT;
+static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH;
+static bool kcsan_interrupt_watcher = IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER);
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "kcsan."
+module_param_named(early_enable, kcsan_early_enable, bool, 0);
+module_param_named(udelay_task, kcsan_udelay_task, uint, 0644);
+module_param_named(udelay_interrupt, kcsan_udelay_interrupt, uint, 0644);
+module_param_named(skip_watch, kcsan_skip_watch, long, 0644);
+module_param_named(interrupt_watcher, kcsan_interrupt_watcher, bool, 0444);
+
+bool kcsan_enabled;
+
+/* Per-CPU kcsan_ctx for interrupts */
+static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = {
+	.disable_count		= 0,
+	.atomic_next		= 0,
+	.atomic_nest_count	= 0,
+	.in_flat_atomic		= false,
+	.access_mask		= 0,
+	.scoped_accesses	= {LIST_POISON1, NULL},
+};
+
+/*
+ * Helper macros to index into adjacent slots, starting from address slot
+ * itself, followed by the right and left slots.
+ *
+ * The purpose is 2-fold:
+ *
+ *	1. if during insertion the address slot is already occupied, check if
+ *	   any adjacent slots are free;
+ *	2. accesses that straddle a slot boundary due to size that exceeds a
+ *	   slot's range may check adjacent slots if any watchpoint matches.
+ *
+ * Note that accesses with very large size may still miss a watchpoint; however,
+ * given this should be rare, this is a reasonable trade-off to make, since this
+ * will avoid:
+ *
+ *	1. excessive contention between watchpoint checks and setup;
+ *	2. larger number of simultaneous watchpoints without sacrificing
+ *	   performance.
+ *
+ * Example: SLOT_IDX values for KCSAN_CHECK_ADJACENT=1, where i is [0, 1, 2]:
+ *
+ *   slot=0:  [ 1,  2,  0]
+ *   slot=9:  [10, 11,  9]
+ *   slot=63: [64, 65, 63]
+ */
+#define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS))
+
+/*
+ * SLOT_IDX_FAST is used in the fast-path. Not first checking the address's primary
+ * slot (middle) is fine if we assume that races occur rarely. The set of
+ * indices {SLOT_IDX(slot, i) | i in [0, NUM_SLOTS)} is equivalent to
+ * {SLOT_IDX_FAST(slot, i) | i in [0, NUM_SLOTS)}.
+ */
+#define SLOT_IDX_FAST(slot, i) (slot + i)
+
+/*
+ * Watchpoints, with each entry encoded as defined in encoding.h: in order to be
+ * able to safely update and access a watchpoint without introducing locking
+ * overhead, we encode each watchpoint as a single atomic long. The initial
+ * zero-initialized state matches INVALID_WATCHPOINT.
+ *
+ * Add NUM_SLOTS-1 entries to account for overflow; this helps avoid having to
+ * use more complicated SLOT_IDX_FAST calculation with modulo in the fast-path.
+ */
+static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
+
+/*
+ * Instructions to skip watching counter, used in should_watch(). We use a
+ * per-CPU counter to avoid excessive contention.
+ */
+static DEFINE_PER_CPU(long, kcsan_skip);
+
+static __always_inline atomic_long_t *find_watchpoint(unsigned long addr,
+						      size_t size,
+						      bool expect_write,
+						      long *encoded_watchpoint)
+{
+	const int slot = watchpoint_slot(addr);
+	const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK;
+	atomic_long_t *watchpoint;
+	unsigned long wp_addr_masked;
+	size_t wp_size;
+	bool is_write;
+	int i;
+
+	BUILD_BUG_ON(CONFIG_KCSAN_NUM_WATCHPOINTS < NUM_SLOTS);
+
+	for (i = 0; i < NUM_SLOTS; ++i) {
+		watchpoint = &watchpoints[SLOT_IDX_FAST(slot, i)];
+		*encoded_watchpoint = atomic_long_read(watchpoint);
+		if (!decode_watchpoint(*encoded_watchpoint, &wp_addr_masked,
+				       &wp_size, &is_write))
+			continue;
+
+		if (expect_write && !is_write)
+			continue;
+
+		/* Check if the watchpoint matches the access. */
+		if (matching_access(wp_addr_masked, wp_size, addr_masked, size))
+			return watchpoint;
+	}
+
+	return NULL;
+}
+
+static inline atomic_long_t *
+insert_watchpoint(unsigned long addr, size_t size, bool is_write)
+{
+	const int slot = watchpoint_slot(addr);
+	const long encoded_watchpoint = encode_watchpoint(addr, size, is_write);
+	atomic_long_t *watchpoint;
+	int i;
+
+	/* Check slot index logic, ensuring we stay within array bounds. */
+	BUILD_BUG_ON(SLOT_IDX(0, 0) != KCSAN_CHECK_ADJACENT);
+	BUILD_BUG_ON(SLOT_IDX(0, KCSAN_CHECK_ADJACENT+1) != 0);
+	BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT) != ARRAY_SIZE(watchpoints)-1);
+	BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT+1) != ARRAY_SIZE(watchpoints) - NUM_SLOTS);
+
+	for (i = 0; i < NUM_SLOTS; ++i) {
+		long expect_val = INVALID_WATCHPOINT;
+
+		/* Try to acquire this slot. */
+		watchpoint = &watchpoints[SLOT_IDX(slot, i)];
+		if (atomic_long_try_cmpxchg_relaxed(watchpoint, &expect_val, encoded_watchpoint))
+			return watchpoint;
+	}
+
+	return NULL;
+}
+
+/*
+ * Return true if watchpoint was successfully consumed, false otherwise.
+ *
+ * This may return false if:
+ *
+ *	1. another thread already consumed the watchpoint;
+ *	2. the thread that set up the watchpoint already removed it;
+ *	3. the watchpoint was removed and then re-used.
+ */
+static __always_inline bool
+try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
+{
+	return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
+}
+
+/* Return true if watchpoint was not touched, false if already consumed. */
+static inline bool consume_watchpoint(atomic_long_t *watchpoint)
+{
+	return atomic_long_xchg_relaxed(watchpoint, CONSUMED_WATCHPOINT) != CONSUMED_WATCHPOINT;
+}
+
+/* Remove the watchpoint -- its slot may be reused after. */
+static inline void remove_watchpoint(atomic_long_t *watchpoint)
+{
+	atomic_long_set(watchpoint, INVALID_WATCHPOINT);
+}
+
+static __always_inline struct kcsan_ctx *get_ctx(void)
+{
+	/*
+	 * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would
+	 * also result in calls that generate warnings in uaccess regions.
+	 */
+	return in_task() ? &current->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
+}
+
+/* Check scoped accesses; never inline because this is a slow-path! */
+static noinline void kcsan_check_scoped_accesses(void)
+{
+	struct kcsan_ctx *ctx = get_ctx();
+	struct list_head *prev_save = ctx->scoped_accesses.prev;
+	struct kcsan_scoped_access *scoped_access;
+
+	ctx->scoped_accesses.prev = NULL;  /* Avoid recursion. */
+	list_for_each_entry(scoped_access, &ctx->scoped_accesses, list)
+		__kcsan_check_access(scoped_access->ptr, scoped_access->size, scoped_access->type);
+	ctx->scoped_accesses.prev = prev_save;
+}
+
+/* Rules for generic atomic accesses. Called from fast-path. */
+static __always_inline bool
+is_atomic(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx)
+{
+	if (type & KCSAN_ACCESS_ATOMIC)
+		return true;
+
+	/*
+	 * Unless explicitly declared atomic, never consider an assertion access
+	 * as atomic. This allows using them also in atomic regions, such as
+	 * seqlocks, without implicitly changing their semantics.
+	 */
+	if (type & KCSAN_ACCESS_ASSERT)
+		return false;
+
+	if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) &&
+	    (type & KCSAN_ACCESS_WRITE) && size <= sizeof(long) &&
+	    IS_ALIGNED((unsigned long)ptr, size))
+		return true; /* Assume aligned writes up to word size are atomic. */
+
+	if (ctx->atomic_next > 0) {
+		/*
+		 * Because we do not have separate contexts for nested
+		 * interrupts, in case atomic_next is set, we simply assume that
+		 * the outer interrupt set atomic_next. In the worst case, we
+		 * will conservatively consider operations as atomic. This is a
+		 * reasonable trade-off to make, since this case should be
+		 * extremely rare; however, even if extremely rare, it could
+		 * lead to false positives otherwise.
+		 */
+		if ((hardirq_count() >> HARDIRQ_SHIFT) < 2)
+			--ctx->atomic_next; /* in task, or outer interrupt */
+		return true;
+	}
+
+	return ctx->atomic_nest_count > 0 || ctx->in_flat_atomic;
+}
+
+static __always_inline bool
+should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx)
+{
+	/*
+	 * Never set up watchpoints when memory operations are atomic.
+	 *
+	 * Need to check this first, before kcsan_skip check below: (1) atomics
+	 * should not count towards skipped instructions, and (2) to actually
+	 * decrement kcsan_atomic_next for consecutive instruction stream.
+	 */
+	if (is_atomic(ptr, size, type, ctx))
+		return false;
+
+	if (this_cpu_dec_return(kcsan_skip) >= 0)
+		return false;
+
+	/*
+	 * NOTE: If we get here, kcsan_skip must always be reset in slow path
+	 * via reset_kcsan_skip() to avoid underflow.
+	 */
+
+	/* this operation should be watched */
+	return true;
+}
+
+static inline void reset_kcsan_skip(void)
+{
+	long skip_count = kcsan_skip_watch -
+			  (IS_ENABLED(CONFIG_KCSAN_SKIP_WATCH_RANDOMIZE) ?
+				   prandom_u32_max(kcsan_skip_watch) :
+				   0);
+	this_cpu_write(kcsan_skip, skip_count);
+}
+
+static __always_inline bool kcsan_is_enabled(void)
+{
+	return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
+}
+
+static inline unsigned int get_delay(void)
+{
+	unsigned int delay = in_task() ? kcsan_udelay_task : kcsan_udelay_interrupt;
+	return delay - (IS_ENABLED(CONFIG_KCSAN_DELAY_RANDOMIZE) ?
+				prandom_u32_max(delay) :
+				0);
+}
+
+/*
+ * Pull everything together: check_access() below contains the performance
+ * critical operations; the fast-path (including check_access) functions should
+ * all be inlinable by the instrumentation functions.
+ *
+ * The slow-path (kcsan_found_watchpoint, kcsan_setup_watchpoint) are
+ * non-inlinable -- note that, we prefix these with "kcsan_" to ensure they can
+ * be filtered from the stacktrace, as well as give them unique names for the
+ * UACCESS whitelist of objtool. Each function uses user_access_save/restore(),
+ * since they do not access any user memory, but instrumentation is still
+ * emitted in UACCESS regions.
+ */
+
+static noinline void kcsan_found_watchpoint(const volatile void *ptr,
+					    size_t size,
+					    int type,
+					    atomic_long_t *watchpoint,
+					    long encoded_watchpoint)
+{
+	unsigned long flags;
+	bool consumed;
+
+	if (!kcsan_is_enabled())
+		return;
+
+	/*
+	 * The access_mask check relies on value-change comparison. To avoid
+	 * reporting a race where e.g. the writer set up the watchpoint, but the
+	 * reader has access_mask!=0, we have to ignore the found watchpoint.
+	 */
+	if (get_ctx()->access_mask != 0)
+		return;
+
+	/*
+	 * Consume the watchpoint as soon as possible, to minimize the chances
+	 * of !consumed. Consuming the watchpoint must always be guarded by
+	 * kcsan_is_enabled() check, as otherwise we might erroneously
+	 * triggering reports when disabled.
+	 */
+	consumed = try_consume_watchpoint(watchpoint, encoded_watchpoint);
+
+	/* keep this after try_consume_watchpoint */
+	flags = user_access_save();
+
+	if (consumed) {
+		kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE,
+			     KCSAN_REPORT_CONSUMED_WATCHPOINT,
+			     watchpoint - watchpoints);
+	} else {
+		/*
+		 * The other thread may not print any diagnostics, as it has
+		 * already removed the watchpoint, or another thread consumed
+		 * the watchpoint before this thread.
+		 */
+		kcsan_counter_inc(KCSAN_COUNTER_REPORT_RACES);
+	}
+
+	if ((type & KCSAN_ACCESS_ASSERT) != 0)
+		kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES);
+	else
+		kcsan_counter_inc(KCSAN_COUNTER_DATA_RACES);
+
+	user_access_restore(flags);
+}
+
+static noinline void
+kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
+{
+	const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0;
+	const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0;
+	atomic_long_t *watchpoint;
+	union {
+		u8 _1;
+		u16 _2;
+		u32 _4;
+		u64 _8;
+	} expect_value;
+	unsigned long access_mask;
+	enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE;
+	unsigned long ua_flags = user_access_save();
+	unsigned long irq_flags = 0;
+
+	/*
+	 * Always reset kcsan_skip counter in slow-path to avoid underflow; see
+	 * should_watch().
+	 */
+	reset_kcsan_skip();
+
+	if (!kcsan_is_enabled())
+		goto out;
+
+	/*
+	 * Special atomic rules: unlikely to be true, so we check them here in
+	 * the slow-path, and not in the fast-path in is_atomic(). Call after
+	 * kcsan_is_enabled(), as we may access memory that is not yet
+	 * initialized during early boot.
+	 */
+	if (!is_assert && kcsan_is_atomic_special(ptr))
+		goto out;
+
+	if (!check_encodable((unsigned long)ptr, size)) {
+		kcsan_counter_inc(KCSAN_COUNTER_UNENCODABLE_ACCESSES);
+		goto out;
+	}
+
+	if (!kcsan_interrupt_watcher)
+		/* Use raw to avoid lockdep recursion via IRQ flags tracing. */
+		raw_local_irq_save(irq_flags);
+
+	watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
+	if (watchpoint == NULL) {
+		/*
+		 * Out of capacity: the size of 'watchpoints', and the frequency
+		 * with which should_watch() returns true should be tweaked so
+		 * that this case happens very rarely.
+		 */
+		kcsan_counter_inc(KCSAN_COUNTER_NO_CAPACITY);
+		goto out_unlock;
+	}
+
+	kcsan_counter_inc(KCSAN_COUNTER_SETUP_WATCHPOINTS);
+	kcsan_counter_inc(KCSAN_COUNTER_USED_WATCHPOINTS);
+
+	/*
+	 * Read the current value, to later check and infer a race if the data
+	 * was modified via a non-instrumented access, e.g. from a device.
+	 */
+	expect_value._8 = 0;
+	switch (size) {
+	case 1:
+		expect_value._1 = READ_ONCE(*(const u8 *)ptr);
+		break;
+	case 2:
+		expect_value._2 = READ_ONCE(*(const u16 *)ptr);
+		break;
+	case 4:
+		expect_value._4 = READ_ONCE(*(const u32 *)ptr);
+		break;
+	case 8:
+		expect_value._8 = READ_ONCE(*(const u64 *)ptr);
+		break;
+	default:
+		break; /* ignore; we do not diff the values */
+	}
+
+	if (IS_ENABLED(CONFIG_KCSAN_DEBUG)) {
+		kcsan_disable_current();
+		pr_err("KCSAN: watching %s, size: %zu, addr: %px [slot: %d, encoded: %lx]\n",
+		       is_write ? "write" : "read", size, ptr,
+		       watchpoint_slot((unsigned long)ptr),
+		       encode_watchpoint((unsigned long)ptr, size, is_write));
+		kcsan_enable_current();
+	}
+
+	/*
+	 * Delay this thread, to increase probability of observing a racy
+	 * conflicting access.
+	 */
+	udelay(get_delay());
+
+	/*
+	 * Re-read value, and check if it is as expected; if not, we infer a
+	 * racy access.
+	 */
+	access_mask = get_ctx()->access_mask;
+	switch (size) {
+	case 1:
+		expect_value._1 ^= READ_ONCE(*(const u8 *)ptr);
+		if (access_mask)
+			expect_value._1 &= (u8)access_mask;
+		break;
+	case 2:
+		expect_value._2 ^= READ_ONCE(*(const u16 *)ptr);
+		if (access_mask)
+			expect_value._2 &= (u16)access_mask;
+		break;
+	case 4:
+		expect_value._4 ^= READ_ONCE(*(const u32 *)ptr);
+		if (access_mask)
+			expect_value._4 &= (u32)access_mask;
+		break;
+	case 8:
+		expect_value._8 ^= READ_ONCE(*(const u64 *)ptr);
+		if (access_mask)
+			expect_value._8 &= (u64)access_mask;
+		break;
+	default:
+		break; /* ignore; we do not diff the values */
+	}
+
+	/* Were we able to observe a value-change? */
+	if (expect_value._8 != 0)
+		value_change = KCSAN_VALUE_CHANGE_TRUE;
+
+	/* Check if this access raced with another. */
+	if (!consume_watchpoint(watchpoint)) {
+		/*
+		 * Depending on the access type, map a value_change of MAYBE to
+		 * TRUE (always report) or FALSE (never report).
+		 */
+		if (value_change == KCSAN_VALUE_CHANGE_MAYBE) {
+			if (access_mask != 0) {
+				/*
+				 * For access with access_mask, we require a
+				 * value-change, as it is likely that races on
+				 * ~access_mask bits are expected.
+				 */
+				value_change = KCSAN_VALUE_CHANGE_FALSE;
+			} else if (size > 8 || is_assert) {
+				/* Always assume a value-change. */
+				value_change = KCSAN_VALUE_CHANGE_TRUE;
+			}
+		}
+
+		/*
+		 * No need to increment 'data_races' counter, as the racing
+		 * thread already did.
+		 *
+		 * Count 'assert_failures' for each failed ASSERT access,
+		 * therefore both this thread and the racing thread may
+		 * increment this counter.
+		 */
+		if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE)
+			kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES);
+
+		kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL,
+			     watchpoint - watchpoints);
+	} else if (value_change == KCSAN_VALUE_CHANGE_TRUE) {
+		/* Inferring a race, since the value should not have changed. */
+
+		kcsan_counter_inc(KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN);
+		if (is_assert)
+			kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES);
+
+		if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert)
+			kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE,
+				     KCSAN_REPORT_RACE_UNKNOWN_ORIGIN,
+				     watchpoint - watchpoints);
+	}
+
+	/*
+	 * Remove watchpoint; must be after reporting, since the slot may be
+	 * reused after this point.
+	 */
+	remove_watchpoint(watchpoint);
+	kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
+out_unlock:
+	if (!kcsan_interrupt_watcher)
+		raw_local_irq_restore(irq_flags);
+out:
+	user_access_restore(ua_flags);
+}
+
+static __always_inline void check_access(const volatile void *ptr, size_t size,
+					 int type)
+{
+	const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0;
+	atomic_long_t *watchpoint;
+	long encoded_watchpoint;
+
+	/*
+	 * Do nothing for 0 sized check; this comparison will be optimized out
+	 * for constant sized instrumentation (__tsan_{read,write}N).
+	 */
+	if (unlikely(size == 0))
+		return;
+
+	/*
+	 * Avoid user_access_save in fast-path: find_watchpoint is safe without
+	 * user_access_save, as the address that ptr points to is only used to
+	 * check if a watchpoint exists; ptr is never dereferenced.
+	 */
+	watchpoint = find_watchpoint((unsigned long)ptr, size, !is_write,
+				     &encoded_watchpoint);
+	/*
+	 * It is safe to check kcsan_is_enabled() after find_watchpoint in the
+	 * slow-path, as long as no state changes that cause a race to be
+	 * detected and reported have occurred until kcsan_is_enabled() is
+	 * checked.
+	 */
+
+	if (unlikely(watchpoint != NULL))
+		kcsan_found_watchpoint(ptr, size, type, watchpoint,
+				       encoded_watchpoint);
+	else {
+		struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */
+
+		if (unlikely(should_watch(ptr, size, type, ctx)))
+			kcsan_setup_watchpoint(ptr, size, type);
+		else if (unlikely(ctx->scoped_accesses.prev))
+			kcsan_check_scoped_accesses();
+	}
+}
+
+/* === Public interface ===================================================== */
+
+void __init kcsan_init(void)
+{
+	BUG_ON(!in_task());
+
+	kcsan_debugfs_init();
+
+	/*
+	 * We are in the init task, and no other tasks should be running;
+	 * WRITE_ONCE without memory barrier is sufficient.
+	 */
+	if (kcsan_early_enable)
+		WRITE_ONCE(kcsan_enabled, true);
+}
+
+/* === Exported interface =================================================== */
+
+void kcsan_disable_current(void)
+{
+	++get_ctx()->disable_count;
+}
+EXPORT_SYMBOL(kcsan_disable_current);
+
+void kcsan_enable_current(void)
+{
+	if (get_ctx()->disable_count-- == 0) {
+		/*
+		 * Warn if kcsan_enable_current() calls are unbalanced with
+		 * kcsan_disable_current() calls, which causes disable_count to
+		 * become negative and should not happen.
+		 */
+		kcsan_disable_current(); /* restore to 0, KCSAN still enabled */
+		kcsan_disable_current(); /* disable to generate warning */
+		WARN(1, "Unbalanced %s()", __func__);
+		kcsan_enable_current();
+	}
+}
+EXPORT_SYMBOL(kcsan_enable_current);
+
+void kcsan_enable_current_nowarn(void)
+{
+	if (get_ctx()->disable_count-- == 0)
+		kcsan_disable_current();
+}
+EXPORT_SYMBOL(kcsan_enable_current_nowarn);
+
+void kcsan_nestable_atomic_begin(void)
+{
+	/*
+	 * Do *not* check and warn if we are in a flat atomic region: nestable
+	 * and flat atomic regions are independent from each other.
+	 * See include/linux/kcsan.h: struct kcsan_ctx comments for more
+	 * comments.
+	 */
+
+	++get_ctx()->atomic_nest_count;
+}
+EXPORT_SYMBOL(kcsan_nestable_atomic_begin);
+
+void kcsan_nestable_atomic_end(void)
+{
+	if (get_ctx()->atomic_nest_count-- == 0) {
+		/*
+		 * Warn if kcsan_nestable_atomic_end() calls are unbalanced with
+		 * kcsan_nestable_atomic_begin() calls, which causes
+		 * atomic_nest_count to become negative and should not happen.
+		 */
+		kcsan_nestable_atomic_begin(); /* restore to 0 */
+		kcsan_disable_current(); /* disable to generate warning */
+		WARN(1, "Unbalanced %s()", __func__);
+		kcsan_enable_current();
+	}
+}
+EXPORT_SYMBOL(kcsan_nestable_atomic_end);
+
+void kcsan_flat_atomic_begin(void)
+{
+	get_ctx()->in_flat_atomic = true;
+}
+EXPORT_SYMBOL(kcsan_flat_atomic_begin);
+
+void kcsan_flat_atomic_end(void)
+{
+	get_ctx()->in_flat_atomic = false;
+}
+EXPORT_SYMBOL(kcsan_flat_atomic_end);
+
+void kcsan_atomic_next(int n)
+{
+	get_ctx()->atomic_next = n;
+}
+EXPORT_SYMBOL(kcsan_atomic_next);
+
+void kcsan_set_access_mask(unsigned long mask)
+{
+	get_ctx()->access_mask = mask;
+}
+EXPORT_SYMBOL(kcsan_set_access_mask);
+
+struct kcsan_scoped_access *
+kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type,
+			  struct kcsan_scoped_access *sa)
+{
+	struct kcsan_ctx *ctx = get_ctx();
+
+	__kcsan_check_access(ptr, size, type);
+
+	ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */
+
+	INIT_LIST_HEAD(&sa->list);
+	sa->ptr = ptr;
+	sa->size = size;
+	sa->type = type;
+
+	if (!ctx->scoped_accesses.prev) /* Lazy initialize list head. */
+		INIT_LIST_HEAD(&ctx->scoped_accesses);
+	list_add(&sa->list, &ctx->scoped_accesses);
+
+	ctx->disable_count--;
+	return sa;
+}
+EXPORT_SYMBOL(kcsan_begin_scoped_access);
+
+void kcsan_end_scoped_access(struct kcsan_scoped_access *sa)
+{
+	struct kcsan_ctx *ctx = get_ctx();
+
+	if (WARN(!ctx->scoped_accesses.prev, "Unbalanced %s()?", __func__))
+		return;
+
+	ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */
+
+	list_del(&sa->list);
+	if (list_empty(&ctx->scoped_accesses))
+		/*
+		 * Ensure we do not enter kcsan_check_scoped_accesses()
+		 * slow-path if unnecessary, and avoids requiring list_empty()
+		 * in the fast-path (to avoid a READ_ONCE() and potential
+		 * uaccess warning).
+		 */
+		ctx->scoped_accesses.prev = NULL;
+
+	ctx->disable_count--;
+
+	__kcsan_check_access(sa->ptr, sa->size, sa->type);
+}
+EXPORT_SYMBOL(kcsan_end_scoped_access);
+
+void __kcsan_check_access(const volatile void *ptr, size_t size, int type)
+{
+	check_access(ptr, size, type);
+}
+EXPORT_SYMBOL(__kcsan_check_access);
+
+/*
+ * KCSAN uses the same instrumentation that is emitted by supported compilers
+ * for ThreadSanitizer (TSAN).
+ *
+ * When enabled, the compiler emits instrumentation calls (the functions
+ * prefixed with "__tsan" below) for all loads and stores that it generated;
+ * inline asm is not instrumented.
+ *
+ * Note that, not all supported compiler versions distinguish aligned/unaligned
+ * accesses, but e.g. recent versions of Clang do. We simply alias the unaligned
+ * version to the generic version, which can handle both.
+ */
+
+#define DEFINE_TSAN_READ_WRITE(size)                                           \
+	void __tsan_read##size(void *ptr)                                      \
+	{                                                                      \
+		check_access(ptr, size, 0);                                    \
+	}                                                                      \
+	EXPORT_SYMBOL(__tsan_read##size);                                      \
+	void __tsan_unaligned_read##size(void *ptr)                            \
+		__alias(__tsan_read##size);                                    \
+	EXPORT_SYMBOL(__tsan_unaligned_read##size);                            \
+	void __tsan_write##size(void *ptr)                                     \
+	{                                                                      \
+		check_access(ptr, size, KCSAN_ACCESS_WRITE);                   \
+	}                                                                      \
+	EXPORT_SYMBOL(__tsan_write##size);                                     \
+	void __tsan_unaligned_write##size(void *ptr)                           \
+		__alias(__tsan_write##size);                                   \
+	EXPORT_SYMBOL(__tsan_unaligned_write##size)
+
+DEFINE_TSAN_READ_WRITE(1);
+DEFINE_TSAN_READ_WRITE(2);
+DEFINE_TSAN_READ_WRITE(4);
+DEFINE_TSAN_READ_WRITE(8);
+DEFINE_TSAN_READ_WRITE(16);
+
+void __tsan_read_range(void *ptr, size_t size)
+{
+	check_access(ptr, size, 0);
+}
+EXPORT_SYMBOL(__tsan_read_range);
+
+void __tsan_write_range(void *ptr, size_t size)
+{
+	check_access(ptr, size, KCSAN_ACCESS_WRITE);
+}
+EXPORT_SYMBOL(__tsan_write_range);
+
+/*
+ * The below are not required by KCSAN, but can still be emitted by the
+ * compiler.
+ */
+void __tsan_func_entry(void *call_pc)
+{
+}
+EXPORT_SYMBOL(__tsan_func_entry);
+void __tsan_func_exit(void)
+{
+}
+EXPORT_SYMBOL(__tsan_func_exit);
+void __tsan_init(void)
+{
+}
+EXPORT_SYMBOL(__tsan_init);
diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c
new file mode 100644
index 000000000000..023e49c58d55
--- /dev/null
+++ b/kernel/kcsan/debugfs.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/atomic.h>
+#include <linux/bsearch.h>
+#include <linux/bug.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/kallsyms.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+
+#include "kcsan.h"
+
+/*
+ * Statistics counters.
+ */
+static atomic_long_t counters[KCSAN_COUNTER_COUNT];
+
+/*
+ * Addresses for filtering functions from reporting. This list can be used as a
+ * whitelist or blacklist.
+ */
+static struct {
+	unsigned long	*addrs;		/* array of addresses */
+	size_t		size;		/* current size */
+	int		used;		/* number of elements used */
+	bool		sorted;		/* if elements are sorted */
+	bool		whitelist;	/* if list is a blacklist or whitelist */
+} report_filterlist = {
+	.addrs		= NULL,
+	.size		= 8,		/* small initial size */
+	.used		= 0,
+	.sorted		= false,
+	.whitelist	= false,	/* default is blacklist */
+};
+static DEFINE_SPINLOCK(report_filterlist_lock);
+
+static const char *counter_to_name(enum kcsan_counter_id id)
+{
+	switch (id) {
+	case KCSAN_COUNTER_USED_WATCHPOINTS:		return "used_watchpoints";
+	case KCSAN_COUNTER_SETUP_WATCHPOINTS:		return "setup_watchpoints";
+	case KCSAN_COUNTER_DATA_RACES:			return "data_races";
+	case KCSAN_COUNTER_ASSERT_FAILURES:		return "assert_failures";
+	case KCSAN_COUNTER_NO_CAPACITY:			return "no_capacity";
+	case KCSAN_COUNTER_REPORT_RACES:		return "report_races";
+	case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN:	return "races_unknown_origin";
+	case KCSAN_COUNTER_UNENCODABLE_ACCESSES:	return "unencodable_accesses";
+	case KCSAN_COUNTER_ENCODING_FALSE_POSITIVES:	return "encoding_false_positives";
+	case KCSAN_COUNTER_COUNT:
+		BUG();
+	}
+	return NULL;
+}
+
+void kcsan_counter_inc(enum kcsan_counter_id id)
+{
+	atomic_long_inc(&counters[id]);
+}
+
+void kcsan_counter_dec(enum kcsan_counter_id id)
+{
+	atomic_long_dec(&counters[id]);
+}
+
+/*
+ * The microbenchmark allows benchmarking KCSAN core runtime only. To run
+ * multiple threads, pipe 'microbench=<iters>' from multiple tasks into the
+ * debugfs file. This will not generate any conflicts, and tests fast-path only.
+ */
+static noinline void microbenchmark(unsigned long iters)
+{
+	const struct kcsan_ctx ctx_save = current->kcsan_ctx;
+	const bool was_enabled = READ_ONCE(kcsan_enabled);
+	cycles_t cycles;
+
+	/* We may have been called from an atomic region; reset context. */
+	memset(&current->kcsan_ctx, 0, sizeof(current->kcsan_ctx));
+	/*
+	 * Disable to benchmark fast-path for all accesses, and (expected
+	 * negligible) call into slow-path, but never set up watchpoints.
+	 */
+	WRITE_ONCE(kcsan_enabled, false);
+
+	pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters);
+
+	cycles = get_cycles();
+	while (iters--) {
+		unsigned long addr = iters & ((PAGE_SIZE << 8) - 1);
+		int type = !(iters & 0x7f) ? KCSAN_ACCESS_ATOMIC :
+				(!(iters & 0xf) ? KCSAN_ACCESS_WRITE : 0);
+		__kcsan_check_access((void *)addr, sizeof(long), type);
+	}
+	cycles = get_cycles() - cycles;
+
+	pr_info("KCSAN: %s end   | cycles: %llu\n", __func__, cycles);
+
+	WRITE_ONCE(kcsan_enabled, was_enabled);
+	/* restore context */
+	current->kcsan_ctx = ctx_save;
+}
+
+/*
+ * Simple test to create conflicting accesses. Write 'test=<iters>' to KCSAN's
+ * debugfs file from multiple tasks to generate real conflicts and show reports.
+ */
+static long test_dummy;
+static long test_flags;
+static long test_scoped;
+static noinline void test_thread(unsigned long iters)
+{
+	const long CHANGE_BITS = 0xff00ff00ff00ff00L;
+	const struct kcsan_ctx ctx_save = current->kcsan_ctx;
+	cycles_t cycles;
+
+	/* We may have been called from an atomic region; reset context. */
+	memset(&current->kcsan_ctx, 0, sizeof(current->kcsan_ctx));
+
+	pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters);
+	pr_info("test_dummy@%px, test_flags@%px, test_scoped@%px,\n",
+		&test_dummy, &test_flags, &test_scoped);
+
+	cycles = get_cycles();
+	while (iters--) {
+		/* These all should generate reports. */
+		__kcsan_check_read(&test_dummy, sizeof(test_dummy));
+		ASSERT_EXCLUSIVE_WRITER(test_dummy);
+		ASSERT_EXCLUSIVE_ACCESS(test_dummy);
+
+		ASSERT_EXCLUSIVE_BITS(test_flags, ~CHANGE_BITS); /* no report */
+		__kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */
+
+		ASSERT_EXCLUSIVE_BITS(test_flags, CHANGE_BITS); /* report */
+		__kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */
+
+		/* not actually instrumented */
+		WRITE_ONCE(test_dummy, iters);  /* to observe value-change */
+		__kcsan_check_write(&test_dummy, sizeof(test_dummy));
+
+		test_flags ^= CHANGE_BITS; /* generate value-change */
+		__kcsan_check_write(&test_flags, sizeof(test_flags));
+
+		BUG_ON(current->kcsan_ctx.scoped_accesses.prev);
+		{
+			/* Should generate reports anywhere in this block. */
+			ASSERT_EXCLUSIVE_WRITER_SCOPED(test_scoped);
+			ASSERT_EXCLUSIVE_ACCESS_SCOPED(test_scoped);
+			BUG_ON(!current->kcsan_ctx.scoped_accesses.prev);
+			/* Unrelated accesses. */
+			__kcsan_check_access(&cycles, sizeof(cycles), 0);
+			__kcsan_check_access(&cycles, sizeof(cycles), KCSAN_ACCESS_ATOMIC);
+		}
+		BUG_ON(current->kcsan_ctx.scoped_accesses.prev);
+	}
+	cycles = get_cycles() - cycles;
+
+	pr_info("KCSAN: %s end   | cycles: %llu\n", __func__, cycles);
+
+	/* restore context */
+	current->kcsan_ctx = ctx_save;
+}
+
+static int cmp_filterlist_addrs(const void *rhs, const void *lhs)
+{
+	const unsigned long a = *(const unsigned long *)rhs;
+	const unsigned long b = *(const unsigned long *)lhs;
+
+	return a < b ? -1 : a == b ? 0 : 1;
+}
+
+bool kcsan_skip_report_debugfs(unsigned long func_addr)
+{
+	unsigned long symbolsize, offset;
+	unsigned long flags;
+	bool ret = false;
+
+	if (!kallsyms_lookup_size_offset(func_addr, &symbolsize, &offset))
+		return false;
+	func_addr -= offset; /* Get function start */
+
+	spin_lock_irqsave(&report_filterlist_lock, flags);
+	if (report_filterlist.used == 0)
+		goto out;
+
+	/* Sort array if it is unsorted, and then do a binary search. */
+	if (!report_filterlist.sorted) {
+		sort(report_filterlist.addrs, report_filterlist.used,
+		     sizeof(unsigned long), cmp_filterlist_addrs, NULL);
+		report_filterlist.sorted = true;
+	}
+	ret = !!bsearch(&func_addr, report_filterlist.addrs,
+			report_filterlist.used, sizeof(unsigned long),
+			cmp_filterlist_addrs);
+	if (report_filterlist.whitelist)
+		ret = !ret;
+
+out:
+	spin_unlock_irqrestore(&report_filterlist_lock, flags);
+	return ret;
+}
+
+static void set_report_filterlist_whitelist(bool whitelist)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&report_filterlist_lock, flags);
+	report_filterlist.whitelist = whitelist;
+	spin_unlock_irqrestore(&report_filterlist_lock, flags);
+}
+
+/* Returns 0 on success, error-code otherwise. */
+static ssize_t insert_report_filterlist(const char *func)
+{
+	unsigned long flags;
+	unsigned long addr = kallsyms_lookup_name(func);
+	ssize_t ret = 0;
+
+	if (!addr) {
+		pr_err("KCSAN: could not find function: '%s'\n", func);
+		return -ENOENT;
+	}
+
+	spin_lock_irqsave(&report_filterlist_lock, flags);
+
+	if (report_filterlist.addrs == NULL) {
+		/* initial allocation */
+		report_filterlist.addrs =
+			kmalloc_array(report_filterlist.size,
+				      sizeof(unsigned long), GFP_ATOMIC);
+		if (report_filterlist.addrs == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else if (report_filterlist.used == report_filterlist.size) {
+		/* resize filterlist */
+		size_t new_size = report_filterlist.size * 2;
+		unsigned long *new_addrs =
+			krealloc(report_filterlist.addrs,
+				 new_size * sizeof(unsigned long), GFP_ATOMIC);
+
+		if (new_addrs == NULL) {
+			/* leave filterlist itself untouched */
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		report_filterlist.size = new_size;
+		report_filterlist.addrs = new_addrs;
+	}
+
+	/* Note: deduplicating should be done in userspace. */
+	report_filterlist.addrs[report_filterlist.used++] =
+		kallsyms_lookup_name(func);
+	report_filterlist.sorted = false;
+
+out:
+	spin_unlock_irqrestore(&report_filterlist_lock, flags);
+
+	return ret;
+}
+
+static int show_info(struct seq_file *file, void *v)
+{
+	int i;
+	unsigned long flags;
+
+	/* show stats */
+	seq_printf(file, "enabled: %i\n", READ_ONCE(kcsan_enabled));
+	for (i = 0; i < KCSAN_COUNTER_COUNT; ++i)
+		seq_printf(file, "%s: %ld\n", counter_to_name(i),
+			   atomic_long_read(&counters[i]));
+
+	/* show filter functions, and filter type */
+	spin_lock_irqsave(&report_filterlist_lock, flags);
+	seq_printf(file, "\n%s functions: %s\n",
+		   report_filterlist.whitelist ? "whitelisted" : "blacklisted",
+		   report_filterlist.used == 0 ? "none" : "");
+	for (i = 0; i < report_filterlist.used; ++i)
+		seq_printf(file, " %ps\n", (void *)report_filterlist.addrs[i]);
+	spin_unlock_irqrestore(&report_filterlist_lock, flags);
+
+	return 0;
+}
+
+static int debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_info, NULL);
+}
+
+static ssize_t
+debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *off)
+{
+	char kbuf[KSYM_NAME_LEN];
+	char *arg;
+	int read_len = count < (sizeof(kbuf) - 1) ? count : (sizeof(kbuf) - 1);
+
+	if (copy_from_user(kbuf, buf, read_len))
+		return -EFAULT;
+	kbuf[read_len] = '\0';
+	arg = strstrip(kbuf);
+
+	if (!strcmp(arg, "on")) {
+		WRITE_ONCE(kcsan_enabled, true);
+	} else if (!strcmp(arg, "off")) {
+		WRITE_ONCE(kcsan_enabled, false);
+	} else if (!strncmp(arg, "microbench=", sizeof("microbench=") - 1)) {
+		unsigned long iters;
+
+		if (kstrtoul(&arg[sizeof("microbench=") - 1], 0, &iters))
+			return -EINVAL;
+		microbenchmark(iters);
+	} else if (!strncmp(arg, "test=", sizeof("test=") - 1)) {
+		unsigned long iters;
+
+		if (kstrtoul(&arg[sizeof("test=") - 1], 0, &iters))
+			return -EINVAL;
+		test_thread(iters);
+	} else if (!strcmp(arg, "whitelist")) {
+		set_report_filterlist_whitelist(true);
+	} else if (!strcmp(arg, "blacklist")) {
+		set_report_filterlist_whitelist(false);
+	} else if (arg[0] == '!') {
+		ssize_t ret = insert_report_filterlist(&arg[1]);
+
+		if (ret < 0)
+			return ret;
+	} else {
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+static const struct file_operations debugfs_ops =
+{
+	.read	 = seq_read,
+	.open	 = debugfs_open,
+	.write	 = debugfs_write,
+	.release = single_release
+};
+
+void __init kcsan_debugfs_init(void)
+{
+	debugfs_create_file("kcsan", 0644, NULL, NULL, &debugfs_ops);
+}
diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h
new file mode 100644
index 000000000000..f03562aaf2eb
--- /dev/null
+++ b/kernel/kcsan/encoding.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _KERNEL_KCSAN_ENCODING_H
+#define _KERNEL_KCSAN_ENCODING_H
+
+#include <linux/bits.h>
+#include <linux/log2.h>
+#include <linux/mm.h>
+
+#include "kcsan.h"
+
+#define SLOT_RANGE PAGE_SIZE
+
+#define INVALID_WATCHPOINT  0
+#define CONSUMED_WATCHPOINT 1
+
+/*
+ * The maximum useful size of accesses for which we set up watchpoints is the
+ * max range of slots we check on an access.
+ */
+#define MAX_ENCODABLE_SIZE (SLOT_RANGE * (1 + KCSAN_CHECK_ADJACENT))
+
+/*
+ * Number of bits we use to store size info.
+ */
+#define WATCHPOINT_SIZE_BITS bits_per(MAX_ENCODABLE_SIZE)
+/*
+ * This encoding for addresses discards the upper (1 for is-write + SIZE_BITS);
+ * however, most 64-bit architectures do not use the full 64-bit address space.
+ * Also, in order for a false positive to be observable 2 things need to happen:
+ *
+ *	1. different addresses but with the same encoded address race;
+ *	2. and both map onto the same watchpoint slots;
+ *
+ * Both these are assumed to be very unlikely. However, in case it still happens
+ * happens, the report logic will filter out the false positive (see report.c).
+ */
+#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG-1 - WATCHPOINT_SIZE_BITS)
+
+/*
+ * Masks to set/retrieve the encoded data.
+ */
+#define WATCHPOINT_WRITE_MASK BIT(BITS_PER_LONG-1)
+#define WATCHPOINT_SIZE_MASK                                                   \
+	GENMASK(BITS_PER_LONG-2, BITS_PER_LONG-2 - WATCHPOINT_SIZE_BITS)
+#define WATCHPOINT_ADDR_MASK                                                   \
+	GENMASK(BITS_PER_LONG-3 - WATCHPOINT_SIZE_BITS, 0)
+
+static inline bool check_encodable(unsigned long addr, size_t size)
+{
+	return size <= MAX_ENCODABLE_SIZE;
+}
+
+static inline long
+encode_watchpoint(unsigned long addr, size_t size, bool is_write)
+{
+	return (long)((is_write ? WATCHPOINT_WRITE_MASK : 0) |
+		      (size << WATCHPOINT_ADDR_BITS) |
+		      (addr & WATCHPOINT_ADDR_MASK));
+}
+
+static __always_inline bool decode_watchpoint(long watchpoint,
+					      unsigned long *addr_masked,
+					      size_t *size,
+					      bool *is_write)
+{
+	if (watchpoint == INVALID_WATCHPOINT ||
+	    watchpoint == CONSUMED_WATCHPOINT)
+		return false;
+
+	*addr_masked =    (unsigned long)watchpoint & WATCHPOINT_ADDR_MASK;
+	*size	     =   ((unsigned long)watchpoint & WATCHPOINT_SIZE_MASK) >> WATCHPOINT_ADDR_BITS;
+	*is_write    = !!((unsigned long)watchpoint & WATCHPOINT_WRITE_MASK);
+
+	return true;
+}
+
+/*
+ * Return watchpoint slot for an address.
+ */
+static __always_inline int watchpoint_slot(unsigned long addr)
+{
+	return (addr / PAGE_SIZE) % CONFIG_KCSAN_NUM_WATCHPOINTS;
+}
+
+static __always_inline bool matching_access(unsigned long addr1, size_t size1,
+					    unsigned long addr2, size_t size2)
+{
+	unsigned long end_range1 = addr1 + size1 - 1;
+	unsigned long end_range2 = addr2 + size2 - 1;
+
+	return addr1 <= end_range2 && addr2 <= end_range1;
+}
+
+#endif /* _KERNEL_KCSAN_ENCODING_H */
diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h
new file mode 100644
index 000000000000..763d6d08d94b
--- /dev/null
+++ b/kernel/kcsan/kcsan.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * The Kernel Concurrency Sanitizer (KCSAN) infrastructure. For more info please
+ * see Documentation/dev-tools/kcsan.rst.
+ */
+
+#ifndef _KERNEL_KCSAN_KCSAN_H
+#define _KERNEL_KCSAN_KCSAN_H
+
+#include <linux/kcsan.h>
+
+/* The number of adjacent watchpoints to check. */
+#define KCSAN_CHECK_ADJACENT 1
+#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT)
+
+extern unsigned int kcsan_udelay_task;
+extern unsigned int kcsan_udelay_interrupt;
+
+/*
+ * Globally enable and disable KCSAN.
+ */
+extern bool kcsan_enabled;
+
+/*
+ * Initialize debugfs file.
+ */
+void kcsan_debugfs_init(void);
+
+enum kcsan_counter_id {
+	/*
+	 * Number of watchpoints currently in use.
+	 */
+	KCSAN_COUNTER_USED_WATCHPOINTS,
+
+	/*
+	 * Total number of watchpoints set up.
+	 */
+	KCSAN_COUNTER_SETUP_WATCHPOINTS,
+
+	/*
+	 * Total number of data races.
+	 */
+	KCSAN_COUNTER_DATA_RACES,
+
+	/*
+	 * Total number of ASSERT failures due to races. If the observed race is
+	 * due to two conflicting ASSERT type accesses, then both will be
+	 * counted.
+	 */
+	KCSAN_COUNTER_ASSERT_FAILURES,
+
+	/*
+	 * Number of times no watchpoints were available.
+	 */
+	KCSAN_COUNTER_NO_CAPACITY,
+
+	/*
+	 * A thread checking a watchpoint raced with another checking thread;
+	 * only one will be reported.
+	 */
+	KCSAN_COUNTER_REPORT_RACES,
+
+	/*
+	 * Observed data value change, but writer thread unknown.
+	 */
+	KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN,
+
+	/*
+	 * The access cannot be encoded to a valid watchpoint.
+	 */
+	KCSAN_COUNTER_UNENCODABLE_ACCESSES,
+
+	/*
+	 * Watchpoint encoding caused a watchpoint to fire on mismatching
+	 * accesses.
+	 */
+	KCSAN_COUNTER_ENCODING_FALSE_POSITIVES,
+
+	KCSAN_COUNTER_COUNT, /* number of counters */
+};
+
+/*
+ * Increment/decrement counter with given id; avoid calling these in fast-path.
+ */
+extern void kcsan_counter_inc(enum kcsan_counter_id id);
+extern void kcsan_counter_dec(enum kcsan_counter_id id);
+
+/*
+ * Returns true if data races in the function symbol that maps to func_addr
+ * (offsets are ignored) should *not* be reported.
+ */
+extern bool kcsan_skip_report_debugfs(unsigned long func_addr);
+
+/*
+ * Value-change states.
+ */
+enum kcsan_value_change {
+	/*
+	 * Did not observe a value-change, however, it is valid to report the
+	 * race, depending on preferences.
+	 */
+	KCSAN_VALUE_CHANGE_MAYBE,
+
+	/*
+	 * Did not observe a value-change, and it is invalid to report the race.
+	 */
+	KCSAN_VALUE_CHANGE_FALSE,
+
+	/*
+	 * The value was observed to change, and the race should be reported.
+	 */
+	KCSAN_VALUE_CHANGE_TRUE,
+};
+
+enum kcsan_report_type {
+	/*
+	 * The thread that set up the watchpoint and briefly stalled was
+	 * signalled that another thread triggered the watchpoint.
+	 */
+	KCSAN_REPORT_RACE_SIGNAL,
+
+	/*
+	 * A thread found and consumed a matching watchpoint.
+	 */
+	KCSAN_REPORT_CONSUMED_WATCHPOINT,
+
+	/*
+	 * No other thread was observed to race with the access, but the data
+	 * value before and after the stall differs.
+	 */
+	KCSAN_REPORT_RACE_UNKNOWN_ORIGIN,
+};
+
+/*
+ * Print a race report from thread that encountered the race.
+ */
+extern void kcsan_report(const volatile void *ptr, size_t size, int access_type,
+			 enum kcsan_value_change value_change,
+			 enum kcsan_report_type type, int watchpoint_idx);
+
+#endif /* _KERNEL_KCSAN_KCSAN_H */
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
new file mode 100644
index 000000000000..ac5f8345bae9
--- /dev/null
+++ b/kernel/kcsan/report.c
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/debug_locks.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/lockdep.h>
+#include <linux/preempt.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/stacktrace.h>
+
+#include "kcsan.h"
+#include "encoding.h"
+
+/*
+ * Max. number of stack entries to show in the report.
+ */
+#define NUM_STACK_ENTRIES 64
+
+/* Common access info. */
+struct access_info {
+	const volatile void	*ptr;
+	size_t			size;
+	int			access_type;
+	int			task_pid;
+	int			cpu_id;
+};
+
+/*
+ * Other thread info: communicated from other racing thread to thread that set
+ * up the watchpoint, which then prints the complete report atomically.
+ */
+struct other_info {
+	struct access_info	ai;
+	unsigned long		stack_entries[NUM_STACK_ENTRIES];
+	int			num_stack_entries;
+
+	/*
+	 * Optionally pass @current. Typically we do not need to pass @current
+	 * via @other_info since just @task_pid is sufficient. Passing @current
+	 * has additional overhead.
+	 *
+	 * To safely pass @current, we must either use get_task_struct/
+	 * put_task_struct, or stall the thread that populated @other_info.
+	 *
+	 * We cannot rely on get_task_struct/put_task_struct in case
+	 * release_report() races with a task being released, and would have to
+	 * free it in release_report(). This may result in deadlock if we want
+	 * to use KCSAN on the allocators.
+	 *
+	 * Since we also want to reliably print held locks for
+	 * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread
+	 * that populated @other_info until it has been consumed.
+	 */
+	struct task_struct	*task;
+};
+
+/*
+ * To never block any producers of struct other_info, we need as many elements
+ * as we have watchpoints (upper bound on concurrent races to report).
+ */
+static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
+
+/*
+ * Information about reported races; used to rate limit reporting.
+ */
+struct report_time {
+	/*
+	 * The last time the race was reported.
+	 */
+	unsigned long time;
+
+	/*
+	 * The frames of the 2 threads; if only 1 thread is known, one frame
+	 * will be 0.
+	 */
+	unsigned long frame1;
+	unsigned long frame2;
+};
+
+/*
+ * Since we also want to be able to debug allocators with KCSAN, to avoid
+ * deadlock, report_times cannot be dynamically resized with krealloc in
+ * rate_limit_report.
+ *
+ * Therefore, we use a fixed-size array, which at most will occupy a page. This
+ * still adequately rate limits reports, assuming that a) number of unique data
+ * races is not excessive, and b) occurrence of unique races within the
+ * same time window is limited.
+ */
+#define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
+#define REPORT_TIMES_SIZE                                                      \
+	(CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ?                   \
+		 REPORT_TIMES_MAX :                                            \
+		 CONFIG_KCSAN_REPORT_ONCE_IN_MS)
+static struct report_time report_times[REPORT_TIMES_SIZE];
+
+/*
+ * Spinlock serializing report generation, and access to @other_infos. Although
+ * it could make sense to have a finer-grained locking story for @other_infos,
+ * report generation needs to be serialized either way, so not much is gained.
+ */
+static DEFINE_RAW_SPINLOCK(report_lock);
+
+/*
+ * Checks if the race identified by thread frames frame1 and frame2 has
+ * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
+ */
+static bool rate_limit_report(unsigned long frame1, unsigned long frame2)
+{
+	struct report_time *use_entry = &report_times[0];
+	unsigned long invalid_before;
+	int i;
+
+	BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0);
+
+	if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0)
+		return false;
+
+	invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS);
+
+	/* Check if a matching race report exists. */
+	for (i = 0; i < REPORT_TIMES_SIZE; ++i) {
+		struct report_time *rt = &report_times[i];
+
+		/*
+		 * Must always select an entry for use to store info as we
+		 * cannot resize report_times; at the end of the scan, use_entry
+		 * will be the oldest entry, which ideally also happened before
+		 * KCSAN_REPORT_ONCE_IN_MS ago.
+		 */
+		if (time_before(rt->time, use_entry->time))
+			use_entry = rt;
+
+		/*
+		 * Initially, no need to check any further as this entry as well
+		 * as following entries have never been used.
+		 */
+		if (rt->time == 0)
+			break;
+
+		/* Check if entry expired. */
+		if (time_before(rt->time, invalid_before))
+			continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */
+
+		/* Reported recently, check if race matches. */
+		if ((rt->frame1 == frame1 && rt->frame2 == frame2) ||
+		    (rt->frame1 == frame2 && rt->frame2 == frame1))
+			return true;
+	}
+
+	use_entry->time = jiffies;
+	use_entry->frame1 = frame1;
+	use_entry->frame2 = frame2;
+	return false;
+}
+
+/*
+ * Special rules to skip reporting.
+ */
+static bool
+skip_report(enum kcsan_value_change value_change, unsigned long top_frame)
+{
+	/* Should never get here if value_change==FALSE. */
+	WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE);
+
+	/*
+	 * The first call to skip_report always has value_change==TRUE, since we
+	 * cannot know the value written of an instrumented access. For the 2nd
+	 * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY:
+	 *
+	 * 1. read watchpoint, conflicting write (value_change==TRUE): report;
+	 * 2. read watchpoint, conflicting write (value_change==MAYBE): skip;
+	 * 3. write watchpoint, conflicting write (value_change==TRUE): report;
+	 * 4. write watchpoint, conflicting write (value_change==MAYBE): skip;
+	 * 5. write watchpoint, conflicting read (value_change==MAYBE): skip;
+	 * 6. write watchpoint, conflicting read (value_change==TRUE): report;
+	 *
+	 * Cases 1-4 are intuitive and expected; case 5 ensures we do not report
+	 * data races where the write may have rewritten the same value; case 6
+	 * is possible either if the size is larger than what we check value
+	 * changes for or the access type is KCSAN_ACCESS_ASSERT.
+	 */
+	if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) &&
+	    value_change == KCSAN_VALUE_CHANGE_MAYBE) {
+		/*
+		 * The access is a write, but the data value did not change.
+		 *
+		 * We opt-out of this filter for certain functions at request of
+		 * maintainers.
+		 */
+		char buf[64];
+		int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame);
+
+		if (!strnstr(buf, "rcu_", len) &&
+		    !strnstr(buf, "_rcu", len) &&
+		    !strnstr(buf, "_srcu", len))
+			return true;
+	}
+
+	return kcsan_skip_report_debugfs(top_frame);
+}
+
+static const char *get_access_type(int type)
+{
+	if (type & KCSAN_ACCESS_ASSERT) {
+		if (type & KCSAN_ACCESS_SCOPED) {
+			if (type & KCSAN_ACCESS_WRITE)
+				return "assert no accesses (scoped)";
+			else
+				return "assert no writes (scoped)";
+		} else {
+			if (type & KCSAN_ACCESS_WRITE)
+				return "assert no accesses";
+			else
+				return "assert no writes";
+		}
+	}
+
+	switch (type) {
+	case 0:
+		return "read";
+	case KCSAN_ACCESS_ATOMIC:
+		return "read (marked)";
+	case KCSAN_ACCESS_WRITE:
+		return "write";
+	case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
+		return "write (marked)";
+	case KCSAN_ACCESS_SCOPED:
+		return "read (scoped)";
+	case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC:
+		return "read (marked, scoped)";
+	case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE:
+		return "write (scoped)";
+	case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
+		return "write (marked, scoped)";
+	default:
+		BUG();
+	}
+}
+
+static const char *get_bug_type(int type)
+{
+	return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race";
+}
+
+/* Return thread description: in task or interrupt. */
+static const char *get_thread_desc(int task_id)
+{
+	if (task_id != -1) {
+		static char buf[32]; /* safe: protected by report_lock */
+
+		snprintf(buf, sizeof(buf), "task %i", task_id);
+		return buf;
+	}
+	return "interrupt";
+}
+
+/* Helper to skip KCSAN-related functions in stack-trace. */
+static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries)
+{
+	char buf[64];
+	char *cur;
+	int len, skip;
+
+	for (skip = 0; skip < num_entries; ++skip) {
+		len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]);
+
+		/* Never show tsan_* or {read,write}_once_size. */
+		if (strnstr(buf, "tsan_", len) ||
+		    strnstr(buf, "_once_size", len))
+			continue;
+
+		cur = strnstr(buf, "kcsan_", len);
+		if (cur) {
+			cur += sizeof("kcsan_") - 1;
+			if (strncmp(cur, "test", sizeof("test") - 1))
+				continue; /* KCSAN runtime function. */
+			/* KCSAN related test. */
+		}
+
+		/*
+		 * No match for runtime functions -- @skip entries to skip to
+		 * get to first frame of interest.
+		 */
+		break;
+	}
+
+	return skip;
+}
+
+/* Compares symbolized strings of addr1 and addr2. */
+static int sym_strcmp(void *addr1, void *addr2)
+{
+	char buf1[64];
+	char buf2[64];
+
+	snprintf(buf1, sizeof(buf1), "%pS", addr1);
+	snprintf(buf2, sizeof(buf2), "%pS", addr2);
+
+	return strncmp(buf1, buf2, sizeof(buf1));
+}
+
+static void print_verbose_info(struct task_struct *task)
+{
+	if (!task)
+		return;
+
+	pr_err("\n");
+	debug_show_held_locks(task);
+	print_irqtrace_events(task);
+}
+
+/*
+ * Returns true if a report was generated, false otherwise.
+ */
+static bool print_report(enum kcsan_value_change value_change,
+			 enum kcsan_report_type type,
+			 const struct access_info *ai,
+			 const struct other_info *other_info)
+{
+	unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 };
+	int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1);
+	int skipnr = get_stack_skipnr(stack_entries, num_stack_entries);
+	unsigned long this_frame = stack_entries[skipnr];
+	unsigned long other_frame = 0;
+	int other_skipnr = 0; /* silence uninit warnings */
+
+	/*
+	 * Must check report filter rules before starting to print.
+	 */
+	if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr]))
+		return false;
+
+	if (type == KCSAN_REPORT_RACE_SIGNAL) {
+		other_skipnr = get_stack_skipnr(other_info->stack_entries,
+						other_info->num_stack_entries);
+		other_frame = other_info->stack_entries[other_skipnr];
+
+		/* @value_change is only known for the other thread */
+		if (skip_report(value_change, other_frame))
+			return false;
+	}
+
+	if (rate_limit_report(this_frame, other_frame))
+		return false;
+
+	/* Print report header. */
+	pr_err("==================================================================\n");
+	switch (type) {
+	case KCSAN_REPORT_RACE_SIGNAL: {
+		int cmp;
+
+		/*
+		 * Order functions lexographically for consistent bug titles.
+		 * Do not print offset of functions to keep title short.
+		 */
+		cmp = sym_strcmp((void *)other_frame, (void *)this_frame);
+		pr_err("BUG: KCSAN: %s in %ps / %ps\n",
+		       get_bug_type(ai->access_type | other_info->ai.access_type),
+		       (void *)(cmp < 0 ? other_frame : this_frame),
+		       (void *)(cmp < 0 ? this_frame : other_frame));
+	} break;
+
+	case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
+		pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type),
+		       (void *)this_frame);
+		break;
+
+	default:
+		BUG();
+	}
+
+	pr_err("\n");
+
+	/* Print information about the racing accesses. */
+	switch (type) {
+	case KCSAN_REPORT_RACE_SIGNAL:
+		pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
+		       get_access_type(other_info->ai.access_type), other_info->ai.ptr,
+		       other_info->ai.size, get_thread_desc(other_info->ai.task_pid),
+		       other_info->ai.cpu_id);
+
+		/* Print the other thread's stack trace. */
+		stack_trace_print(other_info->stack_entries + other_skipnr,
+				  other_info->num_stack_entries - other_skipnr,
+				  0);
+
+		if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
+			print_verbose_info(other_info->task);
+
+		pr_err("\n");
+		pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
+		       get_access_type(ai->access_type), ai->ptr, ai->size,
+		       get_thread_desc(ai->task_pid), ai->cpu_id);
+		break;
+
+	case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
+		pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n",
+		       get_access_type(ai->access_type), ai->ptr, ai->size,
+		       get_thread_desc(ai->task_pid), ai->cpu_id);
+		break;
+
+	default:
+		BUG();
+	}
+	/* Print stack trace of this thread. */
+	stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr,
+			  0);
+
+	if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
+		print_verbose_info(current);
+
+	/* Print report footer. */
+	pr_err("\n");
+	pr_err("Reported by Kernel Concurrency Sanitizer on:\n");
+	dump_stack_print_info(KERN_DEFAULT);
+	pr_err("==================================================================\n");
+
+	return true;
+}
+
+static void release_report(unsigned long *flags, struct other_info *other_info)
+{
+	if (other_info)
+		/*
+		 * Use size to denote valid/invalid, since KCSAN entirely
+		 * ignores 0-sized accesses.
+		 */
+		other_info->ai.size = 0;
+
+	raw_spin_unlock_irqrestore(&report_lock, *flags);
+}
+
+/*
+ * Sets @other_info->task and awaits consumption of @other_info.
+ *
+ * Precondition: report_lock is held.
+ * Postcondition: report_lock is held.
+ */
+static void set_other_info_task_blocking(unsigned long *flags,
+					 const struct access_info *ai,
+					 struct other_info *other_info)
+{
+	/*
+	 * We may be instrumenting a code-path where current->state is already
+	 * something other than TASK_RUNNING.
+	 */
+	const bool is_running = current->state == TASK_RUNNING;
+	/*
+	 * To avoid deadlock in case we are in an interrupt here and this is a
+	 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a
+	 * timeout to ensure this works in all contexts.
+	 *
+	 * Await approximately the worst case delay of the reporting thread (if
+	 * we are not interrupted).
+	 */
+	int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt);
+
+	other_info->task = current;
+	do {
+		if (is_running) {
+			/*
+			 * Let lockdep know the real task is sleeping, to print
+			 * the held locks (recall we turned lockdep off, so
+			 * locking/unlocking @report_lock won't be recorded).
+			 */
+			set_current_state(TASK_UNINTERRUPTIBLE);
+		}
+		raw_spin_unlock_irqrestore(&report_lock, *flags);
+		/*
+		 * We cannot call schedule() since we also cannot reliably
+		 * determine if sleeping here is permitted -- see in_atomic().
+		 */
+
+		udelay(1);
+		raw_spin_lock_irqsave(&report_lock, *flags);
+		if (timeout-- < 0) {
+			/*
+			 * Abort. Reset @other_info->task to NULL, since it
+			 * appears the other thread is still going to consume
+			 * it. It will result in no verbose info printed for
+			 * this task.
+			 */
+			other_info->task = NULL;
+			break;
+		}
+		/*
+		 * If invalid, or @ptr nor @current matches, then @other_info
+		 * has been consumed and we may continue. If not, retry.
+		 */
+	} while (other_info->ai.size && other_info->ai.ptr == ai->ptr &&
+		 other_info->task == current);
+	if (is_running)
+		set_current_state(TASK_RUNNING);
+}
+
+/* Populate @other_info; requires that the provided @other_info not in use. */
+static void prepare_report_producer(unsigned long *flags,
+				    const struct access_info *ai,
+				    struct other_info *other_info)
+{
+	raw_spin_lock_irqsave(&report_lock, *flags);
+
+	/*
+	 * The same @other_infos entry cannot be used concurrently, because
+	 * there is a one-to-one mapping to watchpoint slots (@watchpoints in
+	 * core.c), and a watchpoint is only released for reuse after reporting
+	 * is done by the consumer of @other_info. Therefore, it is impossible
+	 * for another concurrent prepare_report_producer() to set the same
+	 * @other_info, and are guaranteed exclusivity for the @other_infos
+	 * entry pointed to by @other_info.
+	 *
+	 * To check this property holds, size should never be non-zero here,
+	 * because every consumer of struct other_info resets size to 0 in
+	 * release_report().
+	 */
+	WARN_ON(other_info->ai.size);
+
+	other_info->ai = *ai;
+	other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2);
+
+	if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
+		set_other_info_task_blocking(flags, ai, other_info);
+
+	raw_spin_unlock_irqrestore(&report_lock, *flags);
+}
+
+/* Awaits producer to fill @other_info and then returns. */
+static bool prepare_report_consumer(unsigned long *flags,
+				    const struct access_info *ai,
+				    struct other_info *other_info)
+{
+
+	raw_spin_lock_irqsave(&report_lock, *flags);
+	while (!other_info->ai.size) { /* Await valid @other_info. */
+		raw_spin_unlock_irqrestore(&report_lock, *flags);
+		cpu_relax();
+		raw_spin_lock_irqsave(&report_lock, *flags);
+	}
+
+	/* Should always have a matching access based on watchpoint encoding. */
+	if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size,
+				     (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)))
+		goto discard;
+
+	if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size,
+			     (unsigned long)ai->ptr, ai->size)) {
+		/*
+		 * If the actual accesses to not match, this was a false
+		 * positive due to watchpoint encoding.
+		 */
+		kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES);
+		goto discard;
+	}
+
+	return true;
+
+discard:
+	release_report(flags, other_info);
+	return false;
+}
+
+/*
+ * Depending on the report type either sets @other_info and returns false, or
+ * awaits @other_info and returns true. If @other_info is not required for the
+ * report type, simply acquires @report_lock and returns true.
+ */
+static noinline bool prepare_report(unsigned long *flags,
+				    enum kcsan_report_type type,
+				    const struct access_info *ai,
+				    struct other_info *other_info)
+{
+	switch (type) {
+	case KCSAN_REPORT_CONSUMED_WATCHPOINT:
+		prepare_report_producer(flags, ai, other_info);
+		return false;
+	case KCSAN_REPORT_RACE_SIGNAL:
+		return prepare_report_consumer(flags, ai, other_info);
+	default:
+		/* @other_info not required; just acquire @report_lock. */
+		raw_spin_lock_irqsave(&report_lock, *flags);
+		return true;
+	}
+}
+
+void kcsan_report(const volatile void *ptr, size_t size, int access_type,
+		  enum kcsan_value_change value_change,
+		  enum kcsan_report_type type, int watchpoint_idx)
+{
+	unsigned long flags = 0;
+	const struct access_info ai = {
+		.ptr		= ptr,
+		.size		= size,
+		.access_type	= access_type,
+		.task_pid	= in_task() ? task_pid_nr(current) : -1,
+		.cpu_id		= raw_smp_processor_id()
+	};
+	struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
+					? NULL : &other_infos[watchpoint_idx];
+
+	kcsan_disable_current();
+	if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos)))
+		goto out;
+
+	/*
+	 * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
+	 * we do not turn off lockdep here; this could happen due to recursion
+	 * into lockdep via KCSAN if we detect a race in utilities used by
+	 * lockdep.
+	 */
+	lockdep_off();
+
+	if (prepare_report(&flags, type, &ai, other_info)) {
+		/*
+		 * Never report if value_change is FALSE, only if we it is
+		 * either TRUE or MAYBE. In case of MAYBE, further filtering may
+		 * be done once we know the full stack trace in print_report().
+		 */
+		bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE &&
+				print_report(value_change, type, &ai, other_info);
+
+		if (reported && panic_on_warn)
+			panic("panic_on_warn set ...\n");
+
+		release_report(&flags, other_info);
+	}
+
+	lockdep_on();
+out:
+	kcsan_enable_current();
+}
diff --git a/kernel/kcsan/test.c b/kernel/kcsan/test.c
new file mode 100644
index 000000000000..d26a052d3383
--- /dev/null
+++ b/kernel/kcsan/test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/types.h>
+
+#include "encoding.h"
+
+#define ITERS_PER_TEST 2000
+
+/* Test requirements. */
+static bool test_requires(void)
+{
+	/* random should be initialized for the below tests */
+	return prandom_u32() + prandom_u32() != 0;
+}
+
+/*
+ * Test watchpoint encode and decode: check that encoding some access's info,
+ * and then subsequent decode preserves the access's info.
+ */
+static bool test_encode_decode(void)
+{
+	int i;
+
+	for (i = 0; i < ITERS_PER_TEST; ++i) {
+		size_t size = prandom_u32_max(MAX_ENCODABLE_SIZE) + 1;
+		bool is_write = !!prandom_u32_max(2);
+		unsigned long addr;
+
+		prandom_bytes(&addr, sizeof(addr));
+		if (WARN_ON(!check_encodable(addr, size)))
+			return false;
+
+		/* Encode and decode */
+		{
+			const long encoded_watchpoint =
+				encode_watchpoint(addr, size, is_write);
+			unsigned long verif_masked_addr;
+			size_t verif_size;
+			bool verif_is_write;
+
+			/* Check special watchpoints */
+			if (WARN_ON(decode_watchpoint(
+				    INVALID_WATCHPOINT, &verif_masked_addr,
+				    &verif_size, &verif_is_write)))
+				return false;
+			if (WARN_ON(decode_watchpoint(
+				    CONSUMED_WATCHPOINT, &verif_masked_addr,
+				    &verif_size, &verif_is_write)))
+				return false;
+
+			/* Check decoding watchpoint returns same data */
+			if (WARN_ON(!decode_watchpoint(
+				    encoded_watchpoint, &verif_masked_addr,
+				    &verif_size, &verif_is_write)))
+				return false;
+			if (WARN_ON(verif_masked_addr !=
+				    (addr & WATCHPOINT_ADDR_MASK)))
+				goto fail;
+			if (WARN_ON(verif_size != size))
+				goto fail;
+			if (WARN_ON(is_write != verif_is_write))
+				goto fail;
+
+			continue;
+fail:
+			pr_err("%s fail: %s %zu bytes @ %lx -> encoded: %lx -> %s %zu bytes @ %lx\n",
+			       __func__, is_write ? "write" : "read", size,
+			       addr, encoded_watchpoint,
+			       verif_is_write ? "write" : "read", verif_size,
+			       verif_masked_addr);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/* Test access matching function. */
+static bool test_matching_access(void)
+{
+	if (WARN_ON(!matching_access(10, 1, 10, 1)))
+		return false;
+	if (WARN_ON(!matching_access(10, 2, 11, 1)))
+		return false;
+	if (WARN_ON(!matching_access(10, 1, 9, 2)))
+		return false;
+	if (WARN_ON(matching_access(10, 1, 11, 1)))
+		return false;
+	if (WARN_ON(matching_access(9, 1, 10, 1)))
+		return false;
+
+	/*
+	 * An access of size 0 could match another access, as demonstrated here.
+	 * Rather than add more comparisons to 'matching_access()', which would
+	 * end up in the fast-path for *all* checks, check_access() simply
+	 * returns for all accesses of size 0.
+	 */
+	if (WARN_ON(!matching_access(8, 8, 12, 0)))
+		return false;
+
+	return true;
+}
+
+static int __init kcsan_selftest(void)
+{
+	int passed = 0;
+	int total = 0;
+
+#define RUN_TEST(do_test)                                                      \
+	do {                                                                   \
+		++total;                                                       \
+		if (do_test())                                                 \
+			++passed;                                              \
+		else                                                           \
+			pr_err("KCSAN selftest: " #do_test " failed");         \
+	} while (0)
+
+	RUN_TEST(test_requires);
+	RUN_TEST(test_encode_decode);
+	RUN_TEST(test_matching_access);
+
+	pr_info("KCSAN selftest: %d/%d tests passed\n", passed, total);
+	if (passed != total)
+		panic("KCSAN selftests failed");
+	return 0;
+}
+postcore_initcall(kcsan_selftest);
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 45452facff3b..6d11cfb9b41f 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -5,6 +5,9 @@ KCOV_INSTRUMENT		:= n
 
 obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
 
+# Avoid recursion lockdep -> KCSAN -> ... -> lockdep.
+KCSAN_SANITIZE_lockdep.o := n
+
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_lockdep_proc.o = $(CC_FLAGS_FTRACE)
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 21fb5a5662b5..5fc9c9b70862 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -7,6 +7,12 @@ endif
 # that is not a function of syscall inputs. E.g. involuntary context switches.
 KCOV_INSTRUMENT := n
 
+# There are numerous data races here, however, most of them are due to plain accesses.
+# This would make it even harder for syzbot to find reproducers, because these
+# bugs trigger without specific input. Disable by default, but should re-enable
+# eventually.
+KCSAN_SANITIZE := n
+
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
 # needed for x86 only.  Why this used to be enabled for all architectures is beyond
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 1d8aaa546412..6575bb0a0434 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -6,6 +6,9 @@ ifdef CONFIG_FUNCTION_TRACER
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
 
+# Avoid recursion due to instrumentation.
+KCSAN_SANITIZE := n
+
 ifdef CONFIG_FTRACE_SELFTEST
 # selftest needs instrumentation
 CFLAGS_trace_selftest_dynamic.o = $(CC_FLAGS_FTRACE)
author	Thomas Gleixner <tglx@linutronix.de>	2020-06-11 20:02:46 +0200
committer	Thomas Gleixner <tglx@linutronix.de>	2020-06-11 20:02:46 +0200
commit	37d1a04b13a6d2fec91a6813fc034947a27db034 (patch)
tree	c6a8d7d0df96a6eb1ddb53a12885761cb72e0e92 /kernel
parent	37f8173dd84936ea78000ed1cad24f8b18d48ebb (diff)
parent	97a9474aeb789183a1d0712e66a4283860279ac9 (diff)