summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/compiler-clang.h11
-rw-r--r--include/linux/compiler-gcc.h6
-rw-r--r--include/linux/compiler.h53
-rw-r--r--include/linux/compiler_types.h32
-rw-r--r--include/linux/cpu_cooling.h12
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/idle_inject.h4
-rw-r--r--include/linux/instrumented.h109
-rw-r--r--include/linux/kcsan-checks.h430
-rw-r--r--include/linux/kcsan.h59
-rw-r--r--include/linux/key.h33
-rw-r--r--include/linux/kthread.h6
-rw-r--r--include/linux/kvm_host.h8
-rw-r--r--include/linux/lsm_audit.h1
-rw-r--r--include/linux/lsm_hook_defs.h9
-rw-r--r--include/linux/lsm_hooks.h14
-rw-r--r--include/linux/mmu_context.h5
-rw-r--r--include/linux/mmzone.h14
-rw-r--r--include/linux/nfs4.h4
-rw-r--r--include/linux/nfs_fs.h1
-rw-r--r--include/linux/nfs_xdr.h2
-rw-r--r--include/linux/pipe_fs_i.h27
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/security.h30
-rw-r--r--include/linux/seqlock.h51
-rw-r--r--include/linux/stacktrace.h2
-rw-r--r--include/linux/sunrpc/auth.h5
-rw-r--r--include/linux/sunrpc/gss_api.h1
-rw-r--r--include/linux/sunrpc/svc.h2
-rw-r--r--include/linux/sunrpc/svc_rdma.h6
-rw-r--r--include/linux/sunrpc/svc_xprt.h6
-rw-r--r--include/linux/sunrpc/svcauth_gss.h3
-rw-r--r--include/linux/sunrpc/svcsock.h6
-rw-r--r--include/linux/thermal.h84
-rw-r--r--include/linux/uaccess.h14
-rw-r--r--include/linux/watch_queue.h127
37 files changed, 1018 insertions, 165 deletions
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 790c0c6b8552..ee37256ec8bd 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -16,7 +16,7 @@
#define KASAN_ABI_VERSION 5
#if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer)
-/* emulate gcc's __SANITIZE_ADDRESS__ flag */
+/* Emulate GCC's __SANITIZE_ADDRESS__ flag */
#define __SANITIZE_ADDRESS__
#define __no_sanitize_address \
__attribute__((no_sanitize("address", "hwaddress")))
@@ -24,6 +24,15 @@
#define __no_sanitize_address
#endif
+#if __has_feature(thread_sanitizer)
+/* emulate gcc's __SANITIZE_THREAD__ flag */
+#define __SANITIZE_THREAD__
+#define __no_sanitize_thread \
+ __attribute__((no_sanitize("thread")))
+#else
+#define __no_sanitize_thread
+#endif
+
/*
* Not all versions of clang implement the the type-generic versions
* of the builtin overflow checkers. Fortunately, clang implements
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index e2f725273261..7dd4e0349ef3 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -144,6 +144,12 @@
#define __no_sanitize_address
#endif
+#if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__)
+#define __no_sanitize_thread __attribute__((no_sanitize_thread))
+#else
+#define __no_sanitize_thread
+#endif
+
#if GCC_VERSION >= 50100
#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
#endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 33d3a2e5abab..30827f82ad62 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -250,6 +250,27 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
*/
#include <asm/barrier.h>
#include <linux/kasan-checks.h>
+#include <linux/kcsan-checks.h>
+
+/**
+ * data_race - mark an expression as containing intentional data races
+ *
+ * This data_race() macro is useful for situations in which data races
+ * should be forgiven. One example is diagnostic code that accesses
+ * shared variables but is not a part of the core synchronization design.
+ *
+ * This macro *does not* affect normal code generation, but is a hint
+ * to tooling that data races here are to be ignored.
+ */
+#define data_race(expr) \
+({ \
+ __unqual_scalar_typeof(({ expr; })) __v = ({ \
+ __kcsan_disable_current(); \
+ expr; \
+ }); \
+ __kcsan_enable_current(); \
+ __v; \
+})
/*
* Use __READ_ONCE() instead of READ_ONCE() if you do not require any
@@ -271,30 +292,18 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
__READ_ONCE_SCALAR(x); \
})
-#define __WRITE_ONCE(x, val) \
-do { \
- *(volatile typeof(x) *)&(x) = (val); \
+#define __WRITE_ONCE(x, val) \
+do { \
+ *(volatile typeof(x) *)&(x) = (val); \
} while (0)
-#define WRITE_ONCE(x, val) \
-do { \
- compiletime_assert_rwonce_type(x); \
- __WRITE_ONCE(x, val); \
+#define WRITE_ONCE(x, val) \
+do { \
+ compiletime_assert_rwonce_type(x); \
+ __WRITE_ONCE(x, val); \
} while (0)
-#ifdef CONFIG_KASAN
-/*
- * We can't declare function 'inline' because __no_sanitize_address conflicts
- * with inlining. Attempt to inline it may cause a build failure.
- * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
- * '__maybe_unused' allows us to avoid defined-but-not-used warnings.
- */
-# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused
-#else
-# define __no_kasan_or_inline __always_inline
-#endif
-
-static __no_kasan_or_inline
+static __no_sanitize_or_inline
unsigned long __read_once_word_nocheck(const void *addr)
{
return __READ_ONCE(*(unsigned long *)addr);
@@ -302,8 +311,8 @@ unsigned long __read_once_word_nocheck(const void *addr)
/*
* Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need to load a
- * word from memory atomically but without telling KASAN. This is usually
- * used by unwinding code when walking the stack of a running process.
+ * word from memory atomically but without telling KASAN/KCSAN. This is
+ * usually used by unwinding code when walking the stack of a running process.
*/
#define READ_ONCE_NOCHECK(x) \
({ \
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index d4e1956f903f..21aed0981edf 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -171,6 +171,38 @@ struct ftrace_likely_data {
*/
#define noinline_for_stack noinline
+/*
+ * Sanitizer helper attributes: Because using __always_inline and
+ * __no_sanitize_* conflict, provide helper attributes that will either expand
+ * to __no_sanitize_* in compilation units where instrumentation is enabled
+ * (__SANITIZE_*__), or __always_inline in compilation units without
+ * instrumentation (__SANITIZE_*__ undefined).
+ */
+#ifdef __SANITIZE_ADDRESS__
+/*
+ * We can't declare function 'inline' because __no_sanitize_address conflicts
+ * with inlining. Attempt to inline it may cause a build failure.
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
+ * '__maybe_unused' allows us to avoid defined-but-not-used warnings.
+ */
+# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused
+# define __no_sanitize_or_inline __no_kasan_or_inline
+#else
+# define __no_kasan_or_inline __always_inline
+#endif
+
+#define __no_kcsan __no_sanitize_thread
+#ifdef __SANITIZE_THREAD__
+# define __no_kcsan_or_inline __no_kcsan notrace __maybe_unused
+# define __no_sanitize_or_inline __no_kcsan_or_inline
+#else
+# define __no_kcsan_or_inline __always_inline
+#endif
+
+#ifndef __no_sanitize_or_inline
+#define __no_sanitize_or_inline __always_inline
+#endif
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index 65501d8f9778..a3bdc8a98f2c 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -63,18 +63,10 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy)
struct cpuidle_driver;
#ifdef CONFIG_CPU_IDLE_THERMAL
-int cpuidle_cooling_register(struct cpuidle_driver *drv);
-int cpuidle_of_cooling_register(struct device_node *np,
- struct cpuidle_driver *drv);
+void cpuidle_cooling_register(struct cpuidle_driver *drv);
#else /* CONFIG_CPU_IDLE_THERMAL */
-static inline int cpuidle_cooling_register(struct cpuidle_driver *drv)
+static inline void cpuidle_cooling_register(struct cpuidle_driver *drv)
{
- return 0;
-}
-static inline int cpuidle_of_cooling_register(struct device_node *np,
- struct cpuidle_driver *drv)
-{
- return 0;
}
#endif /* CONFIG_CPU_IDLE_THERMAL */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8377afef8806..191772d4a4d7 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -102,6 +102,7 @@ enum cpuhp_state {
CPUHP_AP_IRQ_ARMADA_XP_STARTING,
CPUHP_AP_IRQ_BCM2836_STARTING,
CPUHP_AP_IRQ_MIPS_GIC_STARTING,
+ CPUHP_AP_IRQ_RISCV_STARTING,
CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
CPUHP_AP_ARM_MVEBU_COHERENCY,
CPUHP_AP_MICROCODE_LOADER,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 19ef6c88c152..8e1f8f93108f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1048,6 +1048,7 @@ struct lock_manager_operations {
bool (*lm_break)(struct file_lock *);
int (*lm_change)(struct file_lock *, int, struct list_head *);
void (*lm_setup)(struct file_lock *, void **);
+ bool (*lm_breaker_owns_lease)(struct file_lock *);
};
struct lock_manager {
diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h
index a445cd1a36c5..91a8612b8bf9 100644
--- a/include/linux/idle_inject.h
+++ b/include/linux/idle_inject.h
@@ -26,4 +26,8 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev,
void idle_inject_get_duration(struct idle_inject_device *ii_dev,
unsigned int *run_duration_us,
unsigned int *idle_duration_us);
+
+void idle_inject_set_latency(struct idle_inject_device *ii_dev,
+ unsigned int latency_ns);
+
#endif /* __IDLE_INJECT_H__ */
diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h
new file mode 100644
index 000000000000..43e6ea591975
--- /dev/null
+++ b/include/linux/instrumented.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * This header provides generic wrappers for memory access instrumentation that
+ * the compiler cannot emit for: KASAN, KCSAN.
+ */
+#ifndef _LINUX_INSTRUMENTED_H
+#define _LINUX_INSTRUMENTED_H
+
+#include <linux/compiler.h>
+#include <linux/kasan-checks.h>
+#include <linux/kcsan-checks.h>
+#include <linux/types.h>
+
+/**
+ * instrument_read - instrument regular read access
+ *
+ * Instrument a regular read access. The instrumentation should be inserted
+ * before the actual read happens.
+ *
+ * @ptr address of access
+ * @size size of access
+ */
+static __always_inline void instrument_read(const volatile void *v, size_t size)
+{
+ kasan_check_read(v, size);
+ kcsan_check_read(v, size);
+}
+
+/**
+ * instrument_write - instrument regular write access
+ *
+ * Instrument a regular write access. The instrumentation should be inserted
+ * before the actual write happens.
+ *
+ * @ptr address of access
+ * @size size of access
+ */
+static __always_inline void instrument_write(const volatile void *v, size_t size)
+{
+ kasan_check_write(v, size);
+ kcsan_check_write(v, size);
+}
+
+/**
+ * instrument_atomic_read - instrument atomic read access
+ *
+ * Instrument an atomic read access. The instrumentation should be inserted
+ * before the actual read happens.
+ *
+ * @ptr address of access
+ * @size size of access
+ */
+static __always_inline void instrument_atomic_read(const volatile void *v, size_t size)
+{
+ kasan_check_read(v, size);
+ kcsan_check_atomic_read(v, size);
+}
+
+/**
+ * instrument_atomic_write - instrument atomic write access
+ *
+ * Instrument an atomic write access. The instrumentation should be inserted
+ * before the actual write happens.
+ *
+ * @ptr address of access
+ * @size size of access
+ */
+static __always_inline void instrument_atomic_write(const volatile void *v, size_t size)
+{
+ kasan_check_write(v, size);
+ kcsan_check_atomic_write(v, size);
+}
+
+/**
+ * instrument_copy_to_user - instrument reads of copy_to_user
+ *
+ * Instrument reads from kernel memory, that are due to copy_to_user (and
+ * variants). The instrumentation must be inserted before the accesses.
+ *
+ * @to destination address
+ * @from source address
+ * @n number of bytes to copy
+ */
+static __always_inline void
+instrument_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ kasan_check_read(from, n);
+ kcsan_check_read(from, n);
+}
+
+/**
+ * instrument_copy_from_user - instrument writes of copy_from_user
+ *
+ * Instrument writes to kernel memory, that are due to copy_from_user (and
+ * variants). The instrumentation should be inserted before the accesses.
+ *
+ * @to destination address
+ * @from source address
+ * @n number of bytes to copy
+ */
+static __always_inline void
+instrument_copy_from_user(const void *to, const void __user *from, unsigned long n)
+{
+ kasan_check_write(to, n);
+ kcsan_check_write(to, n);
+}
+
+#endif /* _LINUX_INSTRUMENTED_H */
diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h
new file mode 100644
index 000000000000..7b0b9c44f5f3
--- /dev/null
+++ b/include/linux/kcsan-checks.h
@@ -0,0 +1,430 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_KCSAN_CHECKS_H
+#define _LINUX_KCSAN_CHECKS_H
+
+/* Note: Only include what is already included by compiler.h. */
+#include <linux/compiler_attributes.h>
+#include <linux/types.h>
+
+/*
+ * ACCESS TYPE MODIFIERS
+ *
+ * <none>: normal read access;
+ * WRITE : write access;
+ * ATOMIC: access is atomic;
+ * ASSERT: access is not a regular access, but an assertion;
+ * SCOPED: access is a scoped access;
+ */
+#define KCSAN_ACCESS_WRITE 0x1
+#define KCSAN_ACCESS_ATOMIC 0x2
+#define KCSAN_ACCESS_ASSERT 0x4
+#define KCSAN_ACCESS_SCOPED 0x8
+
+/*
+ * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used
+ * even in compilation units that selectively disable KCSAN, but must use KCSAN
+ * to validate access to an address. Never use these in header files!
+ */
+#ifdef CONFIG_KCSAN
+/**
+ * __kcsan_check_access - check generic access for races
+ *
+ * @ptr: address of access
+ * @size: size of access
+ * @type: access type modifier
+ */
+void __kcsan_check_access(const volatile void *ptr, size_t size, int type);
+
+/**
+ * kcsan_disable_current - disable KCSAN for the current context
+ *
+ * Supports nesting.
+ */
+void kcsan_disable_current(void);
+
+/**
+ * kcsan_enable_current - re-enable KCSAN for the current context
+ *
+ * Supports nesting.
+ */
+void kcsan_enable_current(void);
+void kcsan_enable_current_nowarn(void); /* Safe in uaccess regions. */
+
+/**
+ * kcsan_nestable_atomic_begin - begin nestable atomic region
+ *
+ * Accesses within the atomic region may appear to race with other accesses but
+ * should be considered atomic.
+ */
+void kcsan_nestable_atomic_begin(void);
+
+/**
+ * kcsan_nestable_atomic_end - end nestable atomic region
+ */
+void kcsan_nestable_atomic_end(void);
+
+/**
+ * kcsan_flat_atomic_begin - begin flat atomic region
+ *
+ * Accesses within the atomic region may appear to race with other accesses but
+ * should be considered atomic.
+ */
+void kcsan_flat_atomic_begin(void);
+
+/**
+ * kcsan_flat_atomic_end - end flat atomic region
+ */
+void kcsan_flat_atomic_end(void);
+
+/**
+ * kcsan_atomic_next - consider following accesses as atomic
+ *
+ * Force treating the next n memory accesses for the current context as atomic
+ * operations.
+ *
+ * @n: number of following memory accesses to treat as atomic.
+ */
+void kcsan_atomic_next(int n);
+
+/**
+ * kcsan_set_access_mask - set access mask
+ *
+ * Set the access mask for all accesses for the current context if non-zero.
+ * Only value changes to bits set in the mask will be reported.
+ *
+ * @mask: bitmask
+ */
+void kcsan_set_access_mask(unsigned long mask);
+
+/* Scoped access information. */
+struct kcsan_scoped_access {
+ struct list_head list;
+ const volatile void *ptr;
+ size_t size;
+ int type;
+};
+/*
+ * Automatically call kcsan_end_scoped_access() when kcsan_scoped_access goes
+ * out of scope; relies on attribute "cleanup", which is supported by all
+ * compilers that support KCSAN.
+ */
+#define __kcsan_cleanup_scoped \
+ __maybe_unused __attribute__((__cleanup__(kcsan_end_scoped_access)))
+
+/**
+ * kcsan_begin_scoped_access - begin scoped access
+ *
+ * Begin scoped access and initialize @sa, which will cause KCSAN to
+ * continuously check the memory range in the current thread until
+ * kcsan_end_scoped_access() is called for @sa.
+ *
+ * Scoped accesses are implemented by appending @sa to an internal list for the
+ * current execution context, and then checked on every call into the KCSAN
+ * runtime.
+ *
+ * @ptr: address of access
+ * @size: size of access
+ * @type: access type modifier
+ * @sa: struct kcsan_scoped_access to use for the scope of the access
+ */
+struct kcsan_scoped_access *
+kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type,
+ struct kcsan_scoped_access *sa);
+
+/**
+ * kcsan_end_scoped_access - end scoped access
+ *
+ * End a scoped access, which will stop KCSAN checking the memory range.
+ * Requires that kcsan_begin_scoped_access() was previously called once for @sa.
+ *
+ * @sa: a previously initialized struct kcsan_scoped_access
+ */
+void kcsan_end_scoped_access(struct kcsan_scoped_access *sa);
+
+
+#else /* CONFIG_KCSAN */
+
+static inline void __kcsan_check_access(const volatile void *ptr, size_t size,
+ int type) { }
+
+static inline void kcsan_disable_current(void) { }
+static inline void kcsan_enable_current(void) { }
+static inline void kcsan_enable_current_nowarn(void) { }
+static inline void kcsan_nestable_atomic_begin(void) { }
+static inline void kcsan_nestable_atomic_end(void) { }
+static inline void kcsan_flat_atomic_begin(void) { }
+static inline void kcsan_flat_atomic_end(void) { }
+static inline void kcsan_atomic_next(int n) { }
+static inline void kcsan_set_access_mask(unsigned long mask) { }
+
+struct kcsan_scoped_access { };
+#define __kcsan_cleanup_scoped __maybe_unused
+static inline struct kcsan_scoped_access *
+kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type,
+ struct kcsan_scoped_access *sa) { return sa; }
+static inline void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) { }
+
+#endif /* CONFIG_KCSAN */
+
+#ifdef __SANITIZE_THREAD__
+/*
+ * Only calls into the runtime when the particular compilation unit has KCSAN
+ * instrumentation enabled. May be used in header files.
+ */
+#define kcsan_check_access __kcsan_check_access
+
+/*
+ * Only use these to disable KCSAN for accesses in the current compilation unit;
+ * calls into libraries may still perform KCSAN checks.
+ */
+#define __kcsan_disable_current kcsan_disable_current
+#define __kcsan_enable_current kcsan_enable_current_nowarn
+#else
+static inline void kcsan_check_access(const volatile void *ptr, size_t size,
+ int type) { }
+static inline void __kcsan_enable_current(void) { }
+static inline void __kcsan_disable_current(void) { }
+#endif
+
+/**
+ * __kcsan_check_read - check regular read access for races
+ *
+ * @ptr: address of access
+ * @size: size of access
+ */
+#define __kcsan_check_read(ptr, size) __kcsan_check_access(ptr, size, 0)
+
+/**
+ * __kcsan_check_write - check regular write access for races
+ *
+ * @ptr: address of access
+ * @size: size of access
+ */
+#define __kcsan_check_write(ptr, size) \
+ __kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE)
+
+/**
+ * kcsan_check_read - check regular read access for races
+ *
+ * @ptr: address of access
+ * @size: size of access
+ */
+#define kcsan_check_read(ptr, size) kcsan_check_access(ptr, size, 0)
+
+/**
+ * kcsan_check_write - check regular write access for races
+ *
+ * @ptr: address of access
+ * @size: size of access
+ */
+#define kcsan_check_write(ptr, size) \
+ kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE)
+
+/*
+ * Check for atomic accesses: if atomic accesses are not ignored, this simply
+ * aliases to kcsan_check_access(), otherwise becomes a no-op.
+ */
+#ifdef CONFIG_KCSAN_IGNORE_ATOMICS
+#define kcsan_check_atomic_read(...) do { } while (0)
+#define kcsan_check_atomic_write(...) do { } while (0)
+#else
+#define kcsan_check_atomic_read(ptr, size) \
+ kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC)
+#define kcsan_check_atomic_write(ptr, size) \
+ kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC | KCSAN_ACCESS_WRITE)
+#endif
+
+/**
+ * ASSERT_EXCLUSIVE_WRITER - assert no concurrent writes to @var
+ *
+ * Assert that there are no concurrent writes to @var; other readers are
+ * allowed. This assertion can be used to specify properties of concurrent code,
+ * where violation cannot be detected as a normal data race.
+ *
+ * For example, if we only have a single writer, but multiple concurrent
+ * readers, to avoid data races, all these accesses must be marked; even
+ * concurrent marked writes racing with the single writer are bugs.
+ * Unfortunately, due to being marked, they are no longer data races. For cases
+ * like these, we can use the macro as follows:
+ *
+ * .. code-block:: c
+ *
+ * void writer(void) {
+ * spin_lock(&update_foo_lock);
+ * ASSERT_EXCLUSIVE_WRITER(shared_foo);
+ * WRITE_ONCE(shared_foo, ...);
+ * spin_unlock(&update_foo_lock);
+ * }
+ * void reader(void) {
+ * // update_foo_lock does not need to be held!
+ * ... = READ_ONCE(shared_foo);
+ * }
+ *
+ * Note: ASSERT_EXCLUSIVE_WRITER_SCOPED(), if applicable, performs more thorough
+ * checking if a clear scope where no concurrent writes are expected exists.
+ *
+ * @var: variable to assert on
+ */
+#define ASSERT_EXCLUSIVE_WRITER(var) \
+ __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT)
+
+/*
+ * Helper macros for implementation of for ASSERT_EXCLUSIVE_*_SCOPED(). @id is
+ * expected to be unique for the scope in which instances of kcsan_scoped_access
+ * are declared.
+ */
+#define __kcsan_scoped_name(c, suffix) __kcsan_scoped_##c##suffix
+#define __ASSERT_EXCLUSIVE_SCOPED(var, type, id) \
+ struct kcsan_scoped_access __kcsan_scoped_name(id, _) \
+ __kcsan_cleanup_scoped; \
+ struct kcsan_scoped_access *__kcsan_scoped_name(id, _dummy_p) \
+ __maybe_unused = kcsan_begin_scoped_access( \
+ &(var), sizeof(var), KCSAN_ACCESS_SCOPED | (type), \
+ &__kcsan_scoped_name(id, _))
+
+/**
+ * ASSERT_EXCLUSIVE_WRITER_SCOPED - assert no concurrent writes to @var in scope
+ *
+ * Scoped variant of ASSERT_EXCLUSIVE_WRITER().
+ *
+ * Assert that there are no concurrent writes to @var for the duration of the
+ * scope in which it is introduced. This provides a better way to fully cover
+ * the enclosing scope, compared to multiple ASSERT_EXCLUSIVE_WRITER(), and
+ * increases the likelihood for KCSAN to detect racing accesses.
+ *
+ * For example, it allows finding race-condition bugs that only occur due to
+ * state changes within the scope itself:
+ *
+ * .. code-block:: c
+ *
+ * void writer(void) {
+ * spin_lock(&update_foo_lock);
+ * {
+ * ASSERT_EXCLUSIVE_WRITER_SCOPED(shared_foo);
+ * WRITE_ONCE(shared_foo, 42);
+ * ...
+ * // shared_foo should still be 42 here!
+ * }
+ * spin_unlock(&update_foo_lock);
+ * }
+ * void buggy(void) {
+ * if (READ_ONCE(shared_foo) == 42)
+ * WRITE_ONCE(shared_foo, 1); // bug!
+ * }
+ *
+ * @var: variable to assert on
+ */
+#define ASSERT_EXCLUSIVE_WRITER_SCOPED(var) \
+ __ASSERT_EXCLUSIVE_SCOPED(var, KCSAN_ACCESS_ASSERT, __COUNTER__)
+
+/**
+ * ASSERT_EXCLUSIVE_ACCESS - assert no concurrent accesses to @var
+ *
+ * Assert that there are no concurrent accesses to @var (no readers nor
+ * writers). This assertion can be used to specify properties of concurrent
+ * code, where violation cannot be detected as a normal data race.
+ *
+ * For example, where exclusive access is expected after determining no other
+ * users of an object are left, but the object is not actually freed. We can
+ * check that this property actually holds as follows:
+ *
+ * .. code-block:: c
+ *
+ * if (refcount_dec_and_test(&obj->refcnt)) {
+ * ASSERT_EXCLUSIVE_ACCESS(*obj);
+ * do_some_cleanup(obj);
+ * release_for_reuse(obj);
+ * }
+ *
+ * Note: ASSERT_EXCLUSIVE_ACCESS_SCOPED(), if applicable, performs more thorough
+ * checking if a clear scope where no concurrent accesses are expected exists.
+ *
+ * Note: For cases where the object is freed, `KASAN <kasan.html>`_ is a better
+ * fit to detect use-after-free bugs.
+ *
+ * @var: variable to assert on
+ */
+#define ASSERT_EXCLUSIVE_ACCESS(var) \
+ __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT)
+
+/**
+ * ASSERT_EXCLUSIVE_ACCESS_SCOPED - assert no concurrent accesses to @var in scope
+ *
+ * Scoped variant of ASSERT_EXCLUSIVE_ACCESS().
+ *
+ * Assert that there are no concurrent accesses to @var (no readers nor writers)
+ * for the entire duration of the scope in which it is introduced. This provides
+ * a better way to fully cover the enclosing scope, compared to multiple
+ * ASSERT_EXCLUSIVE_ACCESS(), and increases the likelihood for KCSAN to detect
+ * racing accesses.
+ *
+ * @var: variable to assert on
+ */
+#define ASSERT_EXCLUSIVE_ACCESS_SCOPED(var) \
+ __ASSERT_EXCLUSIVE_SCOPED(var, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT, __COUNTER__)
+
+/**
+ * ASSERT_EXCLUSIVE_BITS - assert no concurrent writes to subset of bits in @var
+ *
+ * Bit-granular variant of ASSERT_EXCLUSIVE_WRITER().
+ *
+ * Assert that there are no concurrent writes to a subset of bits in @var;
+ * concurrent readers are permitted. This assertion captures more detailed
+ * bit-level properties, compared to the other (word granularity) assertions.
+ * Only the bits set in @mask are checked for concurrent modifications, while
+ * ignoring the remaining bits, i.e. concurrent writes (or reads) to ~mask bits
+ * are ignored.
+ *
+ * Use this for variables, where some bits must not be modified concurrently,
+ * yet other bits are expected to be modified concurrently.
+ *
+ * For example, variables where, after initialization, some bits are read-only,
+ * but other bits may still be modified concurrently. A reader may wish to
+ * assert that this is true as follows:
+ *
+ * .. code-block:: c
+ *
+ * ASSERT_EXCLUSIVE_BITS(flags, READ_ONLY_MASK);
+ * foo = (READ_ONCE(flags) & READ_ONLY_MASK) >> READ_ONLY_SHIFT;
+ *
+ * Note: The access that immediately follows ASSERT_EXCLUSIVE_BITS() is assumed
+ * to access the masked bits only, and KCSAN optimistically assumes it is
+ * therefore safe, even in the presence of data races, and marking it with
+ * READ_ONCE() is optional from KCSAN's point-of-view. We caution, however, that
+ * it may still be advisable to do so, since we cannot reason about all compiler
+ * optimizations when it comes to bit manipulations (on the reader and writer
+ * side). If you are sure nothing can go wrong, we can write the above simply
+ * as:
+ *
+ * .. code-block:: c
+ *
+ * ASSERT_EXCLUSIVE_BITS(flags, READ_ONLY_MASK);
+ * foo = (flags & READ_ONLY_MASK) >> READ_ONLY_SHIFT;
+ *
+ * Another example, where this may be used, is when certain bits of @var may
+ * only be modified when holding the appropriate lock, but other bits may still
+ * be modified concurrently. Writers, where other bits may change concurrently,
+ * could use the assertion as follows:
+ *
+ * .. code-block:: c
+ *
+ * spin_lock(&foo_lock);
+ * ASSERT_EXCLUSIVE_BITS(flags, FOO_MASK);
+ * old_flags = flags;
+ * new_flags = (old_flags & ~FOO_MASK) | (new_foo << FOO_SHIFT);
+ * if (cmpxchg(&flags, old_flags, new_flags) != old_flags) { ... }
+ * spin_unlock(&foo_lock);
+ *
+ * @var: variable to assert on
+ * @mask: only check for modifications to bits set in @mask
+ */
+#define ASSERT_EXCLUSIVE_BITS(var, mask) \
+ do { \
+ kcsan_set_access_mask(mask); \
+ __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT);\
+ kcsan_set_access_mask(0); \
+ kcsan_atomic_next(1); \
+ } while (0)
+
+#endif /* _LINUX_KCSAN_CHECKS_H */
diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h
new file mode 100644
index 000000000000..53340d8789f9
--- /dev/null
+++ b/include/linux/kcsan.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_KCSAN_H
+#define _LINUX_KCSAN_H
+
+#include <linux/kcsan-checks.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_KCSAN
+
+/*
+ * Context for each thread of execution: for tasks, this is stored in
+ * task_struct, and interrupts access internal per-CPU storage.
+ */
+struct kcsan_ctx {
+ int disable_count; /* disable counter */
+ int atomic_next; /* number of following atomic ops */
+
+ /*
+ * We distinguish between: (a) nestable atomic regions that may contain
+ * other nestable regions; and (b) flat atomic regions that do not keep
+ * track of nesting. Both (a) and (b) are entirely independent of each
+ * other, and a flat region may be started in a nestable region or
+ * vice-versa.
+ *
+ * This is required because, for example, in the annotations for
+ * seqlocks, we declare seqlock writer critical sections as (a) nestable
+ * atomic regions, but reader critical sections as (b) flat atomic
+ * regions, but have encountered cases where seqlock reader critical
+ * sections are contained within writer critical sections (the opposite
+ * may be possible, too).
+ *
+ * To support these cases, we independently track the depth of nesting
+ * for (a), and whether the leaf level is flat for (b).
+ */
+ int atomic_nest_count;
+ bool in_flat_atomic;
+
+ /*
+ * Access mask for all accesses if non-zero.
+ */
+ unsigned long access_mask;
+
+ /* List of scoped accesses. */
+ struct list_head scoped_accesses;
+};
+
+/**
+ * kcsan_init - initialize KCSAN runtime
+ */
+void kcsan_init(void);
+
+#else /* CONFIG_KCSAN */
+
+static inline void kcsan_init(void) { }
+
+#endif /* CONFIG_KCSAN */
+
+#endif /* _LINUX_KCSAN_H */
diff --git a/include/linux/key.h b/include/linux/key.h
index 6cf8e71cf8b7..0f2e24f13c2b 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -71,6 +71,23 @@ struct net;
#define KEY_PERM_UNDEF 0xffffffff
+/*
+ * The permissions required on a key that we're looking up.
+ */
+enum key_need_perm {
+ KEY_NEED_UNSPECIFIED, /* Needed permission unspecified */
+ KEY_NEED_VIEW, /* Require permission to view attributes */
+ KEY_NEED_READ, /* Require permission to read content */
+ KEY_NEED_WRITE, /* Require permission to update / modify */
+ KEY_NEED_SEARCH, /* Require permission to search (keyring) or find (key) */
+ KEY_NEED_LINK, /* Require permission to link */
+ KEY_NEED_SETATTR, /* Require permission to change attributes */
+ KEY_NEED_UNLINK, /* Require permission to unlink key */
+ KEY_SYSADMIN_OVERRIDE, /* Special: override by CAP_SYS_ADMIN */
+ KEY_AUTHTOKEN_OVERRIDE, /* Special: override by possession of auth token */
+ KEY_DEFER_PERM_CHECK, /* Special: permission check is deferred */
+};
+
struct seq_file;
struct user_struct;
struct signal_struct;
@@ -176,6 +193,9 @@ struct key {
struct list_head graveyard_link;
struct rb_node serial_node;
};
+#ifdef CONFIG_KEY_NOTIFICATIONS
+ struct watch_list *watchers; /* Entities watching this key for changes */
+#endif
struct rw_semaphore sem; /* change vs change sem */
struct key_user *user; /* owner of this key */
void *security; /* security data for this key */
@@ -417,20 +437,9 @@ static inline key_serial_t key_serial(const struct key *key)
extern void key_set_timeout(struct key *, unsigned);
extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
- key_perm_t perm);
+ enum key_need_perm need_perm);
extern void key_free_user_ns(struct user_namespace *);
-/*
- * The permissions required on a key that we're looking up.
- */
-#define KEY_NEED_VIEW 0x01 /* Require permission to view attributes */
-#define KEY_NEED_READ 0x02 /* Require permission to read content */
-#define KEY_NEED_WRITE 0x04 /* Require permission to update / modify */
-#define KEY_NEED_SEARCH 0x08 /* Require permission to search (keyring) or find (key) */
-#define KEY_NEED_LINK 0x10 /* Require permission to link */
-#define KEY_NEED_SETATTR 0x20 /* Require permission to change attributes */
-#define KEY_NEED_ALL 0x3f /* All the above permissions */
-
static inline short key_read_state(const struct key *key)
{
/* Barrier versus mark_key_instantiated(). */
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 8bbcaad7ef0f..65b81e0c494d 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -5,6 +5,8 @@
#include <linux/err.h>
#include <linux/sched.h>
+struct mm_struct;
+
__printf(4, 5)
struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
void *data,
@@ -57,6 +59,7 @@ bool kthread_should_stop(void);
bool kthread_should_park(void);
bool __kthread_should_park(struct task_struct *k);
bool kthread_freezable_should_stop(bool *was_frozen);
+void *kthread_func(struct task_struct *k);
void *kthread_data(struct task_struct *k);
void *kthread_probe_data(struct task_struct *k);
int kthread_park(struct task_struct *k);
@@ -198,6 +201,9 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work);
void kthread_destroy_worker(struct kthread_worker *worker);
+void kthread_use_mm(struct mm_struct *mm);
+void kthread_unuse_mm(struct mm_struct *mm);
+
struct cgroup_subsys_state;
#ifdef CONFIG_BLK_CGROUP
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f43b59b1294c..62ec926c78a0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -206,6 +206,7 @@ struct kvm_async_pf {
unsigned long addr;
struct kvm_arch_async_pf arch;
bool wakeup_all;
+ bool notpresent_injected;
};
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
@@ -318,7 +319,6 @@ struct kvm_vcpu {
bool preempted;
bool ready;
struct kvm_vcpu_arch arch;
- struct dentry *debugfs_dentry;
};
static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
@@ -888,7 +888,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
-void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu);
+void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
#endif
int kvm_arch_hardware_enable(void);
@@ -1421,8 +1421,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
}
#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
-int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end, bool blockable);
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end);
#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 99d629fd9944..28f23b341c1c 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -75,6 +75,7 @@ struct common_audit_data {
#define LSM_AUDIT_DATA_IBPKEY 13
#define LSM_AUDIT_DATA_IBENDPORT 14
#define LSM_AUDIT_DATA_LOCKDOWN 15
+#define LSM_AUDIT_DATA_NOTIFICATION 16
union {
struct path path;
struct dentry *dentry;
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index fb3ce6cec997..4a3d70baa7f1 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -254,6 +254,15 @@ LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen)
LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx,
u32 *ctxlen)
+#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
+LSM_HOOK(int, 0, post_notification, const struct cred *w_cred,
+ const struct cred *cred, struct watch_notification *n)
+#endif /* CONFIG_SECURITY && CONFIG_WATCH_QUEUE */
+
+#if defined(CONFIG_SECURITY) && defined(CONFIG_KEY_NOTIFICATIONS)
+LSM_HOOK(int, 0, watch_key, struct key *key)
+#endif /* CONFIG_SECURITY && CONFIG_KEY_NOTIFICATIONS */
+
#ifdef CONFIG_SECURITY_NETWORK
LSM_HOOK(int, 0, unix_stream_connect, struct sock *sock, struct sock *other,
struct sock *newsk)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 3e62dab77699..73d87955e26b 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1445,6 +1445,20 @@
* @ctx is a pointer in which to place the allocated security context.
* @ctxlen points to the place to put the length of @ctx.
*
+ * Security hooks for the general notification queue:
+ *
+ * @post_notification:
+ * Check to see if a watch notification can be posted to a particular
+ * queue.
+ * @w_cred: The credentials of the whoever set the watch.
+ * @cred: The event-triggerer's credentials
+ * @n: The notification being posted
+ *
+ * @watch_key:
+ * Check to see if a process is allowed to watch for event notifications
+ * from a key or keyring.
+ * @key: The key to watch.
+ *
* Security hooks for using the eBPF maps and programs functionalities through
* eBPF syscalls.
*
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
index d9a543a9e1cc..c51a84132d7c 100644
--- a/include/linux/mmu_context.h
+++ b/include/linux/mmu_context.h
@@ -4,11 +4,6 @@
#include <asm/mmu_context.h>
-struct mm_struct;
-
-void use_mm(struct mm_struct *mm);
-void unuse_mm(struct mm_struct *mm);
-
/* Architectures that care about IRQ state in switch_mm can override this. */
#ifndef switch_mm_irqs_off
# define switch_mm_irqs_off switch_mm
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index df1f08486d81..c4c37fd12104 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -660,9 +660,21 @@ struct deferred_split {
* per-zone basis.
*/
typedef struct pglist_data {
+ /*
+ * node_zones contains just the zones for THIS node. Not all of the
+ * zones may be populated, but it is the full list. It is referenced by
+ * this node's node_zonelists as well as other node's node_zonelists.
+ */
struct zone node_zones[MAX_NR_ZONES];
+
+ /*
+ * node_zonelists contains references to all zones in all nodes.
+ * Generally the first zones will be references to this node's
+ * node_zones.
+ */
struct zonelist node_zonelists[MAX_ZONELISTS];
- int nr_zones;
+
+ int nr_zones; /* number of populated zones in this node */
#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
struct page *node_mem_map;
#ifdef CONFIG_PAGE_EXTENSION
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 82d8fb422092..4dba3c948932 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -38,7 +38,7 @@ struct nfs4_ace {
struct nfs4_acl {
uint32_t naces;
- struct nfs4_ace aces[0];
+ struct nfs4_ace aces[];
};
#define NFS4_MAXLABELLEN 2048
@@ -295,7 +295,7 @@ static inline bool seqid_mutating_err(u32 err)
case NFS4ERR_NOFILEHANDLE:
case NFS4ERR_MOVED:
return false;
- };
+ }
return true;
}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 73eda45f1cfd..6ee9119acc5d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -230,6 +230,7 @@ struct nfs4_copy_state {
#define NFS_INO_INVALID_OTHER BIT(12) /* other attrs are invalid */
#define NFS_INO_DATA_INVAL_DEFER \
BIT(13) /* Deferred cache invalidation */
+#define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */
#define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \
| NFS_INO_INVALID_CTIME \
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e5f3e7d8d3d5..5fd0a9ef425f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1227,7 +1227,7 @@ struct nfs4_secinfo4 {
struct nfs4_secinfo_flavors {
unsigned int num_flavors;
- struct nfs4_secinfo4 flavors[0];
+ struct nfs4_secinfo4 flavors[];
};
struct nfs4_secinfo_arg {
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 0c31b9461262..50afd0d0084c 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -9,6 +9,10 @@
#define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */
#define PIPE_BUF_FLAG_PACKET 0x08 /* read() as a packet */
#define PIPE_BUF_FLAG_CAN_MERGE 0x10 /* can merge buffers */
+#define PIPE_BUF_FLAG_WHOLE 0x20 /* read() must return entire buffer or error */
+#ifdef CONFIG_WATCH_QUEUE
+#define PIPE_BUF_FLAG_LOSS 0x40 /* Message loss happened after this buffer */
+#endif
/**
* struct pipe_buffer - a linux kernel pipe buffer
@@ -34,8 +38,10 @@ struct pipe_buffer {
* @wr_wait: writer wait point in case of full pipe
* @head: The point of buffer production
* @tail: The point of buffer consumption
+ * @note_loss: The next read() should insert a data-lost message
* @max_usage: The maximum number of slots that may be used in the ring
* @ring_size: total number of buffers (should be a power of 2)
+ * @nr_accounted: The amount this pipe accounts for in user->pipe_bufs
* @tmp_page: cached released page
* @readers: number of current readers of this pipe
* @writers: number of current writers of this pipe
@@ -46,6 +52,7 @@ struct pipe_buffer {
* @fasync_writers: writer side fasync
* @bufs: the circular array of pipe buffers
* @user: the user who created this pipe
+ * @watch_queue: If this pipe is a watch_queue, this is the stuff for that
**/
struct pipe_inode_info {
struct mutex mutex;
@@ -54,6 +61,10 @@ struct pipe_inode_info {
unsigned int tail;
unsigned int max_usage;
unsigned int ring_size;
+#ifdef CONFIG_WATCH_QUEUE
+ bool note_loss;
+#endif
+ unsigned int nr_accounted;
unsigned int readers;
unsigned int writers;
unsigned int files;
@@ -64,6 +75,9 @@ struct pipe_inode_info {
struct fasync_struct *fasync_writers;
struct pipe_buffer *bufs;
struct user_struct *user;
+#ifdef CONFIG_WATCH_QUEUE
+ struct watch_queue *watch_queue;
+#endif
};
/*
@@ -239,9 +253,20 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
extern const struct pipe_buf_operations nosteal_pipe_buf_ops;
+#ifdef CONFIG_WATCH_QUEUE
+unsigned long account_pipe_buffers(struct user_struct *user,
+ unsigned long old, unsigned long new);
+bool too_many_pipe_buffers_soft(unsigned long user_bufs);
+bool too_many_pipe_buffers_hard(unsigned long user_bufs);
+bool pipe_is_unprivileged_user(void);
+#endif
+
/* for F_SETPIPE_SZ and F_GETPIPE_SZ */
+#ifdef CONFIG_WATCH_QUEUE
+int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
+#endif
long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
-struct pipe_inode_info *get_pipe_info(struct file *file);
+struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
int create_pipe_files(struct file **, int);
unsigned int round_pipe_size(unsigned long size);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c5d96e3e7fff..4ea612e9ad27 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -31,6 +31,7 @@
#include <linux/task_io_accounting.h>
#include <linux/posix-timers.h>
#include <linux/rseq.h>
+#include <linux/kcsan.h>
/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
@@ -1197,6 +1198,9 @@ struct task_struct {
#ifdef CONFIG_KASAN
unsigned int kasan_depth;
#endif
+#ifdef CONFIG_KCSAN
+ struct kcsan_ctx kcsan_ctx;
+#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* Index of current stored address in ret_stack: */
diff --git a/include/linux/security.h b/include/linux/security.h
index b3f2cb21b4f2..469fa91f8cf8 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -56,6 +56,8 @@ struct mm_struct;
struct fs_context;
struct fs_parameter;
enum fs_value_type;
+struct watch;
+struct watch_notification;
/* Default (no) options for the capable function */
#define CAP_OPT_NONE 0x0
@@ -1282,6 +1284,28 @@ static inline int security_locked_down(enum lockdown_reason what)
}
#endif /* CONFIG_SECURITY */
+#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
+int security_post_notification(const struct cred *w_cred,
+ const struct cred *cred,
+ struct watch_notification *n);
+#else
+static inline int security_post_notification(const struct cred *w_cred,
+ const struct cred *cred,
+ struct watch_notification *n)
+{
+ return 0;
+}
+#endif
+
+#if defined(CONFIG_SECURITY) && defined(CONFIG_KEY_NOTIFICATIONS)
+int security_watch_key(struct key *key);
+#else
+static inline int security_watch_key(struct key *key)
+{
+ return 0;
+}
+#endif
+
#ifdef CONFIG_SECURITY_NETWORK
int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk);
@@ -1750,8 +1774,8 @@ static inline int security_path_chroot(const struct path *path)
int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags);
void security_key_free(struct key *key);
-int security_key_permission(key_ref_t key_ref,
- const struct cred *cred, unsigned perm);
+int security_key_permission(key_ref_t key_ref, const struct cred *cred,
+ enum key_need_perm need_perm);
int security_key_getsecurity(struct key *key, char **_buffer);
#else
@@ -1769,7 +1793,7 @@ static inline void security_key_free(struct key *key)
static inline int security_key_permission(key_ref_t key_ref,
const struct cred *cred,
- unsigned perm)
+ enum key_need_perm need_perm)
{
return 0;
}
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 0491d963d47e..8b97204f35a7 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -37,9 +37,25 @@
#include <linux/preempt.h>
#include <linux/lockdep.h>
#include <linux/compiler.h>
+#include <linux/kcsan-checks.h>
#include <asm/processor.h>
/*
+ * The seqlock interface does not prescribe a precise sequence of read
+ * begin/retry/end. For readers, typically there is a call to
+ * read_seqcount_begin() and read_seqcount_retry(), however, there are more
+ * esoteric cases which do not follow this pattern.
+ *
+ * As a consequence, we take the following best-effort approach for raw usage
+ * via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
+ * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
+ * atomics; if there is a matching read_seqcount_retry() call, no following
+ * memory operations are considered atomic. Usage of seqlocks via seqlock_t
+ * interface is not affected.
+ */
+#define KCSAN_SEQLOCK_REGION_MAX 1000
+
+/*
* Version using sequence counter only.
* This can be used when code has its own mutex protecting the
* updating starting before the write_seqcountbeqin() and ending
@@ -115,6 +131,7 @@ repeat:
cpu_relax();
goto repeat;
}
+ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
return ret;
}
@@ -131,6 +148,7 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s)
{
unsigned ret = READ_ONCE(s->sequence);
smp_rmb();
+ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
return ret;
}
@@ -183,6 +201,7 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s)
{
unsigned ret = READ_ONCE(s->sequence);
smp_rmb();
+ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
return ret & ~1;
}
@@ -202,7 +221,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s)
*/
static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
{
- return unlikely(s->sequence != start);
+ kcsan_atomic_next(0);
+ return unlikely(READ_ONCE(s->sequence) != start);
}
/**
@@ -225,6 +245,7 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
static inline void raw_write_seqcount_begin(seqcount_t *s)
{
+ kcsan_nestable_atomic_begin();
s->sequence++;
smp_wmb();
}
@@ -233,6 +254,7 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
{
smp_wmb();
s->sequence++;
+ kcsan_nestable_atomic_end();
}
/**
@@ -243,6 +265,13 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
* usual consistency guarantee. It is one wmb cheaper, because we can
* collapse the two back-to-back wmb()s.
*
+ * Note that writes surrounding the barrier should be declared atomic (e.g.
+ * via WRITE_ONCE): a) to ensure the writes become visible to other threads
+ * atomically, avoiding compiler optimizations; b) to document which writes are
+ * meant to propagate to the reader critical section. This is necessary because
+ * neither writes before and after the barrier are enclosed in a seq-writer
+ * critical section that would ensure readers are aware of ongoing writes.
+ *
* seqcount_t seq;
* bool X = true, Y = false;
*
@@ -262,18 +291,20 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
*
* void write(void)
* {
- * Y = true;
+ * WRITE_ONCE(Y, true);
*
* raw_write_seqcount_barrier(seq);
*
- * X = false;
+ * WRITE_ONCE(X, false);
* }
*/
static inline void raw_write_seqcount_barrier(seqcount_t *s)
{
+ kcsan_nestable_atomic_begin();
s->sequence++;
smp_wmb();
s->sequence++;
+ kcsan_nestable_atomic_end();
}
static inline int raw_read_seqcount_latch(seqcount_t *s)
@@ -398,7 +429,9 @@ static inline void write_seqcount_end(seqcount_t *s)
static inline void write_seqcount_invalidate(seqcount_t *s)
{
smp_wmb();
+ kcsan_nestable_atomic_begin();
s->sequence+=2;
+ kcsan_nestable_atomic_end();
}
typedef struct {
@@ -430,11 +463,21 @@ typedef struct {
*/
static inline unsigned read_seqbegin(const seqlock_t *sl)
{
- return read_seqcount_begin(&sl->seqcount);
+ unsigned ret = read_seqcount_begin(&sl->seqcount);
+
+ kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */
+ kcsan_flat_atomic_begin();
+ return ret;
}
static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
{
+ /*
+ * Assume not nested: read_seqretry() may be called multiple times when
+ * completing read critical section.
+ */
+ kcsan_flat_atomic_end();
+
return read_seqcount_retry(&sl->seqcount, start);
}
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 83bd8cb475d7..b7af8cc13eda 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -64,7 +64,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
struct stack_trace {
unsigned int nr_entries, max_entries;
unsigned long *entries;
- int skip; /* input argument: How many entries to skip */
+ unsigned int skip; /* input argument: How many entries to skip */
};
extern void save_stack_trace(struct stack_trace *trace);
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 4f6b28487f28..98da816b5fc2 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -76,7 +76,7 @@ struct rpc_auth {
unsigned int au_verfsize; /* size of reply verifier */
unsigned int au_ralign; /* words before UL header */
- unsigned int au_flags;
+ unsigned long au_flags;
const struct rpc_authops *au_ops;
rpc_authflavor_t au_flavor; /* pseudoflavor (note may
* differ from the flavor in
@@ -89,7 +89,8 @@ struct rpc_auth {
};
/* rpc_auth au_flags */
-#define RPCAUTH_AUTH_DATATOUCH 0x00000002
+#define RPCAUTH_AUTH_DATATOUCH (1)
+#define RPCAUTH_AUTH_UPDATE_SLACK (2)
struct rpc_auth_create_args {
rpc_authflavor_t pseudoflavor;
diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
index bc07e51f20d1..bf4ac8a0268c 100644
--- a/include/linux/sunrpc/gss_api.h
+++ b/include/linux/sunrpc/gss_api.h
@@ -84,6 +84,7 @@ struct pf_desc {
u32 service;
char *name;
char *auth_domain_name;
+ struct auth_domain *domain;
bool datatouch;
};
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index fd390894a584..386628b36bc7 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -254,6 +254,7 @@ struct svc_rqst {
struct page * *rq_page_end; /* one past the last page */
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
+ struct bio_vec rq_bvec[RPCSVC_MAXPAGES];
__be32 rq_xid; /* transmission id */
u32 rq_prog; /* program number */
@@ -299,6 +300,7 @@ struct svc_rqst {
struct net *rq_bc_net; /* pointer to backchannel's
* net namespace
*/
+ void ** rq_lease_breaker; /* The v4 client breaking a lease */
};
#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index cbcfbd0521e3..7ed82625dc0b 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -48,7 +48,6 @@
#include <linux/sunrpc/rpc_rdma.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
-#define SVCRDMA_DEBUG
/* Default and maximum inline threshold sizes */
enum {
@@ -160,9 +159,8 @@ struct svc_rdma_send_ctxt {
};
/* svc_rdma_backchannel.c */
-extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
- __be32 *rdma_resp,
- struct xdr_buf *rcvbuf);
+extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *rctxt);
/* svc_rdma_recvfrom.c */
extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma);
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 9e1e046de176..aca35ab5cff2 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -117,6 +117,12 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u
return 0;
}
+static inline bool svc_xprt_is_dead(const struct svc_xprt *xprt)
+{
+ return (test_bit(XPT_DEAD, &xprt->xpt_flags) != 0) ||
+ (test_bit(XPT_CLOSE, &xprt->xpt_flags) != 0);
+}
+
int svc_reg_xprt_class(struct svc_xprt_class *);
void svc_unreg_xprt_class(struct svc_xprt_class *);
void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h
index ca39a388dc22..f09c82b0a7ae 100644
--- a/include/linux/sunrpc/svcauth_gss.h
+++ b/include/linux/sunrpc/svcauth_gss.h
@@ -20,7 +20,8 @@ int gss_svc_init(void);
void gss_svc_shutdown(void);
int gss_svc_init_net(struct net *net);
void gss_svc_shutdown_net(struct net *net);
-int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name);
+struct auth_domain *svcauth_gss_register_pseudoflavor(u32 pseudoflavor,
+ char *name);
u32 svcauth_gss_flavor(struct auth_domain *dom);
#endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 771baadaee9d..b7ac7fe68306 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -28,7 +28,7 @@ struct svc_sock {
/* private TCP part */
/* On-the-wire fragment header: */
- __be32 sk_reclen;
+ __be32 sk_marker;
/* As we receive a record, this includes the length received so
* far (including the fragment header): */
u32 sk_tcplen;
@@ -41,12 +41,12 @@ struct svc_sock {
static inline u32 svc_sock_reclen(struct svc_sock *svsk)
{
- return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK;
+ return be32_to_cpu(svsk->sk_marker) & RPC_FRAGMENT_SIZE_MASK;
}
static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
{
- return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT;
+ return be32_to_cpu(svsk->sk_marker) & RPC_LAST_STREAM_FRAGMENT;
}
/*
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index c91b1e344d56..216185bb3014 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -32,20 +32,10 @@
/* use value, which < 0K, to indicate an invalid/uninitialized temperature */
#define THERMAL_TEMP_INVALID -274000
-/* Default Thermal Governor */
-#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
-#define DEFAULT_THERMAL_GOVERNOR "step_wise"
-#elif defined(CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE)
-#define DEFAULT_THERMAL_GOVERNOR "fair_share"
-#elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE)
-#define DEFAULT_THERMAL_GOVERNOR "user_space"
-#elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR)
-#define DEFAULT_THERMAL_GOVERNOR "power_allocator"
-#endif
-
struct thermal_zone_device;
struct thermal_cooling_device;
struct thermal_instance;
+struct thermal_attr;
enum thermal_device_mode {
THERMAL_DEVICE_DISABLED = 0,
@@ -130,11 +120,6 @@ struct thermal_cooling_device {
struct list_head node;
};
-struct thermal_attr {
- struct device_attribute attr;
- char name[THERMAL_NAME_LENGTH];
-};
-
/**
* struct thermal_zone_device - structure for a thermal zone
* @id: unique id number for each thermal zone
@@ -347,21 +332,6 @@ struct thermal_zone_of_device_ops {
int (*set_trip_temp)(void *, int, int);
};
-/**
- * struct thermal_trip - representation of a point in temperature domain
- * @np: pointer to struct device_node that this trip point was created from
- * @temperature: temperature value in miliCelsius
- * @hysteresis: relative hysteresis in miliCelsius
- * @type: trip point type
- */
-
-struct thermal_trip {
- struct device_node *np;
- int temperature;
- int hysteresis;
- enum thermal_trip_type type;
-};
-
/* Function declarations */
#ifdef CONFIG_THERMAL_OF
int thermal_zone_of_get_sensor_id(struct device_node *tz_np,
@@ -413,19 +383,7 @@ void devm_thermal_zone_of_sensor_unregister(struct device *dev,
#endif
-#if IS_ENABLED(CONFIG_THERMAL)
-static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
-{
- return cdev->ops->get_requested_power && cdev->ops->state2power &&
- cdev->ops->power2state;
-}
-
-int power_actor_get_max_power(struct thermal_cooling_device *,
- struct thermal_zone_device *tz, u32 *max_power);
-int power_actor_get_min_power(struct thermal_cooling_device *,
- struct thermal_zone_device *tz, u32 *min_power);
-int power_actor_set_power(struct thermal_cooling_device *,
- struct thermal_instance *, u32);
+#ifdef CONFIG_THERMAL
struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
void *, struct thermal_zone_device_ops *,
struct thermal_zone_params *, int, int);
@@ -439,7 +397,6 @@ int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
struct thermal_cooling_device *);
void thermal_zone_device_update(struct thermal_zone_device *,
enum thermal_notify_event);
-void thermal_zone_set_trips(struct thermal_zone_device *);
struct thermal_cooling_device *thermal_cooling_device_register(const char *,
void *, const struct thermal_cooling_device_ops *);
@@ -457,24 +414,9 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);
int thermal_zone_get_slope(struct thermal_zone_device *tz);
int thermal_zone_get_offset(struct thermal_zone_device *tz);
-int get_tz_trend(struct thermal_zone_device *, int);
-struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
- struct thermal_cooling_device *, int);
void thermal_cdev_update(struct thermal_cooling_device *);
void thermal_notify_framework(struct thermal_zone_device *, int);
#else
-static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
-{ return false; }
-static inline int power_actor_get_max_power(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz, u32 *max_power)
-{ return 0; }
-static inline int power_actor_get_min_power(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz,
- u32 *min_power)
-{ return -ENODEV; }
-static inline int power_actor_set_power(struct thermal_cooling_device *cdev,
- struct thermal_instance *tz, u32 power)
-{ return 0; }
static inline struct thermal_zone_device *thermal_zone_device_register(
const char *type, int trips, int mask, void *devdata,
struct thermal_zone_device_ops *ops,
@@ -484,21 +426,6 @@ static inline struct thermal_zone_device *thermal_zone_device_register(
static inline void thermal_zone_device_unregister(
struct thermal_zone_device *tz)
{ }
-static inline int thermal_zone_bind_cooling_device(
- struct thermal_zone_device *tz, int trip,
- struct thermal_cooling_device *cdev,
- unsigned long upper, unsigned long lower,
- unsigned int weight)
-{ return -ENODEV; }
-static inline int thermal_zone_unbind_cooling_device(
- struct thermal_zone_device *tz, int trip,
- struct thermal_cooling_device *cdev)
-{ return -ENODEV; }
-static inline void thermal_zone_device_update(struct thermal_zone_device *tz,
- enum thermal_notify_event event)
-{ }
-static inline void thermal_zone_set_trips(struct thermal_zone_device *tz)
-{ }
static inline struct thermal_cooling_device *
thermal_cooling_device_register(char *type, void *devdata,
const struct thermal_cooling_device_ops *ops)
@@ -530,12 +457,7 @@ static inline int thermal_zone_get_slope(
static inline int thermal_zone_get_offset(
struct thermal_zone_device *tz)
{ return -ENODEV; }
-static inline int get_tz_trend(struct thermal_zone_device *tz, int trip)
-{ return -ENODEV; }
-static inline struct thermal_instance *
-get_thermal_instance(struct thermal_zone_device *tz,
- struct thermal_cooling_device *cdev, int trip)
-{ return ERR_PTR(-ENODEV); }
+
static inline void thermal_cdev_update(struct thermal_cooling_device *cdev)
{ }
static inline void thermal_notify_framework(struct thermal_zone_device *tz,
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index dac1db05bf7e..7bcadca22100 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -2,9 +2,9 @@
#ifndef __LINUX_UACCESS_H__
#define __LINUX_UACCESS_H__
+#include <linux/instrumented.h>
#include <linux/sched.h>
#include <linux/thread_info.h>
-#include <linux/kasan-checks.h>
#define uaccess_kernel() segment_eq(get_fs(), KERNEL_DS)
@@ -58,7 +58,7 @@
static __always_inline __must_check unsigned long
__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
{
- kasan_check_write(to, n);
+ instrument_copy_from_user(to, from, n);
check_object_size(to, n, false);
return raw_copy_from_user(to, from, n);
}
@@ -67,7 +67,7 @@ static __always_inline __must_check unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
might_fault();
- kasan_check_write(to, n);
+ instrument_copy_from_user(to, from, n);
check_object_size(to, n, false);
return raw_copy_from_user(to, from, n);
}
@@ -88,7 +88,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
static __always_inline __must_check unsigned long
__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
{
- kasan_check_read(from, n);
+ instrument_copy_to_user(to, from, n);
check_object_size(from, n, true);
return raw_copy_to_user(to, from, n);
}
@@ -97,7 +97,7 @@ static __always_inline __must_check unsigned long
__copy_to_user(void __user *to, const void *from, unsigned long n)
{
might_fault();
- kasan_check_read(from, n);
+ instrument_copy_to_user(to, from, n);
check_object_size(from, n, true);
return raw_copy_to_user(to, from, n);
}
@@ -109,7 +109,7 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
unsigned long res = n;
might_fault();
if (likely(access_ok(from, n))) {
- kasan_check_write(to, n);
+ instrument_copy_from_user(to, from, n);
res = raw_copy_from_user(to, from, n);
}
if (unlikely(res))
@@ -127,7 +127,7 @@ _copy_to_user(void __user *to, const void *from, unsigned long n)
{
might_fault();
if (access_ok(to, n)) {
- kasan_check_read(from, n);
+ instrument_copy_to_user(to, from, n);
n = raw_copy_to_user(to, from, n);
}
return n;
diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h
new file mode 100644
index 000000000000..5e08db2adc31
--- /dev/null
+++ b/include/linux/watch_queue.h
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/* User-mappable watch queue
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * See Documentation/watch_queue.rst
+ */
+
+#ifndef _LINUX_WATCH_QUEUE_H
+#define _LINUX_WATCH_QUEUE_H
+
+#include <uapi/linux/watch_queue.h>
+#include <linux/kref.h>
+#include <linux/rcupdate.h>
+
+#ifdef CONFIG_WATCH_QUEUE
+
+struct cred;
+
+struct watch_type_filter {
+ enum watch_notification_type type;
+ __u32 subtype_filter[1]; /* Bitmask of subtypes to filter on */
+ __u32 info_filter; /* Filter on watch_notification::info */
+ __u32 info_mask; /* Mask of relevant bits in info_filter */
+};
+
+struct watch_filter {
+ union {
+ struct rcu_head rcu;
+ unsigned long type_filter[2]; /* Bitmask of accepted types */
+ };
+ u32 nr_filters; /* Number of filters */
+ struct watch_type_filter filters[];
+};
+
+struct watch_queue {
+ struct rcu_head rcu;
+ struct watch_filter __rcu *filter;
+ struct pipe_inode_info *pipe; /* The pipe we're using as a buffer */
+ struct hlist_head watches; /* Contributory watches */
+ struct page **notes; /* Preallocated notifications */
+ unsigned long *notes_bitmap; /* Allocation bitmap for notes */
+ struct kref usage; /* Object usage count */
+ spinlock_t lock;
+ unsigned int nr_notes; /* Number of notes */
+ unsigned int nr_pages; /* Number of pages in notes[] */
+ bool defunct; /* T when queues closed */
+};
+
+/*
+ * Representation of a watch on an object.
+ */
+struct watch {
+ union {
+ struct rcu_head rcu;
+ u32 info_id; /* ID to be OR'd in to info field */
+ };
+ struct watch_queue __rcu *queue; /* Queue to post events to */
+ struct hlist_node queue_node; /* Link in queue->watches */
+ struct watch_list __rcu *watch_list;
+ struct hlist_node list_node; /* Link in watch_list->watchers */
+ const struct cred *cred; /* Creds of the owner of the watch */
+ void *private; /* Private data for the watched object */
+ u64 id; /* Internal identifier */
+ struct kref usage; /* Object usage count */
+};
+
+/*
+ * List of watches on an object.
+ */
+struct watch_list {
+ struct rcu_head rcu;
+ struct hlist_head watchers;
+ void (*release_watch)(struct watch *);
+ spinlock_t lock;
+};
+
+extern void __post_watch_notification(struct watch_list *,
+ struct watch_notification *,
+ const struct cred *,
+ u64);
+extern struct watch_queue *get_watch_queue(int);
+extern void put_watch_queue(struct watch_queue *);
+extern void init_watch(struct watch *, struct watch_queue *);
+extern int add_watch_to_object(struct watch *, struct watch_list *);
+extern int remove_watch_from_object(struct watch_list *, struct watch_queue *, u64, bool);
+extern long watch_queue_set_size(struct pipe_inode_info *, unsigned int);
+extern long watch_queue_set_filter(struct pipe_inode_info *,
+ struct watch_notification_filter __user *);
+extern int watch_queue_init(struct pipe_inode_info *);
+extern void watch_queue_clear(struct watch_queue *);
+
+static inline void init_watch_list(struct watch_list *wlist,
+ void (*release_watch)(struct watch *))
+{
+ INIT_HLIST_HEAD(&wlist->watchers);
+ spin_lock_init(&wlist->lock);
+ wlist->release_watch = release_watch;
+}
+
+static inline void post_watch_notification(struct watch_list *wlist,
+ struct watch_notification *n,
+ const struct cred *cred,
+ u64 id)
+{
+ if (unlikely(wlist))
+ __post_watch_notification(wlist, n, cred, id);
+}
+
+static inline void remove_watch_list(struct watch_list *wlist, u64 id)
+{
+ if (wlist) {
+ remove_watch_from_object(wlist, NULL, id, true);
+ kfree_rcu(wlist, rcu);
+ }
+}
+
+/**
+ * watch_sizeof - Calculate the information part of the size of a watch record,
+ * given the structure size.
+ */
+#define watch_sizeof(STRUCT) (sizeof(STRUCT) << WATCH_INFO_LENGTH__SHIFT)
+
+#endif
+
+#endif /* _LINUX_WATCH_QUEUE_H */