From afaef01c001537fa97a25092d7f54d764dc7d8c1 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Fri, 17 Aug 2018 01:16:58 +0300 Subject: x86/entry: Add STACKLEAK erasing the kernel stack at the end of syscalls The STACKLEAK feature (initially developed by PaX Team) has the following benefits: 1. Reduces the information that can be revealed through kernel stack leak bugs. The idea of erasing the thread stack at the end of syscalls is similar to CONFIG_PAGE_POISONING and memzero_explicit() in kernel crypto, which all comply with FDP_RIP.2 (Full Residual Information Protection) of the Common Criteria standard. 2. Blocks some uninitialized stack variable attacks (e.g. CVE-2017-17712, CVE-2010-2963). That kind of bugs should be killed by improving C compilers in future, which might take a long time. This commit introduces the code filling the used part of the kernel stack with a poison value before returning to userspace. Full STACKLEAK feature also contains the gcc plugin which comes in a separate commit. The STACKLEAK feature is ported from grsecurity/PaX. More information at: https://grsecurity.net/ https://pax.grsecurity.net/ This code is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on our understanding of the code. Changes or omissions from the original code are ours and don't reflect the original grsecurity/PaX code. Performance impact: Hardware: Intel Core i7-4770, 16 GB RAM Test #1: building the Linux kernel on a single core 0.91% slowdown Test #2: hackbench -s 4096 -l 2000 -g 15 -f 25 -P 4.2% slowdown So the STACKLEAK description in Kconfig includes: "The tradeoff is the performance impact: on a single CPU system kernel compilation sees a 1% slowdown, other systems and workloads may vary and you are advised to test this feature on your expected workload before deploying it". Signed-off-by: Alexander Popov Acked-by: Thomas Gleixner Reviewed-by: Dave Hansen Acked-by: Ingo Molnar Signed-off-by: Kees Cook --- include/linux/sched.h | 4 ++++ include/linux/stackleak.h | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 include/linux/stackleak.h (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 977cb57d7bc9..c1a23acd24e7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1192,6 +1192,10 @@ struct task_struct { void *security; #endif +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + unsigned long lowest_stack; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h new file mode 100644 index 000000000000..628c2b947b89 --- /dev/null +++ b/include/linux/stackleak.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_STACKLEAK_H +#define _LINUX_STACKLEAK_H + +#include +#include + +/* + * Check that the poison value points to the unused hole in the + * virtual memory map for your platform. + */ +#define STACKLEAK_POISON -0xBEEF +#define STACKLEAK_SEARCH_DEPTH 128 + +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#include + +static inline void stackleak_task_init(struct task_struct *t) +{ + t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long); +} +#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ +static inline void stackleak_task_init(struct task_struct *t) { } +#endif + +#endif -- cgit From c8d126275a5fa59394fe17109bdb9812fed296b8 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Fri, 17 Aug 2018 01:17:01 +0300 Subject: fs/proc: Show STACKLEAK metrics in the /proc file system Introduce CONFIG_STACKLEAK_METRICS providing STACKLEAK information about tasks via the /proc file system. In particular, /proc//stack_depth shows the maximum kernel stack consumption for the current and previous syscalls. Although this information is not precise, it can be useful for estimating the STACKLEAK performance impact for your workloads. Suggested-by: Ingo Molnar Signed-off-by: Alexander Popov Tested-by: Laura Abbott Signed-off-by: Kees Cook --- include/linux/sched.h | 1 + include/linux/stackleak.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c1a23acd24e7..ae9d10e14b82 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1194,6 +1194,7 @@ struct task_struct { #ifdef CONFIG_GCC_PLUGIN_STACKLEAK unsigned long lowest_stack; + unsigned long prev_lowest_stack; #endif /* diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index 628c2b947b89..b911b973d328 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -18,6 +18,9 @@ static inline void stackleak_task_init(struct task_struct *t) { t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long); +# ifdef CONFIG_STACKLEAK_METRICS + t->prev_lowest_stack = t->lowest_stack; +# endif } #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ static inline void stackleak_task_init(struct task_struct *t) { } -- cgit From 964c9dff0091893a9a74a88edf984c6da0b779f7 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Fri, 17 Aug 2018 01:17:03 +0300 Subject: stackleak: Allow runtime disabling of kernel stack erasing Introduce CONFIG_STACKLEAK_RUNTIME_DISABLE option, which provides 'stack_erasing' sysctl. It can be used in runtime to control kernel stack erasing for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK. Suggested-by: Ingo Molnar Signed-off-by: Alexander Popov Tested-by: Laura Abbott Signed-off-by: Kees Cook --- include/linux/stackleak.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index b911b973d328..3d5c3271a9a8 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -22,6 +22,12 @@ static inline void stackleak_task_init(struct task_struct *t) t->prev_lowest_stack = t->lowest_stack; # endif } + +#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE +int stack_erasing_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +#endif + #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ static inline void stackleak_task_init(struct task_struct *t) { } #endif -- cgit From 29efbc6aea9d9bd9aa9870a9afc1882046303cf9 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 30 Aug 2018 19:07:27 +0200 Subject: Compiler Attributes: remove unused attributes __optimize and __deprecate_for_modules are unused in the whole kernel tree. Simply drop them. Tested-by: Sedat Dilek # on top of v4.19-rc5, clang 7 Reviewed-by: Nick Desaulniers Reviewed-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler-gcc.h | 2 -- include/linux/compiler.h | 4 ---- include/linux/compiler_types.h | 1 - 3 files changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 4d36b27214fd..1302b425e625 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -81,8 +81,6 @@ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) -#define __optimize(level) __attribute__((__optimize__(level))) - #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) #ifndef __CHECKER__ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 681d866efb1e..7c0157d50964 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -301,10 +301,6 @@ static inline void *offset_to_ptr(const int *off) #endif /* __ASSEMBLY__ */ -#ifndef __optimize -# define __optimize(level) -#endif - /* Compile time object size, -1 for unknown */ #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index db192becfec4..a19562cb047c 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -108,7 +108,6 @@ struct ftrace_likely_data { /* Don't. Just don't. */ #define __deprecated -#define __deprecated_for_modules #endif /* __KERNEL__ */ -- cgit From 5c67a52f3da0f0d22764f2daec417702695a8112 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 30 Aug 2018 19:13:37 +0200 Subject: Compiler Attributes: always use the extra-underscores syntax The attribute syntax optionally allows to surround attribute names with "__" in order to avoid collisions with macros of the same name (see https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html). This homogenizes all attributes to use the syntax with underscores. While there are currently only a handful of cases of some TUs defining macros like "error" which may collide with the attributes, this should prevent futures surprises. This has been done only for "standard" attributes supported by the major compilers. In other words, those of third-party tools (e.g. sparse, plugins...) have not been changed for the moment. Tested-by: Sedat Dilek # on top of v4.19-rc5, clang 7 Reviewed-by: Nick Desaulniers Reviewed-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler-clang.h | 2 +- include/linux/compiler-gcc.h | 12 ++++++------ include/linux/compiler-intel.h | 2 +- include/linux/compiler.h | 8 ++++---- include/linux/compiler_types.h | 42 +++++++++++++++++++++--------------------- 5 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index b1ce500fe8b3..d11cad61ba5c 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -21,7 +21,7 @@ #define __SANITIZE_ADDRESS__ #endif -#define __no_sanitize_address __attribute__((no_sanitize("address"))) +#define __no_sanitize_address __attribute__((__no_sanitize__("address"))) /* * Not all versions of clang implement the the type-generic versions diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 1302b425e625..7a1de7683df5 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -76,7 +76,7 @@ #endif #ifdef RETPOLINE -#define __noretpoline __attribute__((indirect_branch("keep"))) +#define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) @@ -84,8 +84,8 @@ #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) #ifndef __CHECKER__ -#define __compiletime_warning(message) __attribute__((warning(message))) -#define __compiletime_error(message) __attribute__((error(message))) +#define __compiletime_warning(message) __attribute__((__warning__(message))) +#define __compiletime_error(message) __attribute__((__error__(message))) #ifdef LATENT_ENTROPY_PLUGIN #define __latent_entropy __attribute__((latent_entropy)) @@ -134,7 +134,7 @@ * optimizer that something else uses this function or variable, thus preventing * this. */ -#define __visible __attribute__((externally_visible)) +#define __visible __attribute__((__externally_visible__)) /* gcc version specific checks */ @@ -191,7 +191,7 @@ * should not be applied to that function. * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 */ -#define __no_sanitize_address __attribute__((no_sanitize_address)) +#define __no_sanitize_address __attribute__((__no_sanitize_address__)) #endif #if GCC_VERSION >= 50100 @@ -199,7 +199,7 @@ * Mark structures as requiring designated initializers. * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html */ -#define __designated_init __attribute__((designated_init)) +#define __designated_init __attribute__((__designated_init__)) #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index 4c7f9befa9f6..fef8bb3e53ef 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -42,4 +42,4 @@ * and may be redefined here because they should not be shared with other * compilers, like clang. */ -#define __visible __attribute__((externally_visible)) +#define __visible __attribute__((__externally_visible__)) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7c0157d50964..ec4a28bad2c6 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -24,7 +24,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, long ______r; \ static struct ftrace_likely_data \ __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_annotated_branch"))) \ + __attribute__((__section__("_ftrace_annotated_branch"))) \ ______f = { \ .data.func = __func__, \ .data.file = __FILE__, \ @@ -60,7 +60,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, int ______r; \ static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_branch"))) \ + __attribute__((__section__("_ftrace_branch"))) \ ______f = { \ .func = __func__, \ .file = __FILE__, \ @@ -146,7 +146,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, extern typeof(sym) sym; \ static const unsigned long __kentry_##sym \ __used \ - __attribute__((section("___kentry" "+" #sym ), used)) \ + __attribute__((__section__("___kentry" "+" #sym ), used)) \ = (unsigned long)&sym; #endif @@ -287,7 +287,7 @@ unsigned long read_word_at_a_time(const void *addr) * visible to the compiler. */ #define __ADDRESSABLE(sym) \ - static void * __attribute__((section(".discard.addressable"), used)) \ + static void * __attribute__((__section__(".discard.addressable"), used)) \ __PASTE(__addressable_##sym, __LINE__) = (void *)&sym; /** diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index a19562cb047c..8fbdd47dd3d0 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -195,26 +195,26 @@ struct ftrace_likely_data { * would be. * [...] */ -#define __pure __attribute__((pure)) -#define __aligned(x) __attribute__((aligned(x))) -#define __aligned_largest __attribute__((aligned)) -#define __printf(a, b) __attribute__((format(printf, a, b))) -#define __scanf(a, b) __attribute__((format(scanf, a, b))) -#define __maybe_unused __attribute__((unused)) -#define __always_unused __attribute__((unused)) -#define __mode(x) __attribute__((mode(x))) +#define __pure __attribute__((__pure__)) +#define __aligned(x) __attribute__((__aligned__(x))) +#define __aligned_largest __attribute__((__aligned__)) +#define __printf(a, b) __attribute__((__format__(printf, a, b))) +#define __scanf(a, b) __attribute__((__format__(scanf, a, b))) +#define __maybe_unused __attribute__((__unused__)) +#define __always_unused __attribute__((__unused__)) +#define __mode(x) __attribute__((__mode__(x))) #define __malloc __attribute__((__malloc__)) #define __used __attribute__((__used__)) -#define __noreturn __attribute__((noreturn)) -#define __packed __attribute__((packed)) -#define __weak __attribute__((weak)) -#define __alias(symbol) __attribute__((alias(#symbol))) -#define __cold __attribute__((cold)) +#define __noreturn __attribute__((__noreturn__)) +#define __packed __attribute__((__packed__)) +#define __weak __attribute__((__weak__)) +#define __alias(symbol) __attribute__((__alias__(#symbol))) +#define __cold __attribute__((__cold__)) #define __section(S) __attribute__((__section__(#S))) #ifdef CONFIG_ENABLE_MUST_CHECK -#define __must_check __attribute__((warn_unused_result)) +#define __must_check __attribute__((__warn_unused_result__)) #else #define __must_check #endif @@ -222,7 +222,7 @@ struct ftrace_likely_data { #if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__) #define notrace __attribute__((hotpatch(0, 0))) #else -#define notrace __attribute__((no_instrument_function)) +#define notrace __attribute__((__no_instrument_function__)) #endif /* @@ -231,7 +231,7 @@ struct ftrace_likely_data { * stack and frame pointer being set up and there is no chance to * restore the lr register to the value before mcount was called. */ -#define __naked __attribute__((naked)) notrace +#define __naked __attribute__((__naked__)) notrace #define __compiler_offsetof(a, b) __builtin_offsetof(a, b) @@ -242,7 +242,7 @@ struct ftrace_likely_data { * defined so the gnu89 semantics are the default. */ #ifdef __GNUC_STDC_INLINE__ -# define __gnu_inline __attribute__((gnu_inline)) +# define __gnu_inline __attribute__((__gnu_inline__)) #else # define __gnu_inline #endif @@ -262,17 +262,17 @@ struct ftrace_likely_data { #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) #define inline \ - inline __attribute__((always_inline, unused)) notrace __gnu_inline + inline __attribute__((__always_inline__, __unused__)) notrace __gnu_inline #else -#define inline inline __attribute__((unused)) notrace __gnu_inline +#define inline inline __attribute__((__unused__)) notrace __gnu_inline #endif #define __inline__ inline #define __inline inline -#define noinline __attribute__((noinline)) +#define noinline __attribute__((__noinline__)) #ifndef __always_inline -#define __always_inline inline __attribute__((always_inline)) +#define __always_inline inline __attribute__((__always_inline__)) #endif /* -- cgit From c2c640aa04cc4e6caf0ff17ff18b3784e0c99566 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 30 Aug 2018 19:45:01 +0200 Subject: Compiler Attributes: remove unneeded tests Attributes const and always_inline have tests around them which are unneeded, since they are supported by gcc >= 4.6, clang >= 3 and icc >= 13. https://godbolt.org/z/DFPq37 In the case of gnu_inline, we do not need to test for __GNUC_STDC_INLINE__ because, regardless of the current inlining behavior, we can simply always force the old GCC inlining behavior by using the attribute in all cases. Tested-by: Sedat Dilek # on top of v4.19-rc5, clang 7 Reviewed-by: Nick Desaulniers Reviewed-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_types.h | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 8fbdd47dd3d0..5ff9cda893f4 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -158,10 +158,6 @@ struct ftrace_likely_data { (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \ sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) -#ifndef __attribute_const__ -#define __attribute_const__ __attribute__((__const__)) -#endif - #ifndef __noclone #define __noclone #endif @@ -196,6 +192,7 @@ struct ftrace_likely_data { * [...] */ #define __pure __attribute__((__pure__)) +#define __attribute_const__ __attribute__((__const__)) #define __aligned(x) __attribute__((__aligned__(x))) #define __aligned_largest __attribute__((__aligned__)) #define __printf(a, b) __attribute__((__format__(printf, a, b))) @@ -211,6 +208,8 @@ struct ftrace_likely_data { #define __alias(symbol) __attribute__((__alias__(#symbol))) #define __cold __attribute__((__cold__)) #define __section(S) __attribute__((__section__(#S))) +#define __always_inline inline __attribute__((__always_inline__)) +#define __gnu_inline __attribute__((__gnu_inline__)) #ifdef CONFIG_ENABLE_MUST_CHECK @@ -235,18 +234,6 @@ struct ftrace_likely_data { #define __compiler_offsetof(a, b) __builtin_offsetof(a, b) -/* - * Feature detection for gnu_inline (gnu89 extern inline semantics). Either - * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics, - * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not - * defined so the gnu89 semantics are the default. - */ -#ifdef __GNUC_STDC_INLINE__ -# define __gnu_inline __attribute__((__gnu_inline__)) -#else -# define __gnu_inline -#endif - /* * Force always-inline if the user requests it so via the .config. * GCC does not warn about unused static inline functions for @@ -271,10 +258,6 @@ struct ftrace_likely_data { #define __inline inline #define noinline __attribute__((__noinline__)) -#ifndef __always_inline -#define __always_inline inline __attribute__((__always_inline__)) -#endif - /* * Rather then using noinline to prevent stack consumption, use * noinline_for_stack instead. For documentation reasons. -- cgit From ec0bbef66f867854691d5af18c2231d746958e0e Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 30 Aug 2018 19:25:14 +0200 Subject: Compiler Attributes: homogenize __must_be_array Different definitions of __must_be_array: * gcc: disabled for __CHECKER__ * clang: same definition as gcc's, but without __CHECKER__ * intel: the comment claims __builtin_types_compatible_p() is unsupported; but icc seems to support it since 13.0.1 (released in 2012). See https://godbolt.org/z/S0l6QQ Therefore, we can remove all of them and have a single definition in compiler.h Tested-by: Sedat Dilek # on top of v4.19-rc5, clang 7 Reviewed-by: Nick Desaulniers Reviewed-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler-clang.h | 1 - include/linux/compiler-gcc.h | 7 ------- include/linux/compiler-intel.h | 3 --- include/linux/compiler.h | 7 +++++++ 4 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index d11cad61ba5c..fa9532f8d885 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -41,6 +41,5 @@ * compilers, like ICC. */ #define barrier() __asm__ __volatile__("" : : : "memory") -#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #define __assume_aligned(a, ...) \ __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7a1de7683df5..3b32bbfa5a49 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -68,13 +68,6 @@ */ #define uninitialized_var(x) x = x -#ifdef __CHECKER__ -#define __must_be_array(a) 0 -#else -/* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) -#endif - #ifdef RETPOLINE #define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index fef8bb3e53ef..6004b4588bd4 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -29,9 +29,6 @@ */ #define OPTIMIZER_HIDE_VAR(var) barrier() -/* Intel ECC compiler doesn't support __builtin_types_compatible_p() */ -#define __must_be_array(a) 0 - #endif /* icc has this, but it's called _bswap16 */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ec4a28bad2c6..165b1d5683ed 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -357,4 +357,11 @@ static inline void *offset_to_ptr(const int *off) compiletime_assert(__native_word(t), \ "Need native word sized stores/loads for atomicity.") +#ifdef __CHECKER__ +#define __must_be_array(a) 0 +#else +/* &a[0] degrades to a pointer: a different type from an array */ +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#endif + #endif /* __LINUX_COMPILER_H */ -- cgit From 989bd5000f36052df604888ed12bb6ef390786b7 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 31 Aug 2018 18:00:16 +0200 Subject: Compiler Attributes: remove unneeded sparse (__CHECKER__) tests Sparse knows about a few more attributes now, so we can remove the __CHECKER__ conditions from them (which, in turn, allow us to move some of them later on to compiler_attributes.h). * assume_aligned: since sparse's commit ffc860b ("sparse: ignore __assume_aligned__ attribute"), included in 0.5.1 * error: since sparse's commit 0a04210 ("sparse: Add 'error' to ignored attributes"), included in 0.5.0 * hotpatch: since sparse's commit 6043210 ("sparse/parse.c: ignore hotpatch attribute"), included in 0.5.1 * warning: since sparse's commit 977365d ("Avoid "attribute 'warning': unknown attribute" warning"), included in 0.4.2 On top of that, __must_be_array does not need it either because: * Even ancient versions of sparse do not have a problem * BUILD_BUG_ON_ZERO() is currently disabled for __CHECKER__ Tested-by: Sedat Dilek # on top of v4.19-rc5, clang 7 Reviewed-by: Nick Desaulniers Reviewed-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler-gcc.h | 6 ++---- include/linux/compiler.h | 4 ---- include/linux/compiler_types.h | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 3b32bbfa5a49..1ca6a51cfaa9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -76,14 +76,12 @@ #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) -#ifndef __CHECKER__ #define __compiletime_warning(message) __attribute__((__warning__(message))) #define __compiletime_error(message) __attribute__((__error__(message))) -#ifdef LATENT_ENTROPY_PLUGIN +#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif -#endif /* __CHECKER__ */ /* * calling noreturn functions, __builtin_unreachable() and __builtin_trap() @@ -131,7 +129,7 @@ /* gcc version specific checks */ -#if GCC_VERSION >= 40900 && !defined(__CHECKER__) +#if GCC_VERSION >= 40900 /* * __assume_aligned(n, k): Tell the optimizer that the returned * pointer can be assumed to be k modulo n. The second argument is diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 165b1d5683ed..4030a2940d6b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -357,11 +357,7 @@ static inline void *offset_to_ptr(const int *off) compiletime_assert(__native_word(t), \ "Need native word sized stores/loads for atomicity.") -#ifdef __CHECKER__ -#define __must_be_array(a) 0 -#else /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) -#endif #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 5ff9cda893f4..a3eceb3ad1b3 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -218,7 +218,7 @@ struct ftrace_likely_data { #define __must_check #endif -#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__) +#if defined(CC_USING_HOTPATCH) #define notrace __attribute__((hotpatch(0, 0))) #else #define notrace __attribute__((__no_instrument_function__)) -- cgit From 66dbeef915f275dad6c8656b31667ef9640f5639 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 3 Sep 2018 18:34:18 +0200 Subject: Compiler Attributes: add missing SPDX ID in compiler_types.h Tested-by: Sedat Dilek # on top of v4.19-rc5, clang 7 Reviewed-by: Nick Desaulniers Reviewed-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index a3eceb3ad1b3..ed7c0e4a180e 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_COMPILER_TYPES_H #define __LINUX_COMPILER_TYPES_H -- cgit From a3f8a30f3f0079c7edfc72e329eee8594fb3e3cb Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 30 Aug 2018 20:36:59 +0200 Subject: Compiler Attributes: use feature checks instead of version checks Instead of using version checks per-compiler to define (or not) each attribute, use __has_attribute to test for them, following the cleanup started with commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive"), which is supported on gcc >= 5, clang >= 2.9 and icc >= 17. In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute by hand. All the attributes that can be unconditionally defined and directly map to compiler attribute(s) (even if optional) have been moved to a new file include/linux/compiler_attributes.h In an effort to make the file as regular as possible, comments stating the purpose of attributes have been removed. Instead, links to the compiler docs have been added (i.e. to gcc and, if available, to clang as well). In addition, they have been sorted. Finally, if an attribute is optional (i.e. if it is guarded by __has_attribute), the reason has been stated for future reference. Tested-by: Sedat Dilek # on top of v4.19-rc5, clang 7 Reviewed-by: Nick Desaulniers Reviewed-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler-clang.h | 4 - include/linux/compiler-gcc.h | 51 -------- include/linux/compiler-intel.h | 6 - include/linux/compiler_attributes.h | 244 ++++++++++++++++++++++++++++++++++++ include/linux/compiler_types.h | 74 ++--------- 5 files changed, 254 insertions(+), 125 deletions(-) create mode 100644 include/linux/compiler_attributes.h (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index fa9532f8d885..3e7dafb3ea80 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -21,8 +21,6 @@ #define __SANITIZE_ADDRESS__ #endif -#define __no_sanitize_address __attribute__((__no_sanitize__("address"))) - /* * Not all versions of clang implement the the type-generic versions * of the builtin overflow checkers. Fortunately, clang implements @@ -41,5 +39,3 @@ * compilers, like ICC. */ #define barrier() __asm__ __volatile__("" : : : "memory") -#define __assume_aligned(a, ...) \ - __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 1ca6a51cfaa9..cfac027e1625 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -108,9 +108,6 @@ __builtin_unreachable(); \ } while (0) -/* Mark a function definition as prohibited from being cloned. */ -#define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) - #if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__) #define __randomize_layout __attribute__((randomize_layout)) #define __no_randomize_layout __attribute__((no_randomize_layout)) @@ -119,32 +116,6 @@ #define randomized_struct_fields_end } __randomize_layout; #endif -/* - * When used with Link Time Optimization, gcc can optimize away C functions or - * variables which are referenced only from assembly code. __visible tells the - * optimizer that something else uses this function or variable, thus preventing - * this. - */ -#define __visible __attribute__((__externally_visible__)) - -/* gcc version specific checks */ - -#if GCC_VERSION >= 40900 -/* - * __assume_aligned(n, k): Tell the optimizer that the returned - * pointer can be assumed to be k modulo n. The second argument is - * optional (default 0), so we use a variadic macro to make the - * shorthand. - * - * Beware: Do not apply this to functions which may return - * ERR_PTRs. Also, it is probably unwise to apply it to functions - * returning extra information in the low bits (but in that case the - * compiler should see some alignment anyway, when the return value is - * massaged by 'flags = ptr & 3; ptr &= ~3;'). - */ -#define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) -#endif - /* * GCC 'asm goto' miscompiles certain code sequences: * @@ -176,32 +147,10 @@ #define KASAN_ABI_VERSION 3 #endif -#if GCC_VERSION >= 40902 -/* - * Tell the compiler that address safety instrumentation (KASAN) - * should not be applied to that function. - * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 - */ -#define __no_sanitize_address __attribute__((__no_sanitize_address__)) -#endif - #if GCC_VERSION >= 50100 -/* - * Mark structures as requiring designated initializers. - * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html - */ -#define __designated_init __attribute__((__designated_init__)) #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif -#if !defined(__noclone) -#define __noclone /* not needed */ -#endif - -#if !defined(__no_sanitize_address) -#define __no_sanitize_address -#endif - /* * Turn individual warnings and errors on and off locally, depending * on version. diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index 6004b4588bd4..517bd14e1222 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -34,9 +34,3 @@ /* icc has this, but it's called _bswap16 */ #define __HAVE_BUILTIN_BSWAP16__ #define __builtin_bswap16 _bswap16 - -/* The following are for compatibility with GCC, from compiler-gcc.h, - * and may be redefined here because they should not be shared with other - * compilers, like clang. - */ -#define __visible __attribute__((__externally_visible__)) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h new file mode 100644 index 000000000000..f0f9fc398440 --- /dev/null +++ b/include/linux/compiler_attributes.h @@ -0,0 +1,244 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_COMPILER_ATTRIBUTES_H +#define __LINUX_COMPILER_ATTRIBUTES_H + +/* + * The attributes in this file are unconditionally defined and they directly + * map to compiler attribute(s) -- except those that are optional. + * + * Any other "attributes" (i.e. those that depend on a configuration option, + * on a compiler, on an architecture, on plugins, on other attributes...) + * should be defined elsewhere (e.g. compiler_types.h or compiler-*.h). + * + * This file is meant to be sorted (by actual attribute name, + * not by #define identifier). Use the __attribute__((__name__)) syntax + * (i.e. with underscores) to avoid future collisions with other macros. + * If an attribute is optional, state the reason in the comment. + */ + +/* + * To check for optional attributes, we use __has_attribute, which is supported + * on gcc >= 5, clang >= 2.9 and icc >= 17. In the meantime, to support + * 4.6 <= gcc < 5, we implement __has_attribute by hand. + * + * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__ + * depending on the compiler used to build it; however, these attributes have + * no semantic effects for sparse, so it does not matter. Also note that, + * in order to avoid sparse's warnings, even the unsupported ones must be + * defined to 0. + */ +#ifndef __has_attribute +# define __has_attribute(x) __GCC4_has_attribute_##x +# define __GCC4_has_attribute___assume_aligned__ (__GNUC_MINOR__ >= 9) +# define __GCC4_has_attribute___designated_init__ 0 +# define __GCC4_has_attribute___externally_visible__ 1 +# define __GCC4_has_attribute___noclone__ 1 +# define __GCC4_has_attribute___optimize__ 1 +# define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8) +#endif + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alias-function-attribute + */ +#define __alias(symbol) __attribute__((__alias__(#symbol))) + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-aligned-function-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-aligned-type-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-aligned-variable-attribute + */ +#define __aligned(x) __attribute__((__aligned__(x))) +#define __aligned_largest __attribute__((__aligned__)) + +/* + * Note: users of __always_inline currently do not write "inline" themselves, + * which seems to be required by gcc to apply the attribute according + * to its docs (and also "warning: always_inline function might not be + * inlinable [-Wattributes]" is emitted). + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-always_005finline-function-attribute + * clang: mentioned + */ +#define __always_inline inline __attribute__((__always_inline__)) + +/* + * The second argument is optional (default 0), so we use a variadic macro + * to make the shorthand. + * + * Beware: Do not apply this to functions which may return + * ERR_PTRs. Also, it is probably unwise to apply it to functions + * returning extra information in the low bits (but in that case the + * compiler should see some alignment anyway, when the return value is + * massaged by 'flags = ptr & 3; ptr &= ~3;'). + * + * Optional: only supported since gcc >= 4.9 + * Optional: not supported by icc + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#assume-aligned + */ +#if __has_attribute(__assume_aligned__) +# define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) +#else +# define __assume_aligned(a, ...) +#endif + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute + */ +#define __cold __attribute__((__cold__)) + +/* + * Note the long name. + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-const-function-attribute + */ +#define __attribute_const__ __attribute__((__const__)) + +/* + * Don't. Just don't. See commit 771c035372a0 ("deprecate the '__deprecated' + * attribute warnings entirely and for good") for more information. + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-deprecated-function-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-deprecated-type-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-deprecated-variable-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Enumerator-Attributes.html#index-deprecated-enumerator-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#deprecated + */ +#define __deprecated + +/* + * Optional: only supported since gcc >= 5.1 + * Optional: not supported by clang + * Optional: not supported by icc + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-designated_005finit-type-attribute + */ +#if __has_attribute(__designated_init__) +# define __designated_init __attribute__((__designated_init__)) +#else +# define __designated_init +#endif + +/* + * Optional: not supported by clang + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-externally_005fvisible-function-attribute + */ +#if __has_attribute(__externally_visible__) +# define __visible __attribute__((__externally_visible__)) +#else +# define __visible +#endif + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-format-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#format + */ +#define __printf(a, b) __attribute__((__format__(printf, a, b))) +#define __scanf(a, b) __attribute__((__format__(scanf, a, b))) + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-gnu_005finline-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#gnu-inline + */ +#define __gnu_inline __attribute__((__gnu_inline__)) + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute + */ +#define __malloc __attribute__((__malloc__)) + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-mode-type-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-mode-variable-attribute + */ +#define __mode(x) __attribute__((__mode__(x))) + +/* + * Optional: not supported by clang + * Note: icc does not recognize gcc's no-tracer + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noclone-function-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-optimize-function-attribute + */ +#if __has_attribute(__noclone__) +# if __has_attribute(__optimize__) +# define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) +# else +# define __noclone __attribute__((__noclone__)) +# endif +#else +# define __noclone +#endif + +/* + * Note the missing underscores. + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noinline-function-attribute + * clang: mentioned + */ +#define noinline __attribute__((__noinline__)) + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noreturn-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#noreturn + * clang: https://clang.llvm.org/docs/AttributeReference.html#id1 + */ +#define __noreturn __attribute__((__noreturn__)) + +/* + * Optional: only supported since gcc >= 4.8 + * Optional: not supported by icc + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-no_005fsanitize_005faddress-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#no-sanitize-address-no-address-safety-analysis + */ +#if __has_attribute(__no_sanitize_address__) +# define __no_sanitize_address __attribute__((__no_sanitize_address__)) +#else +# define __no_sanitize_address +#endif + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-packed-type-attribute + * clang: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-packed-variable-attribute + */ +#define __packed __attribute__((__packed__)) + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-pure-function-attribute + */ +#define __pure __attribute__((__pure__)) + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-section-function-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-section-variable-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate + */ +#define __section(S) __attribute__((__section__(#S))) + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-unused-type-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-unused-variable-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-unused-label-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#maybe-unused-unused + */ +#define __always_unused __attribute__((__unused__)) +#define __maybe_unused __attribute__((__unused__)) + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-used-function-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-used-variable-attribute + */ +#define __used __attribute__((__used__)) + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute + */ +#define __weak __attribute__((__weak__)) + +#endif /* __LINUX_COMPILER_ATTRIBUTES_H */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index ed7c0e4a180e..3439d7d0249a 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -55,6 +55,9 @@ extern void __chk_io_ptr(const volatile void __iomem *); #ifdef __KERNEL__ +/* Attributes */ +#include + /* Compiler specific macros. */ #ifdef __clang__ #include @@ -79,12 +82,6 @@ extern void __chk_io_ptr(const volatile void __iomem *); #include #endif -/* - * Generic compiler-independent macros required for kernel - * build go below this comment. Actual compiler/compiler version - * specific implementations come from the above header files - */ - struct ftrace_branch_data { const char *func; const char *file; @@ -107,9 +104,6 @@ struct ftrace_likely_data { unsigned long constant; }; -/* Don't. Just don't. */ -#define __deprecated - #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ @@ -119,10 +113,6 @@ struct ftrace_likely_data { * compilers. We don't consider that to be an error, so set them to nothing. * For example, some of them are for compiler specific plugins. */ -#ifndef __designated_init -# define __designated_init -#endif - #ifndef __latent_entropy # define __latent_entropy #endif @@ -140,17 +130,6 @@ struct ftrace_likely_data { # define randomized_struct_fields_end #endif -#ifndef __visible -#define __visible -#endif - -/* - * Assume alignment of return value. - */ -#ifndef __assume_aligned -#define __assume_aligned(a, ...) -#endif - /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) @@ -159,10 +138,6 @@ struct ftrace_likely_data { (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \ sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) -#ifndef __noclone -#define __noclone -#endif - /* Helpers for emitting diagnostics in pragmas. */ #ifndef __diag #define __diag(string) @@ -182,37 +157,6 @@ struct ftrace_likely_data { #define __diag_error(compiler, version, option, comment) \ __diag_ ## compiler(version, error, option) -/* - * From the GCC manual: - * - * Many functions have no effects except the return value and their - * return value depends only on the parameters and/or global - * variables. Such a function can be subject to common subexpression - * elimination and loop optimization just as an arithmetic operator - * would be. - * [...] - */ -#define __pure __attribute__((__pure__)) -#define __attribute_const__ __attribute__((__const__)) -#define __aligned(x) __attribute__((__aligned__(x))) -#define __aligned_largest __attribute__((__aligned__)) -#define __printf(a, b) __attribute__((__format__(printf, a, b))) -#define __scanf(a, b) __attribute__((__format__(scanf, a, b))) -#define __maybe_unused __attribute__((__unused__)) -#define __always_unused __attribute__((__unused__)) -#define __mode(x) __attribute__((__mode__(x))) -#define __malloc __attribute__((__malloc__)) -#define __used __attribute__((__used__)) -#define __noreturn __attribute__((__noreturn__)) -#define __packed __attribute__((__packed__)) -#define __weak __attribute__((__weak__)) -#define __alias(symbol) __attribute__((__alias__(#symbol))) -#define __cold __attribute__((__cold__)) -#define __section(S) __attribute__((__section__(#S))) -#define __always_inline inline __attribute__((__always_inline__)) -#define __gnu_inline __attribute__((__gnu_inline__)) - - #ifdef CONFIG_ENABLE_MUST_CHECK #define __must_check __attribute__((__warn_unused_result__)) #else @@ -246,18 +190,20 @@ struct ftrace_likely_data { * semantics rather than c99. This prevents multiple symbol definition errors * of extern inline functions at link time. * A lot of inline functions can cause havoc with function tracing. + * Do not use __always_inline here, since currently it expands to inline again + * (which would break users of __always_inline). */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) -#define inline \ - inline __attribute__((__always_inline__, __unused__)) notrace __gnu_inline +#define inline inline __attribute__((__always_inline__)) __gnu_inline \ + __maybe_unused notrace #else -#define inline inline __attribute__((__unused__)) notrace __gnu_inline +#define inline inline __gnu_inline \ + __maybe_unused notrace #endif #define __inline__ inline -#define __inline inline -#define noinline __attribute__((__noinline__)) +#define __inline inline /* * Rather then using noinline to prevent stack consumption, use -- cgit From 06e3727e02f9ee9cf571692cd5c74fc5a8a2af52 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 3 Sep 2018 19:22:13 +0200 Subject: Compiler Attributes: KENTRY used twice the "used" attribute Tested-by: Sedat Dilek # on top of v4.19-rc5, clang 7 Reviewed-by: Nick Desaulniers Reviewed-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 4030a2940d6b..17ee9165ca51 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -146,7 +146,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, extern typeof(sym) sym; \ static const unsigned long __kentry_##sym \ __used \ - __attribute__((__section__("___kentry" "+" #sym ), used)) \ + __attribute__((__section__("___kentry" "+" #sym ))) \ = (unsigned long)&sym; #endif -- cgit From e04462fb82f8dd98288c0e7ab1eec79c92537d25 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 3 Sep 2018 19:17:50 +0200 Subject: Compiler Attributes: remove uses of __attribute__ from compiler.h Suggested-by: Nick Desaulniers Tested-by: Sedat Dilek # on top of v4.19-rc5, clang 7 Reviewed-by: Nick Desaulniers Reviewed-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 17ee9165ca51..b5fb034fa6fa 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -23,8 +23,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __branch_check__(x, expect, is_constant) ({ \ long ______r; \ static struct ftrace_likely_data \ - __attribute__((__aligned__(4))) \ - __attribute__((__section__("_ftrace_annotated_branch"))) \ + __aligned(4) \ + __section("_ftrace_annotated_branch") \ ______f = { \ .data.func = __func__, \ .data.file = __FILE__, \ @@ -59,8 +59,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, ({ \ int ______r; \ static struct ftrace_branch_data \ - __attribute__((__aligned__(4))) \ - __attribute__((__section__("_ftrace_branch"))) \ + __aligned(4) \ + __section("_ftrace_branch") \ ______f = { \ .func = __func__, \ .file = __FILE__, \ @@ -146,7 +146,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, extern typeof(sym) sym; \ static const unsigned long __kentry_##sym \ __used \ - __attribute__((__section__("___kentry" "+" #sym ))) \ + __section("___kentry" "+" #sym ) \ = (unsigned long)&sym; #endif @@ -287,7 +287,7 @@ unsigned long read_word_at_a_time(const void *addr) * visible to the compiler. */ #define __ADDRESSABLE(sym) \ - static void * __attribute__((__section__(".discard.addressable"), used)) \ + static void * __section(".discard.addressable") __used \ __PASTE(__addressable_##sym, __LINE__) = (void *)&sym; /** -- cgit From 92676236917d8e2e0de1d8612686d3d04a6a245b Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 19 Sep 2018 01:06:24 +0200 Subject: Compiler Attributes: add support for __nonstring (gcc >= 8) From the GCC manual: nonstring The nonstring variable attribute specifies that an object or member declaration with type array of char, signed char, or unsigned char, or pointer to such a type is intended to store character arrays that do not necessarily contain a terminating NUL. This is useful in detecting uses of such arrays or pointers with functions that expect NUL-terminated strings, and to avoid warnings when such an array or pointer is used as an argument to a bounded string manipulation function such as strncpy. https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html This attribute can be used for documentation purposes (i.e. replacing comments), but it is most helpful when the following warnings are enabled: -Wstringop-overflow Warn for calls to string manipulation functions such as memcpy and strcpy that are determined to overflow the destination buffer. [...] -Wstringop-truncation Warn for calls to bounded string manipulation functions such as strncat, strncpy, and stpncpy that may either truncate the copied string or leave the destination unchanged. [...] In situations where a character array is intended to store a sequence of bytes with no terminating NUL such an array may be annotated with attribute nonstring to avoid this warning. Such arrays, however, are not suitable arguments to functions that expect NUL-terminated strings. To help detect accidental misuses of such arrays GCC issues warnings unless it can prove that the use is safe. https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html Tested-by: Sedat Dilek # on top of v4.19-rc5, clang 7 Reviewed-by: Kees Cook Reviewed-by: Nick Desaulniers Reviewed-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index f0f9fc398440..6b28c1b7310c 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -34,6 +34,7 @@ # define __GCC4_has_attribute___externally_visible__ 1 # define __GCC4_has_attribute___noclone__ 1 # define __GCC4_has_attribute___optimize__ 1 +# define __GCC4_has_attribute___nonstring__ 0 # define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8) #endif @@ -181,6 +182,19 @@ */ #define noinline __attribute__((__noinline__)) +/* + * Optional: only supported since gcc >= 8 + * Optional: not supported by clang + * Optional: not supported by icc + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute + */ +#if __has_attribute(__nonstring__) +# define __nonstring __attribute__((__nonstring__)) +#else +# define __nonstring +#endif + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noreturn-function-attribute * clang: https://clang.llvm.org/docs/AttributeReference.html#noreturn -- cgit From fe0640eb30b7da261ae84d252ed9ed3c7e68dfd8 Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" Date: Mon, 15 Oct 2018 10:22:21 -0700 Subject: compiler.h: update definition of unreachable() Fixes the objtool warning seen with Clang: arch/x86/mm/fault.o: warning: objtool: no_context()+0x220: unreachable instruction Fixes commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Josh noted that the fallback definition was meant to work around a pre-gcc-4.6 bug. GCC still needs to work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365, so compiler-gcc.h defines its own version of unreachable(). Clang and ICC can use this shared definition. Link: https://github.com/ClangBuiltLinux/linux/issues/204 Suggested-by: Andy Lutomirski Suggested-by: Josh Poimboeuf Tested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Miguel Ojeda --- include/linux/compiler.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index b5fb034fa6fa..2e0b6322588b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -124,7 +124,10 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, # define ASM_UNREACHABLE #endif #ifndef unreachable -# define unreachable() do { annotate_reachable(); do { } while (1); } while (0) +# define unreachable() do { \ + annotate_unreachable(); \ + __builtin_unreachable(); \ +} while (0) #endif /* -- cgit From 1ff2fea5e30ca15752777441ecb64a169fe22e9e Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" Date: Mon, 15 Oct 2018 14:24:27 -0700 Subject: compiler-gcc: remove comment about gcc 4.5 from unreachable() Remove the comment about being unable to detect __builtin_unreachable. __builtin_unreachable was implemented in the GCC 4.5 timeframe. The kernel's minimum supported version of GCC is 4.6 since commit cafa0010cd51 ("Raise the minimum required gcc version to 4.6"). Commit cb984d101b30 ("compiler-gcc: integrate the various compiler-gcc[345].h files") shows that unreachable() had different guards based on GCC version. Suggested-by: Miguel Ojeda Signed-off-by: Nick Desaulniers Signed-off-by: Miguel Ojeda --- include/linux/compiler-gcc.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cfac027e1625..2010493e1040 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -96,10 +96,6 @@ * Mark a position in code as unreachable. This can be used to * suppress control flow warnings after asm blocks that transfer * control elsewhere. - * - * Early snapshots of gcc 4.5 don't support this and we can't detect - * this in the preprocessor, but we can live with this because they're - * unreleased. Really, we need to have autoconf for the kernel. */ #define unreachable() \ do { \ -- cgit From 00e23707442a75b404392cef1405ab4fd498de6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 22 Oct 2018 13:07:28 +0100 Subject: iov_iter: Use accessor function Use accessor functions to access an iterator's type and direction. This allows for the possibility of using some other method of determining the type of iterator than if-chains with bitwise-AND conditions. Signed-off-by: David Howells --- include/linux/uio.h | 48 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 422b1c01ee0d..fcabc959c794 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -21,7 +21,7 @@ struct kvec { size_t iov_len; }; -enum { +enum iter_type { ITER_IOVEC = 0, ITER_KVEC = 2, ITER_BVEC = 4, @@ -47,6 +47,36 @@ struct iov_iter { }; }; +static inline enum iter_type iov_iter_type(const struct iov_iter *i) +{ + return i->type & ~(READ | WRITE); +} + +static inline bool iter_is_iovec(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_IOVEC; +} + +static inline bool iov_iter_is_kvec(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_KVEC; +} + +static inline bool iov_iter_is_bvec(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_BVEC; +} + +static inline bool iov_iter_is_pipe(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_PIPE; +} + +static inline unsigned char iov_iter_rw(const struct iov_iter *i) +{ + return i->type & (READ | WRITE); +} + /* * Total number of bytes covered by an iovec. * @@ -74,7 +104,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) } #define iov_for_each(iov, iter, start) \ - if (!((start).type & (ITER_BVEC | ITER_PIPE))) \ + if (iov_iter_type(start) == ITER_IOVEC || \ + iov_iter_type(start) == ITER_KVEC) \ for (iter = (start); \ (iter).count && \ ((iov = iov_iter_iovec(&(iter))), 1); \ @@ -202,19 +233,6 @@ static inline size_t iov_iter_count(const struct iov_iter *i) return i->count; } -static inline bool iter_is_iovec(const struct iov_iter *i) -{ - return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE)); -} - -/* - * Get one of READ or WRITE out of iter->type without any other flags OR'd in - * with it. - * - * The ?: is just for type safety. - */ -#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE)) - /* * Cap the iov_iter by given limit; note that the second argument is * *not* the new size - it's upper limit for such. Passing it a value -- cgit From aa563d7bca6e882ec2bdae24603c8f016401a144 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:56 +0100 Subject: iov_iter: Separate type from direction and use accessor functions In the iov_iter struct, separate the iterator type from the iterator direction and use accessor functions to access them in most places. Convert a bunch of places to use switch-statements to access them rather then chains of bitwise-AND statements. This makes it easier to add further iterator types. Also, this can be more efficient as to implement a switch of small contiguous integers, the compiler can use ~50% fewer compare instructions than it has to use bitwise-and instructions. Further, cease passing the iterator type into the iterator setup function. The iterator function can set that itself. Only the direction is required. Signed-off-by: David Howells --- include/linux/uio.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index fcabc959c794..3d8f1acc142c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -29,7 +29,7 @@ enum iter_type { }; struct iov_iter { - int type; + unsigned int type; size_t iov_offset; size_t count; union { @@ -212,13 +212,13 @@ size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i) size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); -void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, +void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); -void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec, +void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec, unsigned long nr_segs, size_t count); -void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec, +void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count); -void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe, +void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); -- cgit From 9ea9ce0427aab02a2fd88fc608267cf6952119f1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:56 +0100 Subject: iov_iter: Add I/O discard iterator Add a new iterator, ITER_DISCARD, that can only be used in READ mode and just discards any data copied to it. This is useful in a network filesystem for discarding any unwanted data sent by a server. Signed-off-by: David Howells --- include/linux/uio.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 3d8f1acc142c..55ce99ddb912 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -26,6 +26,7 @@ enum iter_type { ITER_KVEC = 2, ITER_BVEC = 4, ITER_PIPE = 8, + ITER_DISCARD = 16, }; struct iov_iter { @@ -72,6 +73,11 @@ static inline bool iov_iter_is_pipe(const struct iov_iter *i) return iov_iter_type(i) == ITER_PIPE; } +static inline bool iov_iter_is_discard(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_DISCARD; +} + static inline unsigned char iov_iter_rw(const struct iov_iter *i) { return i->type & (READ | WRITE); @@ -220,6 +226,7 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_ unsigned long nr_segs, size_t count); void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count); +void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, -- cgit From 70025f84e5b79627a6739533c4fe7cef5b605886 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2018 17:46:51 +0100 Subject: KEYS: Provide key type operations for asymmetric key ops [ver #2] Provide five new operations in the key_type struct that can be used to provide access to asymmetric key operations. These will be implemented for the asymmetric key type in a later patch and may refer to a key retained in RAM by the kernel or a key retained in crypto hardware. int (*asym_query)(const struct kernel_pkey_params *params, struct kernel_pkey_query *info); int (*asym_eds_op)(struct kernel_pkey_params *params, const void *in, void *out); int (*asym_verify_signature)(struct kernel_pkey_params *params, const void *in, const void *in2); Since encrypt, decrypt and sign are identical in their interfaces, they're rolled together in the asym_eds_op() operation and there's an operation ID in the params argument to distinguish them. Verify is different in that we supply the data and the signature instead and get an error value (or 0) as the only result on the expectation that this may well be how a hardware crypto device may work. Signed-off-by: David Howells Tested-by: Marcel Holtmann Reviewed-by: Marcel Holtmann Reviewed-by: Denis Kenzior Tested-by: Denis Kenzior Signed-off-by: James Morris --- include/linux/key-type.h | 11 +++++++++++ include/linux/keyctl.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 include/linux/keyctl.h (limited to 'include/linux') diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 05d8fb5a06c4..bc9af551fc83 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -17,6 +17,9 @@ #ifdef CONFIG_KEYS +struct kernel_pkey_query; +struct kernel_pkey_params; + /* * key under-construction record * - passed to the request_key actor if supplied @@ -155,6 +158,14 @@ struct key_type { */ struct key_restriction *(*lookup_restriction)(const char *params); + /* Asymmetric key accessor functions. */ + int (*asym_query)(const struct kernel_pkey_params *params, + struct kernel_pkey_query *info); + int (*asym_eds_op)(struct kernel_pkey_params *params, + const void *in, void *out); + int (*asym_verify_signature)(struct kernel_pkey_params *params, + const void *in, const void *in2); + /* internal fields */ struct list_head link; /* link in types list */ struct lock_class_key lock_class; /* key->sem lock class */ diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h new file mode 100644 index 000000000000..c7c48c79ce0e --- /dev/null +++ b/include/linux/keyctl.h @@ -0,0 +1,46 @@ +/* keyctl kernel bits + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef __LINUX_KEYCTL_H +#define __LINUX_KEYCTL_H + +#include + +struct kernel_pkey_query { + __u32 supported_ops; /* Which ops are supported */ + __u32 key_size; /* Size of the key in bits */ + __u16 max_data_size; /* Maximum size of raw data to sign in bytes */ + __u16 max_sig_size; /* Maximum size of signature in bytes */ + __u16 max_enc_size; /* Maximum size of encrypted blob in bytes */ + __u16 max_dec_size; /* Maximum size of decrypted blob in bytes */ +}; + +enum kernel_pkey_operation { + kernel_pkey_encrypt, + kernel_pkey_decrypt, + kernel_pkey_sign, + kernel_pkey_verify, +}; + +struct kernel_pkey_params { + struct key *key; + const char *encoding; /* Encoding (eg. "oaep" or "raw" for none) */ + const char *hash_algo; /* Digest algorithm used (eg. "sha1") or NULL if N/A */ + char *info; /* Modified info string to be released later */ + __u32 in_len; /* Input data size */ + union { + __u32 out_len; /* Output buffer size (enc/dec/sign) */ + __u32 in2_len; /* 2nd input data size (verify) */ + }; + enum kernel_pkey_operation op : 8; +}; + +#endif /* __LINUX_KEYCTL_H */ -- cgit From 1383a7ed67490fb00d793e36c7a4d599ff88a64d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:40:31 +1100 Subject: vfs: check file ranges before cloning files Move the file range checks from vfs_clone_file_prep into a separate generic_remap_checks function so that all the checks are collected in a central location. This forms the basis for adding more checks from generic_write_checks that will make cloning's input checking more consistent with write input checking. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- include/linux/fs.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 897eae8faee1..ba93a6e7dac4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1825,9 +1825,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); -extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, - struct inode *inode_out, loff_t pos_out, - u64 *len, bool is_dedupe); +extern int vfs_clone_file_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 *count, bool is_dedupe); extern int do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, @@ -2967,6 +2967,9 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); +extern int generic_remap_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + uint64_t *count, bool is_dedupe); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); -- cgit From a83ab01a62e61616ebb8b97f90f568c1214dc10d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:08 +1100 Subject: vfs: rename vfs_clone_file_prep to be more descriptive The vfs_clone_file_prep is a generic function to be called by filesystem implementations only. Rename the prefix to generic_ and make it more clear that it applies to remap operations, not just clones. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ba93a6e7dac4..55729e1c2e75 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1825,9 +1825,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); -extern int vfs_clone_file_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 *count, bool is_dedupe); +extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 *count, bool is_dedupe); extern int do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, -- cgit From 2e5dfc99f2e61c42083ba742395e7a7b353513d1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:21 +1100 Subject: vfs: combine the clone and dedupe into a single remap_file_range Combine the clone_file_range and dedupe_file_range operations into a single remap_file_range file operation dispatch since they're fundamentally the same operation. The differences between the two can be made in the prep functions. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- include/linux/fs.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 55729e1c2e75..888cef35c7d7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1721,6 +1721,24 @@ struct block_device_operations; #define NOMMU_VMFLAGS \ (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC) +/* + * These flags control the behavior of the remap_file_range function pointer. + * If it is called with len == 0 that means "remap to end of source file". + * See Documentation/filesystems/vfs.txt for more details about this call. + * + * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) + */ +#define REMAP_FILE_DEDUP (1 << 0) + +/* + * These flags signal that the caller is ok with altering various aspects of + * the behavior of the remap operation. The changes must be made by the + * implementation; the vfs remap helper functions can take advantage of them. + * Flags in this category exist to preserve the quirky behavior of the hoisted + * btrfs clone/dedupe ioctls. + * There are no flags yet, but subsequent commits will add some. + */ +#define REMAP_FILE_ADVISORY (0) struct iov_iter; @@ -1759,10 +1777,9 @@ struct file_operations { #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, - u64); - int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, - u64); + int (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); } __randomize_layout; -- cgit From a91ae49bbaf43910edb09e03fedf26b23875bd52 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:28 +1100 Subject: vfs: pass remap flags to generic_remap_file_range_prep Plumb the remap flags through the filesystem from the vfs function dispatcher all the way to the prep function to prepare for behavior changes in subsequent patches. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 888cef35c7d7..631c28ce1436 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1844,7 +1844,7 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *count, bool is_dedupe); + u64 *count, unsigned int remap_flags); extern int do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, -- cgit From 3d28193e1df043764deb7abdaba5e3a6660bc393 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:34 +1100 Subject: vfs: pass remap flags to generic_remap_checks Pass the same remap flags to generic_remap_checks for consistency. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 631c28ce1436..c5435ca81132 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2986,7 +2986,7 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - uint64_t *count, bool is_dedupe); + uint64_t *count, unsigned int remap_flags); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); -- cgit From 42ec3d4c02187a18e27ff94b409ec27234bf2ffd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:49 +1100 Subject: vfs: make remap_file_range functions take and return bytes completed Change the remap_file_range functions to take a number of bytes to operate upon and return the number of bytes they operated on. This is a requirement for allowing fs implementations to return short clone/dedupe results to the user, which will enable us to obey resource limits in a graceful manner. A subsequent patch will enable copy_file_range to signal to the ->clone_file_range implementation that it can handle a short length, which will be returned in the function's return value. For now the short return is not implemented anywhere so the behavior won't change -- either copy_file_range manages to clone the entire range or it tries an alternative. Neither clone ioctl can take advantage of this, alas. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- include/linux/fs.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c5435ca81132..c72d8c3c065a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1777,9 +1777,9 @@ struct file_operations { #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*remap_file_range)(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len, unsigned int remap_flags); + loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); } __randomize_layout; @@ -1844,19 +1844,22 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *count, unsigned int remap_flags); -extern int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); -extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); + loff_t *count, + unsigned int remap_flags); +extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len); +extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len); extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, - struct file *dst_file, loff_t dst_pos, - u64 len); +extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, + struct file *dst_file, loff_t dst_pos, + loff_t len); struct super_operations { @@ -2986,7 +2989,7 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - uint64_t *count, unsigned int remap_flags); + loff_t *count, unsigned int remap_flags); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); -- cgit From 452ce65951a2f0719e4e119ecca134c06cfe22ee Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:56 +1100 Subject: vfs: plumb remap flags through the vfs clone functions Plumb a remap_flags argument through the {do,vfs}_clone_file_range functions so that clone can take advantage of it. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c72d8c3c065a..1c5e55d2a67d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1848,10 +1848,10 @@ extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, unsigned int remap_flags); extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - loff_t len); + loff_t len, unsigned int remap_flags); extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - loff_t len); + loff_t len, unsigned int remap_flags); extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same); -- cgit From df3658361951e17364f1e1c3fa92862a990ad8bd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:42:03 +1100 Subject: vfs: plumb remap flags through the vfs dedupe functions Plumb a remap_flags argument through the vfs_dedupe_file_range_one functions so that dedupe can take advantage of it. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1c5e55d2a67d..544ab5083b48 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1859,7 +1859,7 @@ extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, - loff_t len); + loff_t len, unsigned int remap_flags); struct super_operations { -- cgit From eca3654e3cc7d93e9734d0fa96cfb15c7f356244 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:42:10 +1100 Subject: vfs: enable remap callers that can handle short operations Plumb in a remap flag that enables the filesystem remap handler to shorten remapping requests for callers that can handle it. Now copy_file_range can report partial success (in case we run up against alignment problems, resource limits, etc.). We also enable CAN_SHORTEN for fideduperange to maintain existing userspace-visible behavior where xfs/btrfs shorten the dedupe range to avoid stale post-eof data exposure. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- include/linux/fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 544ab5083b48..34c22d695011 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1727,8 +1727,10 @@ struct block_device_operations; * See Documentation/filesystems/vfs.txt for more details about this call. * * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) + * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request */ #define REMAP_FILE_DEDUP (1 << 0) +#define REMAP_FILE_CAN_SHORTEN (1 << 1) /* * These flags signal that the caller is ok with altering various aspects of @@ -1736,9 +1738,8 @@ struct block_device_operations; * implementation; the vfs remap helper functions can take advantage of them. * Flags in this category exist to preserve the quirky behavior of the hoisted * btrfs clone/dedupe ioctls. - * There are no flags yet, but subsequent commits will add some. */ -#define REMAP_FILE_ADVISORY (0) +#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) struct iov_iter; -- cgit From c32e5f39953fa6bbff35c655bdcb7b3128f1e79f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:42:17 +1100 Subject: vfs: hide file range comparison function There are no callers of vfs_dedupe_file_range_compare, so we might as well make it a static helper and remove the export. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 34c22d695011..346036a84f18 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1853,9 +1853,6 @@ extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); -extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, - struct inode *dest, loff_t destoff, - loff_t len, bool *is_same); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, -- cgit From 966c37f2d77eb44d47af8e919267b1ba675b2eca Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 26 Oct 2018 11:30:35 +0800 Subject: ipv4/igmp: fix v1/v2 switchback timeout based on rfc3376, 8.12 Similiar with ipv6 mcast commit 89225d1ce6af3 ("net: ipv6: mld: fix v1/v2 switchback timeout to rfc3810, 9.12.") i) RFC3376 8.12. Older Version Querier Present Timeout says: The Older Version Querier Interval is the time-out for transitioning a host back to IGMPv3 mode once an older version query is heard. When an older version query is received, hosts set their Older Version Querier Present Timer to Older Version Querier Interval. This value MUST be ((the Robustness Variable) times (the Query Interval in the last Query received)) plus (one Query Response Interval). Currently we only use a hardcode value IGMP_V1/v2_ROUTER_PRESENT_TIMEOUT. Fix it by adding two new items mr_qi(Query Interval) and mr_qri(Query Response Interval) in struct in_device. Now we can calculate the switchback time via (mr_qrv * mr_qi) + mr_qri. We need update these values when receive IGMPv3 queries. Reported-by: Ying Xu Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index c759d1cbcedd..a64f21a97369 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -37,7 +37,9 @@ struct in_device { unsigned long mr_v1_seen; unsigned long mr_v2_seen; unsigned long mr_maxdelay; - unsigned char mr_qrv; + unsigned long mr_qi; /* Query Interval */ + unsigned long mr_qri; /* Query Response Interval */ + unsigned char mr_qrv; /* Query Robustness Variable */ unsigned char mr_gq_running; unsigned char mr_ifc_count; struct timer_list mr_gq_timer; /* general query timer */ -- cgit From bb58fd7eeffc9bd5d6e2a16cbf0e9e259f8d09f2 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Fri, 19 Oct 2018 14:11:03 -0700 Subject: i40e: Update status codes Add a few new status code which will be used by the ice driver, and rename a few to make them more consistent. Error code are mapped to similar values as in i40e_status.h, so as to be compatible with older VF drivers not using this status enum. Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 2c9756bd9c4c..b2488055fd1d 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -62,13 +62,19 @@ /* Error Codes */ enum virtchnl_status_code { VIRTCHNL_STATUS_SUCCESS = 0, - VIRTCHNL_ERR_PARAM = -5, + VIRTCHNL_STATUS_ERR_PARAM = -5, + VIRTCHNL_STATUS_ERR_NO_MEMORY = -18, VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH = -38, VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR = -39, VIRTCHNL_STATUS_ERR_INVALID_VF_ID = -40, - VIRTCHNL_STATUS_NOT_SUPPORTED = -64, + VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR = -53, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED = -64, }; +/* Backward compatibility */ +#define VIRTCHNL_ERR_PARAM VIRTCHNL_STATUS_ERR_PARAM +#define VIRTCHNL_STATUS_NOT_SUPPORTED VIRTCHNL_STATUS_ERR_NOT_SUPPORTED + #define VIRTCHNL_LINK_SPEED_100MB_SHIFT 0x1 #define VIRTCHNL_LINK_SPEED_1000MB_SHIFT 0x2 #define VIRTCHNL_LINK_SPEED_10GB_SHIFT 0x3 @@ -831,7 +837,7 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: default: - return VIRTCHNL_ERR_PARAM; + return VIRTCHNL_STATUS_ERR_PARAM; } /* few more checks */ if (err_msg_format || valid_len != msglen) -- cgit From a324e9396ca3d00e1101476ba067b412e0aba232 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 31 Oct 2018 19:20:20 +0100 Subject: EDAC, skx: Fix randconfig builds The driver depends on the ADXL component glue and selects it. However, ADXL itself implicitly depends on ACPI and in nonsensical randconfig builds like this: # CONFIG_ACPI is not set CONFIG_ACPI_ADXL=y where ACPI is not enabled, the build fails with: drivers/edac/skx_edac.o: In function `skx_mce_check_error': skx_edac.c:(.text+0xab): undefined reference to `adxl_decode' drivers/edac/skx_edac.o: In function `skx_init': skx_edac.c:(.init.text+0x8bf): undefined reference to `adxl_get_component_names' make: *** [vmlinux] Error 1 Add stubs for that case so that the build succeeds. CONFIG_ACPI=n doesn't make any sense for real configurations but this fix will at least silence randconfig builds. Signed-off-by: Borislav Petkov Acked-by: Tony Luck Cc: "Rafael J. Wysocki" --- include/linux/adxl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/adxl.h b/include/linux/adxl.h index 2a629acb4c3f..2d29f55923e3 100644 --- a/include/linux/adxl.h +++ b/include/linux/adxl.h @@ -7,7 +7,12 @@ #ifndef _LINUX_ADXL_H #define _LINUX_ADXL_H +#ifdef CONFIG_ACPI_ADXL const char * const *adxl_get_component_names(void); int adxl_decode(u64 addr, u64 component_values[]); +#else +static inline const char * const *adxl_get_component_names(void) { return NULL; } +static inline int adxl_decode(u64 addr, u64 component_values[]) { return -EOPNOTSUPP; } +#endif #endif /* _LINUX_ADXL_H */ -- cgit From 439cd39ea136d2c026805264d58a91f36b6b64ca Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Sat, 14 Jul 2018 21:59:43 +0200 Subject: netfilter: ipset: list:set: Decrease refcount synchronously on deletion and replace Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel") postponed decreasing set reference counters to the RCU callback. An 'ipset del' command can terminate before the RCU grace period is elapsed, and if sets are listed before then, the reference counter shown in userspace will be wrong: # ipset create h hash:ip; ipset create l list:set; ipset add l # ipset del l h; ipset list h Name: h Type: hash:ip Revision: 4 Header: family inet hashsize 1024 maxelem 65536 Size in memory: 88 References: 1 Number of entries: 0 Members: # sleep 1; ipset list h Name: h Type: hash:ip Revision: 4 Header: family inet hashsize 1024 maxelem 65536 Size in memory: 88 References: 0 Number of entries: 0 Members: Fix this by making the reference count update synchronous again. As a result, when sets are listed, ip_set_name_byindex() might now fetch a set whose reference count is already zero. Instead of relying on the reference count to protect against concurrent set renaming, grab ip_set_ref_lock as reader and copy the name, while holding the same lock in ip_set_rename() as writer instead. Reported-by: Li Shuang Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel") Signed-off-by: Stefano Brivio Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 34fc80f3eb90..1d100efe74ec 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -314,7 +314,7 @@ enum { extern ip_set_id_t ip_set_get_byname(struct net *net, const char *name, struct ip_set **set); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); -extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index); +extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index); -- cgit From 0962590e553331db2cc0aef2dc35c57f6300dbbe Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 1 Nov 2018 00:05:52 +0100 Subject: bpf: fix partial copy of map_ptr when dst is scalar ALU operations on pointers such as scalar_reg += map_value_ptr are handled in adjust_ptr_min_max_vals(). Problem is however that map_ptr and range in the register state share a union, so transferring state through dst_reg->range = ptr_reg->range is just buggy as any new map_ptr in the dst_reg is then truncated (or null) for subsequent checks. Fix this by adding a raw member and use it for copying state over to dst_reg. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Daniel Borkmann Cc: Edward Cree Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9e8056ec20fa..d93e89761a8b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -51,6 +51,9 @@ struct bpf_reg_state { * PTR_TO_MAP_VALUE_OR_NULL */ struct bpf_map *map_ptr; + + /* Max size from any of the above. */ + unsigned long raw; }; /* Fixed part of pointer offset, pointer types only */ s32 off; -- cgit From a846446b1914d1e3d996d657754f43fde89bab51 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 12 Oct 2018 14:42:52 +0100 Subject: x86/compat: Adjust in_compat_syscall() to generic code under !COMPAT The result of in_compat_syscall() can be pictured as: x86 platform: --------------------------------------------------- | Arch\syscall | 64-bit | ia32 | x32 | |-------------------------------------------------| | x86_64 | false | true | true | |-------------------------------------------------| | i686 | | | | --------------------------------------------------- Other platforms: ------------------------------------------- | Arch\syscall | 64-bit | compat | |-----------------------------------------| | 64-bit | false | true | |-----------------------------------------| | 32-bit(?) | | | ------------------------------------------- As seen, the result of in_compat_syscall() on generic 32-bit platform differs from i686. There is no reason for in_compat_syscall() == true on native i686. It also easy to misread code if the result on native 32-bit platform differs between arches. Because of that non arch-specific code has many places with: if (IS_ENABLED(CONFIG_COMPAT) && in_compat_syscall()) in different variations. It looks-like the only non-x86 code which uses in_compat_syscall() not under CONFIG_COMPAT guard is in amd/amdkfd. But according to the commit a18069c132cb ("amdkfd: Disable support for 32-bit user processes"), it actually should be disabled on native i686. Rename in_compat_syscall() to in_32bit_syscall() for x86-specific code and make in_compat_syscall() false under !CONFIG_COMPAT. A follow on patch will clean up generic users which were forced to check IS_ENABLED(CONFIG_COMPAT) with in_compat_syscall(). Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Ard Biesheuvel Cc: "David S. Miller" Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: John Stultz Cc: "Kirill A. Shutemov" Cc: Oleg Nesterov Cc: Steffen Klassert Cc: Stephen Boyd Cc: Steven Rostedt Cc: linux-efi@vger.kernel.org Cc: netdev@vger.kernel.org Link: https://lkml.kernel.org/r/20181012134253.23266-2-dima@arista.com --- include/linux/compat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index d30e4dbd4be2..f1ef24c68fcc 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -1029,9 +1029,9 @@ int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, #else /* !CONFIG_COMPAT */ #define is_compat_task() (0) -#ifndef in_compat_syscall +/* Ensure no one redefines in_compat_syscall() under !CONFIG_COMPAT */ +#define in_compat_syscall in_compat_syscall static inline bool in_compat_syscall(void) { return false; } -#endif #endif /* CONFIG_COMPAT */ -- cgit From c3be6577d82a9f0163eb1e2c37a477414d12a209 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 1 Nov 2018 17:51:34 +0000 Subject: SUNRPC: Use atomic(64)_t for seq_send(64) The seq_send & seq_send64 fields in struct krb5_ctx are used as atomically incrementing counters. This is implemented using cmpxchg() & cmpxchg64() to implement what amount to custom versions of atomic_fetch_inc() & atomic64_fetch_inc(). Besides the duplication, using cmpxchg64() has another major drawback in that some 32 bit architectures don't provide it. As such commit 571ed1fd2390 ("SUNRPC: Replace krb5_seq_lock with a lockless scheme") resulted in build failures for some architectures. Change seq_send to be an atomic_t and seq_send64 to be an atomic64_t, then use atomic(64)_* functions to manipulate the values. The atomic64_t type & associated functions are provided even on architectures which lack real 64 bit atomic memory access via CONFIG_GENERIC_ATOMIC64 which uses spinlocks to serialize access. This fixes the build failures for architectures lacking cmpxchg64(). A potential alternative that was raised would be to provide cmpxchg64() on the 32 bit architectures that currently lack it, using spinlocks. However this would provide a version of cmpxchg64() with semantics a little different to the implementations on architectures with real 64 bit atomics - the spinlock-based implementation would only work if all access to the memory used with cmpxchg64() is *always* performed using cmpxchg64(). That is not currently a requirement for users of cmpxchg64(), and making it one seems questionable. As such avoiding cmpxchg64() outside of architecture-specific code seems best, particularly in cases where atomic64_t seems like a better fit anyway. The CONFIG_GENERIC_ATOMIC64 implementation of atomic64_* functions will use spinlocks & so faces the same issue, but with the key difference that the memory backing an atomic64_t ought to always be accessed via the atomic64_* functions anyway making the issue moot. Signed-off-by: Paul Burton Fixes: 571ed1fd2390 ("SUNRPC: Replace krb5_seq_lock with a lockless scheme") Cc: Trond Myklebust Cc: Anna Schumaker Cc: J. Bruce Fields Cc: Jeff Layton Cc: David S. Miller Cc: linux-nfs@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 69f749afa617..4162de72e95c 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -107,8 +107,8 @@ struct krb5_ctx { u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ u8 cksum[GSS_KRB5_MAX_KEYLEN]; s32 endtime; - u32 seq_send; - u64 seq_send64; + atomic_t seq_send; + atomic64_t seq_send64; struct xdr_netobj mech_used; u8 initiator_sign[GSS_KRB5_MAX_KEYLEN]; u8 acceptor_sign[GSS_KRB5_MAX_KEYLEN]; @@ -118,9 +118,6 @@ struct krb5_ctx { u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; }; -extern u32 gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx); -extern u64 gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx); - /* The length of the Kerberos GSS token header */ #define GSS_KRB5_TOK_HDR_LEN (16) -- cgit From b5f2954d30c77649bce9c27e7a0a94299d9cfdf8 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Thu, 1 Nov 2018 17:24:10 -0400 Subject: blkcg: revert blkcg cleanups series This reverts a series committed earlier due to null pointer exception bug report in [1]. It seems there are edge case interactions that I did not consider and will need some time to understand what causes the adverse interactions. The original series can be found in [2] with a follow up series in [3]. [1] https://www.spinics.net/lists/cgroups/msg20719.html [2] https://lore.kernel.org/lkml/20180911184137.35897-1-dennisszhou@gmail.com/ [3] https://lore.kernel.org/lkml/20181020185612.51587-1-dennis@kernel.org/ This reverts the following commits: d459d853c2ed, b2c3fa546705, 101246ec02b5, b3b9f24f5fcc, e2b0989954ae, f0fcb3ec89f3, c839e7a03f92, bdc2491708c4, 74b7c02a9bc1, 5bf9a1f3b4ef, a7b39b4e961c, 07b05bcc3213, 49f4c2dc2b50, 27e6fa996c53 Signed-off-by: Dennis Zhou Signed-off-by: Jens Axboe --- include/linux/bio.h | 26 +++----- include/linux/blk-cgroup.h | 145 ++++++++++++++------------------------------- include/linux/blk_types.h | 1 + include/linux/cgroup.h | 2 - include/linux/writeback.h | 5 +- 5 files changed, 57 insertions(+), 122 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index b47c7f716731..056fb627edb3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -503,31 +503,23 @@ do { \ disk_devt((bio)->bi_disk) #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -int bio_associate_blkg_from_page(struct bio *bio, struct page *page); +int bio_associate_blkcg_from_page(struct bio *bio, struct page *page); #else -static inline int bio_associate_blkg_from_page(struct bio *bio, - struct page *page) { return 0; } +static inline int bio_associate_blkcg_from_page(struct bio *bio, + struct page *page) { return 0; } #endif #ifdef CONFIG_BLK_CGROUP +int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); -int bio_associate_blkg_from_css(struct bio *bio, - struct cgroup_subsys_state *css); -int bio_associate_create_blkg(struct request_queue *q, struct bio *bio); -int bio_reassociate_blkg(struct request_queue *q, struct bio *bio); void bio_disassociate_task(struct bio *bio); -void bio_clone_blkg_association(struct bio *dst, struct bio *src); +void bio_clone_blkcg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ -static inline int bio_associate_blkg_from_css(struct bio *bio, - struct cgroup_subsys_state *css) -{ return 0; } -static inline int bio_associate_create_blkg(struct request_queue *q, - struct bio *bio) { return 0; } -static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) -{ return 0; } +static inline int bio_associate_blkcg(struct bio *bio, + struct cgroup_subsys_state *blkcg_css) { return 0; } static inline void bio_disassociate_task(struct bio *bio) { } -static inline void bio_clone_blkg_association(struct bio *dst, - struct bio *src) { } +static inline void bio_clone_blkcg_association(struct bio *dst, + struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_HIGHMEM diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 1e76ceebeb5d..6d766a19f2bb 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -126,7 +126,7 @@ struct blkcg_gq { struct request_list rl; /* reference count */ - struct percpu_ref refcnt; + atomic_t refcnt; /* is this blkg online? protected by both blkcg and q locks */ bool online; @@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); -struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q); int blkcg_init_queue(struct request_queue *q); @@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); -/** - * blkcg_css - find the current css - * - * Find the css associated with either the kthread or the current task. - * This may return a dying css, so it is up to the caller to use tryget logic - * to confirm it is alive and well. - */ -static inline struct cgroup_subsys_state *blkcg_css(void) -{ - struct cgroup_subsys_state *css; - - css = kthread_blkcg(); - if (css) - return css; - return task_css(current, io_cgrp_id); -} static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -/** - * __bio_blkcg - internal version of bio_blkcg for bfq and cfq - * - * DO NOT USE. - * There is a flaw using this version of the function. In particular, this was - * used in a broken paradigm where association was called on the given css. It - * is possible though that the returned css from task_css() is in the process - * of dying due to migration of the current task. So it is improper to assume - * *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will - * take additional work to handle more gracefully. - */ -static inline struct blkcg *__bio_blkcg(struct bio *bio) -{ - if (bio && bio->bi_blkg) - return bio->bi_blkg->blkcg; - return css_to_blkcg(blkcg_css()); -} - -/** - * bio_blkcg - grab the blkcg associated with a bio - * @bio: target bio - * - * This returns the blkcg associated with a bio, NULL if not associated. - * Callers are expected to either handle NULL or know association has been - * done prior to calling this. - */ static inline struct blkcg *bio_blkcg(struct bio *bio) { - if (bio && bio->bi_blkg) - return bio->bi_blkg->blkcg; - return NULL; + struct cgroup_subsys_state *css; + + if (bio && bio->bi_css) + return css_to_blkcg(bio->bi_css); + css = kthread_blkcg(); + if (css) + return css_to_blkcg(css); + return css_to_blkcg(task_css(current, io_cgrp_id)); } static inline bool blk_cgroup_congested(void) @@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) */ static inline void blkg_get(struct blkcg_gq *blkg) { - percpu_ref_get(&blkg->refcnt); + WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); + atomic_inc(&blkg->refcnt); } /** - * blkg_tryget - try and get a blkg reference + * blkg_try_get - try and get a blkg reference * @blkg: blkg to get * * This is for use when doing an RCU lookup of the blkg. We may be in the midst * of freeing this blkg, so we can only use it if the refcnt is not zero. */ -static inline bool blkg_tryget(struct blkcg_gq *blkg) +static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) { - return percpu_ref_tryget(&blkg->refcnt); + if (atomic_inc_not_zero(&blkg->refcnt)) + return blkg; + return NULL; } -/** - * blkg_tryget_closest - try and get a blkg ref on the closet blkg - * @blkg: blkg to get - * - * This walks up the blkg tree to find the closest non-dying blkg and returns - * the blkg that it did association with as it may not be the passed in blkg. - */ -static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) -{ - while (!percpu_ref_tryget(&blkg->refcnt)) - blkg = blkg->parent; - return blkg; -} +void __blkg_release_rcu(struct rcu_head *rcu); /** * blkg_put - put a blkg reference @@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) */ static inline void blkg_put(struct blkcg_gq *blkg) { - percpu_ref_put(&blkg->refcnt); + WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); + if (atomic_dec_and_test(&blkg->refcnt)) + call_rcu(&blkg->rcu_head, __blkg_release_rcu); } /** @@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, rcu_read_lock(); - if (bio && bio->bi_blkg) { - blkcg = bio->bi_blkg->blkcg; - if (blkcg == &blkcg_root) - goto rl_use_root; - - blkg_get(bio->bi_blkg); - rcu_read_unlock(); - return &bio->bi_blkg->rl; - } + blkcg = bio_blkcg(bio); - blkcg = css_to_blkcg(blkcg_css()); + /* bypass blkg lookup and use @q->root_rl directly for root */ if (blkcg == &blkcg_root) - goto rl_use_root; + goto root_rl; + /* + * Try to use blkg->rl. blkg lookup may fail under memory pressure + * or if either the blkcg or queue is going away. Fall back to + * root_rl in such cases. + */ blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) - blkg = __blkg_lookup_create(blkcg, q); - - if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg)) - goto rl_use_root; + goto root_rl; + blkg_get(blkg); rcu_read_unlock(); return &blkg->rl; - - /* - * Each blkg has its own request_list, however, the root blkcg - * uses the request_queue's root_rl. This is to avoid most - * overhead for the root blkcg. - */ -rl_use_root: +root_rl: rcu_read_unlock(); return &q->root_rl; } @@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg struct bio *bio) { return false; } #endif - -static inline void blkcg_bio_issue_init(struct bio *bio) -{ - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); -} - static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { + struct blkcg *blkcg; struct blkcg_gq *blkg; bool throtl = false; rcu_read_lock(); + blkcg = bio_blkcg(bio); + + /* associate blkcg if bio hasn't attached one */ + bio_associate_blkcg(bio, &blkcg->css); - bio_associate_create_blkg(q, bio); - blkg = bio->bi_blkg; + blkg = blkg_lookup(blkcg, q); + if (unlikely(!blkg)) { + spin_lock_irq(q->queue_lock); + blkg = blkg_lookup_create(blkcg, q); + if (IS_ERR(blkg)) + blkg = NULL; + spin_unlock_irq(q->queue_lock); + } throtl = blk_throtl_bio(q, blkg, bio); if (!throtl) { + blkg = blkg ?: q->root_blkg; /* * If the bio is flagged with BIO_QUEUE_ENTERED it means this * is a split bio and we would have already accounted for the @@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } - blkcg_bio_issue_init(bio); - rcu_read_unlock(); return !throtl; } @@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q, static inline void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol) { } -static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, @@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { } static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } -static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { return true; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 093a818c5b68..1dcf652ba0aa 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -178,6 +178,7 @@ struct bio { * release. Read comment on top of bio_associate_current(). */ struct io_context *bi_ioc; + struct cgroup_subsys_state *bi_css; struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; #endif diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b8bcbdeb2eac..32c553556bbd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -93,8 +93,6 @@ extern struct css_set init_css_set; bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); -struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, - struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 738a0c24874f..fdfd04e348f6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, * * @bio is a part of the writeback in progress controlled by @wbc. Perform * writeback specific initialization. This is used to apply the cgroup - * writeback context. Must be called after the bio has been associated with - * a device. + * writeback context. */ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { @@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) * regular writeback instead of writing things out itself. */ if (wbc->wb) - bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); + bio_associate_blkcg(bio, wbc->wb->blkcg_css); } #else /* CONFIG_CGROUP_WRITEBACK */ -- cgit From 6d212db11947ae5464e4717536ed9faf61c01e86 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 15 Oct 2018 10:30:23 +0200 Subject: mm: add mm_pxd_folded checks to pgtable_bytes accounting functions The common mm code calls mm_dec_nr_pmds() and mm_dec_nr_puds() in free_pgtables() if the address range spans a full pud or pmd. If mm_dec_nr_puds/mm_dec_nr_pmds are non-empty due to configuration settings they blindly subtract the size of the pmd or pud table from pgtable_bytes even if the pud or pmd page table layer is folded. Add explicit mm_[pmd|pud]_folded checks to the four pgtable_bytes accounting functions mm_inc_nr_puds, mm_inc_nr_pmds, mm_dec_nr_puds and mm_dec_nr_pmds. As the check for folded page tables can be overwritten by the architecture, this allows to keep a correct pgtable_bytes value for platforms that use a dynamic number of page table levels. Acked-by: Kirill A. Shutemov Signed-off-by: Martin Schwidefsky --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index daa2b8f1e9a8..a3701e91bb57 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1742,11 +1742,15 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); static inline void mm_inc_nr_puds(struct mm_struct *mm) { + if (mm_pud_folded(mm)) + return; atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes); } static inline void mm_dec_nr_puds(struct mm_struct *mm) { + if (mm_pud_folded(mm)) + return; atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes); } #endif @@ -1766,11 +1770,15 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); static inline void mm_inc_nr_pmds(struct mm_struct *mm) { + if (mm_pmd_folded(mm)) + return; atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes); } static inline void mm_dec_nr_pmds(struct mm_struct *mm) { + if (mm_pmd_folded(mm)) + return; atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes); } #endif -- cgit From a7ad38b0dd3c1ba8d6e5a55241e875e9db8331ab Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sat, 3 Nov 2018 00:51:28 +0800 Subject: clocksource/drivers/c-sky: Add C-SKY SMP timer The driver is for C-SKY SMP timer. It only supports oneshot event and 32bit overflow for clocksource. Per cpu core has one timer and all timers share one clock-counter-input from the same clocksource. This use mfcr&mtcr instructions to access the regs. Signed-off-by: Guo Ren Cc: Daniel Lezcano Cc: Thomas Gleixner Signed-off-by: Daniel Lezcano --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index caf40ad0bbc6..e0cd2baa8380 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -126,6 +126,7 @@ enum cpuhp_state { CPUHP_AP_MIPS_GIC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_RISCV_TIMER_STARTING, + CPUHP_AP_CSKY_TIMER_STARTING, CPUHP_AP_KVM_STARTING, CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, CPUHP_AP_KVM_ARM_VGIC_STARTING, -- cgit From 17b8b74c0f8dbf9b9e3301f9ca5b65dd1c079951 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 19 Oct 2018 19:35:19 +0200 Subject: netfilter: ipset: Correct rcu_dereference() call in ip_set_put_comment() The function is called when rcu_read_lock() is held and not when rcu_read_lock_bh() is held. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set_comment.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 8e2bab1e8e90..70877f8de7e9 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -43,11 +43,11 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, rcu_assign_pointer(comment->c, c); } -/* Used only when dumping a set, protected by rcu_read_lock_bh() */ +/* Used only when dumping a set, protected by rcu_read_lock() */ static inline int ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) { - struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); + struct ip_set_comment_rcu *c = rcu_dereference(comment->c); if (!c) return 0; -- cgit From 94e297c50b529f5d01cfd1dbc808d61e95180ab7 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Fri, 2 Nov 2018 15:47:53 -0700 Subject: include/linux/notifier.h: SRCU: fix ctags ctags indexing ("make tags" command) throws this warning: ctags: Warning: include/linux/notifier.h:125: null expansion of name pattern "\1" This is the result of DEFINE_PER_CPU() macro expansion. Fix that by getting rid of line break. Similar fix was already done in commit 25528213fe9f ("tags: Fix DEFINE_PER_CPU expansions"), but this one probably wasn't noticed. Link: http://lkml.kernel.org/r/20181030202808.28027-1-semen.protsenko@linaro.org Fixes: 9c80172b902d ("kernel/SRCU: provide a static initializer") Signed-off-by: Sam Protsenko Cc: Sebastian Andrzej Siewior Cc: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/notifier.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index f35c7bf76143..0096a05395e3 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -122,8 +122,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #ifdef CONFIG_TREE_SRCU #define _SRCU_NOTIFIER_HEAD(name, mod) \ - static DEFINE_PER_CPU(struct srcu_data, \ - name##_head_srcu_data); \ + static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \ mod struct srcu_notifier_head name = \ SRCU_NOTIFIER_INIT(name, name##_head_srcu_data) -- cgit From 89c83fb539f95491be80cdd5158e6f0ce329e317 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 2 Nov 2018 15:48:31 -0700 Subject: mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask THP allocation mode is quite complex and it depends on the defrag mode. This complexity is hidden in alloc_hugepage_direct_gfpmask from a large part currently. The NUMA special casing (namely __GFP_THISNODE) is however independent and placed in alloc_pages_vma currently. This both adds an unnecessary branch to all vma based page allocation requests and it makes the code more complex unnecessarily as well. Not to mention that e.g. shmem THP used to do the node reclaiming unconditionally regardless of the defrag mode until recently. This was not only unexpected behavior but it was also hardly a good default behavior and I strongly suspect it was just a side effect of the code sharing more than a deliberate decision which suggests that such a layering is wrong. Get rid of the thp special casing from alloc_pages_vma and move the logic to alloc_hugepage_direct_gfpmask. __GFP_THISNODE is applied to the resulting gfp mask only when the direct reclaim is not requested and when there is no explicit numa binding to preserve the current logic. Please note that there's also a slight difference wrt MPOL_BIND now. The previous code would avoid using __GFP_THISNODE if the local node was outside of policy_nodemask(). After this patch __GFP_THISNODE is avoided for all MPOL_BIND policies. So there's a difference that if local node is actually allowed by the bind policy's nodemask, previously __GFP_THISNODE would be added, but now it won't be. From the behavior POV this is still correct because the policy nodemask is used. Link: http://lkml.kernel.org/r/20180925120326.24392-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Alex Williamson Cc: Andrea Arcangeli Cc: David Rientjes Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Stefan Priebe - Profihost AG Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 12 ++++-------- include/linux/mempolicy.h | 2 ++ 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 24bcc5eec6b4..76f8db0b0e71 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) } extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, - int node, bool hugepage); -#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ - alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) + int node); #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) -#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ - alloc_pages(gfp_mask, order) -#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ +#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) + alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) + alloc_pages_vma(gfp_mask, 0, vma, addr, node) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5228c62af416..bac395f1d00a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr); +struct mempolicy *get_vma_policy(struct vm_area_struct *vma, + unsigned long addr); bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); -- cgit From 3e59020abf0f88182730527ee5b862e786eb485a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 31 Oct 2018 08:39:12 -0700 Subject: net: bql: add __netdev_tx_sent_queue() When qdisc_run() tries to use BQL budget to bulk-dequeue a batch of packets, GSO can later transform this list in another list of skbs, and each skb is sent to device ndo_start_xmit(), one at a time, with skb->xmit_more being set to one but for last skb. Problem is that very often, BQL limit is hit in the middle of the packet train, forcing dev_hard_start_xmit() to stop the bulk send and requeue the end of the list. BQL role is to avoid head of line blocking, making sure a qdisc can deliver high priority packets before low priority ones. But there is no way requeued packets can be bypassed by fresh packets in the qdisc. Aborting the bulk send increases TX softirqs, and hot cache lines (after skb_segment()) are wasted. Note that for TSO packets, we never split a packet in the middle because of BQL limit being hit. Drivers should be able to update BQL counters without flipping/caring about BQL status, if the current skb has xmit_more set. Upper layers are ultimately responsible to stop sending another packet train when BQL limit is hit. Code template in a driver might look like the following : send_doorbell = __netdev_tx_sent_queue(tx_queue, nr_bytes, skb->xmit_more); Note that __netdev_tx_sent_queue() use is not mandatory, since following patch will change dev_hard_start_xmit() to not care about BQL status. But it is highly recommended so that xmit_more full benefits can be reached (less doorbells sent, and less atomic operations as well) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dc1d9ed33b31..857f8abf7b91 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3190,6 +3190,26 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, #endif } +/* Variant of netdev_tx_sent_queue() for drivers that are aware + * that they should not test BQL status themselves. + * We do want to change __QUEUE_STATE_STACK_XOFF only for the last + * skb of a batch. + * Returns true if the doorbell must be used to kick the NIC. + */ +static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue, + unsigned int bytes, + bool xmit_more) +{ + if (xmit_more) { +#ifdef CONFIG_BQL + dql_queued(&dev_queue->dql, bytes); +#endif + return netif_tx_queue_stopped(dev_queue); + } + netdev_tx_sent_queue(dev_queue, bytes); + return true; +} + /** * netdev_sent_queue - report the number of bytes queued to hardware * @dev: network device -- cgit From 35b69a420bfb56b7b74cb635ea903db05e357bec Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 4 Nov 2018 03:48:54 +0000 Subject: clockevents/drivers/i8253: Add support for PIT shutdown quirk Add support for platforms where pit_shutdown() doesn't work because of a quirk in the PIT emulation. On these platforms setting the counter register to zero causes the PIT to start running again, negating the shutdown. Provide a global variable that controls whether the counter register is zero'ed, which platform specific code can override. Signed-off-by: Michael Kelley Signed-off-by: Thomas Gleixner Cc: "gregkh@linuxfoundation.org" Cc: "devel@linuxdriverproject.org" Cc: "daniel.lezcano@linaro.org" Cc: "virtualization@lists.linux-foundation.org" Cc: "jgross@suse.com" Cc: "akataria@vmware.com" Cc: "olaf@aepfle.de" Cc: "apw@canonical.com" Cc: vkuznets Cc: "jasowang@redhat.com" Cc: "marcelo.cerri@canonical.com" Cc: KY Srinivasan Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1541303219-11142-2-git-send-email-mikelley@microsoft.com --- include/linux/i8253.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i8253.h b/include/linux/i8253.h index e6bb36a97519..8336b2f6f834 100644 --- a/include/linux/i8253.h +++ b/include/linux/i8253.h @@ -21,6 +21,7 @@ #define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ) extern raw_spinlock_t i8253_lock; +extern bool i8253_clear_counter_on_shutdown; extern struct clock_event_device i8253_clockevent; extern void clockevent_i8253_init(bool oneshot); -- cgit From d098093ba06eb032057d1aca1c2e45889e099d00 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 28 Oct 2018 12:29:55 +0100 Subject: mtd: nand: Fix nanddev_neraseblocks() nanddev_neraseblocks() currently returns the number pages per LUN instead of the total number of eraseblocks. Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Miquel Raynal --- include/linux/mtd/nand.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index abe975c87b90..78b86dea2f29 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -324,9 +324,8 @@ static inline unsigned int nanddev_ntargets(const struct nand_device *nand) */ static inline unsigned int nanddev_neraseblocks(const struct nand_device *nand) { - return (u64)nand->memorg.luns_per_target * - nand->memorg.eraseblocks_per_lun * - nand->memorg.pages_per_eraseblock; + return nand->memorg.ntargets * nand->memorg.luns_per_target * + nand->memorg.eraseblocks_per_lun; } /** -- cgit From 163c8d54a997153ee1a1e07fcac087492ad85b37 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 5 Nov 2018 07:36:28 +0100 Subject: compiler: remove __no_sanitize_address_or_inline again The __no_sanitize_address_or_inline and __no_kasan_or_inline defines are almost identical. The only difference is that __no_kasan_or_inline does not have the 'notrace' attribute. To be able to replace __no_sanitize_address_or_inline with the older definition, add 'notrace' to __no_kasan_or_inline and change to two users of __no_sanitize_address_or_inline in the s390 code. The 'notrace' option is necessary for e.g. the __load_psw_mask function in arch/s390/include/asm/processor.h. Without the option it is possible to trace __load_psw_mask which leads to kernel stack overflow. Signed-off-by: Martin Schwidefsky Pointed-out-by: Andrey Ryabinin Acked-by: Steven Rostedt (VMware) Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 12 ------------ include/linux/compiler.h | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c0f5db3a9621..2010493e1040 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -143,18 +143,6 @@ #define KASAN_ABI_VERSION 3 #endif -/* - * Because __no_sanitize_address conflicts with inlining: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 - * we do one or the other. - */ -#ifdef CONFIG_KASAN -#define __no_sanitize_address_or_inline \ - __no_sanitize_address __maybe_unused notrace -#else -#define __no_sanitize_address_or_inline inline -#endif - #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 18c80cfa4fc4..06396c1cf127 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -189,7 +189,7 @@ void __read_once_size(const volatile void *p, void *res, int size) * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 * '__maybe_unused' allows us to avoid defined-but-not-used warnings. */ -# define __no_kasan_or_inline __no_sanitize_address __maybe_unused +# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused #else # define __no_kasan_or_inline __always_inline #endif -- cgit From 4c0608f4a0e76dfb82d3accd20081f4bf47ed143 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 30 Oct 2018 09:45:55 -0400 Subject: XArray: Regularise xa_reserve The xa_reserve() function was a little unusual in that it attempted to be callable for all kinds of locking scenarios. Make it look like the other APIs with __xa_reserve, xa_reserve_bh and xa_reserve_irq variants. Signed-off-by: Matthew Wilcox --- include/linux/xarray.h | 80 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index d9514928ddac..c2cb0426c60c 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -291,7 +291,6 @@ void *xa_load(struct xarray *, unsigned long index); void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); void *xa_cmpxchg(struct xarray *, unsigned long index, void *old, void *entry, gfp_t); -int xa_reserve(struct xarray *, unsigned long index, gfp_t); void *xa_store_range(struct xarray *, unsigned long first, unsigned long last, void *entry, gfp_t); bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t); @@ -455,6 +454,7 @@ void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old, void *entry, gfp_t); int __xa_alloc(struct xarray *, u32 *id, u32 max, void *entry, gfp_t); +int __xa_reserve(struct xarray *, unsigned long index, gfp_t); void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t); void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t); @@ -621,6 +621,84 @@ static inline int xa_alloc_irq(struct xarray *xa, u32 *id, u32 max, void *entry, return err; } +/** + * xa_reserve() - Reserve this index in the XArray. + * @xa: XArray. + * @index: Index into array. + * @gfp: Memory allocation flags. + * + * Ensures there is somewhere to store an entry at @index in the array. + * If there is already something stored at @index, this function does + * nothing. If there was nothing there, the entry is marked as reserved. + * Loading from a reserved entry returns a %NULL pointer. + * + * If you do not use the entry that you have reserved, call xa_release() + * or xa_erase() to free any unnecessary memory. + * + * Context: Any context. Takes and releases the xa_lock. + * May sleep if the @gfp flags permit. + * Return: 0 if the reservation succeeded or -ENOMEM if it failed. + */ +static inline +int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp) +{ + int ret; + + xa_lock(xa); + ret = __xa_reserve(xa, index, gfp); + xa_unlock(xa); + + return ret; +} + +/** + * xa_reserve_bh() - Reserve this index in the XArray. + * @xa: XArray. + * @index: Index into array. + * @gfp: Memory allocation flags. + * + * A softirq-disabling version of xa_reserve(). + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. + * Return: 0 if the reservation succeeded or -ENOMEM if it failed. + */ +static inline +int xa_reserve_bh(struct xarray *xa, unsigned long index, gfp_t gfp) +{ + int ret; + + xa_lock_bh(xa); + ret = __xa_reserve(xa, index, gfp); + xa_unlock_bh(xa); + + return ret; +} + +/** + * xa_reserve_irq() - Reserve this index in the XArray. + * @xa: XArray. + * @index: Index into array. + * @gfp: Memory allocation flags. + * + * An interrupt-disabling version of xa_reserve(). + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. + * Return: 0 if the reservation succeeded or -ENOMEM if it failed. + */ +static inline +int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp) +{ + int ret; + + xa_lock_irq(xa); + ret = __xa_reserve(xa, index, gfp); + xa_unlock_irq(xa); + + return ret; +} + /* Everything below here is the Advanced API. Proceed with caution. */ /* -- cgit From c5beb07e7a06b24f4f27304f6282b5dbd929543b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 31 Oct 2018 14:39:28 -0400 Subject: XArray: Unify xa_cmpxchg and __xa_cmpxchg xa_cmpxchg() was one of the largest functions in the xarray implementation. By turning it into a wrapper and having the callers take the lock (like several other functions), we save 160 bytes on a tinyconfig build and reduce the duplication in xarray.c. Signed-off-by: Matthew Wilcox --- include/linux/xarray.h | 113 ++++++++++++++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index c2cb0426c60c..8e59d4fbd55e 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -289,8 +289,6 @@ struct xarray { void xa_init_flags(struct xarray *, gfp_t flags); void *xa_load(struct xarray *, unsigned long index); void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); -void *xa_cmpxchg(struct xarray *, unsigned long index, - void *old, void *entry, gfp_t); void *xa_store_range(struct xarray *, unsigned long first, unsigned long last, void *entry, gfp_t); bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t); @@ -359,48 +357,6 @@ static inline void *xa_erase(struct xarray *xa, unsigned long index) return xa_store(xa, index, NULL, 0); } -/** - * xa_insert() - Store this entry in the XArray unless another entry is - * already present. - * @xa: XArray. - * @index: Index into array. - * @entry: New entry. - * @gfp: Memory allocation flags. - * - * If you would rather see the existing entry in the array, use xa_cmpxchg(). - * This function is for users who don't care what the entry is, only that - * one is present. - * - * Context: Process context. Takes and releases the xa_lock. - * May sleep if the @gfp flags permit. - * Return: 0 if the store succeeded. -EEXIST if another entry was present. - * -ENOMEM if memory could not be allocated. - */ -static inline int xa_insert(struct xarray *xa, unsigned long index, - void *entry, gfp_t gfp) -{ - void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp); - if (!curr) - return 0; - if (xa_is_err(curr)) - return xa_err(curr); - return -EEXIST; -} - -/** - * xa_release() - Release a reserved entry. - * @xa: XArray. - * @index: Index of entry. - * - * After calling xa_reserve(), you can call this function to release the - * reservation. If the entry at @index has been stored to, this function - * will do nothing. - */ -static inline void xa_release(struct xarray *xa, unsigned long index) -{ - xa_cmpxchg(xa, index, NULL, NULL, 0); -} - /** * xa_for_each() - Iterate over a portion of an XArray. * @xa: XArray. @@ -534,6 +490,61 @@ static inline void *xa_erase_irq(struct xarray *xa, unsigned long index) return entry; } +/** + * xa_cmpxchg() - Conditionally replace an entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @old: Old value to test against. + * @entry: New value to place in array. + * @gfp: Memory allocation flags. + * + * If the entry at @index is the same as @old, replace it with @entry. + * If the return value is equal to @old, then the exchange was successful. + * + * Context: Any context. Takes and releases the xa_lock. May sleep + * if the @gfp flags permit. + * Return: The old value at this index or xa_err() if an error happened. + */ +static inline void *xa_cmpxchg(struct xarray *xa, unsigned long index, + void *old, void *entry, gfp_t gfp) +{ + void *curr; + + xa_lock(xa); + curr = __xa_cmpxchg(xa, index, old, entry, gfp); + xa_unlock(xa); + + return curr; +} + +/** + * xa_insert() - Store this entry in the XArray unless another entry is + * already present. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * If you would rather see the existing entry in the array, use xa_cmpxchg(). + * This function is for users who don't care what the entry is, only that + * one is present. + * + * Context: Process context. Takes and releases the xa_lock. + * May sleep if the @gfp flags permit. + * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * -ENOMEM if memory could not be allocated. + */ +static inline int xa_insert(struct xarray *xa, unsigned long index, + void *entry, gfp_t gfp) +{ + void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp); + if (!curr) + return 0; + if (xa_is_err(curr)) + return xa_err(curr); + return -EEXIST; +} + /** * xa_alloc() - Find somewhere to store this entry in the XArray. * @xa: XArray. @@ -699,6 +710,20 @@ int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp) return ret; } +/** + * xa_release() - Release a reserved entry. + * @xa: XArray. + * @index: Index of entry. + * + * After calling xa_reserve(), you can call this function to release the + * reservation. If the entry at @index has been stored to, this function + * will do nothing. + */ +static inline void xa_release(struct xarray *xa, unsigned long index) +{ + xa_cmpxchg(xa, index, NULL, NULL, 0); +} + /* Everything below here is the Advanced API. Proceed with caution. */ /* -- cgit From 9c16bb88905456a9b1299338041f05fa7699971b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 5 Nov 2018 15:48:49 -0500 Subject: XArray: Turn xa_erase into an exported function Make xa_erase() take the spinlock and then call __xa_erase(), but make it out of line since it's such a common function. Signed-off-by: Matthew Wilcox --- include/linux/xarray.h | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 8e59d4fbd55e..4c839c17a99b 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -289,6 +289,7 @@ struct xarray { void xa_init_flags(struct xarray *, gfp_t flags); void *xa_load(struct xarray *, unsigned long index); void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); +void *xa_erase(struct xarray *, unsigned long index); void *xa_store_range(struct xarray *, unsigned long first, unsigned long last, void *entry, gfp_t); bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t); @@ -340,23 +341,6 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark) return xa->xa_flags & XA_FLAGS_MARK(mark); } -/** - * xa_erase() - Erase this entry from the XArray. - * @xa: XArray. - * @index: Index of entry. - * - * This function is the equivalent of calling xa_store() with %NULL as - * the third argument. The XArray does not need to allocate memory, so - * the user does not need to provide GFP flags. - * - * Context: Process context. Takes and releases the xa_lock. - * Return: The entry which used to be at this index. - */ -static inline void *xa_erase(struct xarray *xa, unsigned long index) -{ - return xa_store(xa, index, NULL, 0); -} - /** * xa_for_each() - Iterate over a portion of an XArray. * @xa: XArray. -- cgit From 84e5acb76dacb8ebd648a86a53907ce0dd616534 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 26 Oct 2018 14:41:29 -0400 Subject: XArray: Add xa_store_bh() and xa_store_irq() These convenience wrappers disable interrupts while taking the spinlock. A number of drivers would otherwise have to open-code these functions. Signed-off-by: Matthew Wilcox --- include/linux/xarray.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 4c839c17a99b..52d9732e4ec4 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -426,6 +426,58 @@ static inline int __xa_insert(struct xarray *xa, unsigned long index, return -EEXIST; } +/** + * xa_store_bh() - Store this entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * This function is like calling xa_store() except it disables softirqs + * while holding the array lock. + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. + * Return: The entry which used to be at this index. + */ +static inline void *xa_store_bh(struct xarray *xa, unsigned long index, + void *entry, gfp_t gfp) +{ + void *curr; + + xa_lock_bh(xa); + curr = __xa_store(xa, index, entry, gfp); + xa_unlock_bh(xa); + + return curr; +} + +/** + * xa_store_irq() - Erase this entry from the XArray. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * This function is like calling xa_store() except it disables interrupts + * while holding the array lock. + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. + * Return: The entry which used to be at this index. + */ +static inline void *xa_store_irq(struct xarray *xa, unsigned long index, + void *entry, gfp_t gfp) +{ + void *curr; + + xa_lock_irq(xa); + curr = __xa_store(xa, index, entry, gfp); + xa_unlock_irq(xa); + + return curr; +} + /** * xa_erase_bh() - Erase this entry from the XArray. * @xa: XArray. -- cgit From 804dfaf01bcc9daa4298c608ba9018abf616ec48 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 5 Nov 2018 16:37:15 -0500 Subject: XArray: Fix Documentation Minor fixes. Signed-off-by: Matthew Wilcox --- include/linux/xarray.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 52d9732e4ec4..564892e19f8c 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -487,7 +487,7 @@ static inline void *xa_store_irq(struct xarray *xa, unsigned long index, * the third argument. The XArray does not need to allocate memory, so * the user does not need to provide GFP flags. * - * Context: Process context. Takes and releases the xa_lock while + * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. * Return: The entry which used to be at this index. */ @@ -622,7 +622,7 @@ static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, * Updates the @id pointer with the index, then stores the entry at that * index. A concurrent lookup will not see an uninitialised @id. * - * Context: Process context. Takes and releases the xa_lock while + * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if * there is no more space in the XArray. -- cgit From 8bd66d147c88bd441178c7b4c774ae5a185f19b8 Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" Date: Wed, 31 Oct 2018 12:39:01 -0700 Subject: include/linux/compiler*.h: define asm_volatile_goto asm_volatile_goto should also be defined for other compilers that support asm goto. Fixes commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive"). Signed-off-by: Nick Desaulniers Signed-off-by: Miguel Ojeda --- include/linux/compiler_types.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 3439d7d0249a..4a3f9c09c92d 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -130,6 +130,10 @@ struct ftrace_likely_data { # define randomized_struct_fields_end #endif +#ifndef asm_volatile_goto +#define asm_volatile_goto(x...) asm goto(x) +#endif + /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) -- cgit From 98ee3fc7ef8395f8b7a379e6608aee91efc66d48 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 6 Nov 2018 17:25:37 +0100 Subject: mtd: nand: Fix nanddev_pos_next_page() kernel-doc header Function name is wrong in the kernel-doc header. Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices") Signed-off-by: Boris Brezillon Reviewed-by: Miquel Raynal --- include/linux/mtd/nand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 78b86dea2f29..7f53ece2c039 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -568,7 +568,7 @@ static inline void nanddev_pos_next_eraseblock(struct nand_device *nand, } /** - * nanddev_pos_next_eraseblock() - Move a position to the next page + * nanddev_pos_next_page() - Move a position to the next page * @nand: NAND device * @pos: the position to update * -- cgit From 81bd415c91eb966118d773dddf254aebf3022411 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 6 Jun 2018 21:42:32 +0200 Subject: watchdog/core: Add missing prototypes for weak functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The split out of the hard lockup detector exposed two new weak functions, but no prototypes for them, which triggers the build warning: kernel/watchdog.c:109:12: warning: no previous prototype for ‘watchdog_nmi_enable’ [-Wmissing-prototypes] kernel/watchdog.c:115:13: warning: no previous prototype for ‘watchdog_nmi_disable’ [-Wmissing-prototypes] Add the prototypes. Fixes: 73ce0511c436 ("kernel/watchdog.c: move hardlockup detector to separate file") Signed-off-by: Mathieu Malaterre Signed-off-by: Thomas Gleixner Cc: Babu Moger Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180606194232.17653-1-malat@debian.org --- include/linux/nmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 08f9247e9827..9003e29cde46 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -119,6 +119,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } void watchdog_nmi_stop(void); void watchdog_nmi_start(void); int watchdog_nmi_probe(void); +int watchdog_nmi_enable(unsigned int cpu); +void watchdog_nmi_disable(unsigned int cpu); /** * touch_nmi_watchdog - restart NMI watchdog timeout. -- cgit From 99b77fef3c6c69bb7664f1ac97a69d5b17968dae Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 31 Oct 2018 12:20:28 +0200 Subject: net/mlx5: Fix XRC SRQ umem valid bits Adapt XRC SRQ to the latest HW specification with fixed definition around umem valid bits. The previous definition relied on a bit which was taken for other purposes in legacy FW. Fixes: bd37197554eb ("net/mlx5: Update mlx5_ifc with DEVX UID bits") Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dbff9ff28f2c..34e17e6f8942 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2473,14 +2473,15 @@ struct mlx5_ifc_xrc_srqc_bits { u8 wq_signature[0x1]; u8 cont_srq[0x1]; - u8 dbr_umem_valid[0x1]; + u8 reserved_at_22[0x1]; u8 rlky[0x1]; u8 basic_cyclic_rcv_wqe[0x1]; u8 log_rq_stride[0x3]; u8 xrcd[0x18]; u8 page_offset[0x6]; - u8 reserved_at_46[0x2]; + u8 reserved_at_46[0x1]; + u8 dbr_umem_valid[0x1]; u8 cqn[0x18]; u8 reserved_at_60[0x20]; @@ -6689,9 +6690,12 @@ struct mlx5_ifc_create_xrc_srq_in_bits { struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; - u8 reserved_at_280[0x40]; + u8 reserved_at_280[0x60]; + u8 xrc_srq_umem_valid[0x1]; - u8 reserved_at_2c1[0x5bf]; + u8 reserved_at_2e1[0x1f]; + + u8 reserved_at_300[0x580]; u8 pas[0][0x40]; }; -- cgit From 781f0766cc41a9dd2e5d118ef4b1d5d89430257b Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 19 Oct 2018 16:14:50 +0800 Subject: USB: Wait for extra delay time after USB_PORT_FEAT_RESET for quirky hub Devices connected under Terminus Technology Inc. Hub (1a40:0101) may fail to work after the system resumes from suspend: [ 206.063325] usb 3-2.4: reset full-speed USB device number 4 using xhci_hcd [ 206.143691] usb 3-2.4: device descriptor read/64, error -32 [ 206.351671] usb 3-2.4: device descriptor read/64, error -32 Info for this hub: T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 2 Spd=480 MxCh= 4 D: Ver= 2.00 Cls=09(hub ) Sub=00 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1a40 ProdID=0101 Rev=01.11 S: Product=USB 2.0 Hub C: #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 1 Cls=09(hub ) Sub=00 Prot=00 Driver=hub Some expirements indicate that the USB devices connected to the hub are innocent, it's the hub itself is to blame. The hub needs extra delay time after it resets its port. Hence wait for extra delay, if the device is connected to this quirky hub. Signed-off-by: Kai-Heng Feng Cc: stable Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/quirks.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index b7a99ce56bc9..a1be64c9940f 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -66,4 +66,7 @@ /* Device needs a pause after every control message. */ #define USB_QUIRK_DELAY_CTRL_MSG BIT(13) +/* Hub needs extra delay after resetting its port. */ +#define USB_QUIRK_HUB_SLOW_RESET BIT(14) + #endif /* __LINUX_USB_QUIRKS_H */ -- cgit From 24efee412c75843755da0ddd7bbc2db2ce9129f5 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 6 Nov 2018 22:52:11 +0100 Subject: Compiler Attributes: improve explanation of header Explain better what "optional" attributes are, and avoid calling them so to avoid confusion. Simply retain "Optional" as a word to look for in the comments. Moreover, add a couple sentences to explain a bit more the intention and the documentation links. Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 6b28c1b7310c..f8c400ba1929 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -4,22 +4,26 @@ /* * The attributes in this file are unconditionally defined and they directly - * map to compiler attribute(s) -- except those that are optional. + * map to compiler attribute(s), unless one of the compilers does not support + * the attribute. In that case, __has_attribute is used to check for support + * and the reason is stated in its comment ("Optional: ..."). * * Any other "attributes" (i.e. those that depend on a configuration option, * on a compiler, on an architecture, on plugins, on other attributes...) * should be defined elsewhere (e.g. compiler_types.h or compiler-*.h). + * The intention is to keep this file as simple as possible, as well as + * compiler- and version-agnostic (e.g. avoiding GCC_VERSION checks). * * This file is meant to be sorted (by actual attribute name, * not by #define identifier). Use the __attribute__((__name__)) syntax * (i.e. with underscores) to avoid future collisions with other macros. - * If an attribute is optional, state the reason in the comment. + * Provide links to the documentation of each supported compiler, if it exists. */ /* - * To check for optional attributes, we use __has_attribute, which is supported - * on gcc >= 5, clang >= 2.9 and icc >= 17. In the meantime, to support - * 4.6 <= gcc < 5, we implement __has_attribute by hand. + * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17. + * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute + * by hand. * * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__ * depending on the compiler used to build it; however, these attributes have -- cgit From 23c625ce3065e40c933a4239efb9b11f1194a343 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 8 Nov 2018 14:55:21 +0100 Subject: libceph: assume argonaut on the server side No one is running pre-argonaut. In addition one of the argonaut features (NOSRCADDR) has been required since day one (and a half, 2.6.34 vs 2.6.35) of the kernel client. Allow for the possibility of reusing these feature bits later. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/ceph_features.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 6b92b3395fa9..65a38c4a02a1 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -213,12 +213,6 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \ CEPH_FEATURE_CEPHX_V2) -#define CEPH_FEATURES_REQUIRED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR | \ - CEPH_FEATURE_SUBSCRIBE2 | \ - CEPH_FEATURE_RECONNECT_SEQ | \ - CEPH_FEATURE_PGID64 | \ - CEPH_FEATURE_PGPOOL3 | \ - CEPH_FEATURE_OSDENC) +#define CEPH_FEATURES_REQUIRED_DEFAULT 0 #endif -- cgit From a4310fa2f24687888ce80fdb0e88583561a23700 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 10:37:46 +0100 Subject: can: dev: can_get_echo_skb(): factor out non sending code to __can_get_echo_skb() This patch factors out all non sending parts of can_get_echo_skb() into a seperate function __can_get_echo_skb(), so that it can be re-used in an upcoming patch. Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index a83e1f632eb7..f01623aef2f7 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -169,6 +169,7 @@ void can_change_state(struct net_device *dev, struct can_frame *cf, void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx); +struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); void can_free_echo_skb(struct net_device *dev, unsigned int idx); -- cgit From 55059f2b7f868cd43b3ad30e28e18347e1b46ace Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 18 Sep 2018 11:40:38 +0200 Subject: can: rx-offload: introduce can_rx_offload_get_echo_skb() and can_rx_offload_queue_sorted() functions Current CAN framework can't guarantee proper/chronological order of RX and TX-ECHO messages. To make this possible, drivers should use this functions instead of can_get_echo_skb(). Signed-off-by: Oleksij Rempel Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- include/linux/can/rx-offload.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index cb31683bbe15..01a7c9e5d8d8 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -41,6 +41,10 @@ int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload * int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight); int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg); int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); +int can_rx_offload_queue_sorted(struct can_rx_offload *offload, + struct sk_buff *skb, u32 timestamp); +unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, + unsigned int idx, u32 timestamp); int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb); void can_rx_offload_reset(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); -- cgit From 4530ec36bb1e0d24f41c33229694adacda3d5d89 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 18 Sep 2018 11:40:40 +0200 Subject: can: rx-offload: rename can_rx_offload_irq_queue_err_skb() to can_rx_offload_queue_tail() This function has nothing todo with error. Signed-off-by: Oleksij Rempel Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- include/linux/can/rx-offload.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index 01a7c9e5d8d8..8268811a697e 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -45,7 +45,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, struct sk_buff *skb, u32 timestamp); unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, unsigned int idx, u32 timestamp); -int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb); +int can_rx_offload_queue_tail(struct can_rx_offload *offload, + struct sk_buff *skb); void can_rx_offload_reset(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); void can_rx_offload_enable(struct can_rx_offload *offload); -- cgit From d19f9130b814d33c03118493c17454f7d90075d1 Mon Sep 17 00:00:00 2001 From: Elvira Khabirova Date: Sat, 10 Nov 2018 04:22:09 +0100 Subject: x86/ptrace: Fix documentation for tracehook_report_syscall_entry() tracehook_report_syscall_entry() is called not only if %TIF_SYSCALL_TRACE is set, but also if %TIF_SYSCALL_EMU is set, as appears from x86's entry code. Signed-off-by: Elvira Khabirova Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: ldv@altlinux.org Cc: oleg@redhat.com Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/20181110042209.26333972@akathisia Signed-off-by: Ingo Molnar --- include/linux/tracehook.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 40b0b4c1bf7b..df20f8bdbfa3 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -83,8 +83,8 @@ static inline int ptrace_report_syscall(struct pt_regs *regs) * tracehook_report_syscall_entry - task is about to attempt a system call * @regs: user register state of current task * - * This will be called if %TIF_SYSCALL_TRACE has been set, when the - * current task has just entered the kernel for a system call. + * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set, + * when the current task has just entered the kernel for a system call. * Full user register state is available here. Changing the values * in @regs can affect the system call number and arguments to be tried. * It is safe to block here, preventing the system call from beginning. -- cgit From eff896288872d687d9662000ec9ae11b6d61766f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Nov 2018 09:55:43 -0800 Subject: efi/arm: Defer persistent reservations until after paging_init() The new memory EFI reservation feature we introduced to allow memory reservations to persist across kexec may trigger an unbounded number of calls to memblock_reserve(). The memblock subsystem can deal with this fine, but not before memblock resizing is enabled, which we can only do after paging_init(), when the memory we reallocate the array into is actually mapped. So break out the memreserve table processing into a separate routine and call it after paging_init() on arm64. On ARM, because of limited reviewing bandwidth of the maintainer, we cannot currently fix this, so instead, disable the EFI persistent memreserve entirely on ARM so we can fix it later. Tested-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181114175544.12860-5-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 845174e113ce..100ce4a4aff6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1167,6 +1167,8 @@ static inline bool efi_enabled(int feature) extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); extern bool efi_is_table_address(unsigned long phys_addr); + +extern int efi_apply_persistent_mem_reservations(void); #else static inline bool efi_enabled(int feature) { @@ -1185,6 +1187,11 @@ static inline bool efi_is_table_address(unsigned long phys_addr) { return false; } + +static inline int efi_apply_persistent_mem_reservations(void) +{ + return 0; +} #endif extern int efi_status_to_err(efi_status_t status); -- cgit From 0145b50566e7de5637e80ecba96c7f0e6fff1aad Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 31 Oct 2018 15:20:05 +0100 Subject: iio/hid-sensors: Fix IIO_CHAN_INFO_RAW returning wrong values for signed numbers Before this commit sensor_hub_input_attr_get_raw_value() failed to take the signedness of 16 and 8 bit values into account, returning e.g. 65436 instead of -100 for the z-axis reading of an accelerometer. This commit adds a new is_signed parameter to the function and makes all callers pass the appropriate value for this. While at it, this commit also fixes up some neighboring lines where statements were needlessly split over 2 lines to improve readability. Signed-off-by: Hans de Goede Acked-by: Srinivas Pandruvada Acked-by: Benjamin Tissoires Cc: Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-hub.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 331dc377c275..dc12f5c4b076 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -177,6 +177,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev, * @attr_usage_id: Attribute usage id as per spec * @report_id: Report id to look for * @flag: Synchronous or asynchronous read +* @is_signed: If true then fields < 32 bits will be sign-extended * * Issues a synchronous or asynchronous read request for an input attribute. * Returns data upto 32 bits. @@ -190,7 +191,8 @@ enum sensor_hub_read_flags { int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, u32 usage_id, u32 attr_usage_id, u32 report_id, - enum sensor_hub_read_flags flag + enum sensor_hub_read_flags flag, + bool is_signed ); /** -- cgit From b34087157dd76e8d96e5e52808134a791ac61e57 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 21 Nov 2018 16:00:50 +0000 Subject: dma-direct: Make DIRECT_MAPPING_ERROR viable for SWIOTLB With the overflow buffer removed, we no longer have a unique address which is guaranteed not to be a valid DMA target to use as an error token. The DIRECT_MAPPING_ERROR value of 0 tries to at least represent an unlikely DMA target, but unfortunately there are already SWIOTLB users with DMA-able memory at physical address 0 which now gets falsely treated as a mapping failure and leads to all manner of misbehaviour. The best we can do to mitigate that is flip DIRECT_MAPPING_ERROR to the other commonly-used error value of all-bits-set, since the last single byte of memory is by far the least-likely-valid DMA target. Fixes: dff8d6c1ed58 ("swiotlb: remove the overflow buffer") Reported-by: John Stultz Tested-by: John Stultz Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Robin Murphy Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index bd73e7a91410..9e66bfe369aa 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -5,7 +5,7 @@ #include #include -#define DIRECT_MAPPING_ERROR 0 +#define DIRECT_MAPPING_ERROR (~(dma_addr_t)0) #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include -- cgit From 86de5921a3d5dd246df661e09bdd0a6131b39ae3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Nov 2018 05:53:59 -0800 Subject: tcp: defer SACK compression after DupThresh Jean-Louis reported a TCP regression and bisected to recent SACK compression. After a loss episode (receiver not able to keep up and dropping packets because its backlog is full), linux TCP stack is sending a single SACK (DUPACK). Sender waits a full RTO timer before recovering losses. While RFC 6675 says in section 5, "Algorithm Details", (2) If DupAcks < DupThresh but IsLost (HighACK + 1) returns true -- indicating at least three segments have arrived above the current cumulative acknowledgment point, which is taken to indicate loss -- go to step (4). ... (4) Invoke fast retransmit and enter loss recovery as follows: there are old TCP stacks not implementing this strategy, and still counting the dupacks before starting fast retransmit. While these stacks probably perform poorly when receivers implement LRO/GRO, we should be a little more gentle to them. This patch makes sure we do not enable SACK compression unless 3 dupacks have been sent since last rcv_nxt update. Ideally we should even rearm the timer to send one or two more DUPACK if no more packets are coming, but that will be work aiming for linux-4.21. Many thanks to Jean-Louis for bisecting the issue, providing packet captures and testing this patch. Fixes: 5d9f4262b7ea ("tcp: add SACK compression") Reported-by: Jean-Louis Dupond Tested-by: Jean-Louis Dupond Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8ed77bb4ed86..a9b0280687d5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -196,6 +196,7 @@ struct tcp_sock { u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ + u32 compressed_ack_rcv_nxt; u32 tsoffset; /* timestamp offset */ -- cgit From 0211dda68a4f6531923a2f72d8e8959207f59fba Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Wed, 21 Nov 2018 16:28:23 +0200 Subject: net/dim: Update DIM start sample after each DIM iteration On every iteration of net_dim, the algorithm may choose to check for the system state by comparing current data sample with previous data sample. After each of these comparison, regardless of the action taken, the sample used as baseline is needed to be updated. This patch fixes a bug that causes DIM to take wrong decisions, due to never updating the baseline sample for comparison between iterations. This way, DIM always compares current sample with zeros. Although this is a functional fix, it also improves and stabilizes performance as the algorithm works properly now. Performance: Tested single UDP TX stream with pktgen: samples/pktgen/pktgen_sample03_burst_single_flow.sh -i p4p2 -d 1.1.1.1 -m 24:8a:07:88:26:8b -f 3 -b 128 ConnectX-5 100GbE packet rate improved from 15-19Mpps to 19-20Mpps. Also, toggling between profiles is less frequent with the fix. Fixes: 8115b750dbcb ("net/dim: use struct net_dim_sample as arg to net_dim") Signed-off-by: Tal Gilboa Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/net_dim.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h index c79e859408e6..fd458389f7d1 100644 --- a/include/linux/net_dim.h +++ b/include/linux/net_dim.h @@ -406,6 +406,8 @@ static inline void net_dim(struct net_dim *dim, } /* fall through */ case NET_DIM_START_MEASURE: + net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr, + &dim->start_sample); dim->state = NET_DIM_MEASURE_IN_PROGRESS; break; case NET_DIM_APPLY_NEW_PROFILE: -- cgit From 5cd8d46ea1562be80063f53c7c6a5f40224de623 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 20 Nov 2018 13:00:18 -0500 Subject: packet: copy user buffers before orphan or clone tpacket_snd sends packets with user pages linked into skb frags. It notifies that pages can be reused when the skb is released by setting skb->destructor to tpacket_destruct_skb. This can cause data corruption if the skb is orphaned (e.g., on transmit through veth) or cloned (e.g., on mirror to another psock). Create a kernel-private copy of data in these cases, same as tun/tap zerocopy transmission. Reuse that infrastructure: mark the skb as SKBTX_ZEROCOPY_FRAG, which will trigger copy in skb_orphan_frags(_rx). Unlike other zerocopy packets, do not set shinfo destructor_arg to struct ubuf_info. tpacket_destruct_skb already uses that ptr to notify when the original skb is released and a timestamp is recorded. Do not change this timestamp behavior. The ubuf_info->callback is not needed anyway, as no zerocopy notification is expected. Mark destructor_arg as not-a-uarg by setting the lower bit to 1. The resulting value is not a valid ubuf_info pointer, nor a valid tpacket_snd frame address. Add skb_zcopy_.._nouarg helpers for this. The fix relies on features introduced in commit 52267790ef52 ("sock: add MSG_ZEROCOPY"), so can be backported as is only to 4.14. Tested with from `./in_netns.sh ./txring_overwrite` from http://github.com/wdebruij/kerneltools/tests Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap") Reported-by: Anand H. Krishnan Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0ba687454267..0d1b2c3f127b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1326,6 +1326,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg) } } +static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val) +{ + skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL); + skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; +} + +static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb) +{ + return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL; +} + +static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) +{ + return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL); +} + /* Release a reference on a zerocopy structure */ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) { @@ -1335,7 +1351,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) if (uarg->callback == sock_zerocopy_callback) { uarg->zerocopy = uarg->zerocopy && zerocopy; sock_zerocopy_put(uarg); - } else { + } else if (!skb_zcopy_is_nouarg(skb)) { uarg->callback(uarg, zerocopy); } -- cgit From 89259088c1b7fecb43e8e245dc931909132a4e03 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 17 Nov 2018 11:32:29 +0100 Subject: netfilter: nfnetlink_cttimeout: fetch timeouts for udplite and gre, too syzbot was able to trigger the WARN in cttimeout_default_get() by passing UDPLITE as l4protocol. Alias UDPLITE to UDP, both use same timeout values. Furthermore, also fetch GRE timeouts. GRE is a bit more complicated, as it still can be a module and its netns_proto_gre struct layout isn't visible outside of the gre module. Can't move timeouts around, it appears conntrack sysctl unregister assumes net_generic() returns nf_proto_net, so we get crash. Expose layout of netns_proto_gre instead. A followup nf-next patch could make gre tracker be built-in as well if needed, its not that large. Last, make the WARN() mention the missing protocol value in case anything else is missing. Reported-by: syzbot+2fae8fa157dd92618cae@syzkaller.appspotmail.com Fixes: 8866df9264a3 ("netfilter: nfnetlink_cttimeout: pass default timeout policy to obj_to_nlattr") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_proto_gre.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index b8d95564bd53..14edb795ab43 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -21,6 +21,19 @@ struct nf_ct_gre_keymap { struct nf_conntrack_tuple tuple; }; +enum grep_conntrack { + GRE_CT_UNREPLIED, + GRE_CT_REPLIED, + GRE_CT_MAX +}; + +struct netns_proto_gre { + struct nf_proto_net nf; + rwlock_t keymap_lock; + struct list_head keymap_list; + unsigned int gre_timeouts[GRE_CT_MAX]; +}; + /* add new tuple->key_reply pair to keymap */ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t); -- cgit From 786a9ab1330169f2602238822b4df5d5c4c98f6c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:17 +0100 Subject: gpio: davinci: restore a way to manually specify the GPIO base Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the network support in legacy boot mode for da850-evm since we can no longer request the MDIO clock GPIO. Other boards may be broken too, which I haven't tested. The problem is in the fact that most board files still use the legacy GPIO API where lines are requested by numbers rather than descriptors. While this should be fixed eventually, in order to unbreak the board for now - provide a way to manually specify the GPIO base in platform data. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Acked-by: Linus Walleij Signed-off-by: Sekhar Nori --- include/linux/platform_data/gpio-davinci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h index f92a47e18034..a93841bfb9f7 100644 --- a/include/linux/platform_data/gpio-davinci.h +++ b/include/linux/platform_data/gpio-davinci.h @@ -17,6 +17,8 @@ #define __DAVINCI_GPIO_PLATFORM_H struct davinci_gpio_platform_data { + bool no_auto_base; + u32 base; u32 ngpio; u32 gpio_unbanked; }; -- cgit From 8114865ff82e200b383e46821c25cb0625b842b5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:10:15 -0500 Subject: function_graph: Create function_graph_enter() to consolidate architecture code Currently all the architectures do basically the same thing in preparing the function graph tracer on entry to a function. This code can be pulled into a generic location and then this will allow the function graph tracer to be fixed, as well as extended. Create a new function graph helper function_graph_enter() that will call the hook function (ftrace_graph_entry) and the shadow stack operation (ftrace_push_return_trace), and remove the need of the architecture code to manage the shadow stack. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a397907e8d72..5717e8f81c59 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -779,6 +779,9 @@ extern void return_to_handler(void); extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, unsigned long frame_pointer, unsigned long *retp); +extern int +function_graph_enter(unsigned long ret, unsigned long func, + unsigned long frame_pointer, unsigned long *retp); unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); -- cgit From e2c95a61656d29ceaac97b6a975c8a1f26e26f15 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 26 Nov 2018 14:05:38 +0100 Subject: bpf, ppc64: generalize fetching subprog into bpf_jit_get_func_addr Make fetching of the BPF call address from ppc64 JIT generic. ppc64 was using a slightly different variant rather than through the insns' imm field encoding as the target address would not fit into that space. Therefore, the target subprog number was encoded into the insns' offset and fetched through fp->aux->func[off]->bpf_func instead. Given there are other JITs with this issue and the mechanism of fetching the address is JIT-generic, move it into the core as a helper instead. On the JIT side, we get information on whether the retrieved address is a fixed one, that is, not changing through JIT passes, or a dynamic one. For the former, JITs can optimize their imm emission because this doesn't change jump offsets throughout JIT process. Signed-off-by: Daniel Borkmann Reviewed-by: Sandipan Das Tested-by: Sandipan Das Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index de629b706d1d..448dcc448f1f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -866,6 +866,10 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr); void bpf_jit_free(struct bpf_prog *fp); +int bpf_jit_get_func_addr(const struct bpf_prog *prog, + const struct bpf_insn *insn, bool extra_pass, + u64 *func_addr, bool *func_addr_fixed); + struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); -- cgit From d125f3f866df88da5a85df00291f88f0baa89f7c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 19 Nov 2018 07:40:39 -0500 Subject: function_graph: Make ftrace_push_return_trace() static As all architectures now call function_graph_enter() to do the entry work, no architecture should ever call ftrace_push_return_trace(). Make it static. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 5717e8f81c59..dd16e8218db3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -776,9 +776,6 @@ struct ftrace_ret_stack { */ extern void return_to_handler(void); -extern int -ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, - unsigned long frame_pointer, unsigned long *retp); extern int function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp); -- cgit From 39eb456dacb543de90d3bc6a8e0ac5cf51ac475e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 19 Nov 2018 08:07:12 -0500 Subject: function_graph: Use new curr_ret_depth to manage depth instead of curr_ret_stack Currently, the depth of the ret_stack is determined by curr_ret_stack index. The issue is that there's a race between setting of the curr_ret_stack and calling of the callback attached to the return of the function. Commit 03274a3ffb44 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") moved the calling of the callback to after the setting of the curr_ret_stack, even stating that it was safe to do so, when in fact, it was the reason there was a barrier() there (yes, I should have commented that barrier()). Not only does the curr_ret_stack keep track of the current call graph depth, it also keeps the ret_stack content from being overwritten by new data. The function profiler, uses the "subtime" variable of ret_stack structure and by moving the curr_ret_stack, it allows for interrupts to use the same structure it was using, corrupting the data, and breaking the profiler. To fix this, there needs to be two variables to handle the call stack depth and the pointer to where the ret_stack is being used, as they need to change at two different locations. Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a51c13c2b1a0..d6183a55e8eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1116,6 +1116,7 @@ struct task_struct { #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack: */ int curr_ret_stack; + int curr_ret_depth; /* Stack of return addresses for return function tracing: */ struct ftrace_ret_stack *ret_stack; -- cgit From 321a874a7ef85655e93b3206d0f36b4a6097f948 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:38 +0100 Subject: sched/smt: Expose sched_smt_present static key Make the scheduler's 'sched_smt_present' static key globaly available, so it can be used in the x86 speculation control code. Provide a query function and a stub for the CONFIG_SMP=n case. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.430168326@linutronix.de --- include/linux/sched/smt.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/sched/smt.h (limited to 'include/linux') diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h new file mode 100644 index 000000000000..c9e0be514110 --- /dev/null +++ b/include/linux/sched/smt.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_SMT_H +#define _LINUX_SCHED_SMT_H + +#include + +#ifdef CONFIG_SCHED_SMT +extern struct static_key_false sched_smt_present; + +static __always_inline bool sched_smt_active(void) +{ + return static_branch_likely(&sched_smt_present); +} +#else +static inline bool sched_smt_active(void) { return false; } +#endif + +#endif -- cgit From a74cfffb03b73d41e08f84c2e5c87dec0ce3db9f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:39 +0100 Subject: x86/speculation: Rework SMT state change arch_smt_update() is only called when the sysfs SMT control knob is changed. This means that when SMT is enabled in the sysfs control knob the system is considered to have SMT active even if all siblings are offline. To allow finegrained control of the speculation mitigations, the actual SMT state is more interesting than the fact that siblings could be enabled. Rework the code, so arch_smt_update() is invoked from each individual CPU hotplug function, and simplify the update function while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.521974984@linutronix.de --- include/linux/sched/smt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index c9e0be514110..59d3736c454c 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -15,4 +15,6 @@ static __always_inline bool sched_smt_active(void) static inline bool sched_smt_active(void) { return false; } #endif +void arch_smt_update(void); + #endif -- cgit From 46f7ecb1e7359f183f5bbd1e08b90e10e52164f9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:50 +0100 Subject: ptrace: Remove unused ptrace_may_access_sched() and MODE_IBRS The IBPB control code in x86 removed the usage. Remove the functionality which was introduced for this. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.559149393@linutronix.de --- include/linux/ptrace.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 6c2ffed907f5..de20ede2c5c8 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -64,15 +64,12 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_NOAUDIT 0x04 #define PTRACE_MODE_FSCREDS 0x08 #define PTRACE_MODE_REALCREDS 0x10 -#define PTRACE_MODE_SCHED 0x20 -#define PTRACE_MODE_IBPB 0x40 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) -#define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB) /** * ptrace_may_access - check whether the caller is permitted to access @@ -90,20 +87,6 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); */ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); -/** - * ptrace_may_access - check whether the caller is permitted to access - * a target task. - * @task: target task - * @mode: selects type of access and caller credentials - * - * Returns true on success, false on denial. - * - * Similar to ptrace_may_access(). Only to be called from context switch - * code. Does not call into audit and the regular LSM hooks due to locking - * constraints. - */ -extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode); - static inline int ptrace_reparented(struct task_struct *child) { return !same_thread_group(child->real_parent, child->parent); -- cgit From 9137bb27e60e554dab694eafa4cca241fa3a694f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:53 +0100 Subject: x86/speculation: Add prctl() control for indirect branch speculation Add the PR_SPEC_INDIRECT_BRANCH option for the PR_GET_SPECULATION_CTRL and PR_SET_SPECULATION_CTRL prctls to allow fine grained per task control of indirect branch speculation via STIBP and IBPB. Invocations: Check indirect branch speculation status with - prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); Enable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); Disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); Force disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); See Documentation/userspace-api/spec_ctrl.rst. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.866780996@linutronix.de --- include/linux/sched.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a51c13c2b1a0..d607db5fcc6a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1453,6 +1453,8 @@ static inline bool is_percpu_thread(void) #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ +#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ +#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1484,6 +1486,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) + +TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) +TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) + static inline void current_restore_flags(unsigned long orig_flags, unsigned long flags) { -- cgit From 3f2b7b9035107d6096ea438ea3d97dcf0481b6d2 Mon Sep 17 00:00:00 2001 From: "kiran.modukuri" Date: Mon, 26 Nov 2018 15:41:48 +0000 Subject: fscache: Fix race in fscache_op_complete() due to split atomic_sub & read The code in fscache_retrieval_complete is using atomic_sub followed by an atomic_read: atomic_sub(n_pages, &op->n_pages); if (atomic_read(&op->n_pages) <= 0) fscache_op_complete(&op->op, true); This causes two threads doing a decrement of n_pages to race with each other seeing the op->refcount 0 at same time - and they end up calling fscache_op_complete() in both the threads leading to an assertion failure. Fix this by using atomic_sub_return_relaxed() instead of two calls. Note that I'm using 'relaxed' rather than, say, 'release' as there aren't multiple variables that appear to need ordering across the release. The oops looks something like: FS-Cache: Assertion failed FS-Cache: 0 > 0 is false ... kernel BUG at /usr/src/linux-4.4.0/fs/fscache/operation.c:449! ... Workqueue: fscache_operation fscache_op_work_func [fscache] ... RIP: 0010:[] fscache_op_complete+0x10d/0x180 [fscache] ... Call Trace: [] cachefiles_read_copier+0x3a9/0x410 [cachefiles] [] fscache_op_work_func+0x22/0x50 [fscache] [] process_one_work+0x150/0x3f0 [] worker_thread+0x11a/0x470 [] ? __schedule+0x359/0x980 [] ? rescuer_thread+0x310/0x310 [] kthread+0xd6/0xf0 [] ? kthread_park+0x60/0x60 [] ret_from_fork+0x3f/0x70 [] ? kthread_park+0x60/0x60 This seen this in 4.4.x kernels and the same bug affects fscache in latest upstreams kernels. Fixes: 1bb4b7f98f36 ("FS-Cache: The retrieval remaining-pages counter needs to be atomic_t") Signed-off-by: Kiran Kumar Modukuri Signed-off-by: David Howells --- include/linux/fscache-cache.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 34cf0fdd7dc7..610815e3f1aa 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -196,8 +196,7 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) static inline void fscache_retrieval_complete(struct fscache_retrieval *op, int n_pages) { - atomic_sub(n_pages, &op->n_pages); - if (atomic_read(&op->n_pages) <= 0) + if (atomic_sub_return_relaxed(n_pages, &op->n_pages) <= 0) fscache_op_complete(&op->op, false); } -- cgit From 90230968f102acbe103fbf7c03d41addfef5f153 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 29 Nov 2018 12:00:05 +0200 Subject: net: phy: sfp: correct location of SFP standards SFP standards are now available from the SNIA (Storage Networking Industry Association) website. Cc: Andrew Lunn Cc: Florian Fainelli Signed-off-by: Baruch Siach Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/sfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index d37518e89db2..d9d9de3fcf8e 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -224,7 +224,7 @@ struct sfp_eeprom_ext { * * See the SFF-8472 specification and related documents for the definition * of these structure members. This can be obtained from - * ftp://ftp.seagate.com/sff + * https://www.snia.org/technology-communities/sff/specifications */ struct sfp_eeprom_id { struct sfp_eeprom_base base; -- cgit From 89d328f637b9904b6d4c9af73c8a608b8dd4d6f8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Nov 2018 16:17:22 -0700 Subject: pstore/ram: Correctly calculate usable PRZ bytes The actual number of bytes stored in a PRZ is smaller than the bytes requested by platform data, since there is a header on each PRZ. Additionally, if ECC is enabled, there are trailing bytes used as well. Normally this mismatch doesn't matter since PRZs are circular buffers and the leading "overflow" bytes are just thrown away. However, in the case of a compressed record, this rather badly corrupts the results. This corruption was visible with "ramoops.mem_size=204800 ramoops.ecc=1". Any stored crashes would not be uncompressable (producing a pstorefs "dmesg-*.enc.z" file), and triggering errors at boot: [ 2.790759] pstore: crypto_comp_decompress failed, ret = -22! Backporting this depends on commit 70ad35db3321 ("pstore: Convert console write to use ->write_buf") Reported-by: Joel Fernandes Fixes: b0aad7a99c1d ("pstore: Add compression support to pstore") Signed-off-by: Kees Cook Reviewed-by: Joel Fernandes (Google) --- include/linux/pstore.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index a15bc4d48752..30fcec375a3a 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -90,7 +90,10 @@ struct pstore_record { * * @buf_lock: spinlock to serialize access to @buf * @buf: preallocated crash dump buffer - * @bufsize: size of @buf available for crash dump writes + * @bufsize: size of @buf available for crash dump bytes (must match + * smallest number of bytes available for writing to a + * backend entry, since compressed bytes don't take kindly + * to being truncated) * * @read_mutex: serializes @open, @read, @close, and @erase callbacks * @flags: bitfield of frontends the backend can accept writes for -- cgit From 0c7a52e4d4b5c4d35b31f3c3ad32af814f1bf491 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 28 Nov 2018 03:35:23 +0000 Subject: tracepoint: Use __idx instead of idx in DO_TRACE macro to make it unique After enabling KVM event tracing, almost all of trace_kvm_exit()'s printk shows "kvm_exit: IRQ: ..." even if the actual exception_type is NOT IRQ. More specifically, trace_kvm_exit() is defined in virt/kvm/arm/trace.h by TRACE_EVENT. This slight problem may have existed after commit e6753f23d961 ("tracepoint: Make rcuidle tracepoint callers use SRCU"). There are two variables in trace_kvm_exit() and __DO_TRACE() which have the same name, *idx*. Thus the actual value of *idx* will be overwritten when tracing. Fix it by adding a simple prefix. Cc: Joel Fernandes Cc: Wang Haibin Cc: linux-trace-devel@vger.kernel.org Cc: stable@vger.kernel.org Fixes: e6753f23d961 ("tracepoint: Make rcuidle tracepoint callers use SRCU") Reviewed-by: Joel Fernandes (Google) Signed-off-by: Zenghui Yu Signed-off-by: Steven Rostedt (VMware) --- include/linux/tracepoint.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 538ba1a58f5b..e9de8ad0bad7 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -166,7 +166,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) struct tracepoint_func *it_func_ptr; \ void *it_func; \ void *__data; \ - int __maybe_unused idx = 0; \ + int __maybe_unused __idx = 0; \ \ if (!(cond)) \ return; \ @@ -182,7 +182,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * doesn't work from the idle path. \ */ \ if (rcuidle) { \ - idx = srcu_read_lock_notrace(&tracepoint_srcu); \ + __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ rcu_irq_enter_irqson(); \ } \ \ @@ -198,7 +198,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) \ if (rcuidle) { \ rcu_irq_exit_irqson(); \ - srcu_read_unlock_notrace(&tracepoint_srcu, idx);\ + srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ } \ \ preempt_enable_notrace(); \ -- cgit From e0c274472d5d27f277af722e017525e0b33784cd Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 30 Nov 2018 14:09:58 -0800 Subject: psi: make disabling/enabling easier for vendor kernels Mel Gorman reports a hackbench regression with psi that would prohibit shipping the suse kernel with it default-enabled, but he'd still like users to be able to opt in at little to no cost to others. With the current combination of CONFIG_PSI and the psi_disabled bool set from the commandline, this is a challenge. Do the following things to make it easier: 1. Add a config option CONFIG_PSI_DEFAULT_DISABLED that allows distros to enable CONFIG_PSI in their kernel but leave the feature disabled unless a user requests it at boot-time. To avoid double negatives, rename psi_disabled= to psi=. 2. Make psi_disabled a static branch to eliminate any branch costs when the feature is disabled. In terms of numbers before and after this patch, Mel says: : The following is a comparision using CONFIG_PSI=n as a baseline against : your patch and a vanilla kernel : : 4.20.0-rc4 4.20.0-rc4 4.20.0-rc4 : kconfigdisable-v1r1 vanilla psidisable-v1r1 : Amean 1 1.3100 ( 0.00%) 1.3923 ( -6.28%) 1.3427 ( -2.49%) : Amean 3 3.8860 ( 0.00%) 4.1230 * -6.10%* 3.8860 ( -0.00%) : Amean 5 6.8847 ( 0.00%) 8.0390 * -16.77%* 6.7727 ( 1.63%) : Amean 7 9.9310 ( 0.00%) 10.8367 * -9.12%* 9.9910 ( -0.60%) : Amean 12 16.6577 ( 0.00%) 18.2363 * -9.48%* 17.1083 ( -2.71%) : Amean 18 26.5133 ( 0.00%) 27.8833 * -5.17%* 25.7663 ( 2.82%) : Amean 24 34.3003 ( 0.00%) 34.6830 ( -1.12%) 32.0450 ( 6.58%) : Amean 30 40.0063 ( 0.00%) 40.5800 ( -1.43%) 41.5087 ( -3.76%) : Amean 32 40.1407 ( 0.00%) 41.2273 ( -2.71%) 39.9417 ( 0.50%) : : It's showing that the vanilla kernel takes a hit (as the bisection : indicated it would) and that disabling PSI by default is reasonably : close in terms of performance for this particular workload on this : particular machine so; Link: http://lkml.kernel.org/r/20181127165329.GA29728@cmpxchg.org Signed-off-by: Johannes Weiner Tested-by: Mel Gorman Reported-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/psi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/psi.h b/include/linux/psi.h index 8e0725aac0aa..7006008d5b72 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -1,6 +1,7 @@ #ifndef _LINUX_PSI_H #define _LINUX_PSI_H +#include #include #include @@ -9,7 +10,7 @@ struct css_set; #ifdef CONFIG_PSI -extern bool psi_disabled; +extern struct static_key_false psi_disabled; void psi_init(void); -- cgit From b7df9ada9a7700dbcca1ba53d217c01e3d48179c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 1 Dec 2018 01:18:53 +0100 Subject: bpf: fix pointer offsets in context for 32 bit Currently, pointer offsets in three BPF context structures are broken in two scenarios: i) 32 bit compiled applications running on 64 bit kernels, and ii) LLVM compiled BPF programs running on 32 bit kernels. The latter is due to BPF target machine being strictly 64 bit. So in each of the cases the offsets will mismatch in verifier when checking / rewriting context access. Fix this by providing a helper macro __bpf_md_ptr() that will enforce padding up to 64 bit and proper alignment, and for context access a macro bpf_ctx_range_ptr() which will cover full 64 bit member range on 32 bit archs. For flow_keys, we additionally need to force the size check to sizeof(__u64) as with other pointer types. Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") Fixes: 2dbb9b9e6df6 ("bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT") Reported-by: David S. Miller Signed-off-by: Daniel Borkmann Acked-by: David S. Miller Tested-by: David S. Miller Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 448dcc448f1f..795ff0b869bb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -449,6 +449,13 @@ struct sock_reuseport; offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 #define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2) \ offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1 +#if BITS_PER_LONG == 64 +# define bpf_ctx_range_ptr(TYPE, MEMBER) \ + offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 +#else +# define bpf_ctx_range_ptr(TYPE, MEMBER) \ + offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1 +#endif /* BITS_PER_LONG == 64 */ #define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \ ({ \ -- cgit From 71700bb96047f68a0aae3932466fc7c9ad5ce6c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 30 Nov 2018 16:11:15 -0500 Subject: SUNRPC: Fix a memory leak in call_encode() If we retransmit an RPC request, we currently end up clobbering the value of req->rq_rcv_buf.bvec that was allocated by the initial call to xprt_request_prepare(req). Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 43106ffa6788..2ec128060239 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -72,7 +72,6 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) buf->head[0].iov_base = start; buf->head[0].iov_len = len; buf->tail[0].iov_len = 0; - buf->bvec = NULL; buf->pages = NULL; buf->page_len = 0; buf->flags = 0; -- cgit From 37c2578c0c40e286bc0d30bdc05290b2058cf66e Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 3 Dec 2018 00:54:35 +0000 Subject: Drivers: hv: vmbus: Offload the handling of channels to two workqueues vmbus_process_offer() mustn't call channel->sc_creation_callback() directly for sub-channels, because sc_creation_callback() -> vmbus_open() may never get the host's response to the OPEN_CHANNEL message (the host may rescind a channel at any time, e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind() may not wake up the vmbus_open() as it's blocked due to a non-zero vmbus_connection.offer_in_progress, and finally we have a deadlock. The above is also true for primary channels, if the related device drivers use sync probing mode by default. And, usually the handling of primary channels and sub-channels can depend on each other, so we should offload them to different workqueues to avoid possible deadlock, e.g. in sync-probing mode, NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() -> rtnl_lock(), and causes deadlock: the former gets the rtnl_lock and waits for all the sub-channels to appear, but the latter can't get the rtnl_lock and this blocks the handling of sub-channels. The patch can fix the multiple-NIC deadlock described above for v3.x kernels (e.g. RHEL 7.x) which don't support async-probing of devices, and v4.4, v4.9, v4.14 and v4.18 which support async-probing but don't enable async-probing for Hyper-V drivers (yet). The patch can also fix the hang issue in sub-channel's handling described above for all versions of kernels, including v4.19 and v4.20-rc4. So actually the patch should be applied to all the existing kernels, not only the kernels that have 8195b1396ec8. Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") Cc: stable@vger.kernel.org Cc: Stephen Hemminger Cc: K. Y. Srinivasan Cc: Haiyang Zhang Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b3e24368930a..14131b6fae68 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -905,6 +905,13 @@ struct vmbus_channel { bool probe_done; + /* + * We must offload the handling of the primary/sub channels + * from the single-threaded vmbus_connection.work_queue to + * two different workqueue, otherwise we can block + * vmbus_connection.work_queue and hang: see vmbus_process_offer(). + */ + struct work_struct add_channel_work; }; static inline bool is_hvsock_channel(const struct vmbus_channel *c) -- cgit From 27359fd6e5f3c5db8fe544b63238b6170e8806d8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 30 Nov 2018 11:05:06 -0500 Subject: dax: Fix unlock mismatch with updated API Internal to dax_unlock_mapping_entry(), dax_unlock_entry() is used to store a replacement entry in the Xarray at the given xas-index with the DAX_LOCKED bit clear. When called, dax_unlock_entry() expects the unlocked value of the entry relative to the current Xarray state to be specified. In most contexts dax_unlock_entry() is operating in the same scope as the matched dax_lock_entry(). However, in the dax_unlock_mapping_entry() case the implementation needs to recall the original entry. In the case where the original entry is a 'pmd' entry it is possible that the pfn performed to do the lookup is misaligned to the value retrieved in the Xarray. Change the api to return the unlock cookie from dax_lock_page() and pass it to dax_unlock_page(). This fixes a bug where dax_unlock_page() was assuming that the page was PMD-aligned if the entry was a PMD entry with signatures like: WARNING: CPU: 38 PID: 1396 at fs/dax.c:340 dax_insert_entry+0x2b2/0x2d0 RIP: 0010:dax_insert_entry+0x2b2/0x2d0 [..] Call Trace: dax_iomap_pte_fault.isra.41+0x791/0xde0 ext4_dax_huge_fault+0x16f/0x1f0 ? up_read+0x1c/0xa0 __do_fault+0x1f/0x160 __handle_mm_fault+0x1033/0x1490 handle_mm_fault+0x18b/0x3d0 Link: https://lkml.kernel.org/r/20181130154902.GL10377@bombadil.infradead.org Fixes: 9f32d221301c ("dax: Convert dax_lock_mapping_entry to XArray") Reported-by: Dan Williams Signed-off-by: Matthew Wilcox Tested-by: Dan Williams Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/dax.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 450b28db9533..0dd316a74a29 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -7,6 +7,8 @@ #include #include +typedef unsigned long dax_entry_t; + struct iomap_ops; struct dax_device; struct dax_operations { @@ -88,8 +90,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc); struct page *dax_layout_busy_page(struct address_space *mapping); -bool dax_lock_mapping_entry(struct page *page); -void dax_unlock_mapping_entry(struct page *page); +dax_entry_t dax_lock_page(struct page *page); +void dax_unlock_page(struct page *page, dax_entry_t cookie); #else static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize) @@ -122,14 +124,14 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, return -EOPNOTSUPP; } -static inline bool dax_lock_mapping_entry(struct page *page) +static inline dax_entry_t dax_lock_page(struct page *page) { if (IS_DAX(page->mapping->host)) - return true; - return false; + return ~0UL; + return 0; } -static inline void dax_unlock_mapping_entry(struct page *page) +static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) { } #endif -- cgit From f51ccf46217c28758b1f3b5bc0ccfc00eca658b2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 4 Dec 2018 17:00:36 +0100 Subject: USB: serial: console: fix reported terminal settings The USB-serial console implementation has never reported the actual terminal settings used. Despite storing the corresponding cflags in its struct console, these were never honoured on later tty open() where the tty termios would be left initialised to the driver defaults. Unlike the serial console implementation, the USB-serial code calls subdriver open() already at console setup. While calling set_termios() and write() before open() looks like it could work for some USB-serial drivers, others definitely do not expect this, so modelling this after serial core is going to be intrusive, if at all possible. Instead, use a (renamed) tty helper to save the termios data used at console setup so that the tty termios reflects the actual terminal settings after a subsequent tty open(). Note that the calls to tty_init_termios() (tty_driver_install()) and tty_save_termios() are serialised using the disconnect mutex. This specifically fixes a regression that was triggered by a recent change adding software flow control to the pl2303 driver: a getty trying to disable flow control while leaving the baud rate unchanged would now also set the baud rate to the driver default (prior to the flow-control change this had been a noop). Fixes: 7041d9c3f01b ("USB: serial: pl2303: add support for tx xon/xoff flow control") Cc: stable # 4.18 Cc: Florian Zumbiehl Reported-by: Jarkko Nikula Tested-by: Jarkko Nikula Acked-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 414db2bce715..392138fe59b6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -556,6 +556,7 @@ extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx); extern void tty_release_struct(struct tty_struct *tty, int idx); extern int tty_release(struct inode *inode, struct file *filp); extern void tty_init_termios(struct tty_struct *tty); +extern void tty_save_termios(struct tty_struct *tty); extern int tty_standard_install(struct tty_driver *driver, struct tty_struct *tty); -- cgit From 2fc00c1e0f9d2abe0df74c33cf9f40d12b9b892f Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Mon, 3 Dec 2018 14:46:20 +0800 Subject: HID: use macros in IS_INPUT_APPLICATION Add missing definition for HID_DG_WHITEBOARD then replace the hid usage hex with macros for better readibility. Signed-off-by: Chris Chiu Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index a355d61940f2..ce5f996c8d3d 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -238,6 +238,7 @@ struct hid_item { #define HID_DG_LIGHTPEN 0x000d0003 #define HID_DG_TOUCHSCREEN 0x000d0004 #define HID_DG_TOUCHPAD 0x000d0005 +#define HID_DG_WHITEBOARD 0x000d0006 #define HID_DG_STYLUS 0x000d0020 #define HID_DG_PUCK 0x000d0021 #define HID_DG_FINGER 0x000d0022 @@ -836,7 +837,10 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, /* Applications from HID Usage Tables 4/8/99 Version 1.1 */ /* We ignore a few input applications that are not widely used */ -#define IS_INPUT_APPLICATION(a) (((a >= 0x00010000) && (a <= 0x00010008)) || (a == 0x00010080) || (a == 0x000c0001) || ((a >= 0x000d0002) && (a <= 0x000d0006))) +#define IS_INPUT_APPLICATION(a) \ + (((a >= HID_UP_GENDESK) && (a <= HID_GD_MULTIAXIS)) \ + || ((a >= HID_DG_PEN) && (a <= HID_DG_WHITEBOARD)) \ + || (a == HID_GD_SYSTEM_CONTROL) || (a == HID_CP_CONSUMER_CONTROL)) /* HID core API */ -- cgit From 7f5592742a429b4de770fc5b796d18de43a15fdc Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Mon, 3 Dec 2018 14:46:21 +0800 Subject: HID: input: support Microsoft wireless radio control hotkey The ASUS laptops start to support the airplane mode radio management to replace the original mechanism of airplane mode toggle hotkey. On the ASUS P5440FF, it presents as a HID device connecting via I2C, named i2c-AMPD0001. When pressing it, the Embedded Controller send hid report via I2C and switch the airplane mode indicator LED based on the status. However, it's not working because it fails to be identified as a hidinput device. It fails in hidinput_connect() due to the macro IS_INPUT_APPLICATION doesn't have HID_GD_WIRELESS_RADIO_CTLS as a legit application code. It's easy to add the HID I2C vendor and product id to the quirk list and apply HID_QUIRK_HIDINPUT_FORCE to make it work. But it makes more sense to support it as a generic input application. Signed-off-by: Chris Chiu Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index ce5f996c8d3d..42079116fb61 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -840,7 +840,8 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, #define IS_INPUT_APPLICATION(a) \ (((a >= HID_UP_GENDESK) && (a <= HID_GD_MULTIAXIS)) \ || ((a >= HID_DG_PEN) && (a <= HID_DG_WHITEBOARD)) \ - || (a == HID_GD_SYSTEM_CONTROL) || (a == HID_CP_CONSUMER_CONTROL)) + || (a == HID_GD_SYSTEM_CONTROL) || (a == HID_CP_CONSUMER_CONTROL) \ + || (a == HID_GD_WIRELESS_RADIO_CTLS)) /* HID core API */ -- cgit From 704620afc70cf47abb9d6a1a57f3825d2bca49cf Mon Sep 17 00:00:00 2001 From: Mathias Payer Date: Wed, 5 Dec 2018 21:19:59 +0100 Subject: USB: check usb_get_extra_descriptor for proper size When reading an extra descriptor, we need to properly check the minimum and maximum size allowed, to prevent from invalid data being sent by a device. Reported-by: Hui Peng Reported-by: Mathias Payer Co-developed-by: Linus Torvalds Signed-off-by: Hui Peng Signed-off-by: Mathias Payer Signed-off-by: Linus Torvalds Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 4cdd515a4385..5e49e82c4368 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -407,11 +407,11 @@ struct usb_host_bos { }; int __usb_get_extra_descriptor(char *buffer, unsigned size, - unsigned char type, void **ptr); + unsigned char type, void **ptr, size_t min); #define usb_get_extra_descriptor(ifpoint, type, ptr) \ __usb_get_extra_descriptor((ifpoint)->extra, \ (ifpoint)->extralen, \ - type, (void **)ptr) + type, (void **)ptr, sizeof(**(ptr))) /* ----------------------------------------------------------------------- */ -- cgit From 2f0799a0ffc033bf3cc82d5032acc3ec633464c2 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 5 Dec 2018 15:45:54 -0800 Subject: mm, thp: restore node-local hugepage allocations This is a full revert of ac5b2c18911f ("mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings") and a partial revert of 89c83fb539f9 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask"). By not setting __GFP_THISNODE, applications can allocate remote hugepages when the local node is fragmented or low on memory when either the thp defrag setting is "always" or the vma has been madvised with MADV_HUGEPAGE. Remote access to hugepages often has much higher latency than local pages of the native page size. On Haswell, ac5b2c18911f was shown to have a 13.9% access regression after this commit for binaries that remap their text segment to be backed by transparent hugepages. The intent of ac5b2c18911f is to address an issue where a local node is low on memory or fragmented such that a hugepage cannot be allocated. In every scenario where this was described as a fix, there is abundant and unfragmented remote memory available to allocate from, even with a greater access latency. If remote memory is also low or fragmented, not setting __GFP_THISNODE was also measured on Haswell to have a 40% regression in allocation latency. Restore __GFP_THISNODE for thp allocations. Fixes: ac5b2c18911f ("mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings") Fixes: 89c83fb539f9 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask") Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Vlastimil Babka Cc: Michal Hocko Cc: Andrew Morton Signed-off-by: David Rientjes Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index bac395f1d00a..5228c62af416 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -139,8 +139,6 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr); -struct mempolicy *get_vma_policy(struct vm_area_struct *vma, - unsigned long addr); bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); -- cgit From c53431eb696f3c64c12c00afb81048af54b61532 Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Wed, 5 Dec 2018 10:42:22 +1000 Subject: HID: core: store the collections as a basic tree For each collection parsed, store a pointer to the parent collection (if any). This makes it a lot easier to look up which collection(s) any given item is part of Signed-off-by: Peter Hutterer Verified-by: Harry Cutts Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index a355d61940f2..fdfda898656c 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -427,6 +427,7 @@ struct hid_local { */ struct hid_collection { + struct hid_collection *parent; unsigned type; unsigned usage; unsigned level; @@ -650,6 +651,7 @@ struct hid_parser { unsigned int *collection_stack; unsigned int collection_stack_ptr; unsigned int collection_stack_size; + struct hid_collection *active_collection; struct hid_device *device; unsigned int scan_flags; }; -- cgit From 5a4abb36f312cf83206b1b7d1308ba47cba0b3cc Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Wed, 5 Dec 2018 10:42:23 +1000 Subject: HID: core: process the Resolution Multiplier The Resolution Multiplier is a feature report that modifies the value of Usages within the same Logical Collection. If the multiplier is set to anything but 1, the hardware reports (value * multiplier) for the same amount of physical movement, i.e. the value we receive in the kernel is pre-multiplied. The hardware may either send a single (value * multiplier), or by sending multiplier as many reports with the same value, or a combination of these two options. For example, when the Microsoft Sculpt Ergonomic mouse Resolution Multiplier is set to 12, the Wheel sends out 12 for every detent but AC Pan sends out a value of 3 at 4 times the frequency. The effective multiplier is based on the physical min/max of the multiplier field, a logical min/max of [0,1] with a physical min/max of [1,8] means the multiplier is either 1 or 8. The Resolution Multiplier was introduced for high-resolution scrolling in Windows Vista and is commonly used on Microsoft mice. The recommendation for the Resolution Multiplier is to default to 1 for backwards compatibility. This patch adds an arbitrary upper limit at 255. The only known use case for the Resolution Multiplier is for scroll wheels where the multiplier has to be a fraction of 120 to work with Windows. Signed-off-by: Peter Hutterer Verified-by: Harry Cutts Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index fdfda898656c..fd8d860365a4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -219,6 +219,7 @@ struct hid_item { #define HID_GD_VBRZ 0x00010045 #define HID_GD_VNO 0x00010046 #define HID_GD_FEATURE 0x00010047 +#define HID_GD_RESOLUTION_MULTIPLIER 0x00010048 #define HID_GD_SYSTEM_CONTROL 0x00010080 #define HID_GD_UP 0x00010090 #define HID_GD_DOWN 0x00010091 @@ -437,6 +438,8 @@ struct hid_usage { unsigned hid; /* hid usage code */ unsigned collection_index; /* index into collection array */ unsigned usage_index; /* index into usage array */ + __s8 resolution_multiplier;/* Effective Resolution Multiplier + (HUT v1.12, 4.3.1), default: 1 */ /* hidinput data */ __u16 code; /* input driver code */ __u8 type; /* input driver type */ @@ -894,6 +897,8 @@ struct hid_report *hid_validate_values(struct hid_device *hid, unsigned int type, unsigned int id, unsigned int field_index, unsigned int report_counts); + +void hid_setup_resolution_multiplier(struct hid_device *hid); int hid_open_report(struct hid_device *device); int hid_check_keys_pressed(struct hid_device *hid); int hid_connect(struct hid_device *hid, unsigned int connect_mask); -- cgit From 2dc702c991e3774af9d7ce410eef410ca9e2357e Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Wed, 5 Dec 2018 10:42:24 +1000 Subject: HID: input: use the Resolution Multiplier for high-resolution scrolling Windows uses a magic number of 120 for a wheel click. High-resolution scroll wheels are supposed to use a fraction of 120 to signal smaller scroll steps. This is implemented by the Resolution Multiplier in the device itself. If the multiplier is present in the report descriptor, set it to the logical max and then use the resolution multiplier to calculate the high-resolution events. This is the recommendation by Microsoft, see http://msdn.microsoft.com/en-us/windows/hardware/gg487477.aspx Note that all mice encountered so far have a logical min/max of 0/1, so it's a binary "yes or no" to high-res scrolling anyway. To make userspace simpler, always enable the REL_WHEEL_HI_RES bit. Where the device doesn't support high-resolution scrolling, the value for the high-res data will simply be a multiple of 120 every time. For userspace, if REL_WHEEL_HI_RES is available that is the one to be used. Potential side-effect: a device with a Resolution Multiplier applying to other Input items will have those items set to the logical max as well. This cannot easily be worked around but it is doubtful such devices exist. Signed-off-by: Peter Hutterer Verified-by: Harry Cutts Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index fd8d860365a4..93db548f8761 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -233,6 +233,7 @@ struct hid_item { #define HID_DC_BATTERYSTRENGTH 0x00060020 #define HID_CP_CONSUMER_CONTROL 0x000c0001 +#define HID_CP_AC_PAN 0x000c0238 #define HID_DG_DIGITIZER 0x000d0001 #define HID_DG_PEN 0x000d0002 @@ -441,11 +442,13 @@ struct hid_usage { __s8 resolution_multiplier;/* Effective Resolution Multiplier (HUT v1.12, 4.3.1), default: 1 */ /* hidinput data */ + __s8 wheel_factor; /* 120/resolution_multiplier */ __u16 code; /* input driver code */ __u8 type; /* input driver type */ __s8 hat_min; /* hat switch fun */ __s8 hat_max; /* ditto */ __s8 hat_dir; /* ditto */ + __s16 wheel_accumulated; /* hi-res wheel */ }; struct hid_input; -- cgit From 356ff8a9a78fb35d6482584d260c3754dcbdf669 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 7 Dec 2018 14:50:16 -0800 Subject: Revert "mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask" This reverts commit 89c83fb539f95491be80cdd5158e6f0ce329e317. This should have been done as part of 2f0799a0ffc0 ("mm, thp: restore node-local hugepage allocations"). The movement of the thp allocation policy from alloc_pages_vma() to alloc_hugepage_direct_gfpmask() was intended to only set __GFP_THISNODE for mempolicies that are not MPOL_BIND whereas the revert could set this regardless of mempolicy. While the check for MPOL_BIND between alloc_hugepage_direct_gfpmask() and alloc_pages_vma() was racy, that has since been removed since the revert. What is left is the possibility to use __GFP_THISNODE in policy_node() when it is unexpected because the special handling for hugepages in alloc_pages_vma() was removed as part of the consolidation. Secondly, prior to 89c83fb539f9, alloc_pages_vma() implemented a somewhat different policy for hugepage allocations, which were allocated through alloc_hugepage_vma(). For hugepage allocations, if the allocating process's node is in the set of allowed nodes, allocate with __GFP_THISNODE for that node (for MPOL_PREFERRED, use that node with __GFP_THISNODE instead). This was changed for shmem_alloc_hugepage() to allow fallback to other nodes in 89c83fb539f9 as it did for new_page() in mm/mempolicy.c which is functionally different behavior and removes the requirement to only allocate hugepages locally. So this commit does a full revert of 89c83fb539f9 instead of the partial revert that was done in 2f0799a0ffc0. The result is the same thp allocation policy for 4.20 that was in 4.19. Fixes: 89c83fb539f9 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask") Fixes: 2f0799a0ffc0 ("mm, thp: restore node-local hugepage allocations") Signed-off-by: David Rientjes Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Michal Hocko Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 76f8db0b0e71..0705164f928c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -510,18 +510,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) } extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, - int node); + int node, bool hugepage); +#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ + alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) -#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\ +#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ + alloc_pages(gfp_mask, order) +#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) + alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, node) + alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); -- cgit