From 7c88f2bf7840c0ea67ac2de2e293a653f62ea134 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Thu, 20 Jul 2017 07:05:28 +0200
Subject: tile: defconfig: Cleanup from old Kconfig options

Remove old, dead Kconfig options (in order appearing in this commit):
 - CRYPTO_ZLIB: commit 110492183c4b ("crypto: compress - remove unused
   pcomp interface");
 - IP_NF_TARGET_ULOG: commit d4da843e6fad ("netfilter: kill remnants of
   ulog targets");

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com>
---
 arch/tile/configs/tilegx_defconfig  | 1 -
 arch/tile/configs/tilepro_defconfig | 2 --
 2 files changed, 3 deletions(-)

diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig
index 0d925fa0f0c1..9f94435cc44f 100644
--- a/arch/tile/configs/tilegx_defconfig
+++ b/arch/tile/configs/tilegx_defconfig
@@ -409,5 +409,4 @@ CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig
index 149d8e8eacb8..1c5bd4f8ffca 100644
--- a/arch/tile/configs/tilepro_defconfig
+++ b/arch/tile/configs/tilepro_defconfig
@@ -189,7 +189,6 @@ CONFIG_IP_NF_MATCH_ECN=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
@@ -521,7 +520,6 @@ CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRC_CCITT=m
 CONFIG_CRC7=m
-- 
cgit 


From 637f23abca87d26e091e0d6647ec878d97d2c6cd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 22 Jul 2017 10:33:02 +0300
Subject: tile: array underflow in setup_maxnodemem()

My static checker correctly complains that we should have a lower bound
on "node" to prevent an array underflow.

Fixes: 867e359b97c9 ("arch/tile: core support for Tilera 32-bit chips.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com>
---
 arch/tile/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 443a70bccc1c..b1474e7d9afb 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -140,7 +140,7 @@ static int __init setup_maxnodemem(char *str)
 {
 	char *endp;
 	unsigned long long maxnodemem;
-	long node;
+	unsigned long node;
 
 	node = str ? simple_strtoul(str, &endp, 0) : INT_MAX;
 	if (node >= MAX_NUMNODES || *endp != ':')
-- 
cgit 


From 472b46c352c9ff0b6fa57dbf85d77c51901a3368 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 6 Aug 2017 18:44:27 +0200
Subject: uapi linux/kfd_ioctl.h: only use __u32 and __u64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Include <drm/drm.h> instead of <linux/types.h> which on Linux includes
<linux/types.h> and on non-Linux platforms defines __u32 etc types.

Fixes user space compilation errors like:

linux/kfd_ioctl.h:33:2: error: unknown type name ‘uint32_t’
  uint32_t major_version; /* from KFD */
  ^~~~~~~~

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/linux/kfd_ioctl.h | 172 ++++++++++++++++++++---------------------
 1 file changed, 86 insertions(+), 86 deletions(-)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 7b4567bacfc2..26283fefdf5f 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -23,15 +23,15 @@
 #ifndef KFD_IOCTL_H_INCLUDED
 #define KFD_IOCTL_H_INCLUDED
 
-#include <linux/types.h>
+#include <drm/drm.h>
 #include <linux/ioctl.h>
 
 #define KFD_IOCTL_MAJOR_VERSION 1
 #define KFD_IOCTL_MINOR_VERSION 1
 
 struct kfd_ioctl_get_version_args {
-	uint32_t major_version;	/* from KFD */
-	uint32_t minor_version;	/* from KFD */
+	__u32 major_version;	/* from KFD */
+	__u32 minor_version;	/* from KFD */
 };
 
 /* For kfd_ioctl_create_queue_args.queue_type. */
@@ -43,36 +43,36 @@ struct kfd_ioctl_get_version_args {
 #define KFD_MAX_QUEUE_PRIORITY		15
 
 struct kfd_ioctl_create_queue_args {
-	uint64_t ring_base_address;	/* to KFD */
-	uint64_t write_pointer_address;	/* from KFD */
-	uint64_t read_pointer_address;	/* from KFD */
-	uint64_t doorbell_offset;	/* from KFD */
-
-	uint32_t ring_size;		/* to KFD */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t queue_type;		/* to KFD */
-	uint32_t queue_percentage;	/* to KFD */
-	uint32_t queue_priority;	/* to KFD */
-	uint32_t queue_id;		/* from KFD */
-
-	uint64_t eop_buffer_address;	/* to KFD */
-	uint64_t eop_buffer_size;	/* to KFD */
-	uint64_t ctx_save_restore_address; /* to KFD */
-	uint64_t ctx_save_restore_size;	/* to KFD */
+	__u64 ring_base_address;	/* to KFD */
+	__u64 write_pointer_address;	/* from KFD */
+	__u64 read_pointer_address;	/* from KFD */
+	__u64 doorbell_offset;	/* from KFD */
+
+	__u32 ring_size;		/* to KFD */
+	__u32 gpu_id;		/* to KFD */
+	__u32 queue_type;		/* to KFD */
+	__u32 queue_percentage;	/* to KFD */
+	__u32 queue_priority;	/* to KFD */
+	__u32 queue_id;		/* from KFD */
+
+	__u64 eop_buffer_address;	/* to KFD */
+	__u64 eop_buffer_size;	/* to KFD */
+	__u64 ctx_save_restore_address; /* to KFD */
+	__u64 ctx_save_restore_size;	/* to KFD */
 };
 
 struct kfd_ioctl_destroy_queue_args {
-	uint32_t queue_id;		/* to KFD */
-	uint32_t pad;
+	__u32 queue_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_update_queue_args {
-	uint64_t ring_base_address;	/* to KFD */
+	__u64 ring_base_address;	/* to KFD */
 
-	uint32_t queue_id;		/* to KFD */
-	uint32_t ring_size;		/* to KFD */
-	uint32_t queue_percentage;	/* to KFD */
-	uint32_t queue_priority;	/* to KFD */
+	__u32 queue_id;		/* to KFD */
+	__u32 ring_size;		/* to KFD */
+	__u32 queue_percentage;	/* to KFD */
+	__u32 queue_priority;	/* to KFD */
 };
 
 /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
@@ -80,13 +80,13 @@ struct kfd_ioctl_update_queue_args {
 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
 
 struct kfd_ioctl_set_memory_policy_args {
-	uint64_t alternate_aperture_base;	/* to KFD */
-	uint64_t alternate_aperture_size;	/* to KFD */
+	__u64 alternate_aperture_base;	/* to KFD */
+	__u64 alternate_aperture_size;	/* to KFD */
 
-	uint32_t gpu_id;			/* to KFD */
-	uint32_t default_policy;		/* to KFD */
-	uint32_t alternate_policy;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;			/* to KFD */
+	__u32 default_policy;		/* to KFD */
+	__u32 alternate_policy;		/* to KFD */
+	__u32 pad;
 };
 
 /*
@@ -97,26 +97,26 @@ struct kfd_ioctl_set_memory_policy_args {
  */
 
 struct kfd_ioctl_get_clock_counters_args {
-	uint64_t gpu_clock_counter;	/* from KFD */
-	uint64_t cpu_clock_counter;	/* from KFD */
-	uint64_t system_clock_counter;	/* from KFD */
-	uint64_t system_clock_freq;	/* from KFD */
+	__u64 gpu_clock_counter;	/* from KFD */
+	__u64 cpu_clock_counter;	/* from KFD */
+	__u64 system_clock_counter;	/* from KFD */
+	__u64 system_clock_freq;	/* from KFD */
 
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 #define NUM_OF_SUPPORTED_GPUS 7
 
 struct kfd_process_device_apertures {
-	uint64_t lds_base;		/* from KFD */
-	uint64_t lds_limit;		/* from KFD */
-	uint64_t scratch_base;		/* from KFD */
-	uint64_t scratch_limit;		/* from KFD */
-	uint64_t gpuvm_base;		/* from KFD */
-	uint64_t gpuvm_limit;		/* from KFD */
-	uint32_t gpu_id;		/* from KFD */
-	uint32_t pad;
+	__u64 lds_base;		/* from KFD */
+	__u64 lds_limit;		/* from KFD */
+	__u64 scratch_base;		/* from KFD */
+	__u64 scratch_limit;		/* from KFD */
+	__u64 gpuvm_base;		/* from KFD */
+	__u64 gpuvm_limit;		/* from KFD */
+	__u32 gpu_id;		/* from KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_get_process_apertures_args {
@@ -124,8 +124,8 @@ struct kfd_ioctl_get_process_apertures_args {
 			process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
 
 	/* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
-	uint32_t num_of_nodes;
-	uint32_t pad;
+	__u32 num_of_nodes;
+	__u32 pad;
 };
 
 #define MAX_ALLOWED_NUM_POINTS    100
@@ -133,25 +133,25 @@ struct kfd_ioctl_get_process_apertures_args {
 #define MAX_ALLOWED_WAC_BUFF_SIZE  128
 
 struct kfd_ioctl_dbg_register_args {
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_dbg_unregister_args {
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_dbg_address_watch_args {
-	uint64_t content_ptr;		/* a pointer to the actual content */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+	__u64 content_ptr;		/* a pointer to the actual content */
+	__u32 gpu_id;		/* to KFD */
+	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
 struct kfd_ioctl_dbg_wave_control_args {
-	uint64_t content_ptr;		/* a pointer to the actual content */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+	__u64 content_ptr;		/* a pointer to the actual content */
+	__u32 gpu_id;		/* to KFD */
+	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
 /* Matching HSA_EVENTTYPE */
@@ -172,44 +172,44 @@ struct kfd_ioctl_dbg_wave_control_args {
 #define KFD_SIGNAL_EVENT_LIMIT			256
 
 struct kfd_ioctl_create_event_args {
-	uint64_t event_page_offset;	/* from KFD */
-	uint32_t event_trigger_data;	/* from KFD - signal events only */
-	uint32_t event_type;		/* to KFD */
-	uint32_t auto_reset;		/* to KFD */
-	uint32_t node_id;		/* to KFD - only valid for certain
+	__u64 event_page_offset;	/* from KFD */
+	__u32 event_trigger_data;	/* from KFD - signal events only */
+	__u32 event_type;		/* to KFD */
+	__u32 auto_reset;		/* to KFD */
+	__u32 node_id;		/* to KFD - only valid for certain
 							event types */
-	uint32_t event_id;		/* from KFD */
-	uint32_t event_slot_index;	/* from KFD */
+	__u32 event_id;		/* from KFD */
+	__u32 event_slot_index;	/* from KFD */
 };
 
 struct kfd_ioctl_destroy_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_set_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_reset_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_memory_exception_failure {
-	uint32_t NotPresent;	/* Page not present or supervisor privilege */
-	uint32_t ReadOnly;	/* Write access to a read-only page */
-	uint32_t NoExecute;	/* Execute access to a page marked NX */
-	uint32_t pad;
+	__u32 NotPresent;	/* Page not present or supervisor privilege */
+	__u32 ReadOnly;	/* Write access to a read-only page */
+	__u32 NoExecute;	/* Execute access to a page marked NX */
+	__u32 pad;
 };
 
 /* memory exception data*/
 struct kfd_hsa_memory_exception_data {
 	struct kfd_memory_exception_failure failure;
-	uint64_t va;
-	uint32_t gpu_id;
-	uint32_t pad;
+	__u64 va;
+	__u32 gpu_id;
+	__u32 pad;
 };
 
 /* Event data*/
@@ -217,19 +217,19 @@ struct kfd_event_data {
 	union {
 		struct kfd_hsa_memory_exception_data memory_exception_data;
 	};				/* From KFD */
-	uint64_t kfd_event_data_ext;	/* pointer to an extension structure
+	__u64 kfd_event_data_ext;	/* pointer to an extension structure
 					   for future exception types */
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_wait_events_args {
-	uint64_t events_ptr;		/* pointed to struct
+	__u64 events_ptr;		/* pointed to struct
 					   kfd_event_data array, to KFD */
-	uint32_t num_events;		/* to KFD */
-	uint32_t wait_for_all;		/* to KFD */
-	uint32_t timeout;		/* to KFD */
-	uint32_t wait_result;		/* from KFD */
+	__u32 num_events;		/* to KFD */
+	__u32 wait_for_all;		/* to KFD */
+	__u32 timeout;		/* to KFD */
+	__u32 wait_result;		/* from KFD */
 };
 
 struct kfd_ioctl_set_scratch_backing_va_args {
-- 
cgit 


From a33b2d0359a0aae25d5ac7a26b85a5682485ebbb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 8 Aug 2016 17:46:23 -0700
Subject: selftests/seccomp: Add tests for basic ptrace actions

This adds tests for using only ptrace to perform syscall changes, just
to validate matching behavior between seccomp events and ptrace events.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 41 ++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 73f5ea6778ce..e61b963f011b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1262,6 +1262,13 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 # error "Do not know how to find your architecture's registers and syscalls"
 #endif
 
+/* When the syscall return can't be changed, stub out the tests for it. */
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+# define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
+#else
+# define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(val, action)
+#endif
+
 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
  */
@@ -1357,7 +1364,7 @@ void change_syscall(struct __test_metadata *_metadata,
 #ifdef SYSCALL_NUM_RET_SHARE_REG
 		TH_LOG("Can't modify syscall return on this architecture");
 #else
-		regs.SYSCALL_RET = 1;
+		regs.SYSCALL_RET = EPERM;
 #endif
 
 #ifdef HAVE_GETREGS
@@ -1426,6 +1433,8 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
 
 	if (nr == __NR_getpid)
 		change_syscall(_metadata, tracee, __NR_getppid);
+	if (nr == __NR_open)
+		change_syscall(_metadata, tracee, -1);
 }
 
 FIXTURE_DATA(TRACE_syscall) {
@@ -1480,6 +1489,28 @@ FIXTURE_TEARDOWN(TRACE_syscall)
 		free(self->prog.filter);
 }
 
+TEST_F(TRACE_syscall, ptrace_syscall_redirected)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer will redirect getpid to getppid. */
+	EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_dropped)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer should skip the open syscall, resulting in EPERM. */
+	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open));
+}
+
 TEST_F(TRACE_syscall, syscall_allowed)
 {
 	long ret;
@@ -1520,13 +1551,8 @@ TEST_F(TRACE_syscall, syscall_dropped)
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
 	ASSERT_EQ(0, ret);
 
-#ifdef SYSCALL_NUM_RET_SHARE_REG
-	/* gettid has been skipped */
-	EXPECT_EQ(-1, syscall(__NR_gettid));
-#else
 	/* gettid has been skipped and an altered return value stored. */
-	EXPECT_EQ(1, syscall(__NR_gettid));
-#endif
+	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid));
 	EXPECT_NE(self->mytid, syscall(__NR_gettid));
 }
 
@@ -1557,6 +1583,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE)
 	ASSERT_EQ(0, ret);
 
 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
+	errno = 0;
 	EXPECT_EQ(-1, syscall(__NR_getpid));
 	EXPECT_EQ(EPERM, errno);
 }
-- 
cgit 


From 967d7ba8415139107033770a40c7ec7dd17fbcb1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Dec 2015 11:39:36 -0800
Subject: selftests/seccomp: Add simple seccomp overhead benchmark

This attempts to produce a comparison between native getpid() and a
RET_ALLOW-filtered getpid(), to measure the overhead cost of using
seccomp().

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/Makefile           | 18 ++--
 .../testing/selftests/seccomp/seccomp_benchmark.c  | 99 ++++++++++++++++++++++
 2 files changed, 112 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/selftests/seccomp/seccomp_benchmark.c

diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index aeb0c805f3ca..553d870b4ca9 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,8 +1,16 @@
-TEST_GEN_PROGS := seccomp_bpf
-CFLAGS += -Wl,-no-as-needed -Wall
-LDFLAGS += -lpthread
+all:
 
 include ../lib.mk
 
-$(TEST_GEN_PROGS): seccomp_bpf.c ../kselftest_harness.h
-	$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+.PHONY: all clean
+
+BINARIES := seccomp_bpf seccomp_benchmark
+CFLAGS += -Wl,-no-as-needed -Wall
+
+seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h
+	$(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
new file mode 100644
index 000000000000..5838c8697ec3
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -0,0 +1,99 @@
+/*
+ * Strictly speaking, this is not a test. But it can report during test
+ * runs so relative performace can be measured.
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#define ARRAY_SIZE(a)    (sizeof(a) / sizeof(a[0]))
+
+unsigned long long timing(clockid_t clk_id, unsigned long long samples)
+{
+	pid_t pid, ret;
+	unsigned long long i;
+	struct timespec start, finish;
+
+	pid = getpid();
+	assert(clock_gettime(clk_id, &start) == 0);
+	for (i = 0; i < samples; i++) {
+		ret = syscall(__NR_getpid);
+		assert(pid == ret);
+	}
+	assert(clock_gettime(clk_id, &finish) == 0);
+
+	i = finish.tv_sec - start.tv_sec;
+	i *= 1000000000;
+	i += finish.tv_nsec - start.tv_nsec;
+
+	printf("%lu.%09lu - %lu.%09lu = %llu\n",
+		finish.tv_sec, finish.tv_nsec,
+		start.tv_sec, start.tv_nsec,
+		i);
+
+	return i;
+}
+
+unsigned long long calibrate(void)
+{
+	unsigned long long i;
+
+	printf("Calibrating reasonable sample size...\n");
+
+	for (i = 5; ; i++) {
+		unsigned long long samples = 1 << i;
+
+		/* Find something that takes more than 5 seconds to run. */
+		if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
+			return samples;
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	unsigned long long samples;
+	unsigned long long native, filtered;
+
+	if (argc > 1)
+		samples = strtoull(argv[1], NULL, 0);
+	else
+		samples = calibrate();
+
+	printf("Benchmarking %llu samples...\n", samples);
+
+	native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid native: %llu ns\n", native);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	assert(ret == 0);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	assert(ret == 0);
+
+	filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW: %llu ns\n", filtered);
+
+	printf("Estimated seccomp overhead per syscall: %llu ns\n",
+		filtered - native);
+
+	if (filtered == native)
+		printf("Trying running again with more samples.\n");
+
+	return 0;
+}
-- 
cgit 


From f3f6e30669c048f47d51ea59df9946a91f551c4c Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Aug 2017 14:08:39 -0700
Subject: selftests/seccomp: Refactor RET_ERRNO tests

This refactors the errno tests (since they all use the same pattern for
their filter) and adds a RET_DATA field ordering test.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 95 ++++++++++++++++-----------
 1 file changed, 58 insertions(+), 37 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e61b963f011b..2fb49d99588d 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -136,7 +136,7 @@ TEST(no_new_privs_support)
 	}
 }
 
-/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
+/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
 TEST(mode_filter_support)
 {
 	long ret;
@@ -541,26 +541,30 @@ TEST(arg_out_of_range)
 	EXPECT_EQ(EINVAL, errno);
 }
 
+#define ERRNO_FILTER(name, errno)					\
+	struct sock_filter _read_filter_##name[] = {			\
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
+			offsetof(struct seccomp_data, nr)),		\
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
+	};								\
+	struct sock_fprog prog_##name = {				\
+		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
+		.filter = _read_filter_##name,				\
+	}
+
+/* Make sure basic errno values are correctly passed through a filter. */
 TEST(ERRNO_valid)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(valid, E2BIG);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -568,26 +572,17 @@ TEST(ERRNO_valid)
 	EXPECT_EQ(E2BIG, errno);
 }
 
+/* Make sure an errno of zero is correctly handled by the arch code. */
 TEST(ERRNO_zero)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(zero, 0);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -595,26 +590,21 @@ TEST(ERRNO_zero)
 	EXPECT_EQ(0, read(0, NULL, 0));
 }
 
+/*
+ * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
+ * This tests that the errno value gets capped correctly, fixed by
+ * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
+ */
 TEST(ERRNO_capped)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(capped, 4096);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -622,6 +612,37 @@ TEST(ERRNO_capped)
 	EXPECT_EQ(4095, errno);
 }
 
+/*
+ * Filters are processed in reverse order: last applied is executed first.
+ * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
+ * SECCOMP_RET_DATA mask results will follow the most recently applied
+ * matching filter return (and not the lowest or highest value).
+ */
+TEST(ERRNO_order)
+{
+	ERRNO_FILTER(first,  11);
+	ERRNO_FILTER(second, 13);
+	ERRNO_FILTER(third,  12);
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(12, errno);
+}
+
 FIXTURE_DATA(TRAP) {
 	struct sock_fprog prog;
 };
-- 
cgit 


From deb4de8b31bc5bf21efb6ac31150a01a631cd647 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Aug 2017 15:00:40 -0700
Subject: seccomp: Provide matching filter for introspection

Both the upcoming logging improvements and changes to RET_KILL will need
to know which filter a given seccomp return value originated from. In
order to delay logic processing of result until after the seccomp loop,
this adds a single pointer assignment on matches. This will allow both
log and RET_KILL logic to work off the filter rather than doing more
expensive tests inside the time-critical run_filters loop.

Running tight cycles of getpid() with filters attached shows no measurable
difference in speed.

Suggested-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
---
 kernel/seccomp.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 98b59b5db90b..1f3347fc2605 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -171,10 +171,14 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
 /**
  * seccomp_run_filters - evaluates all seccomp filters against @sd
  * @sd: optional seccomp data to be passed to filters
+ * @match: stores struct seccomp_filter that resulted in the return value,
+ *         unless filter returned SECCOMP_RET_ALLOW, in which case it will
+ *         be unchanged.
  *
  * Returns valid seccomp BPF response codes.
  */
-static u32 seccomp_run_filters(const struct seccomp_data *sd)
+static u32 seccomp_run_filters(const struct seccomp_data *sd,
+			       struct seccomp_filter **match)
 {
 	struct seccomp_data sd_local;
 	u32 ret = SECCOMP_RET_ALLOW;
@@ -198,8 +202,10 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd)
 	for (; f; f = f->prev) {
 		u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
 
-		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
+		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) {
 			ret = cur_ret;
+			*match = f;
+		}
 	}
 	return ret;
 }
@@ -566,6 +572,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 			    const bool recheck_after_trace)
 {
 	u32 filter_ret, action;
+	struct seccomp_filter *match = NULL;
 	int data;
 
 	/*
@@ -574,7 +581,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 	 */
 	rmb();
 
-	filter_ret = seccomp_run_filters(sd);
+	filter_ret = seccomp_run_filters(sd, &match);
 	data = filter_ret & SECCOMP_RET_DATA;
 	action = filter_ret & SECCOMP_RET_ACTION;
 
@@ -638,6 +645,11 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 		return 0;
 
 	case SECCOMP_RET_ALLOW:
+		/*
+		 * Note that the "match" filter will always be NULL for
+		 * this action since SECCOMP_RET_ALLOW is the starting
+		 * state in seccomp_run_filters().
+		 */
 		return 0;
 
 	case SECCOMP_RET_KILL:
-- 
cgit 


From 8e5f1ad116df6b0de65eac458d5e7c318d1c05af Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:52 +0000
Subject: seccomp: Sysctl to display available actions

This patch creates a read-only sysctl containing an ordered list of
seccomp actions that the kernel supports. The ordering, from left to
right, is the lowest action value (kill) to the highest action value
(allow). Currently, a read of the sysctl file would return "kill trap
errno trace allow". The contents of this sysctl file can be useful for
userspace code as well as the system administrator.

The path to the sysctl is:

  /proc/sys/kernel/seccomp/actions_avail

libseccomp and other userspace code can easily determine which actions
the current kernel supports. The set of actions supported by the current
kernel may be different than the set of action macros found in kernel
headers that were installed where the userspace code was built.

In addition, this sysctl will allow system administrators to know which
actions are supported by the kernel and make it easier to configure
exactly what seccomp logs through the audit subsystem. Support for this
level of logging configuration will come in a future patch.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/sysctl/kernel.txt                |  1 +
 Documentation/userspace-api/seccomp_filter.rst | 16 ++++++++
 kernel/seccomp.c                               | 51 ++++++++++++++++++++++++++
 3 files changed, 68 insertions(+)

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index bac23c198360..995c42cf86ba 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -74,6 +74,7 @@ show up in /proc/sys/kernel:
 - reboot-cmd                  [ SPARC only ]
 - rtsig-max
 - rtsig-nr
+- seccomp/                    ==> Documentation/userspace-api/seccomp_filter.rst
 - sem
 - sem_next_id		      [ sysv ipc ]
 - sg-big-buff                 [ generic SCSI device (sg) ]
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index f71eb5ef1f2d..35fc7cbf1d95 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -169,7 +169,23 @@ The ``samples/seccomp/`` directory contains both an x86-specific example
 and a more generic example of a higher level macro interface for BPF
 program generation.
 
+Sysctls
+=======
+
+Seccomp's sysctl files can be found in the ``/proc/sys/kernel/seccomp/``
+directory. Here's a description of each file in that directory:
+
+``actions_avail``:
+	A read-only ordered list of seccomp return values (refer to the
+	``SECCOMP_RET_*`` macros above) in string form. The ordering, from
+	left-to-right, is the least permissive return value to the most
+	permissive return value.
 
+	The list represents the set of seccomp return values supported
+	by the kernel. A userspace program may use this list to
+	determine if the actions found in the ``seccomp.h``, when the
+	program was built, differs from the set of actions actually
+	supported in the current running kernel.
 
 Adding architecture support
 ===========================
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 1f3347fc2605..5f19f41e4e50 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -17,11 +17,13 @@
 #include <linux/audit.h>
 #include <linux/compat.h>
 #include <linux/coredump.h>
+#include <linux/kmemleak.h>
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 #include <linux/seccomp.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
+#include <linux/sysctl.h>
 
 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
 #include <asm/syscall.h>
@@ -934,3 +936,52 @@ out:
 	return ret;
 }
 #endif
+
+#ifdef CONFIG_SYSCTL
+
+/* Human readable action names for friendly sysctl interaction */
+#define SECCOMP_RET_KILL_NAME		"kill"
+#define SECCOMP_RET_TRAP_NAME		"trap"
+#define SECCOMP_RET_ERRNO_NAME		"errno"
+#define SECCOMP_RET_TRACE_NAME		"trace"
+#define SECCOMP_RET_ALLOW_NAME		"allow"
+
+static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME	" "
+					    SECCOMP_RET_TRAP_NAME	" "
+					    SECCOMP_RET_ERRNO_NAME	" "
+					    SECCOMP_RET_TRACE_NAME	" "
+					    SECCOMP_RET_ALLOW_NAME;
+
+static struct ctl_path seccomp_sysctl_path[] = {
+	{ .procname = "kernel", },
+	{ .procname = "seccomp", },
+	{ }
+};
+
+static struct ctl_table seccomp_sysctl_table[] = {
+	{
+		.procname	= "actions_avail",
+		.data		= (void *) &seccomp_actions_avail,
+		.maxlen		= sizeof(seccomp_actions_avail),
+		.mode		= 0444,
+		.proc_handler	= proc_dostring,
+	},
+	{ }
+};
+
+static int __init seccomp_sysctl_init(void)
+{
+	struct ctl_table_header *hdr;
+
+	hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
+	if (!hdr)
+		pr_warn("seccomp: sysctl registration failed\n");
+	else
+		kmemleak_not_leak(hdr);
+
+	return 0;
+}
+
+device_initcall(seccomp_sysctl_init)
+
+#endif /* CONFIG_SYSCTL */
-- 
cgit 


From d612b1fd8010d0d67b5287fe146b8b55bcbb8655 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:53 +0000
Subject: seccomp: Operation for checking if an action is available

Userspace code that needs to check if the kernel supports a given action
may not be able to use the /proc/sys/kernel/seccomp/actions_avail
sysctl. The process may be running in a sandbox and, therefore,
sufficient filesystem access may not be available. This patch adds an
operation to the seccomp(2) syscall that allows userspace code to ask
the kernel if a given action is available.

If the action is supported by the kernel, 0 is returned. If the action
is not supported by the kernel, -1 is returned with errno set to
-EOPNOTSUPP. If this check is attempted on a kernel that doesn't support
this new operation, -1 is returned with errno set to -EINVAL meaning
that userspace code will have the ability to differentiate between the
two error cases.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h                  |  5 ++--
 kernel/seccomp.c                              | 26 +++++++++++++++++++
 tools/testing/selftests/seccomp/seccomp_bpf.c | 36 +++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0f238a43ff1e..aaad61cc46bc 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -11,8 +11,9 @@
 #define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
 
 /* Valid operations for seccomp syscall. */
-#define SECCOMP_SET_MODE_STRICT	0
-#define SECCOMP_SET_MODE_FILTER	1
+#define SECCOMP_SET_MODE_STRICT		0
+#define SECCOMP_SET_MODE_FILTER		1
+#define SECCOMP_GET_ACTION_AVAIL	2
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC	1
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5f19f41e4e50..7a6089f66fed 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -808,6 +808,27 @@ static inline long seccomp_set_mode_filter(unsigned int flags,
 }
 #endif
 
+static long seccomp_get_action_avail(const char __user *uaction)
+{
+	u32 action;
+
+	if (copy_from_user(&action, uaction, sizeof(action)))
+		return -EFAULT;
+
+	switch (action) {
+	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_TRAP:
+	case SECCOMP_RET_ERRNO:
+	case SECCOMP_RET_TRACE:
+	case SECCOMP_RET_ALLOW:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 /* Common entry point for both prctl and syscall. */
 static long do_seccomp(unsigned int op, unsigned int flags,
 		       const char __user *uargs)
@@ -819,6 +840,11 @@ static long do_seccomp(unsigned int op, unsigned int flags,
 		return seccomp_set_mode_strict();
 	case SECCOMP_SET_MODE_FILTER:
 		return seccomp_set_mode_filter(flags, uargs);
+	case SECCOMP_GET_ACTION_AVAIL:
+		if (flags != 0)
+			return -EINVAL;
+
+		return seccomp_get_action_avail(uargs);
 	default:
 		return -EINVAL;
 	}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 2fb49d99588d..1f2888f6678b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1731,6 +1731,10 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
 #define SECCOMP_SET_MODE_FILTER 1
 #endif
 
+#ifndef SECCOMP_GET_ACTION_AVAIL
+#define SECCOMP_GET_ACTION_AVAIL 2
+#endif
+
 #ifndef SECCOMP_FILTER_FLAG_TSYNC
 #define SECCOMP_FILTER_FLAG_TSYNC 1
 #endif
@@ -2469,6 +2473,38 @@ TEST(syscall_restart)
 		_metadata->passed = 0;
 }
 
+TEST(get_action_avail)
+{
+	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
+			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
+			    SECCOMP_RET_ALLOW };
+	__u32 unknown_action = 0x10000000U;
+	int i;
+	long ret;
+
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
+	}
+	EXPECT_EQ(ret, 0);
+
+	for (i = 0; i < ARRAY_SIZE(actions); i++) {
+		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
+		EXPECT_EQ(ret, 0) {
+			TH_LOG("Expected action (0x%X) not available!",
+			       actions[i]);
+		}
+	}
+
+	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
+	EXPECT_EQ(ret, -1);
+	EXPECT_EQ(errno, EOPNOTSUPP);
+}
+
 /*
  * TODO:
  * - add microbenchmarks
-- 
cgit 


From 0ddec0fc8900201c0897b87b762b7c420436662f Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:54 +0000
Subject: seccomp: Sysctl to configure actions that are allowed to be logged

Adminstrators can write to this sysctl to set the seccomp actions that
are allowed to be logged. Any actions not found in this sysctl will not
be logged.

For example, all SECCOMP_RET_KILL, SECCOMP_RET_TRAP, and
SECCOMP_RET_ERRNO actions would be loggable if "kill trap errno" were
written to the sysctl. SECCOMP_RET_TRACE actions would not be logged
since its string representation ("trace") wasn't present in the sysctl
value.

The path to the sysctl is:

 /proc/sys/kernel/seccomp/actions_logged

The actions_avail sysctl can be read to discover the valid action names
that can be written to the actions_logged sysctl with the exception of
"allow". SECCOMP_RET_ALLOW actions cannot be configured for logging.

The default setting for the sysctl is to allow all actions to be logged
except SECCOMP_RET_ALLOW. While only SECCOMP_RET_KILL actions are
currently logged, an upcoming patch will allow applications to request
additional actions to be logged.

There's one important exception to this sysctl. If a task is
specifically being audited, meaning that an audit context has been
allocated for the task, seccomp will log all actions other than
SECCOMP_RET_ALLOW despite the value of actions_logged. This exception
preserves the existing auditing behavior of tasks with an allocated
audit context.

With this patch, the logic for deciding if an action will be logged is:

if action == RET_ALLOW:
  do not log
else if action == RET_KILL && RET_KILL in actions_logged:
  log
else if audit_enabled && task-is-being-audited:
  log
else:
  do not log

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/userspace-api/seccomp_filter.rst |  18 +++
 include/linux/audit.h                          |   6 +-
 kernel/seccomp.c                               | 171 ++++++++++++++++++++++++-
 3 files changed, 187 insertions(+), 8 deletions(-)

diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index 35fc7cbf1d95..2d1d8ab04ac5 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -187,6 +187,24 @@ directory. Here's a description of each file in that directory:
 	program was built, differs from the set of actions actually
 	supported in the current running kernel.
 
+``actions_logged``:
+	A read-write ordered list of seccomp return values (refer to the
+	``SECCOMP_RET_*`` macros above) that are allowed to be logged. Writes
+	to the file do not need to be in ordered form but reads from the file
+	will be ordered in the same way as the actions_avail sysctl.
+
+	It is important to note that the value of ``actions_logged`` does not
+	prevent certain actions from being logged when the audit subsystem is
+	configured to audit a task. If the action is not found in
+	``actions_logged`` list, the final decision on whether to audit the
+	action for that task is ultimately left up to the audit subsystem to
+	decide for all seccomp return values other than ``SECCOMP_RET_ALLOW``.
+
+	The ``allow`` string is not accepted in the ``actions_logged`` sysctl
+	as it is not possible to log ``SECCOMP_RET_ALLOW`` actions. Attempting
+	to write ``allow`` to the sysctl will result in an EINVAL being
+	returned.
+
 Adding architecture support
 ===========================
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 2150bdccfbab..8c30f06d639d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -314,11 +314,7 @@ void audit_core_dumps(long signr);
 
 static inline void audit_seccomp(unsigned long syscall, long signr, int code)
 {
-	if (!audit_enabled)
-		return;
-
-	/* Force a record to be reported if a signal was delivered. */
-	if (signr || unlikely(!audit_dummy_context()))
+	if (audit_enabled && unlikely(!audit_dummy_context()))
 		__audit_seccomp(syscall, signr, code);
 }
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 7a6089f66fed..54357e361aea 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -522,6 +522,45 @@ static void seccomp_send_sigsys(int syscall, int reason)
 }
 #endif	/* CONFIG_SECCOMP_FILTER */
 
+/* For use with seccomp_actions_logged */
+#define SECCOMP_LOG_KILL		(1 << 0)
+#define SECCOMP_LOG_TRAP		(1 << 2)
+#define SECCOMP_LOG_ERRNO		(1 << 3)
+#define SECCOMP_LOG_TRACE		(1 << 4)
+#define SECCOMP_LOG_ALLOW		(1 << 5)
+
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
+				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE;
+
+static inline void seccomp_log(unsigned long syscall, long signr, u32 action)
+{
+	bool log = false;
+
+	switch (action) {
+	case SECCOMP_RET_ALLOW:
+	case SECCOMP_RET_TRAP:
+	case SECCOMP_RET_ERRNO:
+	case SECCOMP_RET_TRACE:
+		break;
+	case SECCOMP_RET_KILL:
+	default:
+		log = seccomp_actions_logged & SECCOMP_LOG_KILL;
+	}
+
+	/*
+	 * Force an audit message to be emitted when the action is RET_KILL and
+	 * the action is allowed to be logged by the admin.
+	 */
+	if (log)
+		return __audit_seccomp(syscall, signr, action);
+
+	/*
+	 * Let the audit subsystem decide if the action should be audited based
+	 * on whether the current task itself is being audited.
+	 */
+	return audit_seccomp(syscall, signr, action);
+}
+
 /*
  * Secure computing mode 1 allows only read/write/exit/sigreturn.
  * To be fully secure this must be combined with rlimit
@@ -547,7 +586,7 @@ static void __secure_computing_strict(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
-	audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL);
 	do_exit(SIGKILL);
 }
 
@@ -656,7 +695,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 	case SECCOMP_RET_KILL:
 	default:
-		audit_seccomp(this_syscall, SIGSYS, action);
+		seccomp_log(this_syscall, SIGSYS, action);
 		/* Dump core only if this is the last remaining thread. */
 		if (get_nr_threads(current) == 1) {
 			siginfo_t info;
@@ -673,7 +712,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 	unreachable();
 
 skip:
-	audit_seccomp(this_syscall, 0, action);
+	seccomp_log(this_syscall, 0, action);
 	return -1;
 }
 #else
@@ -978,6 +1017,127 @@ static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME	" "
 					    SECCOMP_RET_TRACE_NAME	" "
 					    SECCOMP_RET_ALLOW_NAME;
 
+struct seccomp_log_name {
+	u32		log;
+	const char	*name;
+};
+
+static const struct seccomp_log_name seccomp_log_names[] = {
+	{ SECCOMP_LOG_KILL, SECCOMP_RET_KILL_NAME },
+	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
+	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
+	{ SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
+	{ SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
+	{ }
+};
+
+static bool seccomp_names_from_actions_logged(char *names, size_t size,
+					      u32 actions_logged)
+{
+	const struct seccomp_log_name *cur;
+	bool append_space = false;
+
+	for (cur = seccomp_log_names; cur->name && size; cur++) {
+		ssize_t ret;
+
+		if (!(actions_logged & cur->log))
+			continue;
+
+		if (append_space) {
+			ret = strscpy(names, " ", size);
+			if (ret < 0)
+				return false;
+
+			names += ret;
+			size -= ret;
+		} else
+			append_space = true;
+
+		ret = strscpy(names, cur->name, size);
+		if (ret < 0)
+			return false;
+
+		names += ret;
+		size -= ret;
+	}
+
+	return true;
+}
+
+static bool seccomp_action_logged_from_name(u32 *action_logged,
+					    const char *name)
+{
+	const struct seccomp_log_name *cur;
+
+	for (cur = seccomp_log_names; cur->name; cur++) {
+		if (!strcmp(cur->name, name)) {
+			*action_logged = cur->log;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
+{
+	char *name;
+
+	*actions_logged = 0;
+	while ((name = strsep(&names, " ")) && *name) {
+		u32 action_logged = 0;
+
+		if (!seccomp_action_logged_from_name(&action_logged, name))
+			return false;
+
+		*actions_logged |= action_logged;
+	}
+
+	return true;
+}
+
+static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	char names[sizeof(seccomp_actions_avail)];
+	struct ctl_table table;
+	int ret;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	memset(names, 0, sizeof(names));
+
+	if (!write) {
+		if (!seccomp_names_from_actions_logged(names, sizeof(names),
+						       seccomp_actions_logged))
+			return -EINVAL;
+	}
+
+	table = *ro_table;
+	table.data = names;
+	table.maxlen = sizeof(names);
+	ret = proc_dostring(&table, write, buffer, lenp, ppos);
+	if (ret)
+		return ret;
+
+	if (write) {
+		u32 actions_logged;
+
+		if (!seccomp_actions_logged_from_names(&actions_logged,
+						       table.data))
+			return -EINVAL;
+
+		if (actions_logged & SECCOMP_LOG_ALLOW)
+			return -EINVAL;
+
+		seccomp_actions_logged = actions_logged;
+	}
+
+	return 0;
+}
+
 static struct ctl_path seccomp_sysctl_path[] = {
 	{ .procname = "kernel", },
 	{ .procname = "seccomp", },
@@ -992,6 +1152,11 @@ static struct ctl_table seccomp_sysctl_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_dostring,
 	},
+	{
+		.procname	= "actions_logged",
+		.mode		= 0644,
+		.proc_handler	= seccomp_actions_logged_handler,
+	},
 	{ }
 };
 
-- 
cgit 


From 2b7ea5b5b5799f2878ed454bb48032bed6d101d3 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:55 +0000
Subject: seccomp: Selftest for detection of filter flag support

Userspace needs to be able to reliably detect the support of a filter
flag. A good way of doing that is by attempting to enter filter mode,
with the flag bit(s) in question set, and a NULL pointer for the args
parameter of seccomp(2). EFAULT indicates that the flag is valid and
EINVAL indicates that the flag is invalid.

This patch adds a selftest that can be used to test this method of
detection in userspace.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 60 +++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 1f2888f6678b..abf708e09892 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1835,6 +1835,66 @@ TEST(seccomp_syscall_mode_lock)
 	}
 }
 
+/*
+ * Test detection of known and unknown filter flags. Userspace needs to be able
+ * to check if a filter flag is supported by the current kernel and a good way
+ * of doing that is by attempting to enter filter mode, with the flag bit in
+ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
+ * that the flag is valid and EINVAL indicates that the flag is invalid.
+ */
+TEST(detect_seccomp_filter_flags)
+{
+	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC };
+	unsigned int flag, all_flags;
+	int i;
+	long ret;
+
+	/* Test detection of known-good filter flags */
+	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+		flag = flags[i];
+		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+		ASSERT_NE(ENOSYS, errno) {
+			TH_LOG("Kernel does not support seccomp syscall!");
+		}
+		EXPECT_EQ(-1, ret);
+		EXPECT_EQ(EFAULT, errno) {
+			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
+			       flag);
+		}
+
+		all_flags |= flag;
+	}
+
+	/* Test detection of all known-good filter flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
+		       all_flags);
+	}
+
+	/* Test detection of an unknown filter flag */
+	flag = -1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
+		       flag);
+	}
+
+	/*
+	 * Test detection of an unknown filter flag that may simply need to be
+	 * added to this test
+	 */
+	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
+		       flag);
+	}
+}
+
 TEST(TSYNC_first)
 {
 	struct sock_filter filter[] = {
-- 
cgit 


From e66a39977985b1e69e17c4042cb290768eca9b02 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:56 +0000
Subject: seccomp: Filter flag to log all actions except SECCOMP_RET_ALLOW

Add a new filter flag, SECCOMP_FILTER_FLAG_LOG, that enables logging for
all actions except for SECCOMP_RET_ALLOW for the given filter.

SECCOMP_RET_KILL actions are always logged, when "kill" is in the
actions_logged sysctl, and SECCOMP_RET_ALLOW actions are never logged,
regardless of this flag.

This flag can be used to create noisy filters that result in all
non-allowed actions to be logged. A process may have one noisy filter,
which is loaded with this flag, as well as a quiet filter that's not
loaded with this flag. This allows for the actions in a set of filters
to be selectively conveyed to the admin.

Since a system could have a large number of allocated seccomp_filter
structs, struct packing was taken in consideration. On 64 bit x86, the
new log member takes up one byte of an existing four byte hole in the
struct. On 32 bit x86, the new log member creates a new four byte hole
(unavoidable) and consumes one of those bytes.

Unfortunately, the tests added for SECCOMP_FILTER_FLAG_LOG are not
capable of inspecting the audit log to verify that the actions taken in
the filter were logged.

With this patch, the logic for deciding if an action will be logged is:

if action == RET_ALLOW:
  do not log
else if action == RET_KILL && RET_KILL in actions_logged:
  log
else if filter-requests-logging && action in actions_logged:
  log
else if audit_enabled && process-is-being-audited:
  log
else:
  do not log

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/seccomp.h                       |  3 +-
 include/uapi/linux/seccomp.h                  |  1 +
 kernel/seccomp.c                              | 26 +++++++---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 69 ++++++++++++++++++++++++++-
 4 files changed, 91 insertions(+), 8 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index ecc296c137cd..c8bef436b61d 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,7 +3,8 @@
 
 #include <uapi/linux/seccomp.h>
 
-#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC)
+#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC | \
+					 SECCOMP_FILTER_FLAG_LOG)
 
 #ifdef CONFIG_SECCOMP
 
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index aaad61cc46bc..19a611d0712e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -17,6 +17,7 @@
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC	1
+#define SECCOMP_FILTER_FLAG_LOG		2
 
 /*
  * All BPF programs must return a 32-bit value.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 54357e361aea..ed9fde418fc4 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -44,6 +44,7 @@
  *         get/put helpers should be used when accessing an instance
  *         outside of a lifetime-guarded section.  In general, this
  *         is only needed for handling filters shared across tasks.
+ * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
  * @prev: points to a previously installed, or inherited, filter
  * @prog: the BPF program to evaluate
  *
@@ -59,6 +60,7 @@
  */
 struct seccomp_filter {
 	refcount_t usage;
+	bool log;
 	struct seccomp_filter *prev;
 	struct bpf_prog *prog;
 };
@@ -452,6 +454,10 @@ static long seccomp_attach_filter(unsigned int flags,
 			return ret;
 	}
 
+	/* Set log flag, if present. */
+	if (flags & SECCOMP_FILTER_FLAG_LOG)
+		filter->log = true;
+
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
 	 * task reference.
@@ -532,15 +538,22 @@ static void seccomp_send_sigsys(int syscall, int reason)
 static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
 				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE;
 
-static inline void seccomp_log(unsigned long syscall, long signr, u32 action)
+static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
+			       bool requested)
 {
 	bool log = false;
 
 	switch (action) {
 	case SECCOMP_RET_ALLOW:
+		break;
 	case SECCOMP_RET_TRAP:
+		log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
+		break;
 	case SECCOMP_RET_ERRNO:
+		log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
+		break;
 	case SECCOMP_RET_TRACE:
+		log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
 		break;
 	case SECCOMP_RET_KILL:
 	default:
@@ -548,8 +561,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action)
 	}
 
 	/*
-	 * Force an audit message to be emitted when the action is RET_KILL and
-	 * the action is allowed to be logged by the admin.
+	 * Force an audit message to be emitted when the action is RET_KILL or
+	 * the FILTER_FLAG_LOG bit was set and the action is allowed to be
+	 * logged by the admin.
 	 */
 	if (log)
 		return __audit_seccomp(syscall, signr, action);
@@ -586,7 +600,7 @@ static void __secure_computing_strict(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
-	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL, true);
 	do_exit(SIGKILL);
 }
 
@@ -695,7 +709,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 	case SECCOMP_RET_KILL:
 	default:
-		seccomp_log(this_syscall, SIGSYS, action);
+		seccomp_log(this_syscall, SIGSYS, action, true);
 		/* Dump core only if this is the last remaining thread. */
 		if (get_nr_threads(current) == 1) {
 			siginfo_t info;
@@ -712,7 +726,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 	unreachable();
 
 skip:
-	seccomp_log(this_syscall, 0, action);
+	seccomp_log(this_syscall, 0, action, match ? match->log : false);
 	return -1;
 }
 #else
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index abf708e09892..1c8c22ce7740 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1739,6 +1739,10 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
 #define SECCOMP_FILTER_FLAG_TSYNC 1
 #endif
 
+#ifndef SECCOMP_FILTER_FLAG_LOG
+#define SECCOMP_FILTER_FLAG_LOG 2
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
@@ -1844,7 +1848,8 @@ TEST(seccomp_syscall_mode_lock)
  */
 TEST(detect_seccomp_filter_flags)
 {
-	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC };
+	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
+				 SECCOMP_FILTER_FLAG_LOG };
 	unsigned int flag, all_flags;
 	int i;
 	long ret;
@@ -2533,6 +2538,67 @@ TEST(syscall_restart)
 		_metadata->passed = 0;
 }
 
+TEST_SIGNAL(filter_flag_log, SIGSYS)
+{
+	struct sock_filter allow_filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter kill_filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog allow_prog = {
+		.len = (unsigned short)ARRAY_SIZE(allow_filter),
+		.filter = allow_filter,
+	};
+	struct sock_fprog kill_prog = {
+		.len = (unsigned short)ARRAY_SIZE(kill_filter),
+		.filter = kill_filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	EXPECT_NE(0, ret) {
+		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
+	}
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
+	}
+
+	/* Verify that a simple, permissive filter can be added with no flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+	EXPECT_EQ(0, ret);
+
+	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
+	}
+	EXPECT_EQ(0, ret);
+
+	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &kill_prog);
+	EXPECT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
 TEST(get_action_avail)
 {
 	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
@@ -2573,6 +2639,7 @@ TEST(get_action_avail)
  * - endianness checking when appropriate
  * - 64-bit arg prodding
  * - arch value testing (x86 modes especially)
+ * - verify that FILTER_FLAG_LOG filters generate log messages
  * - ...
  */
 
-- 
cgit 


From 59f5cf44a38284eb9e76270c786fb6cc62ef8ac4 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:57 +0000
Subject: seccomp: Action to log before allowing

Add a new action, SECCOMP_RET_LOG, that logs a syscall before allowing
the syscall. At the implementation level, this action is identical to
the existing SECCOMP_RET_ALLOW action. However, it can be very useful when
initially developing a seccomp filter for an application. The developer
can set the default action to be SECCOMP_RET_LOG, maybe mark any
obviously needed syscalls with SECCOMP_RET_ALLOW, and then put the
application through its paces. A list of syscalls that triggered the
default action (SECCOMP_RET_LOG) can be easily gleaned from the logs and
that list can be used to build the syscall whitelist. Finally, the
developer can change the default action to the desired value.

This provides a more friendly experience than seeing the application get
killed, then updating the filter and rebuilding the app, seeing the
application get killed due to a different syscall, then updating the
filter and rebuilding the app, etc.

The functionality is similar to what's supported by the various LSMs.
SELinux has permissive mode, AppArmor has complain mode, SMACK has
bring-up mode, etc.

SECCOMP_RET_LOG is given a lower value than SECCOMP_RET_ALLOW as allow
while logging is slightly more restrictive than quietly allowing.

Unfortunately, the tests added for SECCOMP_RET_LOG are not capable of
inspecting the audit log to verify that the syscall was logged.

With this patch, the logic for deciding if an action will be logged is:

if action == RET_ALLOW:
  do not log
else if action == RET_KILL && RET_KILL in actions_logged:
  log
else if action == RET_LOG && RET_LOG in actions_logged:
  log
else if filter-requests-logging && action in actions_logged:
  log
else if audit_enabled && process-is-being-audited:
  log
else:
  do not log

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/userspace-api/seccomp_filter.rst |  9 +++
 include/uapi/linux/seccomp.h                   |  1 +
 kernel/seccomp.c                               | 23 ++++--
 tools/testing/selftests/seccomp/seccomp_bpf.c  | 98 +++++++++++++++++++++++++-
 4 files changed, 125 insertions(+), 6 deletions(-)

diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index 2d1d8ab04ac5..f4977357daf2 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -141,6 +141,15 @@ In precedence order, they are:
 	allow use of ptrace, even of other sandboxed processes, without
 	extreme care; ptracers can use this mechanism to escape.)
 
+``SECCOMP_RET_LOG``:
+	Results in the system call being executed after it is logged. This
+	should be used by application developers to learn which syscalls their
+	application needs without having to iterate through multiple test and
+	development cycles to build the list.
+
+	This action will only be logged if "log" is present in the
+	actions_logged sysctl string.
+
 ``SECCOMP_RET_ALLOW``:
 	Results in the system call being executed.
 
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 19a611d0712e..f94433263e4b 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -31,6 +31,7 @@
 #define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
 #define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
 #define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_LOG		0x7ffc0000U /* allow after logging */
 #define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ed9fde418fc4..59cde2ed3b92 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -533,10 +533,12 @@ static void seccomp_send_sigsys(int syscall, int reason)
 #define SECCOMP_LOG_TRAP		(1 << 2)
 #define SECCOMP_LOG_ERRNO		(1 << 3)
 #define SECCOMP_LOG_TRACE		(1 << 4)
-#define SECCOMP_LOG_ALLOW		(1 << 5)
+#define SECCOMP_LOG_LOG			(1 << 5)
+#define SECCOMP_LOG_ALLOW		(1 << 6)
 
 static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
-				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE;
+				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE |
+				    SECCOMP_LOG_LOG;
 
 static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 			       bool requested)
@@ -555,15 +557,18 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 	case SECCOMP_RET_TRACE:
 		log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
 		break;
+	case SECCOMP_RET_LOG:
+		log = seccomp_actions_logged & SECCOMP_LOG_LOG;
+		break;
 	case SECCOMP_RET_KILL:
 	default:
 		log = seccomp_actions_logged & SECCOMP_LOG_KILL;
 	}
 
 	/*
-	 * Force an audit message to be emitted when the action is RET_KILL or
-	 * the FILTER_FLAG_LOG bit was set and the action is allowed to be
-	 * logged by the admin.
+	 * Force an audit message to be emitted when the action is RET_KILL,
+	 * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is
+	 * allowed to be logged by the admin.
 	 */
 	if (log)
 		return __audit_seccomp(syscall, signr, action);
@@ -699,6 +704,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 		return 0;
 
+	case SECCOMP_RET_LOG:
+		seccomp_log(this_syscall, 0, action, true);
+		return 0;
+
 	case SECCOMP_RET_ALLOW:
 		/*
 		 * Note that the "match" filter will always be NULL for
@@ -873,6 +882,7 @@ static long seccomp_get_action_avail(const char __user *uaction)
 	case SECCOMP_RET_TRAP:
 	case SECCOMP_RET_ERRNO:
 	case SECCOMP_RET_TRACE:
+	case SECCOMP_RET_LOG:
 	case SECCOMP_RET_ALLOW:
 		break;
 	default:
@@ -1023,12 +1033,14 @@ out:
 #define SECCOMP_RET_TRAP_NAME		"trap"
 #define SECCOMP_RET_ERRNO_NAME		"errno"
 #define SECCOMP_RET_TRACE_NAME		"trace"
+#define SECCOMP_RET_LOG_NAME		"log"
 #define SECCOMP_RET_ALLOW_NAME		"allow"
 
 static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME	" "
 					    SECCOMP_RET_TRAP_NAME	" "
 					    SECCOMP_RET_ERRNO_NAME	" "
 					    SECCOMP_RET_TRACE_NAME	" "
+					    SECCOMP_RET_LOG_NAME	" "
 					    SECCOMP_RET_ALLOW_NAME;
 
 struct seccomp_log_name {
@@ -1041,6 +1053,7 @@ static const struct seccomp_log_name seccomp_log_names[] = {
 	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
 	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
 	{ SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
+	{ SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
 	{ SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
 	{ }
 };
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 1c8c22ce7740..7372958eccb5 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -74,7 +74,12 @@
 #define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
 #define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
 #define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
+#endif
+#ifndef SECCOMP_RET_LOG
+#define SECCOMP_RET_LOG       0x7ffc0000U /* allow after logging */
+#endif
 
+#ifndef SECCOMP_RET_ACTION
 /* Masks for the return value sections. */
 #define SECCOMP_RET_ACTION      0x7fff0000U
 #define SECCOMP_RET_DATA        0x0000ffffU
@@ -342,6 +347,28 @@ TEST(empty_prog)
 	EXPECT_EQ(EINVAL, errno);
 }
 
+TEST(log_all)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	/* getppid() should succeed and be logged (no check for logging) */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+}
+
 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
 {
 	struct sock_filter filter[] = {
@@ -756,6 +783,7 @@ TEST_F(TRAP, handler)
 
 FIXTURE_DATA(precedence) {
 	struct sock_fprog allow;
+	struct sock_fprog log;
 	struct sock_fprog trace;
 	struct sock_fprog error;
 	struct sock_fprog trap;
@@ -767,6 +795,13 @@ FIXTURE_SETUP(precedence)
 	struct sock_filter allow_insns[] = {
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
+	struct sock_filter log_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
 	struct sock_filter trace_insns[] = {
 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 			offsetof(struct seccomp_data, nr)),
@@ -803,6 +838,7 @@ FIXTURE_SETUP(precedence)
 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
 	FILTER_ALLOC(allow);
+	FILTER_ALLOC(log);
 	FILTER_ALLOC(trace);
 	FILTER_ALLOC(error);
 	FILTER_ALLOC(trap);
@@ -813,6 +849,7 @@ FIXTURE_TEARDOWN(precedence)
 {
 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
 	FILTER_FREE(allow);
+	FILTER_FREE(log);
 	FILTER_FREE(trace);
 	FILTER_FREE(error);
 	FILTER_FREE(trap);
@@ -830,6 +867,8 @@ TEST_F(precedence, allow_ok)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -854,6 +893,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -885,6 +926,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
@@ -906,6 +949,8 @@ TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -931,6 +976,8 @@ TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -952,6 +999,8 @@ TEST_F(precedence, errno_is_third)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -970,6 +1019,8 @@ TEST_F(precedence, errno_is_third_in_any_order)
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
@@ -992,6 +1043,8 @@ TEST_F(precedence, trace_is_fourth)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	/* Should work just fine. */
@@ -1013,12 +1066,54 @@ TEST_F(precedence, trace_is_fourth_in_any_order)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	/* Should work just fine. */
 	EXPECT_EQ(parent, syscall(__NR_getppid));
 	/* No ptracer */
 	EXPECT_EQ(-1, syscall(__NR_getpid));
 }
 
+TEST_F(precedence, log_is_fifth)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth_in_any_order)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
 #ifndef PTRACE_O_TRACESECCOMP
 #define PTRACE_O_TRACESECCOMP	0x00000080
 #endif
@@ -2603,7 +2698,7 @@ TEST(get_action_avail)
 {
 	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
-			    SECCOMP_RET_ALLOW };
+			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
 	__u32 unknown_action = 0x10000000U;
 	int i;
 	long ret;
@@ -2640,6 +2735,7 @@ TEST(get_action_avail)
  * - 64-bit arg prodding
  * - arch value testing (x86 modes especially)
  * - verify that FILTER_FLAG_LOG filters generate log messages
+ * - verify that RET_LOG generates log messages
  * - ...
  */
 
-- 
cgit 


From fd76875ca289a3d4722f266fd2d5532a27083903 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 12:53:18 -0700
Subject: seccomp: Rename SECCOMP_RET_KILL to SECCOMP_RET_KILL_THREAD

In preparation for adding SECCOMP_RET_KILL_PROCESS, rename SECCOMP_RET_KILL
to the more accurate SECCOMP_RET_KILL_THREAD.

The existing selftest values are intentionally left as SECCOMP_RET_KILL
just to be sure we're exercising the alias.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/networking/filter.txt            |  2 +-
 Documentation/userspace-api/seccomp_filter.rst |  4 +--
 include/uapi/linux/seccomp.h                   |  3 +-
 kernel/seccomp.c                               | 39 ++++++++++++++------------
 samples/seccomp/bpf-direct.c                   |  4 +--
 samples/seccomp/bpf-helper.h                   |  2 +-
 tools/testing/selftests/seccomp/seccomp_bpf.c  | 17 ++++++-----
 7 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index b69b205501de..73aa0f12156d 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -337,7 +337,7 @@ Examples for low-level BPF:
   jeq #14, good           /* __NR_rt_sigprocmask */
   jeq #13, good           /* __NR_rt_sigaction */
   jeq #35, good           /* __NR_nanosleep */
-  bad: ret #0             /* SECCOMP_RET_KILL */
+  bad: ret #0             /* SECCOMP_RET_KILL_THREAD */
   good: ret #0x7fff0000   /* SECCOMP_RET_ALLOW */
 
 The above example code can be placed into a file (here called "foo"), and
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index f4977357daf2..d76396f2d8ed 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -87,11 +87,11 @@ Return values
 A seccomp filter may return any of the following values. If multiple
 filters exist, the return value for the evaluation of a given system
 call will always use the highest precedent value. (For example,
-``SECCOMP_RET_KILL`` will always take precedence.)
+``SECCOMP_RET_KILL_THREAD`` will always take precedence.)
 
 In precedence order, they are:
 
-``SECCOMP_RET_KILL``:
+``SECCOMP_RET_KILL_THREAD``:
 	Results in the task exiting immediately without executing the
 	system call.  The exit status of the task (``status & 0x7f``) will
 	be ``SIGSYS``, not ``SIGKILL``.
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index f94433263e4b..5a03f699eb17 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -27,7 +27,8 @@
  * The ordering ensures that a min_t() over composed return values always
  * selects the least permissive choice.
  */
-#define SECCOMP_RET_KILL	0x00000000U /* kill the task immediately */
+#define SECCOMP_RET_KILL_THREAD	0x00000000U /* kill the thread */
+#define SECCOMP_RET_KILL	SECCOMP_RET_KILL_THREAD
 #define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
 #define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
 #define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 59cde2ed3b92..95ac54cff00f 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -192,7 +192,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
 	if (unlikely(WARN_ON(f == NULL)))
-		return SECCOMP_RET_KILL;
+		return SECCOMP_RET_KILL_THREAD;
 
 	if (!sd) {
 		populate_seccomp_data(&sd_local);
@@ -529,15 +529,17 @@ static void seccomp_send_sigsys(int syscall, int reason)
 #endif	/* CONFIG_SECCOMP_FILTER */
 
 /* For use with seccomp_actions_logged */
-#define SECCOMP_LOG_KILL		(1 << 0)
+#define SECCOMP_LOG_KILL_THREAD		(1 << 0)
 #define SECCOMP_LOG_TRAP		(1 << 2)
 #define SECCOMP_LOG_ERRNO		(1 << 3)
 #define SECCOMP_LOG_TRACE		(1 << 4)
 #define SECCOMP_LOG_LOG			(1 << 5)
 #define SECCOMP_LOG_ALLOW		(1 << 6)
 
-static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
-				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE |
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_THREAD |
+				    SECCOMP_LOG_TRAP  |
+				    SECCOMP_LOG_ERRNO |
+				    SECCOMP_LOG_TRACE |
 				    SECCOMP_LOG_LOG;
 
 static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
@@ -560,13 +562,13 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 	case SECCOMP_RET_LOG:
 		log = seccomp_actions_logged & SECCOMP_LOG_LOG;
 		break;
-	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_KILL_THREAD:
 	default:
-		log = seccomp_actions_logged & SECCOMP_LOG_KILL;
+		log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
 	}
 
 	/*
-	 * Force an audit message to be emitted when the action is RET_KILL,
+	 * Force an audit message to be emitted when the action is RET_KILL_*,
 	 * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is
 	 * allowed to be logged by the admin.
 	 */
@@ -605,7 +607,7 @@ static void __secure_computing_strict(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
-	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL, true);
+	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
 	do_exit(SIGKILL);
 }
 
@@ -716,7 +718,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 		 */
 		return 0;
 
-	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_KILL_THREAD:
 	default:
 		seccomp_log(this_syscall, SIGSYS, action, true);
 		/* Dump core only if this is the last remaining thread. */
@@ -878,7 +880,7 @@ static long seccomp_get_action_avail(const char __user *uaction)
 		return -EFAULT;
 
 	switch (action) {
-	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_KILL_THREAD:
 	case SECCOMP_RET_TRAP:
 	case SECCOMP_RET_ERRNO:
 	case SECCOMP_RET_TRACE:
@@ -1029,19 +1031,20 @@ out:
 #ifdef CONFIG_SYSCTL
 
 /* Human readable action names for friendly sysctl interaction */
-#define SECCOMP_RET_KILL_NAME		"kill"
+#define SECCOMP_RET_KILL_THREAD_NAME	"kill_thread"
 #define SECCOMP_RET_TRAP_NAME		"trap"
 #define SECCOMP_RET_ERRNO_NAME		"errno"
 #define SECCOMP_RET_TRACE_NAME		"trace"
 #define SECCOMP_RET_LOG_NAME		"log"
 #define SECCOMP_RET_ALLOW_NAME		"allow"
 
-static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME	" "
-					    SECCOMP_RET_TRAP_NAME	" "
-					    SECCOMP_RET_ERRNO_NAME	" "
-					    SECCOMP_RET_TRACE_NAME	" "
-					    SECCOMP_RET_LOG_NAME	" "
-					    SECCOMP_RET_ALLOW_NAME;
+static const char seccomp_actions_avail[] =
+				SECCOMP_RET_KILL_THREAD_NAME	" "
+				SECCOMP_RET_TRAP_NAME		" "
+				SECCOMP_RET_ERRNO_NAME		" "
+				SECCOMP_RET_TRACE_NAME		" "
+				SECCOMP_RET_LOG_NAME		" "
+				SECCOMP_RET_ALLOW_NAME;
 
 struct seccomp_log_name {
 	u32		log;
@@ -1049,7 +1052,7 @@ struct seccomp_log_name {
 };
 
 static const struct seccomp_log_name seccomp_log_names[] = {
-	{ SECCOMP_LOG_KILL, SECCOMP_RET_KILL_NAME },
+	{ SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
 	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
 	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
 	{ SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
diff --git a/samples/seccomp/bpf-direct.c b/samples/seccomp/bpf-direct.c
index 151ec3f52189..235ce3c49ee9 100644
--- a/samples/seccomp/bpf-direct.c
+++ b/samples/seccomp/bpf-direct.c
@@ -129,7 +129,7 @@ static int install_filter(void)
 		/* Check that read is only using stdin. */
 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDIN_FILENO, 4, 0),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD),
 
 		/* Check that write is only using stdout */
 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
@@ -139,7 +139,7 @@ static int install_filter(void)
 
 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD),
 	};
 	struct sock_fprog prog = {
 		.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h
index 1d8de9edd858..83dbe79cbe2c 100644
--- a/samples/seccomp/bpf-helper.h
+++ b/samples/seccomp/bpf-helper.h
@@ -44,7 +44,7 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count);
 #define ALLOW \
 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
 #define DENY \
-	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
+	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD)
 #define JUMP(labels, label) \
 	BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \
 		 JUMP_JT, JUMP_JF)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 7372958eccb5..a3ba39a32449 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -68,15 +68,18 @@
 #define SECCOMP_MODE_FILTER 2
 #endif
 
+#ifndef SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
+#endif
 #ifndef SECCOMP_RET_KILL
-#define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
-#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
-#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
-#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
-#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
+#define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
 #endif
 #ifndef SECCOMP_RET_LOG
-#define SECCOMP_RET_LOG       0x7ffc0000U /* allow after logging */
+#define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
 #endif
 
 #ifndef SECCOMP_RET_ACTION
@@ -2696,7 +2699,7 @@ TEST_SIGNAL(filter_flag_log, SIGSYS)
 
 TEST(get_action_avail)
 {
-	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
+	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
 			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
 	__u32 unknown_action = 0x10000000U;
-- 
cgit 


From 4d3b0b05aae9ee9ce0970dc4cc0fb3fad5e85945 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 13:01:39 -0700
Subject: seccomp: Introduce SECCOMP_RET_KILL_PROCESS

This introduces the BPF return value for SECCOMP_RET_KILL_PROCESS to kill
an entire process. This cannot yet be reached by seccomp, but it changes
the default-kill behavior (for unknown return values) from kill-thread to
kill-process.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h | 18 ++++++++++--------
 kernel/seccomp.c             | 22 ++++++++++++++++------
 2 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 5a03f699eb17..7e77c92df78a 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -22,18 +22,20 @@
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
- * The upper 16-bits are ordered from least permissive values to most.
+ * The upper 16-bits are ordered from least permissive values to most,
+ * as a signed value (so 0x8000000 is negative).
  *
  * The ordering ensures that a min_t() over composed return values always
  * selects the least permissive choice.
  */
-#define SECCOMP_RET_KILL_THREAD	0x00000000U /* kill the thread */
-#define SECCOMP_RET_KILL	SECCOMP_RET_KILL_THREAD
-#define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
-#define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
-#define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
-#define SECCOMP_RET_LOG		0x7ffc0000U /* allow after logging */
-#define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
+#define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
+#define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
 #define SECCOMP_RET_ACTION	0x7fff0000U
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 95ac54cff00f..5c7299b9d953 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -192,7 +192,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
 	if (unlikely(WARN_ON(f == NULL)))
-		return SECCOMP_RET_KILL_THREAD;
+		return SECCOMP_RET_KILL_PROCESS;
 
 	if (!sd) {
 		populate_seccomp_data(&sd_local);
@@ -529,14 +529,16 @@ static void seccomp_send_sigsys(int syscall, int reason)
 #endif	/* CONFIG_SECCOMP_FILTER */
 
 /* For use with seccomp_actions_logged */
-#define SECCOMP_LOG_KILL_THREAD		(1 << 0)
+#define SECCOMP_LOG_KILL_PROCESS	(1 << 0)
+#define SECCOMP_LOG_KILL_THREAD		(1 << 1)
 #define SECCOMP_LOG_TRAP		(1 << 2)
 #define SECCOMP_LOG_ERRNO		(1 << 3)
 #define SECCOMP_LOG_TRACE		(1 << 4)
 #define SECCOMP_LOG_LOG			(1 << 5)
 #define SECCOMP_LOG_ALLOW		(1 << 6)
 
-static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_THREAD |
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
+				    SECCOMP_LOG_KILL_THREAD  |
 				    SECCOMP_LOG_TRAP  |
 				    SECCOMP_LOG_ERRNO |
 				    SECCOMP_LOG_TRACE |
@@ -563,8 +565,11 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 		log = seccomp_actions_logged & SECCOMP_LOG_LOG;
 		break;
 	case SECCOMP_RET_KILL_THREAD:
-	default:
 		log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
+		break;
+	case SECCOMP_RET_KILL_PROCESS:
+	default:
+		log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
 	}
 
 	/*
@@ -719,10 +724,12 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 		return 0;
 
 	case SECCOMP_RET_KILL_THREAD:
+	case SECCOMP_RET_KILL_PROCESS:
 	default:
 		seccomp_log(this_syscall, SIGSYS, action, true);
 		/* Dump core only if this is the last remaining thread. */
-		if (get_nr_threads(current) == 1) {
+		if (action == SECCOMP_RET_KILL_PROCESS ||
+		    get_nr_threads(current) == 1) {
 			siginfo_t info;
 
 			/* Show the original registers in the dump. */
@@ -731,7 +738,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 			seccomp_init_siginfo(&info, this_syscall, data);
 			do_coredump(&info);
 		}
-		do_exit(SIGSYS);
+		if (action == SECCOMP_RET_KILL_PROCESS)
+			do_group_exit(SIGSYS);
+		else
+			do_exit(SIGSYS);
 	}
 
 	unreachable();
-- 
cgit 


From 0466bdb99e8744bc9befa8d62a317f0fd7fd7421 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 13:12:11 -0700
Subject: seccomp: Implement SECCOMP_RET_KILL_PROCESS action
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Right now, SECCOMP_RET_KILL_THREAD (neé SECCOMP_RET_KILL) kills the
current thread. There have been a few requests for this to kill the entire
process (the thread group). This cannot be just changed (discovered when
adding coredump support since coredumping kills the entire process)
because there are userspace programs depending on the thread-kill
behavior.

Instead, implement SECCOMP_RET_KILL_PROCESS, which is 0x80000000, and can
be processed as "-1" by the kernel, below the existing RET_KILL that is
ABI-set to "0". For userspace, SECCOMP_RET_ACTION_FULL is added to expand
the mask to the signed bit. Old userspace using the SECCOMP_RET_ACTION
mask will see SECCOMP_RET_KILL_PROCESS as 0 still, but this would only
be visible when examining the siginfo in a core dump from a RET_KILL_*,
where it will think it was thread-killed instead of process-killed.

Attempts to introduce this behavior via other ways (filter flags,
seccomp struct flags, masked RET_DATA bits) all come with weird
side-effects and baggage. This change preserves the central behavioral
expectations of the seccomp filter engine without putting too great
a burden on changes needed in userspace to use the new action.

The new action is discoverable by userspace through either the new
actions_avail sysctl or through the SECCOMP_GET_ACTION_AVAIL seccomp
operation. If used without checking for availability, old kernels
will treat RET_KILL_PROCESS as RET_KILL_THREAD (since the old mask
will produce RET_KILL_THREAD).

Cc: Paul Moore <paul@paul-moore.com>
Cc: Fabricio Voznika <fvoznika@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/userspace-api/seccomp_filter.rst | 7 ++++++-
 include/uapi/linux/seccomp.h                   | 1 +
 kernel/seccomp.c                               | 9 +++++++--
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index d76396f2d8ed..099c412951d6 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -87,10 +87,15 @@ Return values
 A seccomp filter may return any of the following values. If multiple
 filters exist, the return value for the evaluation of a given system
 call will always use the highest precedent value. (For example,
-``SECCOMP_RET_KILL_THREAD`` will always take precedence.)
+``SECCOMP_RET_KILL_PROCESS`` will always take precedence.)
 
 In precedence order, they are:
 
+``SECCOMP_RET_KILL_PROCESS``:
+	Results in the entire process exiting immediately without executing
+	the system call.  The exit status of the task (``status & 0x7f``)
+	will be ``SIGSYS``, not ``SIGKILL``.
+
 ``SECCOMP_RET_KILL_THREAD``:
 	Results in the task exiting immediately without executing the
 	system call.  The exit status of the task (``status & 0x7f``) will
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 7e77c92df78a..f6bc1dea3247 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -38,6 +38,7 @@
 #define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
+#define SECCOMP_RET_ACTION_FULL	0xffff0000U
 #define SECCOMP_RET_ACTION	0x7fff0000U
 #define SECCOMP_RET_DATA	0x0000ffffU
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5c7299b9d953..c24579dfa7a1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -181,6 +181,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  *
  * Returns valid seccomp BPF response codes.
  */
+#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
 static u32 seccomp_run_filters(const struct seccomp_data *sd,
 			       struct seccomp_filter **match)
 {
@@ -206,7 +207,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 	for (; f; f = f->prev) {
 		u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
 
-		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) {
+		if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
 			ret = cur_ret;
 			*match = f;
 		}
@@ -650,7 +651,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 	filter_ret = seccomp_run_filters(sd, &match);
 	data = filter_ret & SECCOMP_RET_DATA;
-	action = filter_ret & SECCOMP_RET_ACTION;
+	action = filter_ret & SECCOMP_RET_ACTION_FULL;
 
 	switch (action) {
 	case SECCOMP_RET_ERRNO:
@@ -890,6 +891,7 @@ static long seccomp_get_action_avail(const char __user *uaction)
 		return -EFAULT;
 
 	switch (action) {
+	case SECCOMP_RET_KILL_PROCESS:
 	case SECCOMP_RET_KILL_THREAD:
 	case SECCOMP_RET_TRAP:
 	case SECCOMP_RET_ERRNO:
@@ -1041,6 +1043,7 @@ out:
 #ifdef CONFIG_SYSCTL
 
 /* Human readable action names for friendly sysctl interaction */
+#define SECCOMP_RET_KILL_PROCESS_NAME	"kill_process"
 #define SECCOMP_RET_KILL_THREAD_NAME	"kill_thread"
 #define SECCOMP_RET_TRAP_NAME		"trap"
 #define SECCOMP_RET_ERRNO_NAME		"errno"
@@ -1049,6 +1052,7 @@ out:
 #define SECCOMP_RET_ALLOW_NAME		"allow"
 
 static const char seccomp_actions_avail[] =
+				SECCOMP_RET_KILL_PROCESS_NAME	" "
 				SECCOMP_RET_KILL_THREAD_NAME	" "
 				SECCOMP_RET_TRAP_NAME		" "
 				SECCOMP_RET_ERRNO_NAME		" "
@@ -1062,6 +1066,7 @@ struct seccomp_log_name {
 };
 
 static const struct seccomp_log_name seccomp_log_names[] = {
+	{ SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
 	{ SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
 	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
 	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
-- 
cgit 


From f3e1821d9e1cc3fb434d7763001791dcd6720c90 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 13:20:33 -0700
Subject: selftests/seccomp: Test thread vs process killing

This verifies that SECCOMP_RET_KILL_PROCESS is higher priority than
SECCOMP_RET_KILL_THREAD. (This also moves a bunch of defines up earlier
in the file to use them earlier.)

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 228 +++++++++++++++++++-------
 1 file changed, 168 insertions(+), 60 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index a3ba39a32449..0683cd543cd5 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -68,7 +68,17 @@
 #define SECCOMP_MODE_FILTER 2
 #endif
 
-#ifndef SECCOMP_RET_KILL_THREAD
+#ifndef SECCOMP_RET_ALLOW
+struct seccomp_data {
+	int nr;
+	__u32 arch;
+	__u64 instruction_pointer;
+	__u64 args[6];
+};
+#endif
+
+#ifndef SECCOMP_RET_KILL_PROCESS
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
 #define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
 #endif
 #ifndef SECCOMP_RET_KILL
@@ -82,17 +92,53 @@
 #define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
 #endif
 
-#ifndef SECCOMP_RET_ACTION
-/* Masks for the return value sections. */
-#define SECCOMP_RET_ACTION      0x7fff0000U
-#define SECCOMP_RET_DATA        0x0000ffffU
+#ifndef __NR_seccomp
+# if defined(__i386__)
+#  define __NR_seccomp 354
+# elif defined(__x86_64__)
+#  define __NR_seccomp 317
+# elif defined(__arm__)
+#  define __NR_seccomp 383
+# elif defined(__aarch64__)
+#  define __NR_seccomp 277
+# elif defined(__hppa__)
+#  define __NR_seccomp 338
+# elif defined(__powerpc__)
+#  define __NR_seccomp 358
+# elif defined(__s390__)
+#  define __NR_seccomp 348
+# else
+#  warning "seccomp syscall number unknown for this architecture"
+#  define __NR_seccomp 0xffff
+# endif
+#endif
 
-struct seccomp_data {
-	int nr;
-	__u32 arch;
-	__u64 instruction_pointer;
-	__u64 args[6];
-};
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+#ifndef SECCOMP_GET_ACTION_AVAIL
+#define SECCOMP_GET_ACTION_AVAIL 2
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_LOG
+#define SECCOMP_FILTER_FLAG_LOG 2
+#endif
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+	errno = 0;
+	return syscall(__NR_seccomp, op, flags, args);
+}
 #endif
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -550,6 +596,117 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
 	close(fd);
 }
 
+/* This is a thread task to die via seccomp filter violation. */
+void *kill_thread(void *data)
+{
+	bool die = (bool)data;
+
+	if (die) {
+		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+		return (void *)SIBLING_EXIT_FAILURE;
+	}
+
+	return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+/* Prepare a thread that will kill itself or both of us. */
+void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+{
+	pthread_t thread;
+	void *status;
+	/* Kill only when calling __NR_prctl. */
+	struct sock_filter filter_thread[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog_thread = {
+		.len = (unsigned short)ARRAY_SIZE(filter_thread),
+		.filter = filter_thread,
+	};
+	struct sock_filter filter_process[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog_process = {
+		.len = (unsigned short)ARRAY_SIZE(filter_process),
+		.filter = filter_process,
+	};
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
+			     kill_process ? &prog_process : &prog_thread));
+
+	/*
+	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
+	 * flag cannot be downgraded by a new filter.
+	 */
+	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+
+	/* Start a thread that will exit immediately. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
+	ASSERT_EQ(0, pthread_join(thread, &status));
+	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
+
+	/* Start a thread that will die immediately. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
+	ASSERT_EQ(0, pthread_join(thread, &status));
+	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
+
+	/*
+	 * If we get here, only the spawned thread died. Let the parent know
+	 * the whole process didn't die (i.e. this thread, the spawner,
+	 * stayed running).
+	 */
+	exit(42);
+}
+
+TEST(KILL_thread)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, false);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If only the thread was killed, we'll see exit 42. */
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(42, WEXITSTATUS(status));
+}
+
+TEST(KILL_process)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, true);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If the entire process was killed, we'll see SIGSYS. */
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
 TEST(arg_out_of_range)
 {
@@ -1800,55 +1957,6 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
 }
 
-#ifndef __NR_seccomp
-# if defined(__i386__)
-#  define __NR_seccomp 354
-# elif defined(__x86_64__)
-#  define __NR_seccomp 317
-# elif defined(__arm__)
-#  define __NR_seccomp 383
-# elif defined(__aarch64__)
-#  define __NR_seccomp 277
-# elif defined(__hppa__)
-#  define __NR_seccomp 338
-# elif defined(__powerpc__)
-#  define __NR_seccomp 358
-# elif defined(__s390__)
-#  define __NR_seccomp 348
-# else
-#  warning "seccomp syscall number unknown for this architecture"
-#  define __NR_seccomp 0xffff
-# endif
-#endif
-
-#ifndef SECCOMP_SET_MODE_STRICT
-#define SECCOMP_SET_MODE_STRICT 0
-#endif
-
-#ifndef SECCOMP_SET_MODE_FILTER
-#define SECCOMP_SET_MODE_FILTER 1
-#endif
-
-#ifndef SECCOMP_GET_ACTION_AVAIL
-#define SECCOMP_GET_ACTION_AVAIL 2
-#endif
-
-#ifndef SECCOMP_FILTER_FLAG_TSYNC
-#define SECCOMP_FILTER_FLAG_TSYNC 1
-#endif
-
-#ifndef SECCOMP_FILTER_FLAG_LOG
-#define SECCOMP_FILTER_FLAG_LOG 2
-#endif
-
-#ifndef seccomp
-int seccomp(unsigned int op, unsigned int flags, void *args)
-{
-	errno = 0;
-	return syscall(__NR_seccomp, op, flags, args);
-}
-#endif
-
 TEST(seccomp_syscall)
 {
 	struct sock_filter filter[] = {
-- 
cgit 


From 6849243bf4c6155151b294e9f0e0dc9540d6f083 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Aug 2017 20:26:57 -0700
Subject: samples: Unrename SECCOMP_RET_KILL

Since samples can still be built before header installs, avoid the
cosmetic renaming of SECCOMP_RET_KILL to avoid build failures in -next.

Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 samples/seccomp/bpf-direct.c | 4 ++--
 samples/seccomp/bpf-helper.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/samples/seccomp/bpf-direct.c b/samples/seccomp/bpf-direct.c
index 235ce3c49ee9..151ec3f52189 100644
--- a/samples/seccomp/bpf-direct.c
+++ b/samples/seccomp/bpf-direct.c
@@ -129,7 +129,7 @@ static int install_filter(void)
 		/* Check that read is only using stdin. */
 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDIN_FILENO, 4, 0),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
 
 		/* Check that write is only using stdout */
 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
@@ -139,7 +139,7 @@ static int install_filter(void)
 
 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
 	};
 	struct sock_fprog prog = {
 		.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h
index 83dbe79cbe2c..1d8de9edd858 100644
--- a/samples/seccomp/bpf-helper.h
+++ b/samples/seccomp/bpf-helper.h
@@ -44,7 +44,7 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count);
 #define ALLOW \
 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
 #define DENY \
-	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD)
+	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
 #define JUMP(labels, label) \
 	BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \
 		 JUMP_JT, JUMP_JF)
-- 
cgit 


From a4e89ffb59235fd11d27107dea3efa4562ac0a12 Mon Sep 17 00:00:00 2001
From: Matt Weber <matthew.weber@rockwellcollins.com>
Date: Wed, 28 Jun 2017 11:14:29 -0500
Subject: powerpc/e6500: Update machine check for L1D cache err

This patch updates the machine check handler of Linux kernel to
handle the e6500 architecture case. In e6500 core, L1 Data Cache Write
Shadow Mode (DCWS) register is not implemented but L1 data cache always
runs in write shadow mode. So, on L1 data cache parity errors, hardware
will automatically invalidate the data cache but will still log a
machine check interrupt.

Signed-off-by: Ronak Desai <ronak.desai@rockwellcollins.com>
Signed-off-by: Matthew Weber <matthew.weber@rockwellcollins.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/kernel/traps.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 675d5d2bfcde..410352acfa38 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -393,6 +393,7 @@ static inline int check_io_access(struct pt_regs *regs)
 int machine_check_e500mc(struct pt_regs *regs)
 {
 	unsigned long mcsr = mfspr(SPRN_MCSR);
+	unsigned long pvr = mfspr(SPRN_PVR);
 	unsigned long reason = mcsr;
 	int recoverable = 1;
 
@@ -434,8 +435,15 @@ int machine_check_e500mc(struct pt_regs *regs)
 		 * may still get logged and cause a machine check.  We should
 		 * only treat the non-write shadow case as non-recoverable.
 		 */
-		if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
-			recoverable = 0;
+		/* On e6500 core, L1 DCWS (Data cache write shadow mode) bit
+		 * is not implemented but L1 data cache always runs in write
+		 * shadow mode. Hence on data cache parity errors HW will
+		 * automatically invalidate the L1 Data Cache.
+		 */
+		if (PVR_VER(pvr) != PVR_VER_E6500) {
+			if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
+				recoverable = 0;
+		}
 	}
 
 	if (reason & MCSR_L2MMU_MHIT) {
-- 
cgit 


From 50dad5fb59d12dc9a5a0e96073ee1e5857ac45a2 Mon Sep 17 00:00:00 2001
From: Himanshu Jha <himanshujha199640@gmail.com>
Date: Wed, 30 Aug 2017 00:33:35 +0530
Subject: drm/amdkfd: remove memset before memcpy

calling memcpy immediately after memset with the same region of memory
makes memset redundant.

Signed-off-by: Himanshu Jha <himanshujha199640@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 1cae95e2b13a..03bec765b03d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -143,7 +143,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	int num_queues = 0;
 	struct queue *cur;
 
-	memset(&q_properties, 0, sizeof(struct queue_properties));
 	memcpy(&q_properties, properties, sizeof(struct queue_properties));
 	q = NULL;
 	kq = NULL;
-- 
cgit 


From 1fabbf781167052f4480bdcc627378a27e78a559 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Sat, 2 Sep 2017 15:00:25 +0300
Subject: drm/amdkfd: pass queue's mqd when destroying mqd

In VI, the destroy mqd function needs to inquire fields present in the mqd
structure. That's why we need to pass it to that function instead of NULL.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 681b639f5133..0649dd43e780 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -183,7 +183,7 @@ static void uninitialize(struct kernel_queue *kq)
 {
 	if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
 		kq->mqd->destroy_mqd(kq->mqd,
-					NULL,
+					kq->queue->mqd,
 					false,
 					QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
 					kq->queue->pipe,
-- 
cgit 


From 3664847d95e60a9a943858b7800f8484669740fc Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Fri, 25 Aug 2017 10:40:02 -0700
Subject: md/raid5: fix a race condition in stripe batch

We have a race condition in below scenario, say have 3 continuous stripes, sh1,
sh2 and sh3, sh1 is the stripe_head of sh2 and sh3:

CPU1				CPU2				CPU3
handle_stripe(sh3)
				stripe_add_to_batch_list(sh3)
				-> lock(sh2, sh3)
				-> lock batch_lock(sh1)
				-> add sh3 to batch_list of sh1
				-> unlock batch_lock(sh1)
								clear_batch_ready(sh1)
								-> lock(sh1) and batch_lock(sh1)
								-> clear STRIPE_BATCH_READY for all stripes in batch_list
								-> unlock(sh1) and batch_lock(sh1)
->clear_batch_ready(sh3)
-->test_and_clear_bit(STRIPE_BATCH_READY, sh3)
--->return 0 as sh->batch == NULL
				-> sh3->batch_head = sh1
				-> unlock (sh2, sh3)

In CPU1, handle_stripe will continue handle sh3 even it's in batch stripe list
of sh1. By moving sh3->batch_head assignment in to batch_lock, we make it
impossible to clear STRIPE_BATCH_READY before batch_head is set.

Thanks Stephane for helping debug this tricky issue.

Reported-and-tested-by: Stephane Thiell <sthiell@stanford.edu>
Cc: stable@vger.kernel.org (v4.1+)
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 049a958d3c1e..90414a4e0d49 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -811,6 +811,14 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
 			spin_unlock(&head->batch_head->batch_lock);
 			goto unlock_out;
 		}
+		/*
+		 * We must assign batch_head of this stripe within the
+		 * batch_lock, otherwise clear_batch_ready of batch head
+		 * stripe could clear BATCH_READY bit of this stripe and
+		 * this stripe->batch_head doesn't get assigned, which
+		 * could confuse clear_batch_ready for this stripe
+		 */
+		sh->batch_head = head->batch_head;
 
 		/*
 		 * at this point, head's BATCH_READY could be cleared, but we
@@ -818,8 +826,6 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
 		 */
 		list_add(&sh->batch_list, &head->batch_list);
 		spin_unlock(&head->batch_head->batch_lock);
-
-		sh->batch_head = head->batch_head;
 	} else {
 		head->batch_head = head;
 		sh->batch_head = head->batch_head;
-- 
cgit 


From 184a09eb9a2fe425e49c9538f1604b05ed33cfef Mon Sep 17 00:00:00 2001
From: Dennis Yang <dennisyang@qnap.com>
Date: Wed, 6 Sep 2017 11:02:35 +0800
Subject: md/raid5: preserve STRIPE_ON_UNPLUG_LIST in break_stripe_batch_list

In release_stripe_plug(), if a stripe_head has its STRIPE_ON_UNPLUG_LIST
set, it indicates that this stripe_head is already in the raid5_plug_cb
list and release_stripe() would be called instead to drop a reference
count. Otherwise, the STRIPE_ON_UNPLUG_LIST bit would be set for this
stripe_head and it will get queued into the raid5_plug_cb list.

Since break_stripe_batch_list() did not preserve STRIPE_ON_UNPLUG_LIST,
A stripe could be re-added to plug list while it is still on that list
in the following situation. If stripe_head A is added to another
stripe_head B's batch list, in this case A will have its
batch_head != NULL and be added into the plug list. After that,
stripe_head B gets handled and called break_stripe_batch_list() to
reset all the batched stripe_head(including A which is still on
the plug list)'s state and reset their batch_head to NULL.
Before the plug list gets processed, if there is another write request
comes in and get stripe_head A, A will have its batch_head == NULL
(cleared by calling break_stripe_batch_list() on B) and be added to
plug list once again.

Signed-off-by: Dennis Yang <dennisyang@qnap.com>
Cc: stable@vger.kernel.org (v4.1+)
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 90414a4e0d49..34c01e83395a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4605,7 +4605,8 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
 
 		set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
 					    (1 << STRIPE_PREREAD_ACTIVE) |
-					    (1 << STRIPE_DEGRADED)),
+					    (1 << STRIPE_DEGRADED) |
+					    (1 << STRIPE_ON_UNPLUG_LIST)),
 			      head_sh->state & (1 << STRIPE_INSYNC));
 
 		sh->check_state = head_sh->check_state;
-- 
cgit 


From 01f5bbd17a8066b58dba9b5049fad504bce67322 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 7 Sep 2017 10:40:35 +0300
Subject: mmc: block: Fix incorrectly initialized requests

mmc_init_request() depends on card->bouncesz so it must be calculated
before blk_init_allocated_queue() starts allocating requests.

Reported-by: Seraphime Kirkovski <kirkseraph@gmail.com>
Fixes: 304419d8a7e9 ("mmc: core: Allocate per-request data using the..")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Seraphime Kirkovski <kirkseraph@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Pavel Machek <pavel@ucw.cz>
---
 drivers/mmc/core/queue.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index affa7370ba82..74c663b1c0a7 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -242,6 +242,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
 		limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
 
+	/*
+	 * mmc_init_request() depends on card->bouncesz so it must be calculated
+	 * before blk_init_allocated_queue() starts allocating requests.
+	 */
+	card->bouncesz = mmc_queue_calc_bouncesz(host);
+
 	mq->card = card;
 	mq->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!mq->queue)
@@ -265,7 +271,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 	if (mmc_can_erase(card))
 		mmc_queue_setup_discard(mq->queue, card);
 
-	card->bouncesz = mmc_queue_calc_bouncesz(host);
 	if (card->bouncesz) {
 		blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
 		blk_queue_max_segments(mq->queue, card->bouncesz / 512);
-- 
cgit 


From b4f146f5faf85d21f5cd414531c3ced9c1b61e3c Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 5 Sep 2017 20:27:50 +0200
Subject: mmc: host: fix typo after MMC_DEBUG move

MMC_DEBUG was moved and one letter got strangely capitalized.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 02179ed2a40d..8c15637178ff 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -5,7 +5,7 @@
 comment "MMC/SD/SDIO Host Controller Drivers"
 
 config MMC_DEBUG
-	bool "MMC host drivers debugginG"
+	bool "MMC host drivers debugging"
 	depends on MMC != n
 	help
 	  This is an option for use by developers; most people should
-- 
cgit 


From b917c6d18c031cfce11ec35139033845f205b1d0 Mon Sep 17 00:00:00 2001
From: Jan Glauber <jglauber@cavium.com>
Date: Thu, 7 Sep 2017 13:24:17 +0200
Subject: mmc: cavium: Fix use-after-free in of_platform_device_destroy

KASAN reported the following:

[   19.338655] ==================================================================
[   19.345946] BUG: KASAN: use-after-free in of_platform_device_destroy+0x88/0x100
[   19.345966] Read of size 8 at addr fffffe01aa6f1468 by task systemd-udevd/264

[   19.345983] CPU: 1 PID: 264 Comm: systemd-udevd Not tainted 4.13.0-jang+ #737
[   19.345989] Hardware name: Cavium ThunderX CN81XX board (DT)
[   19.345995] Call trace:
[   19.346013] [<fffffc800808b1b0>] dump_backtrace+0x0/0x368
[   19.346026] [<fffffc800808b6bc>] show_stack+0x24/0x30
[   19.346040] [<fffffc8008cbb944>] dump_stack+0xa4/0xc8
[   19.346057] [<fffffc80082c2870>] print_address_description+0x68/0x258
[   19.346070] [<fffffc80082c2d70>] kasan_report+0x238/0x2f8
[   19.346082] [<fffffc80082c14a8>] __asan_load8+0x88/0xb8
[   19.346098] [<fffffc8008aacee0>] of_platform_device_destroy+0x88/0x100
[   19.346131] [<fffffc8000e02fa4>] thunder_mmc_probe+0x314/0x550 [thunderx_mmc]
[   19.346147] [<fffffc800879d560>] pci_device_probe+0x158/0x1f8
[   19.346162] [<fffffc800886e53c>] driver_probe_device+0x394/0x5f8
[   19.346174] [<fffffc800886e8f4>] __driver_attach+0x154/0x158
[   19.346185] [<fffffc800886b12c>] bus_for_each_dev+0xdc/0x140
[   19.346196] [<fffffc800886d9f8>] driver_attach+0x38/0x48
[   19.346207] [<fffffc800886d148>] bus_add_driver+0x290/0x3c8
[   19.346219] [<fffffc800886fc5c>] driver_register+0xbc/0x1a0
[   19.346232] [<fffffc800879b78c>] __pci_register_driver+0xc4/0xd8
[   19.346260] [<fffffc8000e80024>] thunder_mmc_driver_init+0x24/0x10000 [thunderx_mmc]
[   19.346273] [<fffffc8008083a80>] do_one_initcall+0x98/0x1c0
[   19.346289] [<fffffc8008177b54>] do_init_module+0xe0/0x2cc
[   19.346303] [<fffffc8008175cf0>] load_module+0x3238/0x35c0
[   19.346318] [<fffffc8008176438>] SyS_finit_module+0x190/0x1a0
[   19.346329] [<fffffc80080834a0>] __sys_trace_return+0x0/0x4

This is caused by:

  platform_device_register()
   -> platform_device_unregister(to_platform_device(dev))
	freeing struct device
   -> of_node_clear_flag(dev->of_node, ...)
	writing to the freed device

The issue is solved by increasing the reference count before calling
of_platform_device_destroy() so freeing the device is postponed after
the call.

Fixes: 8fb83b142823 ("mmc: cavium: Fix probing race with regulator")
Signed-off-by: Jan Glauber <jglauber@cavium.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/cavium-thunderx.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c
index b9cc95998799..eee08d81b242 100644
--- a/drivers/mmc/host/cavium-thunderx.c
+++ b/drivers/mmc/host/cavium-thunderx.c
@@ -7,6 +7,7 @@
  *
  * Copyright (C) 2016 Cavium Inc.
  */
+#include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/mmc/mmc.h>
@@ -149,8 +150,11 @@ error:
 	for (i = 0; i < CAVIUM_MAX_MMC; i++) {
 		if (host->slot[i])
 			cvm_mmc_of_slot_remove(host->slot[i]);
-		if (host->slot_pdev[i])
+		if (host->slot_pdev[i]) {
+			get_device(&host->slot_pdev[i]->dev);
 			of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL);
+			put_device(&host->slot_pdev[i]->dev);
+		}
 	}
 	clk_disable_unprepare(host->clk);
 	return ret;
-- 
cgit 


From bfaa1ce809bbcd12b1399409ab1dbf0cdaba6e27 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 8 Sep 2017 15:13:33 +0100
Subject: drm/amdkfd: check for null dev to avoid a null pointer dereference

The call to kfd_device_by_id can potentially return null, so check that
dev is null and return with -EINVAL to avoid a null pointer dereference.

Detected by CoverityScan CID#1454629 ("Dereference null return value")

Fixes: 5d71dbc3a588 ("drm/amdkfd: Implement image tiling mode support v2")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e4a8c2e52cb2..660b3fbade41 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -892,6 +892,8 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
 	int err = 0;
 
 	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev)
+		return -EINVAL;
 
 	dev->kfd2kgd->get_tile_config(dev->kgd, &config);
 
-- 
cgit 


From b0e07da3f5c8d069d186a7983ff64eaebf2ea230 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Mon, 11 Sep 2017 11:39:50 +0200
Subject: qxl: fix primary surface handling

The atomic conversion of the qxl driver didn't got the primary surface
handling completely right.  It works in the common simple cases, but
fails for example when changing the display resolution using xrandr or
in multihead setups.

The rules are simple:  There is one primary surface.  Before defining a
new one you have to destroy the old one.

This patch makes qxl_primary_atomic_update() destroy the primary surface
before defining a new one.  It fixes is_primary flag updates.  It adds
is_primary checks so we don't try to update the primary surface in case
it already has the state we want it being in.

Fixes: 3538e80a869b ("drm: qxl: Atomic phase 1: Implement mode_set_nofb")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102338
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196777
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170911093950.22401-1-kraxel@redhat.com
---
 drivers/gpu/drm/qxl/qxl_display.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 03fe182203ce..7babdd8f20fd 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -512,23 +512,25 @@ static void qxl_primary_atomic_update(struct drm_plane *plane,
 	    .y2 = qfb->base.height
 	};
 
-	if (!old_state->fb) {
-		qxl_io_log(qdev,
-			   "create primary fb: %dx%d,%d,%d\n",
-			   bo->surf.width, bo->surf.height,
-			   bo->surf.stride, bo->surf.format);
+	if (old_state->fb) {
+		qfb_old = to_qxl_framebuffer(old_state->fb);
+		bo_old = gem_to_qxl_bo(qfb_old->obj);
+	} else {
+		bo_old = NULL;
+	}
 
-		qxl_io_create_primary(qdev, 0, bo);
-		bo->is_primary = true;
+	if (bo == bo_old)
 		return;
 
-	} else {
-		qfb_old = to_qxl_framebuffer(old_state->fb);
-		bo_old = gem_to_qxl_bo(qfb_old->obj);
+	if (bo_old && bo_old->is_primary) {
+		qxl_io_destroy_primary(qdev);
 		bo_old->is_primary = false;
 	}
 
-	bo->is_primary = true;
+	if (!bo->is_primary) {
+		qxl_io_create_primary(qdev, 0, bo);
+		bo->is_primary = true;
+	}
 	qxl_draw_dirty_fb(qdev, qfb, bo, 0, 0, &norect, 1, 1);
 }
 
@@ -537,13 +539,15 @@ static void qxl_primary_atomic_disable(struct drm_plane *plane,
 {
 	struct qxl_device *qdev = plane->dev->dev_private;
 
-	if (old_state->fb)
-	{	struct qxl_framebuffer *qfb =
+	if (old_state->fb) {
+		struct qxl_framebuffer *qfb =
 			to_qxl_framebuffer(old_state->fb);
 		struct qxl_bo *bo = gem_to_qxl_bo(qfb->obj);
 
-		qxl_io_destroy_primary(qdev);
-		bo->is_primary = false;
+		if (bo->is_primary) {
+			qxl_io_destroy_primary(qdev);
+			bo->is_primary = false;
+		}
 	}
 }
 
-- 
cgit 


From bf2afee14e07de16d3cafc67edbfc2a3cc65e4bc Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Fri, 8 Sep 2017 10:37:35 +1000
Subject: cifs: check rsp for NULL before dereferencing in SMB2_open

In SMB2_open there are several paths where the SendReceive2
call will return an error before it sets rsp_iov.iov_base
thus leaving iov_base uninitialized.

Thus we need to check rsp before we dereference it in
the call to get_rfc1002_length().

A report of this issue was previously reported in
http://www.spinics.net/lists/linux-cifs/msg12846.html

RH-bugzilla : 1476151

Version 2 :
* Lets properly initialize rsp_iov before we use it.

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>.
Signed-off-by: Steve French <smfrench@gmail.com>
Reported-by: Xiaoli Feng <xifeng@redhat.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/smb2pdu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5531e7ee1210..69a751b038ab 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1634,7 +1634,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 	struct cifs_tcon *tcon = oparms->tcon;
 	struct cifs_ses *ses = tcon->ses;
 	struct kvec iov[4];
-	struct kvec rsp_iov;
+	struct kvec rsp_iov = {NULL, 0};
 	int resp_buftype;
 	int uni_path_len;
 	__le16 *copy_path = NULL;
@@ -1763,7 +1763,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 
 	if (rc != 0) {
 		cifs_stats_fail_inc(tcon, SMB2_CREATE_HE);
-		if (err_buf)
+		if (err_buf && rsp)
 			*err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4,
 					   GFP_KERNEL);
 		goto creat_exit;
-- 
cgit 


From 5a642e6bc49f59922e19ebd639e74f72753fc77b Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 8 Sep 2017 16:24:32 +0200
Subject: etnaviv: fix submit error path

If the gpu submit fails, bail out to avoid accessing a potentially
unititalized fence.

CC: stable@vger.kernel.org #4.12+
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 6463fc2c736f..b95362186f9c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -445,8 +445,10 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	cmdbuf->user_size = ALIGN(args->stream_size, 8);
 
 	ret = etnaviv_gpu_submit(gpu, submit, cmdbuf);
-	if (ret == 0)
-		cmdbuf = NULL;
+	if (ret)
+		goto out;
+
+	cmdbuf = NULL;
 
 	if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) {
 		/*
-- 
cgit 


From 518417525f3652c12fb5fad6da4ade66c0072fa3 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 11 Sep 2017 15:29:31 +0200
Subject: etnaviv: fix gem object list corruption

All manipulations of the gem_object list need to be protected by
the list mutex, as GEM objects can be created and freed in parallel.
This fixes a kernel memory corruption.

CC: stable@vger.kernel.org
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 9a3bea738330..87b95eeedd9e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -551,12 +551,15 @@ static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
 void etnaviv_gem_free_object(struct drm_gem_object *obj)
 {
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct etnaviv_drm_private *priv = obj->dev->dev_private;
 	struct etnaviv_vram_mapping *mapping, *tmp;
 
 	/* object should not be active */
 	WARN_ON(is_active(etnaviv_obj));
 
+	mutex_lock(&priv->gem_lock);
 	list_del(&etnaviv_obj->gem_node);
+	mutex_unlock(&priv->gem_lock);
 
 	list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list,
 				 obj_node) {
-- 
cgit 


From fc3100d64f0ae383ae8d845989103da06d62763b Mon Sep 17 00:00:00 2001
From: Pu Hou <bjhoupu@linux.vnet.ibm.com>
Date: Tue, 5 Sep 2017 05:17:24 +0200
Subject: s390/perf: fix bug when creating per-thread event

A per-thread event could not be created correctly like below:

    perf record --per-thread -e rB0000 -- sleep 1
    Error:
    The sys_perf_event_open() syscall returned with 19 (No such device) for event (rB0000).
    /bin/dmesg may provide additional information.
    No CONFIG_PERF_EVENTS=y kernel support configured?

This bug was introduced by:

    commit c311c797998c1e70eade463dd60b843da4f1a203
    Author: Alexey Dobriyan <adobriyan@gmail.com>
    Date:   Mon May 8 15:56:15 2017 -0700

    cpumask: make "nr_cpumask_bits" unsigned

If a per-thread event is not attached to any CPU, the cpu field
in struct perf_event is -1. The above commit converts the CPU number
to unsigned int, which result in an illegal CPU number.

Fixes: c311c797998c ("cpumask: make "nr_cpumask_bits" unsigned")
Cc: <stable@vger.kernel.org> # v4.12+
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Pu Hou <bjhoupu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index c1bf75ffb875..7e1e40323b78 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -823,9 +823,12 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 	}
 
 	/* Check online status of the CPU to which the event is pinned */
-	if ((unsigned int)event->cpu >= nr_cpumask_bits ||
-	    (event->cpu >= 0 && !cpu_online(event->cpu)))
-		return -ENODEV;
+	if (event->cpu >= 0) {
+		if ((unsigned int)event->cpu >= nr_cpumask_bits)
+			return -ENODEV;
+		if (!cpu_online(event->cpu))
+			return -ENODEV;
+	}
 
 	/* Force reset of idle/hv excludes regardless of what the
 	 * user requested.
-- 
cgit 


From 673514aff5d797b0cdb3f09097ae0e253b5667c3 Mon Sep 17 00:00:00 2001
From: Stefan Haberland <sth@linux.vnet.ibm.com>
Date: Tue, 12 Sep 2017 17:22:42 +0200
Subject: s390/dasd: fix race during dasd initialization

Fix a panic in blk_mq_hctx_has_pending() that is caused by a racy call to
blk_mq_run_hw_queues in a dasd function that might get called with the
request queue not yet initialized during initialization.

Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index ea19b4ff87a2..29f35e29d480 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1644,7 +1644,9 @@ void dasd_generic_handle_state_change(struct dasd_device *device)
 	dasd_schedule_device_bh(device);
 	if (device->block) {
 		dasd_schedule_block_bh(device->block);
-		blk_mq_run_hw_queues(device->block->request_queue, true);
+		if (device->block->request_queue)
+			blk_mq_run_hw_queues(device->block->request_queue,
+					     true);
 	}
 }
 EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change);
@@ -3759,7 +3761,9 @@ int dasd_generic_path_operational(struct dasd_device *device)
 	dasd_schedule_device_bh(device);
 	if (device->block) {
 		dasd_schedule_block_bh(device->block);
-		blk_mq_run_hw_queues(device->block->request_queue, true);
+		if (device->block->request_queue)
+			blk_mq_run_hw_queues(device->block->request_queue,
+					     true);
 		}
 
 	if (!device->stopped)
@@ -4025,7 +4029,9 @@ int dasd_generic_restore_device(struct ccw_device *cdev)
 
 	if (device->block) {
 		dasd_schedule_block_bh(device->block);
-		blk_mq_run_hw_queues(device->block->request_queue, true);
+		if (device->block->request_queue)
+			blk_mq_run_hw_queues(device->block->request_queue,
+					     true);
 	}
 
 	clear_bit(DASD_FLAG_SUSPENDED, &device->flags);
-- 
cgit 


From 4cf97582b46f123a4b7cd88d999f1806c2eb4093 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 11 Sep 2017 17:43:56 +0200
Subject: drm/amdgpu: revert tile table update for oland
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Several users have complained that the tile table update broke Oland
support. Despite several attempts to fix it, the root cause is still
unknown at this point and no solution is available. As it is not
acceptable to leave a known regression breaking a major functionality
in the kernel for several releases, let's just reverse this
optimization for now. It can be implemented again later if and only
if the breakage is understood and fixed.

As there were no complaints for Hainan so far, only the Oland part of
the offending commit is reverted. Optimization is preserved on
Hainan, so this commit isn't an actual revert of the original.

This fixes bug #194761:
https://bugzilla.kernel.org/show_bug.cgi?id=194761

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Jean Delvare <jdelvare@suse.de>
Fixes: f8d9422ef80c ("drm/amdgpu: update tile table for oland/hainan")
Cc: Flora Cui <Flora.Cui@amd.com>
Cc: Junwei Zhang <Jerry.Zhang@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Marek Olšák <maraeo@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 189 +++++++++++++++++++++++++++++++++-
 1 file changed, 188 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index d228f5a99044..dbbe986f90f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -636,7 +636,194 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
 				NUM_BANKS(ADDR_SURF_2_BANK);
 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
 			WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
-	} else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) {
+	} else if (adev->asic_type == CHIP_OLAND) {
+		tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[1] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[2] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[3] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[4] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[5] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(split_equal_to_row_size) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[6] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(split_equal_to_row_size) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[7] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(split_equal_to_row_size) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[8] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[9] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[10] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[11] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[12] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[13] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[14] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[15] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[16] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[17] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(split_equal_to_row_size) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[21] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[22] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[23] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[24] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[25] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				NUM_BANKS(ADDR_SURF_8_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1);
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
+	} else if (adev->asic_type == CHIP_HAINAN) {
 		tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
 				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 				PIPE_CONFIG(ADDR_SURF_P2) |
-- 
cgit 


From b468b6a4969f9bdddb31d484f151bfa03fbee767 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 7 Sep 2017 13:54:36 +0200
Subject: scsi: scsi_transport_fc: fix NULL pointer dereference in
 fc_bsg_job_timeout

bsg-lib now embeddeds the job structure into the request, and
req->special can't be used anymore.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_transport_fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 3c6bc0081fcb..ba9d70f8a6a1 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3571,7 +3571,7 @@ fc_vport_sched_delete(struct work_struct *work)
 static enum blk_eh_timer_return
 fc_bsg_job_timeout(struct request *req)
 {
-	struct bsg_job *job = (void *) req->special;
+	struct bsg_job *job = blk_mq_rq_to_pdu(req);
 	struct Scsi_Host *shost = fc_bsg_to_shost(job);
 	struct fc_rport *rport = fc_bsg_to_rport(job);
 	struct fc_internal *i = to_fc_internal(shost->transportt);
-- 
cgit 


From fd536998314a9dc54f384fa16287321edb81602a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 11 Sep 2017 22:00:57 +0200
Subject: scsi: acornscsi: fix build error

A cleanup patch introduced a fatal typo from inbalanced curly braces:

drivers/scsi/arm/acornscsi.c: In function 'acornscsi_host_reset':
drivers/scsi/arm/acornscsi.c:2773:1: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement]
drivers/scsi/arm/acornscsi.c:2795:12: error: invalid storage class for function 'acornscsi_show_info'
 static int acornscsi_show_info(struct seq_file *m, struct Scsi_Host *instance)

The same patch incorrectly changed the argument type of the reset
handler, as shown by this warning:

drivers/scsi/arm/acornscsi.c:2888:27: error: initialization of 'int (*)(struct scsi_cmnd *)' from incompatible pointer type 'int (*)(struct Scsi_Host *)' [-Werror=incompatible-pointer-types]
  .eh_host_reset_handler = acornscsi_host_reset,

This removes one the extraneous opening brace and reverts the
argument type change.

[mkp: fixed checkpatch complaint]

Fixes: 74fa80ee3fae ("scsi: acornscsi: move bus reset to host reset")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/arm/acornscsi.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c
index 690816f3c6af..421fe869a11e 100644
--- a/drivers/scsi/arm/acornscsi.c
+++ b/drivers/scsi/arm/acornscsi.c
@@ -2725,9 +2725,9 @@ int acornscsi_abort(struct scsi_cmnd *SCpnt)
  * Params   : SCpnt  - command causing reset
  * Returns  : one of SCSI_RESET_ macros
  */
-int acornscsi_host_reset(struct Scsi_Host *shpnt)
+int acornscsi_host_reset(struct scsi_cmnd *SCpnt)
 {
-	AS_Host *host = (AS_Host *)shpnt->hostdata;
+	AS_Host *host = (AS_Host *)SCpnt->device->host->hostdata;
 	struct scsi_cmnd *SCptr;
     
     host->stats.resets += 1;
@@ -2741,7 +2741,7 @@ int acornscsi_host_reset(struct Scsi_Host *shpnt)
 
 	printk(KERN_WARNING "acornscsi_reset: ");
 	print_sbic_status(asr, ssr, host->scsi.phase);
-	for (devidx = 0; devidx < 9; devidx ++) {
+	for (devidx = 0; devidx < 9; devidx++)
 	    acornscsi_dumplog(host, devidx);
     }
 #endif
-- 
cgit 


From e785fa0a164aa11001cba931367c7f94ffaff888 Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov@redhat.com>
Date: Wed, 13 Sep 2017 00:21:21 +0200
Subject: nl80211: check for the required netlink attributes presence

nl80211_set_rekey_data() does not check if the required attributes
NL80211_REKEY_DATA_{REPLAY_CTR,KEK,KCK} are present when processing
NL80211_CMD_SET_REKEY_OFFLOAD request. This request can be issued by
users with CAP_NET_ADMIN privilege and may result in NULL dereference
and a system crash. Add a check for the required attributes presence.
This patch is based on the patch by bo Zhang.

This fixes CVE-2017-12153.

References: https://bugzilla.redhat.com/show_bug.cgi?id=1491046
Fixes: e5497d766ad ("cfg80211/nl80211: support GTK rekey offload")
Cc: <stable@vger.kernel.org> # v3.1-rc1
Reported-by: bo Zhang <zhangbo5891001@gmail.com>
Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0df8023f480b..fbd5593e88cb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10903,6 +10903,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		return err;
 
+	if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] ||
+	    !tb[NL80211_REKEY_DATA_KCK])
+		return -EINVAL;
 	if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN)
 		return -ERANGE;
 	if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN)
-- 
cgit 


From f7f3dc00f61261cdc9ccd8b886f21bc4dffd6fd9 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 7 Sep 2017 19:08:21 +0200
Subject: x86/cpu/AMD: Fix erratum 1076 (CPB bit)

CPUID Fn8000_0007_EDX[CPB] is wrongly 0 on models up to B1. But they do
support CPB (AMD's Core Performance Boosting cpufreq CPU feature), so fix that.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sherry Hurwitz <sherry.hurwitz@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170907170821.16021-1-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9862e2cd6d93..d58184b7cd44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -763,6 +763,16 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
 	}
 }
 
+static void init_amd_zn(struct cpuinfo_x86 *c)
+{
+	/*
+	 * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
+	 * all up to and including B1.
+	 */
+	if (c->x86_model <= 1 && c->x86_mask <= 1)
+		set_cpu_cap(c, X86_FEATURE_CPB);
+}
+
 static void init_amd(struct cpuinfo_x86 *c)
 {
 	early_init_amd(c);
@@ -791,6 +801,7 @@ static void init_amd(struct cpuinfo_x86 *c)
 	case 0x10: init_amd_gh(c); break;
 	case 0x12: init_amd_ln(c); break;
 	case 0x15: init_amd_bd(c); break;
+	case 0x17: init_amd_zn(c); break;
 	}
 
 	/* Enable workaround for FXSAVE leak */
-- 
cgit 


From 0998b7a0befdf6e734032895ee639a5e6f88cc3f Mon Sep 17 00:00:00 2001
From: Martin Kepplinger <martink@posteo.de>
Date: Thu, 14 Sep 2017 08:01:38 +0200
Subject: objtool: Fix memory leak in elf_create_rela_section()

Let's free the allocated char array 'relaname' before returning,
in order to avoid leaking memory.

Signed-off-by: Martin Kepplinger <martink@posteo.de>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: mingo.kernel.org@gmail.com
Link: http://lkml.kernel.org/r/20170914060138.26472-1-martink@posteo.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/elf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 6e9f980a7d26..1e89a5f8bfc9 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -508,6 +508,7 @@ struct section *elf_create_rela_section(struct elf *elf, struct section *base)
 	strcat(relaname, base->name);
 
 	sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0);
+	free(relaname);
 	if (!sec)
 		return NULL;
 
-- 
cgit 


From df968c9329f6e5cf3596a0a54adb6f749747a746 Mon Sep 17 00:00:00 2001
From: Petr Vandrovec <petr@vandrovec.name>
Date: Fri, 15 Sep 2017 02:15:05 -0500
Subject: objtool: Do not retrieve data from empty sections

Binutils 2.29-9 in Debian return an error when elf_getdata is invoked
on empty section (.note.GNU-stack in all kernel files), causing
immediate failure of kernel build with:

  elf_getdata: can't manipulate null section

As nothing is done with sections that have zero size, just do not
retrieve their data at all.

Signed-off-by: Petr Vandrovec <petr@vandrovec.name>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2ce30a44349065b70d0f00e71e286dc0cbe745e6.1505459652.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/elf.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 1e89a5f8bfc9..b4cd8bc62521 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -175,19 +175,20 @@ static int read_sections(struct elf *elf)
 			return -1;
 		}
 
-		sec->data = elf_getdata(s, NULL);
-		if (!sec->data) {
-			WARN_ELF("elf_getdata");
-			return -1;
-		}
-
-		if (sec->data->d_off != 0 ||
-		    sec->data->d_size != sec->sh.sh_size) {
-			WARN("unexpected data attributes for %s", sec->name);
-			return -1;
+		if (sec->sh.sh_size != 0) {
+			sec->data = elf_getdata(s, NULL);
+			if (!sec->data) {
+				WARN_ELF("elf_getdata");
+				return -1;
+			}
+			if (sec->data->d_off != 0 ||
+			    sec->data->d_size != sec->sh.sh_size) {
+				WARN("unexpected data attributes for %s",
+				     sec->name);
+				return -1;
+			}
 		}
-
-		sec->len = sec->data->d_size;
+		sec->len = sec->sh.sh_size;
 	}
 
 	/* sanity check, one more call to elf_nextscn() should return NULL */
-- 
cgit 


From 97dab2ae7e8473a821f72a039ead0f36b12ba22d Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 15 Sep 2017 02:17:11 -0500
Subject: objtool: Fix object file corruption

Arnd Bergmann reported that a randconfig build was failing with the
following link error:

  built-in.o: member arch/x86/kernel/time.o in archive is not an object

It turns out the link failed because the time.o file had been corrupted
by objtool:

  nm: arch/x86/kernel/time.o: File format not recognized

In certain rare cases, when a .o file's ORC table is very small, the
.data section size doesn't change because it's page aligned.  Because
all the existing sections haven't changed size, libelf doesn't detect
any section header changes, and so it doesn't update the section header
table properly.  Instead it writes junk in the section header entries
for the new ORC sections.

Make sure libelf properly updates the section header table by setting
the ELF_F_DIRTY flag in the top level elf struct.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 627fce14809b ("objtool: Add ORC unwind table generation")
Link: http://lkml.kernel.org/r/e650fd0f2d8a209d1409a9785deb101fdaed55fb.1505459813.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/elf.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index b4cd8bc62521..24460155c82c 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -563,6 +563,7 @@ int elf_write(struct elf *elf)
 	struct section *sec;
 	Elf_Scn *s;
 
+	/* Update section headers for changed sections: */
 	list_for_each_entry(sec, &elf->sections, list) {
 		if (sec->changed) {
 			s = elf_getscn(elf->elf, sec->idx);
@@ -570,13 +571,17 @@ int elf_write(struct elf *elf)
 				WARN_ELF("elf_getscn");
 				return -1;
 			}
-			if (!gelf_update_shdr (s, &sec->sh)) {
+			if (!gelf_update_shdr(s, &sec->sh)) {
 				WARN_ELF("gelf_update_shdr");
 				return -1;
 			}
 		}
 	}
 
+	/* Make sure the new section header entries get updated properly. */
+	elf_flagelf(elf->elf, ELF_C_SET, ELF_F_DIRTY);
+
+	/* Write all changes to the file. */
 	if (elf_update(elf->elf, ELF_C_WRITE) < 0) {
 		WARN_ELF("elf_update");
 		return -1;
-- 
cgit 


From 820608548737e315c6f93e3099b4e65bde062334 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 15 Sep 2017 11:55:27 -0400
Subject: drm/radeon: disable hard reset in hibernate for APUs

Fixes a hibernation regression on APUs.

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=191571
Fixes: 274ad65c9d02bdc (drm/radeon: hard reset r600 and newer GPU when hibernating.)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 997131d58c7f..ffc10cadcf34 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1663,7 +1663,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
 	radeon_agp_suspend(rdev);
 
 	pci_save_state(dev->pdev);
-	if (freeze && rdev->family >= CHIP_CEDAR) {
+	if (freeze && rdev->family >= CHIP_CEDAR && !(rdev->flags & RADEON_IS_IGP)) {
 		rdev->asic->asic_reset(rdev, true);
 		pci_restore_state(dev->pdev);
 	} else if (suspend) {
-- 
cgit 


From 9c95be0163a0a699ed7eda4727f485f8453580ef Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Wed, 13 Sep 2017 16:29:46 +0200
Subject: scsi: sd: Remove unnecessary condition in sd_read_block_limits()

After series of changes around WRITE_SAME and UNMAP setup we ended up
with leftover unnecessary condition. Remove it.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 11c1738c2100..fb9f8b5f4673 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2915,8 +2915,6 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
 				sd_config_discard(sdkp, SD_LBP_WS16);
 			else if (sdkp->lbpws10)
 				sd_config_discard(sdkp, SD_LBP_WS10);
-			else if (sdkp->lbpu && sdkp->max_unmap_blocks)
-				sd_config_discard(sdkp, SD_LBP_UNMAP);
 			else
 				sd_config_discard(sdkp, SD_LBP_DISABLE);
 		}
-- 
cgit 


From 4759df905a474d245752c9dc94288e779b8734dd Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 15 Sep 2017 14:05:15 +0200
Subject: scsi: sg: factor out sg_fill_request_table()

Factor out sg_fill_request_table() for better readability.

[mkp: typos, applied by hand]

Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sg.c | 61 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 35 insertions(+), 26 deletions(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index cf0e71db9e51..1acdb6e03999 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -828,6 +828,40 @@ static int max_sectors_bytes(struct request_queue *q)
 	return max_sectors << 9;
 }
 
+static void
+sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo)
+{
+	Sg_request *srp;
+	int val;
+	unsigned int ms;
+
+	val = 0;
+	list_for_each_entry(srp, &sfp->rq_list, entry) {
+		if (val > SG_MAX_QUEUE)
+			break;
+		memset(&rinfo[val], 0, SZ_SG_REQ_INFO);
+		rinfo[val].req_state = srp->done + 1;
+		rinfo[val].problem =
+			srp->header.masked_status &
+			srp->header.host_status &
+			srp->header.driver_status;
+		if (srp->done)
+			rinfo[val].duration =
+				srp->header.duration;
+		else {
+			ms = jiffies_to_msecs(jiffies);
+			rinfo[val].duration =
+				(ms > srp->header.duration) ?
+				(ms - srp->header.duration) : 0;
+		}
+		rinfo[val].orphan = srp->orphan;
+		rinfo[val].sg_io_owned = srp->sg_io_owned;
+		rinfo[val].pack_id = srp->header.pack_id;
+		rinfo[val].usr_ptr = srp->header.usr_ptr;
+		val++;
+	}
+}
+
 static long
 sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
 {
@@ -1012,38 +1046,13 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
 			return -EFAULT;
 		else {
 			sg_req_info_t *rinfo;
-			unsigned int ms;
 
 			rinfo = kmalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE,
 								GFP_KERNEL);
 			if (!rinfo)
 				return -ENOMEM;
 			read_lock_irqsave(&sfp->rq_list_lock, iflags);
-			val = 0;
-			list_for_each_entry(srp, &sfp->rq_list, entry) {
-				if (val >= SG_MAX_QUEUE)
-					break;
-				memset(&rinfo[val], 0, SZ_SG_REQ_INFO);
-				rinfo[val].req_state = srp->done + 1;
-				rinfo[val].problem =
-					srp->header.masked_status &
-					srp->header.host_status &
-					srp->header.driver_status;
-				if (srp->done)
-					rinfo[val].duration =
-						srp->header.duration;
-				else {
-					ms = jiffies_to_msecs(jiffies);
-					rinfo[val].duration =
-						(ms > srp->header.duration) ?
-						(ms - srp->header.duration) : 0;
-				}
-				rinfo[val].orphan = srp->orphan;
-				rinfo[val].sg_io_owned = srp->sg_io_owned;
-				rinfo[val].pack_id = srp->header.pack_id;
-				rinfo[val].usr_ptr = srp->header.usr_ptr;
-				val++;
-			}
+			sg_fill_request_table(sfp, rinfo);
 			read_unlock_irqrestore(&sfp->rq_list_lock, iflags);
 			result = __copy_to_user(p, rinfo,
 						SZ_SG_REQ_INFO * SG_MAX_QUEUE);
-- 
cgit 


From 3e0097499839e0fe3af380410eababe5a47c4cf9 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 15 Sep 2017 14:05:16 +0200
Subject: scsi: sg: fixup infoleak when using SG_GET_REQUEST_TABLE

When calling SG_GET_REQUEST_TABLE ioctl only a half-filled table is
returned; the remaining part will then contain stale kernel memory
information.  This patch zeroes out the entire table to avoid this
issue.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sg.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 1acdb6e03999..0419c2298eab 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -839,7 +839,6 @@ sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo)
 	list_for_each_entry(srp, &sfp->rq_list, entry) {
 		if (val > SG_MAX_QUEUE)
 			break;
-		memset(&rinfo[val], 0, SZ_SG_REQ_INFO);
 		rinfo[val].req_state = srp->done + 1;
 		rinfo[val].problem =
 			srp->header.masked_status &
@@ -1047,8 +1046,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
 		else {
 			sg_req_info_t *rinfo;
 
-			rinfo = kmalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE,
-								GFP_KERNEL);
+			rinfo = kzalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE,
+					GFP_KERNEL);
 			if (!rinfo)
 				return -ENOMEM;
 			read_lock_irqsave(&sfp->rq_list_lock, iflags);
-- 
cgit 


From 6c92f7dbf25c36f35320e4ae0b508676410bac04 Mon Sep 17 00:00:00 2001
From: Dave Carroll <david.carroll@microsemi.com>
Date: Fri, 15 Sep 2017 11:04:28 -0600
Subject: scsi: aacraid: Fix 2T+ drives on SmartIOC-2000

The logic for supporting large drives was previously tied to 4Kn support
for SmartIOC-2000. As SmartIOC-2000 does not support volumes using 4Kn
drives, use the intended option flag AAC_OPT_NEW_COMM_64 to determine
support for volumes greater than 2T.

Cc: <stable@vger.kernel.org>
Signed-off-by: Dave Carroll <david.carroll@microsemi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aachba.c  | 12 ++++++------
 drivers/scsi/aacraid/aacraid.h |  5 +++++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index a64285ab0728..af3e4d3f9735 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -699,13 +699,13 @@ static void _aac_probe_container1(void * context, struct fib * fibptr)
 	int status;
 
 	dresp = (struct aac_mount *) fib_data(fibptr);
-	if (!(fibptr->dev->supplement_adapter_info.supported_options2 &
-	    AAC_OPTION_VARIABLE_BLOCK_SIZE))
+	if (!aac_supports_2T(fibptr->dev)) {
 		dresp->mnt[0].capacityhigh = 0;
-	if ((le32_to_cpu(dresp->status) != ST_OK) ||
-	    (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
-		_aac_probe_container2(context, fibptr);
-		return;
+		if ((le32_to_cpu(dresp->status) == ST_OK) &&
+			(le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
+			_aac_probe_container2(context, fibptr);
+			return;
+		}
 	}
 	scsicmd = (struct scsi_cmnd *) context;
 
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 92fabf2b0c24..403a639574e5 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -2701,6 +2701,11 @@ static inline int aac_is_src(struct aac_dev *dev)
 	return 0;
 }
 
+static inline int aac_supports_2T(struct aac_dev *dev)
+{
+	return (dev->adapter_info.options & AAC_OPT_NEW_COMM_64);
+}
+
 char * get_container_type(unsigned type);
 extern int numacb;
 extern char aac_driver_version[];
-- 
cgit 


From 51ae253870f55e14ba2854ce9577ac2920efef0c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 15 Sep 2017 21:29:13 +0200
Subject: xen: x86: mark xen_find_pt_base as __init

gcc-4.6 causes a harmless link-time warning:

WARNING: vmlinux.o(.text.unlikely+0x48e): Section mismatch in reference from the function xen_find_pt_base() to the function .init.text:m2p()
The function xen_find_pt_base() references
the function __init m2p().
This is often because xen_find_pt_base lacks a __init
annotation or the annotation of m2p is wrong.

Newer compilers inline this function, so it never shows up, but marking
it __init is the right way to avoid the warning.

Fixes: 70e61199559a ("xen: move p2m list if conflicting with e820 map")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 arch/x86/xen/mmu_pv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 0422ee7e70b3..ddfeebc2b125 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2221,7 +2221,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
  * not the first page table in the page table pool.
  * Iterate through the initial page tables to find the real page table base.
  */
-static phys_addr_t xen_find_pt_base(pmd_t *pmd)
+static phys_addr_t __init xen_find_pt_base(pmd_t *pmd)
 {
 	phys_addr_t pt_base, paddr;
 	unsigned pmdidx;
-- 
cgit 


From fd7d56270b526ca3ed0c224362e3c64a0f86687a Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Thu, 14 Sep 2017 11:42:17 +0200
Subject: fs/proc: Report eip/esp in /prod/PID/stat for coredumping

Commit 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in
/proc/PID/stat") stopped reporting eip/esp because it is
racy and dangerous for executing tasks. The comment adds:

    As far as I know, there are no use programs that make any
    material use of these fields, so just get rid of them.

However, existing userspace core-dump-handler applications (for
example, minicoredumper) are using these fields since they
provide an excellent cross-platform interface to these valuable
pointers. So that commit introduced a user space visible
regression.

Partially revert the change and make the readout possible for
tasks with the proper permissions and only if the target task
has the PF_DUMPCORE flag set.

Fixes: 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in> /proc/PID/stat")
Reported-by: Marco Felsch <marco.felsch@preh.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Tycho Andersen <tycho.andersen@canonical.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: stable@vger.kernel.org
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Linux API <linux-api@vger.kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/87poatfwg6.fsf@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/proc/array.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 88c355574aa0..525157ca25cb 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -62,6 +62,7 @@
 #include <linux/mman.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/task_stack.h>
 #include <linux/sched/task.h>
 #include <linux/sched/cputime.h>
 #include <linux/proc_fs.h>
@@ -421,7 +422,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		 * esp and eip are intentionally zeroed out.  There is no
 		 * non-racy way to read them without freezing the task.
 		 * Programs that need reliable values can use ptrace(2).
+		 *
+		 * The only exception is if the task is core dumping because
+		 * a program is not able to use ptrace(2) in that case. It is
+		 * safe because the task has stopped executing permanently.
 		 */
+		if (permitted && (task->flags & PF_DUMPCORE)) {
+			eip = KSTK_EIP(task);
+			esp = KSTK_ESP(task);
+		}
 	}
 
 	get_task_comm(tcomm, task);
-- 
cgit 


From 5c756065e47dc3e84b00577bd109f0a8e69903d7 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Wed, 6 Sep 2017 11:02:56 +0200
Subject: scsi: lpfc: Don't return internal MBXERR_ERROR code from probe
 function

Internal error codes happen to be positive, thus the PCI driver core
won't treat them as failure, but we do. This would cause a crash later
on as lpfc_pci_remove_one() is called (e.g. as shutdown function).

Fixes: 6d368e532168 ("[SCSI] lpfc 8.3.24: Add resource extent support")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 7e7ae786121b..100bc4c8798d 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -6131,6 +6131,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 				"Extents and RPI headers enabled.\n");
 		}
 		mempool_free(mboxq, phba->mbox_mem_pool);
+		rc = -EIO;
 		goto out_free_bsmbx;
 	}
 
-- 
cgit 


From 4cb433e856bce5974ea035181cc8eb406496dccc Mon Sep 17 00:00:00 2001
From: Nikola Pajkovsky <npajkovsky@suse.cz>
Date: Wed, 13 Sep 2017 10:46:17 +0200
Subject: scsi: aacraid: error: testing array offset 'bus' after use

Fix possible indexing array of bound for &aac->hba_map[bus][cid], where
bus and cid boundary check happens later.

Fixes: 0d643ff3c353 ("scsi: aacraid: use aac_tmf_callback for reset fib")
Signed-off-by: Nikola Pajkovsky <npajkovsky@suse.cz>
Reviewed-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/linit.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 87cc4a93e637..62beb2596466 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -906,12 +906,14 @@ static int aac_eh_dev_reset(struct scsi_cmnd *cmd)
 
 	bus = aac_logical_to_phys(scmd_channel(cmd));
 	cid = scmd_id(cmd);
-	info = &aac->hba_map[bus][cid];
-	if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
-	    info->devtype != AAC_DEVTYPE_NATIVE_RAW)
+
+	if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS)
 		return FAILED;
 
-	if (info->reset_state > 0)
+	info = &aac->hba_map[bus][cid];
+
+	if (info->devtype != AAC_DEVTYPE_NATIVE_RAW &&
+	    info->reset_state > 0)
 		return FAILED;
 
 	pr_err("%s: Host adapter reset request. SCSI hang ?\n",
@@ -962,12 +964,14 @@ static int aac_eh_target_reset(struct scsi_cmnd *cmd)
 
 	bus = aac_logical_to_phys(scmd_channel(cmd));
 	cid = scmd_id(cmd);
-	info = &aac->hba_map[bus][cid];
-	if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
-	    info->devtype != AAC_DEVTYPE_NATIVE_RAW)
+
+	if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS)
 		return FAILED;
 
-	if (info->reset_state > 0)
+	info = &aac->hba_map[bus][cid];
+
+	if (info->devtype != AAC_DEVTYPE_NATIVE_RAW &&
+	    info->reset_state > 0)
 		return FAILED;
 
 	pr_err("%s: Host adapter reset request. SCSI hang ?\n",
-- 
cgit 


From 9cb067ef8a10bb13112e4d1c0ea996ec96527422 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Sep 2017 23:29:03 +0200
Subject: genirq: Fix cpumask check in __irq_startup_managed()

The result of cpumask_any_and() is invalid when result greater or equal
nr_cpu_ids. The current check is checking for greater only. Fix it.

Fixes: 761ea388e8c4 ("genirq: Handle managed irqs gracefully in irq_startup()")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Chen Yu <yu.c.chen@intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: stable@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rui Zhang <rui.zhang@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: http://lkml.kernel.org/r/20170913213152.272283444@linutronix.de
---
 kernel/irq/chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f51b7b6d2451..6fc89fd93824 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -202,7 +202,7 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
 
 	irqd_clr_managed_shutdown(d);
 
-	if (cpumask_any_and(aff, cpu_online_mask) > nr_cpu_ids) {
+	if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) {
 		/*
 		 * Catch code which fiddles with enable_irq() on a managed
 		 * and potentially shutdown IRQ. Chained interrupt
-- 
cgit 


From 6d57339890c9a9dfcd4ba4f8931b911b962968e3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 15 Sep 2017 17:08:16 +0200
Subject: dma-coherent: fix rmem_dma_device_init regression

My recent bug fix introduced another bug, which caused rmem_dma_device_init
to always fail, as rmem->priv is never set to anything.

This restores the previous behavior, calling dma_init_coherent_memory()
whenever ->priv is NULL.

Fixes: d35b0996fef3 ("dma-coherent: fix dma_declare_coherent_memory() logic error")
Reported-by: Roy Pledge <roy.pledge@nxp.com>
Tested-by: Roy Pledge <roy.pledge@nxp.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/base/dma-coherent.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index a39b2166b145..744f64f43454 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -348,16 +348,15 @@ static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
 	struct dma_coherent_mem *mem = rmem->priv;
 	int ret;
 
-	if (!mem)
-		return -ENODEV;
-
-	ret = dma_init_coherent_memory(rmem->base, rmem->base, rmem->size,
-				       DMA_MEMORY_EXCLUSIVE, &mem);
-
-	if (ret) {
-		pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
-			&rmem->base, (unsigned long)rmem->size / SZ_1M);
-		return ret;
+	if (!mem) {
+		ret = dma_init_coherent_memory(rmem->base, rmem->base,
+					       rmem->size,
+					       DMA_MEMORY_EXCLUSIVE, &mem);
+		if (ret) {
+			pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
+				&rmem->base, (unsigned long)rmem->size / SZ_1M);
+			return ret;
+		}
 	}
 	mem->use_dev_dma_pfn_offset = true;
 	rmem->priv = mem;
-- 
cgit 


From ec11653b531099ddc08a8c7eb495ab83cae84e19 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Thu, 14 Sep 2017 14:51:20 -0500
Subject: CIFS/SMB3: Update documentation to reflect SMB3 and various changes

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 Documentation/filesystems/cifs/AUTHORS  |  5 ++
 Documentation/filesystems/cifs/README   | 81 ++++++++++++++++-----------------
 Documentation/filesystems/cifs/TODO     | 72 ++++++++++++++---------------
 Documentation/filesystems/cifs/cifs.txt | 24 ++++++----
 4 files changed, 91 insertions(+), 91 deletions(-)

diff --git a/Documentation/filesystems/cifs/AUTHORS b/Documentation/filesystems/cifs/AUTHORS
index c98800df677f..9f4f87e16240 100644
--- a/Documentation/filesystems/cifs/AUTHORS
+++ b/Documentation/filesystems/cifs/AUTHORS
@@ -41,6 +41,11 @@ Igor Mammedov (DFS support)
 Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code)
 Scott Lovenberg
 Pavel Shilovsky (for great work adding SMB2 support, and various SMB3 features)
+Aurelien Aptel (for DFS SMB3 work and some key bug fixes)
+Ronnie Sahlberg (for SMB3 xattr work and bug fixes)
+Shirish Pargaonkar (for many ACL patches over the years)
+Sachin Prabhu (many bug fixes, including for reconnect, copy offload and security)
+
 
 Test case and Bug Report contributors
 -------------------------------------
diff --git a/Documentation/filesystems/cifs/README b/Documentation/filesystems/cifs/README
index a54788405429..a9da51553ba3 100644
--- a/Documentation/filesystems/cifs/README
+++ b/Documentation/filesystems/cifs/README
@@ -1,10 +1,14 @@
-The CIFS VFS support for Linux supports many advanced network filesystem 
-features such as hierarchical dfs like namespace, hardlinks, locking and more.  
+This module supports the SMB3 family of advanced network protocols (as well
+as older dialects, originally called "CIFS" or SMB1).
+
+The CIFS VFS module for Linux supports many advanced network filesystem
+features such as hierarchical DFS like namespace, hardlinks, locking and more.
 It was designed to comply with the SNIA CIFS Technical Reference (which 
 supersedes the 1992 X/Open SMB Standard) as well as to perform best practice 
 practical interoperability with Windows 2000, Windows XP, Samba and equivalent 
 servers.  This code was developed in participation with the Protocol Freedom
-Information Foundation.
+Information Foundation.  CIFS and now SMB3 has now become a defacto
+standard for interoperating between Macs and Windows and major NAS appliances.
 
 Please see
   http://protocolfreedom.org/ and
@@ -15,30 +19,11 @@ for more details.
 For questions or bug reports please contact:
     sfrench@samba.org (sfrench@us.ibm.com) 
 
+See the project page at: https://wiki.samba.org/index.php/LinuxCIFS_utils
+
 Build instructions:
 ==================
-For Linux 2.4:
-1) Get the kernel source (e.g.from http://www.kernel.org)
-and download the cifs vfs source (see the project page
-at http://us1.samba.org/samba/Linux_CIFS_client.html)
-and change directory into the top of the kernel directory
-then patch the kernel (e.g. "patch -p1 < cifs_24.patch") 
-to add the cifs vfs to your kernel configure options if
-it has not already been added (e.g. current SuSE and UL
-users do not need to apply the cifs_24.patch since the cifs vfs is
-already in the kernel configure menu) and then
-mkdir linux/fs/cifs and then copy the current cifs vfs files from
-the cifs download to your kernel build directory e.g.
-
-	cp <cifs_download_dir>/fs/cifs/* to <kernel_download_dir>/fs/cifs
-	
-2) make menuconfig (or make xconfig)
-3) select cifs from within the network filesystem choices
-4) save and exit
-5) make dep
-6) make modules (or "make" if CIFS VFS not to be built as a module)
-
-For Linux 2.6:
+For Linux:
 1) Download the kernel (e.g. from http://www.kernel.org)
 and change directory into the top of the kernel directory tree
 (e.g. /usr/src/linux-2.5.73)
@@ -61,16 +46,13 @@ would simply type "make install").
 If you do not have the utility mount.cifs (in the Samba 3.0 source tree and on 
 the CIFS VFS web site) copy it to the same directory in which mount.smbfs and 
 similar files reside (usually /sbin).  Although the helper software is not  
-required, mount.cifs is recommended.  Eventually the Samba 3.0 utility program 
-"net" may also be helpful since it may someday provide easier mount syntax for
-users who are used to Windows e.g.
-	net use <mount point> <UNC name or cifs URL>
+required, mount.cifs is recommended.  Most distros include a "cifs-utils"
+package that includes this utility so it is recommended to install this.
+
 Note that running the Winbind pam/nss module (logon service) on all of your
 Linux clients is useful in mapping Uids and Gids consistently across the
 domain to the proper network user.  The mount.cifs mount helper can be
-trivially built from Samba 3.0 or later source e.g. by executing:
-
-	gcc samba/source/client/mount.cifs.c -o mount.cifs
+found at cifs-utils.git on git.samba.org
 
 If cifs is built as a module, then the size and number of network buffers
 and maximum number of simultaneous requests to one server can be configured.
@@ -79,6 +61,18 @@ Changing these from their defaults is not recommended. By executing modinfo
 on kernel/fs/cifs/cifs.ko the list of configuration changes that can be made
 at module initialization time (by running insmod cifs.ko) can be seen.
 
+Recommendations
+===============
+To improve security the SMB2.1 dialect or later (usually will get SMB3) is now
+the new default. To use old dialects (e.g. to mount Windows XP) use "vers=1.0"
+on mount (or vers=2.0 for Windows Vista).  Note that the CIFS (vers=1.0) is
+much older and less secure than the default dialect SMB3 which includes
+many advanced security features such as downgrade attack detection
+and encrypted shares and stronger signing and authentication algorithms.
+There are additional mount options that may be helpful for SMB3 to get
+improved POSIX behavior (NB: can use vers=3.0 to force only SMB3, never 2.1):
+     "mfsymlinks" and "cifsacl" and "idsfromsid"
+
 Allowing User Mounts
 ====================
 To permit users to mount and unmount over directories they own is possible
@@ -98,9 +92,7 @@ and execution of suid programs on the remote target would be enabled
 by default. This can be changed, as with nfs and other filesystems, 
 by simply specifying "nosuid" among the mount options. For user mounts 
 though to be able to pass the suid flag to mount requires rebuilding 
-mount.cifs with the following flag: 
- 
-        gcc samba/source/client/mount.cifs.c -DCIFS_ALLOW_USR_SUID -o mount.cifs
+mount.cifs with the following flag: CIFS_ALLOW_USR_SUID
 
 There is a corresponding manual page for cifs mounting in the Samba 3.0 and
 later source tree in docs/manpages/mount.cifs.8 
@@ -189,18 +181,18 @@ applications running on the same server as Samba.
 Use instructions:
 ================
 Once the CIFS VFS support is built into the kernel or installed as a module 
-(cifs.o), you can use mount syntax like the following to access Samba or Windows 
-servers: 
+(cifs.ko), you can use mount syntax like the following to access Samba or
+Mac or Windows servers:
 
-  mount -t cifs //9.53.216.11/e$ /mnt -o user=myname,pass=mypassword
+  mount -t cifs //9.53.216.11/e$ /mnt -o username=myname,password=mypassword
 
 Before -o the option -v may be specified to make the mount.cifs
 mount helper display the mount steps more verbosely.  
 After -o the following commonly used cifs vfs specific options
 are supported:
 
-  user=<username>
-  pass=<password>
+  username=<username>
+  password=<password>
   domain=<domain name>
   
 Other cifs mount options are described below.  Use of TCP names (in addition to
@@ -246,13 +238,16 @@ the Server's registry.  Samba starting with version 3.10 will allow such
 filenames (ie those which contain valid Linux characters, which normally
 would be forbidden for Windows/CIFS semantics) as long as the server is
 configured for Unix Extensions (and the client has not disabled
-/proc/fs/cifs/LinuxExtensionsEnabled).
-  
+/proc/fs/cifs/LinuxExtensionsEnabled). In addition the mount option
+"mapposix" can be used on CIFS (vers=1.0) to force the mapping of
+illegal Windows/NTFS/SMB characters to a remap range (this mount parm
+is the default for SMB3). This remap ("mapposix") range is also
+compatible with Mac (and "Services for Mac" on some older Windows).
 
 CIFS VFS Mount Options
 ======================
 A partial list of the supported mount options follows:
-  user		The user name to use when trying to establish
+  username	The user name to use when trying to establish
 		the CIFS session.
   password	The user password.  If the mount helper is
 		installed, the user will be prompted for password
diff --git a/Documentation/filesystems/cifs/TODO b/Documentation/filesystems/cifs/TODO
index 066ffddc3964..396ecfd6ff4a 100644
--- a/Documentation/filesystems/cifs/TODO
+++ b/Documentation/filesystems/cifs/TODO
@@ -1,4 +1,4 @@
-Version 2.03 August 1, 2014
+Version 2.04 September 13, 2017
 
 A Partial List of Missing Features
 ==================================
@@ -8,73 +8,69 @@ for visible, important contributions to this module.  Here
 is a partial list of the known problems and missing features:
 
 a) SMB3 (and SMB3.02) missing optional features:
-   - RDMA
+   - RDMA (started)
    - multichannel (started)
    - directory leases (improved metadata caching)
    - T10 copy offload (copy chunk is only mechanism supported)
-   - encrypted shares
 
 b) improved sparse file support
 
 c) Directory entry caching relies on a 1 second timer, rather than
-using FindNotify or equivalent.  - (started)
+using Directory Leases
 
 d) quota support (needs minor kernel change since quota calls
 to make it to network filesystems or deviceless filesystems)
 
-e) improve support for very old servers (OS/2 and Win9x for example)
-Including support for changing the time remotely (utimes command).
+e) Better optimize open to reduce redundant opens (using reference
+counts more) and to improve use of compounding in SMB3 to reduce
+number of roundtrips.
 
-f) hook lower into the sockets api (as NFS/SunRPC does) to avoid the
-extra copy in/out of the socket buffers in some cases.
-
-g) Better optimize open (and pathbased setfilesize) to reduce the
-oplock breaks coming from windows srv.  Piggyback identical file
-opens on top of each other by incrementing reference count rather
-than resending (helps reduce server resource utilization and avoid
-spurious oplock breaks).
-
-h) Add support for storing symlink info to Windows servers
-in the Extended Attribute format their SFU clients would recognize.
-
-i) Finish inotify support so kde and gnome file list windows
+f) Finish inotify support so kde and gnome file list windows
 will autorefresh (partially complete by Asser). Needs minor kernel
 vfs change to support removing D_NOTIFY on a file.   
 
-j) Add GUI tool to configure /proc/fs/cifs settings and for display of
+g) Add GUI tool to configure /proc/fs/cifs settings and for display of
 the CIFS statistics (started)
 
-k) implement support for security and trusted categories of xattrs
+h) implement support for security and trusted categories of xattrs
 (requires minor protocol extension) to enable better support for SELINUX
 
-l) Implement O_DIRECT flag on open (already supported on mount)
+i) Implement O_DIRECT flag on open (already supported on mount)
 
-m) Create UID mapping facility so server UIDs can be mapped on a per
+j) Create UID mapping facility so server UIDs can be mapped on a per
 mount or a per server basis to client UIDs or nobody if no mapping
-exists.  This is helpful when Unix extensions are negotiated to
-allow better permission checking when UIDs differ on the server
-and client.  Add new protocol request to the CIFS protocol 
-standard for asking the server for the corresponding name of a
-particular uid.
+exists. Also better integration with winbind for resolving SID owners
+
+k) Add tools to take advantage of more smb3 specific ioctls and features
+
+l) encrypted file support
+
+m) improved stats gathering, tools (perhaps integration with nfsometer?)
 
-n) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too)
+n) allow setting more NTFS/SMB3 file attributes remotely (currently limited to compressed
+file attribute via chflags) and improve user space tools for managing and
+viewing them.
 
-o) mount check for unmatched uids
+o) mount helper GUI (to simplify the various configuration options on mount)
 
-p) Add support for new vfs entry point for fallocate
+p) autonegotiation of dialects (offering more than one dialect ie SMB3.02,
+SMB3, SMB2.1 not just SMB3).
 
-q) Add tools to take advantage of cifs/smb3 specific ioctls and features
-such as "CopyChunk" (fast server side file copy)
+q) Allow mount.cifs to be more verbose in reporting errors with dialect
+or unsupported feature errors.
 
-r) encrypted file support
+r) updating cifs documentation, and user guid.
 
-s) improved stats gathering, tools (perhaps integration with nfsometer?)
+s) Addressing bugs found by running a broader set of xfstests in standard
+file system xfstest suite.
 
-t) allow setting more NTFS/SMB3 file attributes remotely (currently limited to compressed
-file attribute via chflags)
+t) split cifs and smb3 support into separate modules so legacy (and less
+secure) CIFS dialect can be disabled in environments that don't need it
+and simplify the code.
 
-u) mount helper GUI (to simplify the various configuration options on mount)
+u) Finish up SMB3.1.1 dialect support
 
+v) POSIX Extensions for SMB3.1.1
 
 KNOWN BUGS
 ====================================
diff --git a/Documentation/filesystems/cifs/cifs.txt b/Documentation/filesystems/cifs/cifs.txt
index 2fac91ac96cf..67756607246e 100644
--- a/Documentation/filesystems/cifs/cifs.txt
+++ b/Documentation/filesystems/cifs/cifs.txt
@@ -1,24 +1,28 @@
-  This is the client VFS module for the Common Internet File System
-  (CIFS) protocol which is the successor to the Server Message Block 
+  This is the client VFS module for the SMB3 NAS protocol as well
+  older dialects such as the Common Internet File System (CIFS)
+  protocol which was the successor to the Server Message Block
   (SMB) protocol, the native file sharing mechanism for most early
   PC operating systems. New and improved versions of CIFS are now
   called SMB2 and SMB3. These dialects are also supported by the
   CIFS VFS module. CIFS is fully supported by network
-  file servers such as Windows 2000, 2003, 2008 and 2012
+  file servers such as Windows 2000, 2003, 2008, 2012 and 2016
   as well by Samba (which provides excellent CIFS
-  server support for Linux and many other operating systems), so
+  server support for Linux and many other operating systems), Apple
+  systems, as well as most Network Attached Storage vendors, so
   this network filesystem client can mount to a wide variety of
   servers.
 
   The intent of this module is to provide the most advanced network
-  file system function for CIFS compliant servers, including better
-  POSIX compliance, secure per-user session establishment, high
-  performance safe distributed caching (oplock), optional packet
+  file system function for SMB3 compliant servers, including advanced
+  security features, excellent parallelized high performance i/o, better
+  POSIX compliance, secure per-user session establishment, encryption,
+  high performance safe distributed caching (leases/oplocks), optional packet
   signing, large files, Unicode support and other internationalization
   improvements. Since both Samba server and this filesystem client support
-  the CIFS Unix extensions, the combination can provide a reasonable 
-  alternative to NFSv4 for fileserving in some Linux to Linux environments,
-  not just in Linux to Windows environments.
+  the CIFS Unix extensions (and in the future SMB3 POSIX extensions),
+  the combination can provide a reasonable alternative to other network and
+  cluster file systems for fileserving in some Linux to Linux environments,
+  not just in Linux to Windows (or Linux to Mac) environments.
 
   This filesystem has an mount utility (mount.cifs) that can be obtained from
 
-- 
cgit 


From 47061a24e2ee5bd8a40d473d47a5bd823fa0081f Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 17 Sep 2017 09:03:48 -0700
Subject: x86/mm: Factor out CR3-building code

Current, the code that assembles a value to load into CR3 is
open-coded everywhere.  Factor it out into helpers build_cr3() and
build_cr3_noflush().

This makes one semantic change: __get_current_cr3_fast() was wrong
on SME systems.  No one noticed because the only caller is in the
VMX code, and there are no CPUs with both SME and VMX.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <Thomas.Lendacky@amd.com>
Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mmu_context.h | 15 +++++++++++----
 arch/x86/mm/tlb.c                  | 11 +++++------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 7ae318c340d9..a999ba6b721f 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 	return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+{
+	return __sme_pa(mm->pgd) | asid;
+}
+
+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+{
+	return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
+}
 
 /*
  * This can be used from process context to figure out what the value of
@@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
  */
 static inline unsigned long __get_current_cr3_fast(void)
 {
-	unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
-
-	if (static_cpu_has(X86_FEATURE_PCID))
-		cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+		this_cpu_read(cpu_tlbstate.loaded_mm_asid));
 
 	/* For now, be very restrictive about when this can be called. */
 	VM_WARN_ON(in_nmi() || preemptible());
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 1ab3821f9e26..93fe97cce581 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -126,8 +126,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * isn't free.
 	 */
 #ifdef CONFIG_DEBUG_VM
-	if (WARN_ON_ONCE(__read_cr3() !=
-			 (__sme_pa(real_prev->pgd) | prev_asid))) {
+	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
 		/*
 		 * If we were to BUG here, we'd be very likely to kill
 		 * the system so hard that we don't see the call trace.
@@ -172,7 +171,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			 */
 			this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
 				       next_tlb_gen);
-			write_cr3(__sme_pa(next->pgd) | prev_asid);
+			write_cr3(build_cr3(next, prev_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
 					TLB_FLUSH_ALL);
 		}
@@ -216,12 +215,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		if (need_flush) {
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-			write_cr3(__sme_pa(next->pgd) | new_asid);
+			write_cr3(build_cr3(next, new_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
 					TLB_FLUSH_ALL);
 		} else {
 			/* The new ASID is already up to date. */
-			write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
+			write_cr3(build_cr3_noflush(next, new_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
 		}
 
@@ -265,7 +264,7 @@ void initialize_tlbstate_and_flush(void)
 		!(cr4_read_shadow() & X86_CR4_PCIDE));
 
 	/* Force ASID 0 and force a TLB flush. */
-	write_cr3(cr3 & ~CR3_PCID_MASK);
+	write_cr3(build_cr3(mm, 0));
 
 	/* Reinitialize tlbstate. */
 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
-- 
cgit 


From 52a2af400c1075219b3f0ce5c96fc961da44018a Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 17 Sep 2017 09:03:49 -0700
Subject: x86/mm/64: Stop using CR3.PCID == 0 in ASID-aware code

Putting the logical ASID into CR3's PCID bits directly means that we
have two cases to consider separately: ASID == 0 and ASID != 0.
This means that bugs that only hit in one of these cases trigger
nondeterministically.

There were some bugs like this in the past, and I think there's
still one in current kernels.  In particular, we have a number of
ASID-unware code paths that save CR3, write some special value, and
then restore CR3.  This includes suspend/resume, hibernate, kexec,
EFI, and maybe other things I've missed.  This is currently
dangerous: if ASID != 0, then this code sequence will leave garbage
in the TLB tagged for ASID 0.  We could potentially see corruption
when switching back to ASID 0.  In principle, an
initialize_tlbstate_and_flush() call after these sequences would
solve the problem, but EFI, at least, does not call this.  (And it
probably shouldn't -- initialize_tlbstate_and_flush() is rather
expensive.)

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/cdc14bbe5d3c3ef2a562be09a6368ffe9bd947a6.1505663533.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mmu_context.h | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index a999ba6b721f..c120b5db178a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -286,14 +286,31 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 	return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
+/*
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
+ * bits.  This serves two purposes.  It prevents a nasty situation in
+ * which PCID-unaware code saves CR3, loads some other value (with PCID
+ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
+ * the saved ASID was nonzero.  It also means that any bugs involving
+ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
+ * deterministically.
+ */
+
 static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
 {
-	return __sme_pa(mm->pgd) | asid;
+	if (static_cpu_has(X86_FEATURE_PCID)) {
+		VM_WARN_ON_ONCE(asid > 4094);
+		return __sme_pa(mm->pgd) | (asid + 1);
+	} else {
+		VM_WARN_ON_ONCE(asid != 0);
+		return __sme_pa(mm->pgd);
+	}
 }
 
 static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
 {
-	return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
+	VM_WARN_ON_ONCE(asid > 4094);
+	return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
 }
 
 /*
-- 
cgit 


From b8b7abaed7a49b350f8ba659ddc264b04931d581 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 17 Sep 2017 09:03:50 -0700
Subject: x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) earlier

Otherwise we might have the PCID feature bit set during cpu_init().

This is just for robustness.  I haven't seen any actual bugs here.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: cba4671af755 ("x86/mm: Disable PCID on 32-bit kernels")
Link: http://lkml.kernel.org/r/b16dae9d6b0db5d9801ddbebbfd83384097c61f3.1505663533.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/bugs.c   | 8 --------
 arch/x86/kernel/cpu/common.c | 8 ++++++++
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index db684880d74a..0af86d9242da 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -21,14 +21,6 @@
 
 void __init check_bugs(void)
 {
-#ifdef CONFIG_X86_32
-	/*
-	 * Regardless of whether PCID is enumerated, the SDM says
-	 * that it can't be enabled in 32-bit mode.
-	 */
-	setup_clear_cpu_cap(X86_FEATURE_PCID);
-#endif
-
 	identify_boot_cpu();
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 775f10100d7f..c9176bae7fd8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -904,6 +904,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 
 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
 	fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Regardless of whether PCID is enumerated, the SDM says
+	 * that it can't be enabled in 32-bit mode.
+	 */
+	setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
 }
 
 void __init early_cpu_init(void)
-- 
cgit 


From 4ba55e65f471d011d3ba2ac2022180ea0877d68e Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 17 Sep 2017 09:03:51 -0700
Subject: x86/mm/32: Load a sane CR3 before cpu_init() on secondary CPUs

For unknown historical reasons (i.e. Borislav doesn't recall),
32-bit kernels invoke cpu_init() on secondary CPUs with
initial_page_table loaded into CR3.  Then they set
current->active_mm to &init_mm and call enter_lazy_tlb() before
fixing CR3.  This means that the x86 TLB code gets invoked while CR3
is inconsistent, and, with the improved PCID sanity checks I added,
we warn.

Fix it by loading swapper_pg_dir (i.e. init_mm.pgd) earlier.

Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 72c0098d92ce ("x86/mm: Reinitialize TLB state on hotplug and resume")
Link: http://lkml.kernel.org/r/30cdfea504682ba3b9012e77717800a91c22097f.1505663533.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/smpboot.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0854ff169274..ad59edd84de7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -232,12 +232,6 @@ static void notrace start_secondary(void *unused)
 	 */
 	if (boot_cpu_has(X86_FEATURE_PCID))
 		__write_cr4(__read_cr4() | X86_CR4_PCIDE);
-	cpu_init();
-	x86_cpuinit.early_percpu_clock_init();
-	preempt_disable();
-	smp_callin();
-
-	enable_start_cpu0 = 0;
 
 #ifdef CONFIG_X86_32
 	/* switch away from the initial page table */
@@ -245,6 +239,13 @@ static void notrace start_secondary(void *unused)
 	__flush_tlb_all();
 #endif
 
+	cpu_init();
+	x86_cpuinit.early_percpu_clock_init();
+	preempt_disable();
+	smp_callin();
+
+	enable_start_cpu0 = 0;
+
 	/* otherwise gcc will move up smp_processor_id before the cpu_init */
 	barrier();
 	/*
-- 
cgit 


From bf29ed1567b67854dc13504f685c45a2ea9b2081 Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Thu, 7 Sep 2017 08:30:44 -0700
Subject: syscalls: Use CHECK_DATA_CORRUPTION for addr_limit_user_check

Use CHECK_DATA_CORRUPTION instead of BUG_ON to provide more flexibility
on address limit failures. By default, send a SIGKILL signal to kill the
current process preventing exploitation of a bad address limit.

Make the TIF_FSCHECK flag optional so ARM can use this function.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Will Drewry <wad@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: David Howells <dhowells@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-api@vger.kernel.org
Cc: Yonghong Song <yhs@fb.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1504798247-48833-2-git-send-email-keescook@chromium.org
---
 include/linux/syscalls.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 95606a2d556f..a78186d826d7 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -221,21 +221,25 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
 	}								\
 	static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 
-#ifdef TIF_FSCHECK
 /*
  * Called before coming back to user-mode. Returning to user-mode with an
  * address limit different than USER_DS can allow to overwrite kernel memory.
  */
 static inline void addr_limit_user_check(void)
 {
-
+#ifdef TIF_FSCHECK
 	if (!test_thread_flag(TIF_FSCHECK))
 		return;
+#endif
 
-	BUG_ON(!segment_eq(get_fs(), USER_DS));
+	if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS),
+				  "Invalid address limit on user-mode return"))
+		force_sig(SIGKILL, current);
+
+#ifdef TIF_FSCHECK
 	clear_thread_flag(TIF_FSCHECK);
-}
 #endif
+}
 
 asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
 			       qid_t id, void __user *addr);
-- 
cgit 


From 2404269bc4e77a67875c8db6667be34c9913c96e Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Thu, 7 Sep 2017 08:30:45 -0700
Subject: Revert "arm/syscalls: Check address limit on user-mode return"

This reverts commit 73ac5d6a2b6ac3ae8d1e1818f3e9946f97489bc9.

The work pending loop can call set_fs after addr_limit_user_check
removed the _TIF_FSCHECK flag. This may happen at anytime based on how
ARM handles alignment exceptions. It leads to an infinite loop condition.

After discussion, it has been agreed that the generic approach is not
tailored to the ARM architecture and any fix might not be complete. This
patch will be replaced by an architecture specific implementation. The
work flag approach will be kept for other architectures.

Reported-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Will Drewry <wad@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: David Howells <dhowells@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-api@vger.kernel.org
Cc: Yonghong Song <yhs@fb.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1504798247-48833-3-git-send-email-keescook@chromium.org
---
 arch/arm/include/asm/thread_info.h | 15 ++++++---------
 arch/arm/include/asm/uaccess.h     |  2 --
 arch/arm/kernel/entry-common.S     |  9 ++-------
 arch/arm/kernel/signal.c           |  5 -----
 4 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 1d468b527b7b..776757d1604a 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -139,11 +139,10 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define TIF_NEED_RESCHED	1	/* rescheduling necessary */
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_UPROBE		3	/* breakpointed or singlestepping */
-#define TIF_FSCHECK		4	/* Check FS is USER_DS on return */
-#define TIF_SYSCALL_TRACE	5	/* syscall trace active */
-#define TIF_SYSCALL_AUDIT	6	/* syscall auditing active */
-#define TIF_SYSCALL_TRACEPOINT	7	/* syscall tracepoint instrumentation */
-#define TIF_SECCOMP		8	/* seccomp syscall filtering active */
+#define TIF_SYSCALL_TRACE	4	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	5	/* syscall auditing active */
+#define TIF_SYSCALL_TRACEPOINT	6	/* syscall tracepoint instrumentation */
+#define TIF_SECCOMP		7	/* seccomp syscall filtering active */
 
 #define TIF_NOHZ		12	/* in adaptive nohz mode */
 #define TIF_USING_IWMMXT	17
@@ -154,7 +153,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
-#define _TIF_FSCHECK		(1 << TIF_FSCHECK)
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
@@ -168,9 +166,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 /*
  * Change these and you break ASM code in entry-common.S
  */
-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING |	\
-				 _TIF_NOTIFY_RESUME | _TIF_UPROBE |	\
-				 _TIF_FSCHECK)
+#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+				 _TIF_NOTIFY_RESUME | _TIF_UPROBE)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_ARM_THREAD_INFO_H */
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 87936dd5d151..0bf2347495f1 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -70,8 +70,6 @@ static inline void set_fs(mm_segment_t fs)
 {
 	current_thread_info()->addr_limit = fs;
 	modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
-	/* On user-mode return, check fs is correct */
-	set_thread_flag(TIF_FSCHECK);
 }
 
 #define segment_eq(a, b)	((a) == (b))
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index ca3614dc6938..0b60adf4a5d9 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -49,9 +49,7 @@ ret_fast_syscall:
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
-	tst	r1, #_TIF_SYSCALL_WORK
-	bne	fast_work_pending
-	tst	r1, #_TIF_WORK_MASK
+	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
 
 	/* perform architecture specific actions before user return */
@@ -77,15 +75,12 @@ ret_fast_syscall:
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
 	disable_irq_notrace			@ disable interrupts
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
-	tst	r1, #_TIF_SYSCALL_WORK
-	bne	fast_work_pending
-	tst	r1, #_TIF_WORK_MASK
+	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	beq	no_work_pending
  UNWIND(.fnend		)
 ENDPROC(ret_fast_syscall)
 
 	/* Slower path - fall through to work_pending */
-fast_work_pending:
 #endif
 
 	tst	r1, #_TIF_SYSCALL_WORK
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index e2de50bf8742..5814298ef0b7 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -14,7 +14,6 @@
 #include <linux/uaccess.h>
 #include <linux/tracehook.h>
 #include <linux/uprobes.h>
-#include <linux/syscalls.h>
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -614,10 +613,6 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 	 * Update the trace code with the current status.
 	 */
 	trace_hardirqs_off();
-
-	/* Check valid user FS if needed */
-	addr_limit_user_check();
-
 	do {
 		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
 			schedule();
-- 
cgit 


From e33f8d32677fa4f4f8996ef46748f86aac81ccff Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Thu, 7 Sep 2017 08:30:46 -0700
Subject: arm/syscalls: Optimize address limit check

Disable the generic address limit check in favor of an architecture
specific optimized implementation. The generic implementation using
pending work flags did not work well with ARM and alignment faults.

The address limit is checked on each syscall return path to user-mode
path as well as the irq user-mode return function. If the address limit
was changed, a function is called to report data corruption (stopping
the kernel or process based on configuration).

The address limit check has to be done before any pending work because
they can reset the address limit and the process is killed using a
SIGKILL signal. For example the lkdtm address limit check does not work
because the signal to kill the process will reset the user-mode address
limit.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Kees Cook <keescook@chromium.org>
Tested-by: Leonard Crestez <leonard.crestez@nxp.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Will Drewry <wad@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: David Howells <dhowells@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-api@vger.kernel.org
Cc: Yonghong Song <yhs@fb.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1504798247-48833-4-git-send-email-keescook@chromium.org
---
 arch/arm/kernel/entry-common.S | 11 +++++++++++
 arch/arm/kernel/signal.c       |  7 +++++++
 2 files changed, 18 insertions(+)

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 0b60adf4a5d9..99c908226065 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -12,6 +12,7 @@
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
 #include <asm/unwind.h>
+#include <asm/memory.h>
 #ifdef CONFIG_AEABI
 #include <asm/unistd-oabi.h>
 #endif
@@ -48,10 +49,14 @@ ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
 
+
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
 
@@ -74,6 +79,9 @@ ret_fast_syscall:
  UNWIND(.cantunwind	)
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	beq	no_work_pending
@@ -106,6 +114,9 @@ ENTRY(ret_to_user)
 ret_slow_syscall:
 	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]
 	tst	r1, #_TIF_WORK_MASK
 	bne	slow_work_pending
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 5814298ef0b7..b67ae12503f3 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -14,6 +14,7 @@
 #include <linux/uaccess.h>
 #include <linux/tracehook.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -673,3 +674,9 @@ struct page *get_signal_page(void)
 
 	return page;
 }
+
+/* Defer to generic check */
+asmlinkage void addr_limit_check_failed(void)
+{
+	addr_limit_user_check();
+}
-- 
cgit 


From a2048e34d4655c06d31400646ae495bbfeb16b27 Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Thu, 7 Sep 2017 08:30:47 -0700
Subject: arm64/syscalls: Move address limit check in loop

A bug was reported on ARM where set_fs might be called after it was
checked on the work pending function. ARM64 is not affected by this bug
but has a similar construct. In order to avoid any similar problems in
the future, the addr_limit_user_check function is moved at the beginning
of the loop.

Fixes: cf7de27ab351 ("arm64/syscalls: Check address limit on user-mode return")
Reported-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Will Drewry <wad@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: David Howells <dhowells@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-api@vger.kernel.org
Cc: Yonghong Song <yhs@fb.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1504798247-48833-5-git-send-email-keescook@chromium.org
---
 arch/arm64/kernel/signal.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index c45214f8fb54..0bdc96c61bc0 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -751,10 +751,10 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 	 */
 	trace_hardirqs_off();
 
-	/* Check valid user FS if needed */
-	addr_limit_user_check();
-
 	do {
+		/* Check valid user FS if needed */
+		addr_limit_user_check();
+
 		if (thread_flags & _TIF_NEED_RESCHED) {
 			schedule();
 		} else {
-- 
cgit 


From 9764c02fcbad40001fd3f63558d918e4d519bb75 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Sun, 17 Sep 2017 10:41:35 -0500
Subject: SMB3: Add support for multidialect negotiate (SMB2.1 and later)

With the need to discourage use of less secure dialect, SMB1 (CIFS),
we temporarily upgraded the dialect to SMB3 in 4.13, but since there
are various servers which only support SMB2.1 (2.1 is more secure
than CIFS/SMB1) but not optimal for a default dialect - add support
for multidialect negotiation.  cifs.ko will now request SMB2.1
or later (ie SMB2.1 or SMB3.0, SMB3.02) and the server will
pick the latest most secure one it can support.

In addition since we are sending multidialect negotiate, add
support for secure negotiate to validate that a man in the
middle didn't downgrade us.

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
CC: Stable <stable@vger.kernel.org> # 4.13+
---
 fs/cifs/cifsglob.h |  6 ++++
 fs/cifs/connect.c  | 24 ++++++++++-----
 fs/cifs/smb2ops.c  | 40 +++++++++++++++++++++++++
 fs/cifs/smb2pdu.c  | 85 +++++++++++++++++++++++++++++++++++++++++++++++-------
 fs/cifs/smb2pdu.h  |  2 +-
 5 files changed, 139 insertions(+), 18 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 808486c29f0d..de5b2e1fcce5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -188,6 +188,8 @@ enum smb_version {
 #ifdef CONFIG_CIFS_SMB311
 	Smb_311,
 #endif /* SMB311 */
+	Smb_3any,
+	Smb_default,
 	Smb_version_err
 };
 
@@ -1701,6 +1703,10 @@ extern struct smb_version_values smb20_values;
 #define SMB21_VERSION_STRING	"2.1"
 extern struct smb_version_operations smb21_operations;
 extern struct smb_version_values smb21_values;
+#define SMBDEFAULT_VERSION_STRING "default"
+extern struct smb_version_values smbdefault_values;
+#define SMB3ANY_VERSION_STRING "3"
+extern struct smb_version_values smb3any_values;
 #define SMB30_VERSION_STRING	"3.0"
 extern struct smb_version_operations smb30_operations;
 extern struct smb_version_values smb30_values;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5aa2d278ca84..8d38b22afb2b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -301,6 +301,8 @@ static const match_table_t cifs_smb_version_tokens = {
 	{ Smb_311, SMB311_VERSION_STRING },
 	{ Smb_311, ALT_SMB311_VERSION_STRING },
 #endif /* SMB311 */
+	{ Smb_3any, SMB3ANY_VERSION_STRING },
+	{ Smb_default, SMBDEFAULT_VERSION_STRING },
 	{ Smb_version_err, NULL }
 };
 
@@ -1148,6 +1150,14 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
 		vol->vals = &smb311_values;
 		break;
 #endif /* SMB311 */
+	case Smb_3any:
+		vol->ops = &smb30_operations; /* currently identical with 3.0 */
+		vol->vals = &smb3any_values;
+		break;
+	case Smb_default:
+		vol->ops = &smb30_operations; /* currently identical with 3.0 */
+		vol->vals = &smbdefault_values;
+		break;
 	default:
 		cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
 		return 1;
@@ -1274,9 +1284,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 
 	vol->actimeo = CIFS_DEF_ACTIMEO;
 
-	/* FIXME: add autonegotiation for SMB3 or later rather than just SMB3 */
-	vol->ops = &smb30_operations; /* both secure and accepted widely */
-	vol->vals = &smb30_values;
+	/* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */
+	vol->ops = &smb30_operations;
+	vol->vals = &smbdefault_values;
 
 	vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
 
@@ -1988,11 +1998,10 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 
 	if (got_version == false)
 		pr_warn("No dialect specified on mount. Default has changed to "
-			"a more secure dialect, SMB3 (vers=3.0), from CIFS "
+			"a more secure dialect, SMB2.1 or later (e.g. SMB3), from CIFS "
 			"(SMB1). To use the less secure SMB1 dialect to access "
-			"old servers which do not support SMB3 specify vers=1.0"
-			" on mount. For somewhat newer servers such as Windows "
-			"7 try vers=2.1.\n");
+			"old servers which do not support SMB3 (or SMB2.1) specify vers=1.0"
+			" on mount.\n");
 
 	kfree(mountdata_copy);
 	return 0;
@@ -2133,6 +2142,7 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
 	if (vol->nosharesock)
 		return 0;
 
+	/* BB update this for smb3any and default case */
 	if ((server->vals != vol->vals) || (server->ops != vol->ops))
 		return 0;
 
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index fb2934b9b97c..405a9bf13aac 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3110,6 +3110,46 @@ struct smb_version_values smb21_values = {
 	.create_lease_size = sizeof(struct create_lease),
 };
 
+struct smb_version_values smb3any_values = {
+	.version_string = SMB3ANY_VERSION_STRING,
+	.protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
+	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+	.large_lock_type = 0,
+	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+	.header_size = sizeof(struct smb2_hdr),
+	.max_header_size = MAX_SMB2_HDR_SIZE,
+	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+	.lock_cmd = SMB2_LOCK,
+	.cap_unix = 0,
+	.cap_nt_find = SMB2_NT_FIND,
+	.cap_large_files = SMB2_LARGE_FILES,
+	.signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+	.signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+	.create_lease_size = sizeof(struct create_lease_v2),
+};
+
+struct smb_version_values smbdefault_values = {
+	.version_string = SMBDEFAULT_VERSION_STRING,
+	.protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
+	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+	.large_lock_type = 0,
+	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+	.header_size = sizeof(struct smb2_hdr),
+	.max_header_size = MAX_SMB2_HDR_SIZE,
+	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+	.lock_cmd = SMB2_LOCK,
+	.cap_unix = 0,
+	.cap_nt_find = SMB2_NT_FIND,
+	.cap_large_files = SMB2_LARGE_FILES,
+	.signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+	.signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+	.create_lease_size = sizeof(struct create_lease_v2),
+};
+
 struct smb_version_values smb30_values = {
 	.version_string = SMB30_VERSION_STRING,
 	.protocol_id = SMB30_PROT_ID,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 69a751b038ab..5c16591a128e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -491,10 +491,25 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 
 	req->hdr.sync_hdr.SessionId = 0;
 
-	req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
-
-	req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */
-	inc_rfc1001_len(req, 2);
+	if (strcmp(ses->server->vals->version_string,
+		   SMB3ANY_VERSION_STRING) == 0) {
+		req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
+		req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+		req->DialectCount = cpu_to_le16(2);
+		inc_rfc1001_len(req, 4);
+	} else if (strcmp(ses->server->vals->version_string,
+		   SMBDEFAULT_VERSION_STRING) == 0) {
+		req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+		req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+		req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+		req->DialectCount = cpu_to_le16(3);
+		inc_rfc1001_len(req, 6);
+	} else {
+		/* otherwise send specific dialect */
+		req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
+		req->DialectCount = cpu_to_le16(1);
+		inc_rfc1001_len(req, 2);
+	}
 
 	/* only one of SMB2 signing flags may be set in SMB2 request */
 	if (ses->sign)
@@ -528,16 +543,42 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 	 */
 	if (rc == -EOPNOTSUPP) {
 		cifs_dbg(VFS, "Dialect not supported by server. Consider "
-			"specifying vers=1.0 or vers=2.1 on mount for accessing"
+			"specifying vers=1.0 or vers=2.0 on mount for accessing"
 			" older servers\n");
 		goto neg_exit;
 	} else if (rc != 0)
 		goto neg_exit;
 
+	if (strcmp(ses->server->vals->version_string,
+		   SMB3ANY_VERSION_STRING) == 0) {
+		if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
+			cifs_dbg(VFS,
+				"SMB2 dialect returned but not requested\n");
+			return -EIO;
+		} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
+			cifs_dbg(VFS,
+				"SMB2.1 dialect returned but not requested\n");
+			return -EIO;
+		}
+	} else if (strcmp(ses->server->vals->version_string,
+		   SMBDEFAULT_VERSION_STRING) == 0) {
+		if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
+			cifs_dbg(VFS,
+				"SMB2 dialect returned but not requested\n");
+			return -EIO;
+		} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
+			/* ops set to 3.0 by default for default so update */
+			ses->server->ops = &smb21_operations;
+		}
+	} else if (rsp->DialectRevision != ses->server->vals->protocol_id) {
+		/* if requested single dialect ensure returned dialect matched */
+		cifs_dbg(VFS, "Illegal 0x%x dialect returned: not requested\n",
+			cpu_to_le16(rsp->DialectRevision));
+		return -EIO;
+	}
+
 	cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode);
 
-	/* BB we may eventually want to match the negotiated vs. requested
-	   dialect, even though we are only requesting one at a time */
 	if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID))
 		cifs_dbg(FYI, "negotiated smb2.0 dialect\n");
 	else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID))
@@ -558,6 +599,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 	}
 	server->dialect = le16_to_cpu(rsp->DialectRevision);
 
+	/* BB: add check that dialect was valid given dialect(s) we asked for */
+
 	/* SMB2 only has an extended negflavor */
 	server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
 	/* set it to the maximum buffer size value we can send with 1 credit */
@@ -606,6 +649,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 	struct validate_negotiate_info_req vneg_inbuf;
 	struct validate_negotiate_info_rsp *pneg_rsp;
 	u32 rsplen;
+	u32 inbuflen; /* max of 4 dialects */
 
 	cifs_dbg(FYI, "validate negotiate\n");
 
@@ -634,9 +678,30 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 	else
 		vneg_inbuf.SecurityMode = 0;
 
-	vneg_inbuf.DialectCount = cpu_to_le16(1);
-	vneg_inbuf.Dialects[0] =
-		cpu_to_le16(tcon->ses->server->vals->protocol_id);
+
+	if (strcmp(tcon->ses->server->vals->version_string,
+		SMB3ANY_VERSION_STRING) == 0) {
+		vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
+		vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+		vneg_inbuf.DialectCount = cpu_to_le16(2);
+		/* structure is big enough for 3 dialects, sending only 2 */
+		inbuflen = sizeof(struct validate_negotiate_info_req) - 2;
+	} else if (strcmp(tcon->ses->server->vals->version_string,
+		SMBDEFAULT_VERSION_STRING) == 0) {
+		vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+		vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+		vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+		vneg_inbuf.DialectCount = cpu_to_le16(3);
+		/* structure is big enough for 3 dialects */
+		inbuflen = sizeof(struct validate_negotiate_info_req);
+	} else {
+		/* otherwise specific dialect was requested */
+		vneg_inbuf.Dialects[0] =
+			cpu_to_le16(tcon->ses->server->vals->protocol_id);
+		vneg_inbuf.DialectCount = cpu_to_le16(1);
+		/* structure is big enough for 3 dialects, sending only 1 */
+		inbuflen = sizeof(struct validate_negotiate_info_req) - 4;
+	}
 
 	rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
 		FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 393ed5f4e1b6..6c9653a130c8 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -716,7 +716,7 @@ struct validate_negotiate_info_req {
 	__u8   Guid[SMB2_CLIENT_GUID_SIZE];
 	__le16 SecurityMode;
 	__le16 DialectCount;
-	__le16 Dialects[1]; /* dialect (someday maybe list) client asked for */
+	__le16 Dialects[3]; /* BB expand this if autonegotiate > 3 dialects */
 } __packed;
 
 struct validate_negotiate_info_rsp {
-- 
cgit 


From 1368f155a972eab037f81b5dea82afd544227552 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 5 Sep 2017 11:24:15 +0200
Subject: cifs: hide unused functions

The newly added SMB2+ attribute support causes unused function
warnings when CONFIG_CIFS_XATTR is disabled:

fs/cifs/smb2ops.c:563:1: error: 'smb2_set_ea' defined but not used [-Werror=unused-function]
 smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
fs/cifs/smb2ops.c:513:1: error: 'smb2_query_eas' defined but not used [-Werror=unused-function]
 smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,

This adds another #ifdef around the affected functions.

Fixes: 5517554e4313 ("cifs: Add support for writing attributes on SMB2+")
Fixes: 95907fea4fd8 ("cifs: Add support for reading attributes on SMB2+")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2ops.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 405a9bf13aac..0dafdbae1f8c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -426,6 +426,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
 	return rc;
 }
 
+#ifdef CONFIG_CIFS_XATTR
 static ssize_t
 move_smb2_ea_to_cifs(char *dst, size_t dst_size,
 		     struct smb2_file_full_ea_info *src, size_t src_size,
@@ -613,6 +614,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
 
 	return rc;
 }
+#endif
 
 static bool
 smb2_can_echo(struct TCP_Server_Info *server)
-- 
cgit 


From 94a9daeaece415ce37ccb413b49a580e68e3477b Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Tue, 12 Sep 2017 18:13:11 -0500
Subject: Update version of cifs module

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/cifsfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 30bf89b1fd9a..5a10e566f0e6 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.09"
+#define CIFS_VERSION   "2.10"
 #endif				/* _CIFSFS_H */
-- 
cgit 


From 8fce3dc5c5d6f6301f67311fa79f333902b58cea Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 15 Sep 2017 21:42:59 +0200
Subject: clocksource/integrator: Fix section mismatch warning

gcc-4.6 and older fail to inline integrator_clocksource_init, so they
end up showing a harmless warning:

WARNING: vmlinux.o(.text+0x4aa94c): Section mismatch in reference from the function integrator_clocksource_init() to the function .init.text:clocksource_mmio_init()
The function integrator_clocksource_init() references
the function __init clocksource_mmio_init().
This is often because integrator_clocksource_init lacks a __init
annotation or the annotation of clocksource_mmio_init is wrong.

Add the missing __init annotation that makes it build cleanly with all
compilers.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Thierry Reding <treding@nvidia.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: http://lkml.kernel.org/r/20170915194310.1170514-1-arnd@arndb.de
---
 drivers/clocksource/timer-integrator-ap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c
index 2ff64d9d4fb3..62d24690ba02 100644
--- a/drivers/clocksource/timer-integrator-ap.c
+++ b/drivers/clocksource/timer-integrator-ap.c
@@ -36,8 +36,8 @@ static u64 notrace integrator_read_sched_clock(void)
 	return -readl(sched_clk_base + TIMER_VALUE);
 }
 
-static int integrator_clocksource_init(unsigned long inrate,
-				       void __iomem *base)
+static int __init integrator_clocksource_init(unsigned long inrate,
+					      void __iomem *base)
 {
 	u32 ctrl = TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC;
 	unsigned long rate = inrate;
-- 
cgit 


From b8f3911610529ba531b99d1e109c7a40fb071f0a Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 12 Sep 2017 15:10:35 +0200
Subject: mtd: spi-nor: Check consistency of the memory size extracted from the
 SFDP

One field of the flash parameter table contains information about the
flash device size.
Most of the time the data extracted from this field is valid, but
sometimes the BFPT section of the SFDP table is corrupted or invalid and
this field is set to 0xffffffff, thus resulting in an integer overflow
when setting params->size.

Since NOR devices are anayway always smaller than 2^64 bytes, we can
easily stop the BFPT parsing if the size reported in this table is
invalid.

Fixes: f384b352cbf0 ("mtd: spi-nor: parse Serial Flash Discoverable Parameters (SFDP) tables")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index cf1d4a15e10a..4425b0283725 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2127,6 +2127,15 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
 	params->size = bfpt.dwords[BFPT_DWORD(2)];
 	if (params->size & BIT(31)) {
 		params->size &= ~BIT(31);
+
+		/*
+		 * Prevent overflows on params->size. Anyway, a NOR of 2^64
+		 * bits is unlikely to exist so this error probably means
+		 * the BFPT we are reading is corrupted/wrong.
+		 */
+		if (params->size > 63)
+			return -EINVAL;
+
 		params->size = 1ULL << params->size;
 	} else {
 		params->size++;
-- 
cgit 


From bfa4133795e5a0badd402dd3f58b13b3cec64a4b Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Date: Wed, 6 Sep 2017 23:45:02 +0200
Subject: mtd: spi-nor: fix DMA unsafe buffer issue in spi_nor_read_sfdp()

spi_nor_read_sfdp() calls nor->read() to read the SFDP data.
When the m25p80 driver is used (pretty common case), nor->read() is then
implemented by the m25p80_read() function, which is likely to initialize a
'struct spi_transfer' from its buf argument before appending this
structure inside the 'struct spi_message' argument of spi_sync().

Besides the SPI sub-system states that both .tx_buf and .rx_buf members of
'struct spi_transfer' must point into dma-safe memory. However, two of the
three calls of spi_nor_read_sfdp() were given pointers to stack allocated
memory as buf argument, hence not in a dma-safe area.
Hopefully, the third and last call of spi_nor_read_sfdp() was already
given a kmalloc'ed buffer argument, hence dma-safe.

So this patch fixes this issue by introducing a
spi_nor_read_sfdp_dma_unsafe() function which simply wraps the existing
spi_nor_read_sfdp() function and uses some kmalloc'ed memory as a bounce
buffer.

Fixes: f384b352cbf0 ("mtd: spi-nor: parse Serial Flash Discoverable Parameters (SFDP) tables")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 4425b0283725..19c000722cbc 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1784,7 +1784,7 @@ spi_nor_set_pp_settings(struct spi_nor_pp_command *pp,
  * @nor:	pointer to a 'struct spi_nor'
  * @addr:	offset in the SFDP area to start reading data from
  * @len:	number of bytes to read
- * @buf:	buffer where the SFDP data are copied into
+ * @buf:	buffer where the SFDP data are copied into (dma-safe memory)
  *
  * Whatever the actual numbers of bytes for address and dummy cycles are
  * for (Fast) Read commands, the Read SFDP (5Ah) instruction is always
@@ -1829,6 +1829,36 @@ read_err:
 	return ret;
 }
 
+/**
+ * spi_nor_read_sfdp_dma_unsafe() - read Serial Flash Discoverable Parameters.
+ * @nor:	pointer to a 'struct spi_nor'
+ * @addr:	offset in the SFDP area to start reading data from
+ * @len:	number of bytes to read
+ * @buf:	buffer where the SFDP data are copied into
+ *
+ * Wrap spi_nor_read_sfdp() using a kmalloc'ed bounce buffer as @buf is now not
+ * guaranteed to be dma-safe.
+ *
+ * Return: -ENOMEM if kmalloc() fails, the return code of spi_nor_read_sfdp()
+ *          otherwise.
+ */
+static int spi_nor_read_sfdp_dma_unsafe(struct spi_nor *nor, u32 addr,
+					size_t len, void *buf)
+{
+	void *dma_safe_buf;
+	int ret;
+
+	dma_safe_buf = kmalloc(len, GFP_KERNEL);
+	if (!dma_safe_buf)
+		return -ENOMEM;
+
+	ret = spi_nor_read_sfdp(nor, addr, len, dma_safe_buf);
+	memcpy(buf, dma_safe_buf, len);
+	kfree(dma_safe_buf);
+
+	return ret;
+}
+
 struct sfdp_parameter_header {
 	u8		id_lsb;
 	u8		minor;
@@ -2101,7 +2131,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
 		    bfpt_header->length * sizeof(u32));
 	addr = SFDP_PARAM_HEADER_PTP(bfpt_header);
 	memset(&bfpt, 0, sizeof(bfpt));
-	err = spi_nor_read_sfdp(nor,  addr, len, &bfpt);
+	err = spi_nor_read_sfdp_dma_unsafe(nor,  addr, len, &bfpt);
 	if (err < 0)
 		return err;
 
@@ -2252,7 +2282,7 @@ static int spi_nor_parse_sfdp(struct spi_nor *nor,
 	int i, err;
 
 	/* Get the SFDP header. */
-	err = spi_nor_read_sfdp(nor, 0, sizeof(header), &header);
+	err = spi_nor_read_sfdp_dma_unsafe(nor, 0, sizeof(header), &header);
 	if (err < 0)
 		return err;
 
-- 
cgit 


From 17b694a5476de09e119c517dcc3b71eff6835bdf Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 3 Sep 2017 13:58:31 +0200
Subject: mtd: nand: lpc32xx_mlc: Fix an error handling path in
 lpc32xx_nand_probe()

If 'clk_prepare_enable()' fails, we must 'put' the corresponding clock.

Fixes: 4d26f012ab59 ("mtd: nand: lpc32xx_mlc: Handle return value of clk_prepare_enable.")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/lpc32xx_mlc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c
index c3bb358ef01e..5796468db653 100644
--- a/drivers/mtd/nand/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/lpc32xx_mlc.c
@@ -707,7 +707,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 	}
 	res = clk_prepare_enable(host->clk);
 	if (res)
-		goto err_exit1;
+		goto err_put_clk;
 
 	nand_chip->cmd_ctrl = lpc32xx_nand_cmd_ctrl;
 	nand_chip->dev_ready = lpc32xx_nand_device_ready;
@@ -814,6 +814,7 @@ err_exit3:
 		dma_release_channel(host->dma_chan);
 err_exit2:
 	clk_disable_unprepare(host->clk);
+err_put_clk:
 	clk_put(host->clk);
 err_exit1:
 	lpc32xx_wp_enable(host);
-- 
cgit 


From e580b8bc4316cbb8bbffb5ed7bf1e477064755ed Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Mon, 18 Sep 2017 09:40:12 +0100
Subject: arm64: efi: Don't include EFI fpsimd save/restore code in non-EFI
 kernels

__efi_fpsimd_begin()/__efi_fpsimd_end() are for use when making EFI
calls only, so using them in non-EFI kernels is not allowed.

This patch compiles them out if CONFIG_EFI is not set.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 3a68cf38a6b3..f444f374bd7b 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -321,6 +321,8 @@ void kernel_neon_end(void)
 }
 EXPORT_SYMBOL(kernel_neon_end);
 
+#ifdef CONFIG_EFI
+
 static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
 static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
 
@@ -370,6 +372,8 @@ void __efi_fpsimd_end(void)
 		kernel_neon_end();
 }
 
+#endif /* CONFIG_EFI */
+
 #endif /* CONFIG_KERNEL_MODE_NEON */
 
 #ifdef CONFIG_CPU_PM
-- 
cgit 


From c73cc120a33e12e4e254b4b42bc613204ccb923b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 18 Sep 2017 11:20:19 +0100
Subject: arm64: relax assembly code alignment from 16 byte to 4 byte

Aarch64 instructions must be word aligned.  The current 16 byte
alignment is more than enough.  Relax it into 4 byte alignment.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/linkage.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index 636c1bced7d4..1b266292f0be 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_LINKAGE_H
 #define __ASM_LINKAGE_H
 
-#define __ALIGN		.align 4
-#define __ALIGN_STR	".align 4"
+#define __ALIGN		.align 2
+#define __ALIGN_STR	".align 2"
 
 #endif
-- 
cgit 


From 3d6a7b99e3fa29b92d6288487e057e0a596bd2b0 Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@cavium.com>
Date: Mon, 18 Sep 2017 11:20:20 +0100
Subject: arm64: ensure the kernel is compiled for LP64

The kernel needs to be compiled as a LP64 binary for ARM64, even when
using a compiler that defaults to code-generation for the ILP32 ABI.
Consequently, we need to explicitly pass '-mabi=lp64' (supported on
gcc-4.9 and newer).

Signed-off-by: Andrew Pinski <Andrew.Pinski@caviumnetworks.com>
Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
Reviewed-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 9b41f1e3b1a0..939b310913cf 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -50,17 +50,22 @@ KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 KBUILD_CFLAGS	+= $(call cc-option, -mpc-relative-literal-loads)
 KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst)
 
+KBUILD_CFLAGS	+= $(call cc-option,-mabi=lp64)
+KBUILD_AFLAGS	+= $(call cc-option,-mabi=lp64)
+
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
 KBUILD_CPPFLAGS	+= -mbig-endian
 CHECKFLAGS	+= -D__AARCH64EB__
 AS		+= -EB
 LD		+= -EB
+LDFLAGS		+= -maarch64linuxb
 UTS_MACHINE	:= aarch64_be
 else
 KBUILD_CPPFLAGS	+= -mlittle-endian
 CHECKFLAGS	+= -D__AARCH64EL__
 AS		+= -EL
 LD		+= -EL
+LDFLAGS		+= -maarch64linux
 UTS_MACHINE	:= aarch64
 endif
 
-- 
cgit 


From 0a51fb7174f2b7866b4d7a4a5c23b685b674beb6 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Sat, 9 Sep 2017 12:19:22 +0300
Subject: quota: add missing lock into __dquot_transfer()

Lock dq_dqb_lock around dquot_decr_inodes()

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: 7b9ca4c61bc2 ("quota: Reduce contention on dq_data_lock")
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8381db9db6d9..50b0556a124f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1980,7 +1980,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
 				      &warn_to[cnt]);
 		if (ret) {
+			spin_lock(&transfer_to[cnt]->dq_dqb_lock);
 			dquot_decr_inodes(transfer_to[cnt], inode_usage);
+			spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
 			goto over_quota;
 		}
 	}
-- 
cgit 


From 0ab0b271bf75073cb254b5ea0593aceae5a42bd3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 16 Sep 2017 22:32:12 +0200
Subject: isofs: fix build regression

The new isofs_show_options() function fails to build when CONFIG_NLS
is disabled:

fs/isofs/inode.c: In function 'isofs_show_options':
fs/isofs/inode.c:518:44: error: 'CONFIG_NLS_DEFAULT' undeclared (first use in this function)
fs/isofs/inode.c:518:44: note: each undeclared identifier is reported only once for each function it appears in

This adds a check for CONFIG_JOLIET (which selects NLS), matching
the other uses of the iocharset handling in this file.

Fixes: 6fecb86a44f5 ("isofs: Implement show_options")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/isofs/inode.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index db692f554158..447a24d77b89 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -514,9 +514,11 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
 	if (sbi->s_fmode != ISOFS_INVALID_MODE)
 		seq_printf(m, ",fmode=%o", sbi->s_fmode);
 
+#ifdef CONFIG_JOLIET
 	if (sbi->s_nls_iocharset &&
 	    strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
 		seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
+#endif
 	return 0;
 }
 
-- 
cgit 


From 74378c5c8cdaf0ce9f65e67cbd0613286f2c3bad Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 5 Sep 2017 20:16:27 +0200
Subject: driver core: Fix link to device power management documentation

Correct location as of commit 2728b2d2e5be4b82 (PM / core / docs:
Convert sleep states API document to reST).

Fixes: 2728b2d2e5be4b82 (PM / core / docs: Convert sleep states API document to reST)
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index c6f27207dbe8..1d2607923a24 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -838,7 +838,7 @@ struct dev_links_info {
  * @driver_data: Private pointer for driver specific info.
  * @links:	Links to suppliers and consumers of this device.
  * @power:	For device power management.
- * 		See Documentation/power/admin-guide/devices.rst for details.
+ *		See Documentation/driver-api/pm/devices.rst for details.
  * @pm_domain:	Provide callbacks that are executed during system suspend,
  * 		hibernation, system resume and during runtime PM transitions
  * 		along with subsystem-level and driver-level callbacks.
-- 
cgit 


From 096a2c610df279d001a8f61faa3eb7f98ca6196b Mon Sep 17 00:00:00 2001
From: Rafael Wysocki <rjw@rjwysocki.net>
Date: Fri, 8 Sep 2017 01:20:54 +0200
Subject: ACPI / PMIC: Add code reviewers to MAINTAINERS

Andy and Mika review code changes under drivers/acpi/pmic/ on
a regular basis and I rely on their help with that, so add them
as code reviwewers for that part of the kernel.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 MAINTAINERS | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2281af4b41b6..fa4e916b72e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -352,6 +352,18 @@ L:	linux-acpi@vger.kernel.org
 S:	Maintained
 F:	drivers/acpi/arm64
 
+ACPI PMIC DRIVERS
+M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	Len Brown <lenb@kernel.org>
+R:	Andy Shevchenko <andy@infradead.org>
+R:	Mika Westerberg <mika.westerberg@linux.intel.com>
+L:	linux-acpi@vger.kernel.org
+Q:	https://patchwork.kernel.org/project/linux-acpi/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+B:	https://bugzilla.kernel.org
+S:	Supported
+F:	drivers/acpi/pmic/
+
 ACPI THERMAL DRIVER
 M:	Zhang Rui <rui.zhang@intel.com>
 L:	linux-acpi@vger.kernel.org
-- 
cgit 


From 41ba8bd0829f5c210715ece8b0f699bed0da8eb5 Mon Sep 17 00:00:00 2001
From: "Jan H. Schönherr" <jschoenh@amazon.de>
Date: Tue, 5 Sep 2017 23:14:29 +0200
Subject: PM / QoS: Use the correct variable to check the QoS request type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the actual function argument for the validation of the request type,
instead of the type field in a fresh (supposedly zero-initialized)
request structure.

Signed-off-by: Jan H. Schönherr <jschoenh@amazon.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/qos.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index f850daeffba4..277d43a83f53 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -277,11 +277,11 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
 	mutex_unlock(&dev_pm_qos_sysfs_mtx);
 }
 
-static bool dev_pm_qos_invalid_request(struct device *dev,
-				       struct dev_pm_qos_request *req)
+static bool dev_pm_qos_invalid_req_type(struct device *dev,
+					enum dev_pm_qos_req_type type)
 {
-	return !req || (req->type == DEV_PM_QOS_LATENCY_TOLERANCE
-			&& !dev->power.set_latency_tolerance);
+	return type == DEV_PM_QOS_LATENCY_TOLERANCE &&
+	       !dev->power.set_latency_tolerance;
 }
 
 static int __dev_pm_qos_add_request(struct device *dev,
@@ -290,7 +290,7 @@ static int __dev_pm_qos_add_request(struct device *dev,
 {
 	int ret = 0;
 
-	if (!dev || dev_pm_qos_invalid_request(dev, req))
+	if (!dev || !req || dev_pm_qos_invalid_req_type(dev, type))
 		return -EINVAL;
 
 	if (WARN(dev_pm_qos_request_active(req),
-- 
cgit 


From 73600b619bf8b9803a0610624a0e11f5ed8f761e Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe.montjoie@gmail.com>
Date: Sat, 2 Sep 2017 10:49:38 +0200
Subject: mtd: nand: remove unused blockmask variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fix the following build warning:
drivers/mtd/nand/nand_base.c:2671:30: attention : variable ‘blockmask’ set but not used [-Wunused-but-set-variable]

Fixes: 0b4773fd1649 ("mtd: nand: Drop unused cached programming support")
Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index bcc8cef1c615..12edaae17d81 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2668,7 +2668,7 @@ static uint8_t *nand_fill_oob(struct mtd_info *mtd, uint8_t *oob, size_t len,
 static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 			     struct mtd_oob_ops *ops)
 {
-	int chipnr, realpage, page, blockmask, column;
+	int chipnr, realpage, page, column;
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	uint32_t writelen = ops->len;
 
@@ -2704,7 +2704,6 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 
 	realpage = (int)(to >> chip->page_shift);
 	page = realpage & chip->pagemask;
-	blockmask = (1 << (chip->phys_erase_shift - chip->page_shift)) - 1;
 
 	/* Invalidate the page cache, when we write to the cached page */
 	if (to <= ((loff_t)chip->pagebuf << chip->page_shift) &&
-- 
cgit 


From fe9c1c9555a529bf678148f719c1b9f1730f68a3 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 14 Sep 2017 14:38:58 +0200
Subject: xen: don't compile pv-specific parts if XEN_PV isn't configured

xenbus_client.c contains some functions specific for pv guests.
Enclose them with #ifdef CONFIG_XEN_PV to avoid compiling them when
they are not needed (e.g. on ARM).

Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/xenbus/xenbus_client.c | 130 +++++++++++++++++++------------------
 1 file changed, 67 insertions(+), 63 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 82a8866758ee..a1c17000129b 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -519,64 +519,6 @@ static int __xenbus_map_ring(struct xenbus_device *dev,
 	return err;
 }
 
-static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-				     grant_ref_t *gnt_refs,
-				     unsigned int nr_grefs,
-				     void **vaddr)
-{
-	struct xenbus_map_node *node;
-	struct vm_struct *area;
-	pte_t *ptes[XENBUS_MAX_RING_GRANTS];
-	phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
-	int err = GNTST_okay;
-	int i;
-	bool leaked;
-
-	*vaddr = NULL;
-
-	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
-		return -EINVAL;
-
-	node = kzalloc(sizeof(*node), GFP_KERNEL);
-	if (!node)
-		return -ENOMEM;
-
-	area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes);
-	if (!area) {
-		kfree(node);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < nr_grefs; i++)
-		phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
-
-	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
-				phys_addrs,
-				GNTMAP_host_map | GNTMAP_contains_pte,
-				&leaked);
-	if (err)
-		goto failed;
-
-	node->nr_handles = nr_grefs;
-	node->pv.area = area;
-
-	spin_lock(&xenbus_valloc_lock);
-	list_add(&node->next, &xenbus_valloc_pages);
-	spin_unlock(&xenbus_valloc_lock);
-
-	*vaddr = area->addr;
-	return 0;
-
-failed:
-	if (!leaked)
-		free_vm_area(area);
-	else
-		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
-
-	kfree(node);
-	return err;
-}
-
 struct map_ring_valloc_hvm
 {
 	unsigned int idx;
@@ -725,6 +667,65 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
 
+#ifdef CONFIG_XEN_PV
+static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
+				     grant_ref_t *gnt_refs,
+				     unsigned int nr_grefs,
+				     void **vaddr)
+{
+	struct xenbus_map_node *node;
+	struct vm_struct *area;
+	pte_t *ptes[XENBUS_MAX_RING_GRANTS];
+	phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
+	int err = GNTST_okay;
+	int i;
+	bool leaked;
+
+	*vaddr = NULL;
+
+	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
+		return -EINVAL;
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes);
+	if (!area) {
+		kfree(node);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_grefs; i++)
+		phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
+
+	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
+				phys_addrs,
+				GNTMAP_host_map | GNTMAP_contains_pte,
+				&leaked);
+	if (err)
+		goto failed;
+
+	node->nr_handles = nr_grefs;
+	node->pv.area = area;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_add(&node->next, &xenbus_valloc_pages);
+	spin_unlock(&xenbus_valloc_lock);
+
+	*vaddr = area->addr;
+	return 0;
+
+failed:
+	if (!leaked)
+		free_vm_area(area);
+	else
+		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
+
+	kfree(node);
+	return err;
+}
+
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
 	struct xenbus_map_node *node;
@@ -788,6 +789,12 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 	return err;
 }
 
+static const struct xenbus_ring_ops ring_ops_pv = {
+	.map = xenbus_map_ring_valloc_pv,
+	.unmap = xenbus_unmap_ring_vfree_pv,
+};
+#endif
+
 struct unmap_ring_vfree_hvm
 {
 	unsigned int idx;
@@ -916,11 +923,6 @@ enum xenbus_state xenbus_read_driver_state(const char *path)
 }
 EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
 
-static const struct xenbus_ring_ops ring_ops_pv = {
-	.map = xenbus_map_ring_valloc_pv,
-	.unmap = xenbus_unmap_ring_vfree_pv,
-};
-
 static const struct xenbus_ring_ops ring_ops_hvm = {
 	.map = xenbus_map_ring_valloc_hvm,
 	.unmap = xenbus_unmap_ring_vfree_hvm,
@@ -928,8 +930,10 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
 
 void __init xenbus_ring_ops_init(void)
 {
+#ifdef CONFIG_XEN_PV
 	if (!xen_feature(XENFEAT_auto_translated_physmap))
 		ring_ops = &ring_ops_pv;
 	else
+#endif
 		ring_ops = &ring_ops_hvm;
 }
-- 
cgit 


From b0ade85165b3caeb0cd908cffe5921a39f25c243 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 10 Sep 2017 13:41:41 +0200
Subject: netfilter: nat: Do not use ARRAY_SIZE() on spinlocks to fix zero div
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If no spinlock debugging options (CONFIG_GENERIC_LOCKBREAK,
CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_LOCK_ALLOC) are enabled on a UP
platform (e.g. m68k defconfig), arch_spinlock_t is an empty struct,
hence using ARRAY_SIZE(nf_nat_locks) causes a division by zero:

    net/netfilter/nf_nat_core.c: In function ‘nf_nat_setup_info’:
    net/netfilter/nf_nat_core.c:432: warning: division by zero
    net/netfilter/nf_nat_core.c: In function ‘__nf_nat_cleanup_conntrack’:
    net/netfilter/nf_nat_core.c:535: warning: division by zero
    net/netfilter/nf_nat_core.c:537: warning: division by zero
    net/netfilter/nf_nat_core.c: In function ‘nf_nat_init’:
    net/netfilter/nf_nat_core.c:810: warning: division by zero
    net/netfilter/nf_nat_core.c:811: warning: division by zero
    net/netfilter/nf_nat_core.c:824: warning: division by zero

Fix this by using the CONNTRACK_LOCKS definition instead.

Suggested-by: Florian Westphal <fw@strlen.de>
Fixes: 8073e960a03bf7b5 ("netfilter: nat: use keyed locks")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index f393a7086025..af8345fc4fbd 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -429,7 +429,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 
 		srchash = hash_by_src(net,
 				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-		lock = &nf_nat_locks[srchash % ARRAY_SIZE(nf_nat_locks)];
+		lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
 		spin_lock_bh(lock);
 		hlist_add_head_rcu(&ct->nat_bysource,
 				   &nf_nat_bysource[srchash]);
@@ -532,9 +532,9 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
 	unsigned int h;
 
 	h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	spin_lock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
+	spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
 	hlist_del_rcu(&ct->nat_bysource);
-	spin_unlock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
+	spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
 }
 
 static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
@@ -807,8 +807,8 @@ static int __init nf_nat_init(void)
 
 	/* Leave them the same for the moment. */
 	nf_nat_htable_size = nf_conntrack_htable_size;
-	if (nf_nat_htable_size < ARRAY_SIZE(nf_nat_locks))
-		nf_nat_htable_size = ARRAY_SIZE(nf_nat_locks);
+	if (nf_nat_htable_size < CONNTRACK_LOCKS)
+		nf_nat_htable_size = CONNTRACK_LOCKS;
 
 	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
 	if (!nf_nat_bysource)
@@ -821,7 +821,7 @@ static int __init nf_nat_init(void)
 		return ret;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(nf_nat_locks); i++)
+	for (i = 0; i < CONNTRACK_LOCKS; i++)
 		spin_lock_init(&nf_nat_locks[i]);
 
 	nf_ct_helper_expectfn_register(&follow_master_nat);
-- 
cgit 


From 7f4f7dd4417d9efd038b14d39c70170db2e0baa0 Mon Sep 17 00:00:00 2001
From: Vishwanath Pai <vpai@akamai.com>
Date: Mon, 11 Sep 2017 21:52:40 +0200
Subject: netfilter: ipset: ipset list may return wrong member count for set
 with timeout

Simple testcase:

$ ipset create test hash:ip timeout 5
$ ipset add test 1.2.3.4
$ ipset add test 1.2.2.2
$ sleep 5

$ ipset l
Name: test
Type: hash:ip
Revision: 5
Header: family inet hashsize 1024 maxelem 65536 timeout 5
Size in memory: 296
References: 0
Number of entries: 2
Members:

We return "Number of entries: 2" but no members are listed. That is
because mtype_list runs "ip_set_timeout_expired" and does not list the
expired entries, but set->elements is never upated (until mtype_gc
cleans it up later).

Reviewed-by: Joshua Hunt <johunt@akamai.com>
Signed-off-by: Vishwanath Pai <vpai@akamai.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_hash_gen.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index f236c0bc7b3f..51063d9ed0f7 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1041,12 +1041,24 @@ out:
 static int
 mtype_head(struct ip_set *set, struct sk_buff *skb)
 {
-	const struct htype *h = set->data;
+	struct htype *h = set->data;
 	const struct htable *t;
 	struct nlattr *nested;
 	size_t memsize;
 	u8 htable_bits;
 
+	/* If any members have expired, set->elements will be wrong
+	 * mytype_expire function will update it with the right count.
+	 * we do not hold set->lock here, so grab it first.
+	 * set->elements can still be incorrect in the case of a huge set,
+	 * because elements might time out during the listing.
+	 */
+	if (SET_WITH_TIMEOUT(set)) {
+		spin_lock_bh(&set->lock);
+		mtype_expire(set, h);
+		spin_unlock_bh(&set->lock);
+	}
+
 	rcu_read_lock_bh();
 	t = rcu_dereference_bh_nfnl(h->table);
 	memsize = mtype_ahash_memsize(h, t) + set->ext_size;
-- 
cgit 


From 64cfcaed7b25f69d8b7a091a23961f50c6788a66 Mon Sep 17 00:00:00 2001
From: Daniel Díaz <daniel.diaz@linaro.org>
Date: Fri, 7 Jul 2017 10:27:06 -0500
Subject: selftests: net: More graceful finding of `ip'.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ip tool might be provided by another package (such as
Busybox), not necessarily implementing the -Version switch.
Trying an actual usage (`ip link show') might be a better
test that would work with all implementations of `ip'.

Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/net/netdevice.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index 4e00568d70c2..90cb903c3381 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -178,7 +178,7 @@ if [ "$(id -u)" -ne 0 ];then
 	exit 0
 fi
 
-ip -Version 2>/dev/null >/dev/null
+ip link show 2>/dev/null >/dev/null
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not run test without the ip tool"
 	exit 0
-- 
cgit 


From 96e5335859e30fa2bb2da8befc86292c3c1a73dd Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Mon, 11 Sep 2017 12:49:01 +0200
Subject: tools: fix testing/selftests/sigaltstack for s390x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On s390x the compilation of the file sas.c in directory
tools/testing/selftests/sigaltstack fails with this error message:

   root@s35lp76 testing]# make selftests/sigaltstack/sas
   cc     selftests/sigaltstack/sas.c   -o selftests/sigaltstack/sas
   selftests/sigaltstack/sas.c: In function ‘my_usr1’:
   selftests/sigaltstack/sas.c:42:25: error: invalid register name for ‘sp’
     register unsigned long sp asm("sp");
                         ^~
   <builtin>: recipe for target 'selftests/sigaltstack/sas' failed
   make: *** [selftests/sigaltstack/sas] Error 1
   [root@s35lp76 testing]#

On s390x the stack pointer is register r15, the register name "sp"
is unknown.
Make this line platform dependend and use register r15.

With this patch the compilation and test succeeds:

   [root@s35lp76 testing]# ./selftests/sigaltstack/sas
   TAP version 13
   ok 1 Initial sigaltstack state was SS_DISABLE
   # [RUN]	signal USR1
   ok 2 sigaltstack is disabled in sighandler
   # [RUN]	switched to user ctx
   # [RUN]	signal USR2
   # [OK]	Stack preserved
   ok 3 sigaltstack is still SS_AUTODISARM after signal
   Pass 3 Fail 0 Xfail 0 Xpass 0 Skip 0 Error 0
   1..3
   [root@s35lp76 testing]#

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/sigaltstack/sas.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index 7d406c3973ba..97bb150837df 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -39,7 +39,11 @@ void my_usr1(int sig, siginfo_t *si, void *u)
 	stack_t stk;
 	struct stk_data *p;
 
+#if __s390x__
+	register unsigned long sp asm("%15");
+#else
 	register unsigned long sp asm("sp");
+#endif
 
 	if (sp < (unsigned long)sstack ||
 			sp >= (unsigned long)sstack + SIGSTKSZ) {
-- 
cgit 


From 172a8ca880f1c8b01bc4d1e0239bcb293ef65e0b Mon Sep 17 00:00:00 2001
From: Fathi Boudra <fathi.boudra@linaro.org>
Date: Thu, 29 Jun 2017 12:39:53 +0300
Subject: selftests: breakpoints: re-order TEST_GEN_PROGS targets

breakpoint_test can fail on arm64 with older/unpatched glibc:

 breakpoint_test_arm64.c: In function 'run_test':
 breakpoint_test_arm64.c:170:25: error: 'TRAP_HWBKPT' undeclared (first use
 in this function)

due to glibc missing several of the TRAP_* constants in the userspace
definitions. Specifically TRAP_BRANCH and TRAP_HWBKPT.
See https://sourceware.org/bugzilla/show_bug.cgi?id=21286

It prevents to build step_after_suspend_test afterward, since make won't
continue.

We still want to be able to build and run the test, independently of
breakpoint_test_arm64 build failure. Re-order TEST_GEN_PROGS to be able to
build step_after_suspend_test first.

Signed-off-by: Fathi Boudra <fathi.boudra@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/breakpoints/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
index 6b214b7b10fb..247b0a1899d7 100644
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -2,14 +2,14 @@
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
+TEST_GEN_PROGS := step_after_suspend_test
+
 ifeq ($(ARCH),x86)
-TEST_GEN_PROGS := breakpoint_test
+TEST_GEN_PROGS += breakpoint_test
 endif
 ifneq (,$(filter $(ARCH),aarch64 arm64))
-TEST_GEN_PROGS := breakpoint_test_arm64
+TEST_GEN_PROGS += breakpoint_test_arm64
 endif
 
-TEST_GEN_PROGS += step_after_suspend_test
-
 include ../lib.mk
 
-- 
cgit 


From 67b2e30eb7b346db60afe91b0927738fb604d0a4 Mon Sep 17 00:00:00 2001
From: Daniel Díaz <daniel.diaz@linaro.org>
Date: Thu, 17 Aug 2017 10:55:30 -0500
Subject: selftests: intel_pstate: build only on x86
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These tests are only for x86, so don't try to build or run
them on other architectures.

Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/intel_pstate/Makefile | 2 ++
 tools/testing/selftests/intel_pstate/run.sh   | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index 849a90ffe8dd..a97e24edde39 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -1,7 +1,9 @@
 CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
 LDLIBS := $(LDLIBS) -lm
 
+ifeq (,$(filter $(ARCH),x86))
 TEST_GEN_FILES := msr aperf
+endif
 
 TEST_PROGS := run.sh
 
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index 7868c106b8b1..1b4b8302dfc2 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -29,6 +29,11 @@
 
 EVALUATE_ONLY=0
 
+if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
+	echo "$0 # Skipped: Test can only run on x86 architectures."
+	exit 0
+fi
+
 max_cpus=$(($(nproc)-1))
 
 # compile programs
-- 
cgit 


From 6f0003363a13be699ba945cfa4074193e04fbea5 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Fri, 8 Sep 2017 14:01:18 +0200
Subject: selftests/intel_pstate: No need to compile test progs in the run
 script

Both test programs are being compiled by make, so no need to compile both
programs in the runner script.
This resolves an error when installing all selftests via make install
and run them in a different environemnt.

Running tests in intel_pstate
========================================
./run.sh: line 35: gcc: command not found
Problem compiling aperf.c.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/intel_pstate/run.sh | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index 1b4b8302dfc2..d3ab48f91cd6 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -36,12 +36,6 @@ fi
 
 max_cpus=$(($(nproc)-1))
 
-# compile programs
-gcc aperf.c -Wall -D_GNU_SOURCE -o aperf  -lm
-[ $? -ne 0 ] && echo "Problem compiling aperf.c." && exit 1
-gcc -o msr msr.c -lm
-[ $? -ne 0 ] && echo "Problem compiling msr.c." && exit 1
-
 function run_test () {
 
 	file_ext=$1
-- 
cgit 


From 56a268cd4a410081d477913d51e044039d3a8834 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Fri, 8 Sep 2017 13:19:23 +0200
Subject: selftests/bpf: Make bpf_util work on uniprocessor systems

The current implementation fails to work on uniprocessor systems.
Fix the parser to also handle the uniprocessor case.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/bpf/bpf_util.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 20ecbaa0d85d..6c53a8906eff 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -12,6 +12,7 @@ static inline unsigned int bpf_num_possible_cpus(void)
 	unsigned int start, end, possible_cpus = 0;
 	char buff[128];
 	FILE *fp;
+	int n;
 
 	fp = fopen(fcpu, "r");
 	if (!fp) {
@@ -20,17 +21,17 @@ static inline unsigned int bpf_num_possible_cpus(void)
 	}
 
 	while (fgets(buff, sizeof(buff), fp)) {
-		if (sscanf(buff, "%u-%u", &start, &end) == 2) {
-			possible_cpus = start == 0 ? end + 1 : 0;
-			break;
+		n = sscanf(buff, "%u-%u", &start, &end);
+		if (n == 0) {
+			printf("Failed to retrieve # possible CPUs!\n");
+			exit(1);
+		} else if (n == 1) {
+			end = start;
 		}
+		possible_cpus = start == 0 ? end + 1 : 0;
+		break;
 	}
-
 	fclose(fp);
-	if (!possible_cpus) {
-		printf("Failed to retrieve # possible CPUs!\n");
-		exit(1);
-	}
 
 	return possible_cpus;
 }
-- 
cgit 


From 84c06566cfb84e5f6aed9582d4570df8406700c9 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Fri, 8 Sep 2017 14:01:17 +0200
Subject: selftests/ftrace: multiple_kprobes: Also check for support

The multiple_kprobes test case fails to check for KPROBE_EVENT support.
Add the check to prevent a false test result.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 2a1cb9908746..a4fd4c851a5b 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -1,6 +1,8 @@
 #!/bin/sh
 # description: Register/unregister many kprobe events
 
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
 # ftrace fentry skip size depends on the machine architecture.
 # Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
 case `uname -m` in
-- 
cgit 


From 63ecc3d9436f8012e49dc846d6cb0a85a3433517 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Wed, 13 Sep 2017 19:30:51 -0600
Subject: udpv6: Fix the checksum computation when HW checksum does not apply

While trying an ESP transport mode encryption for UDPv6 packets of
datagram size 1436 with MTU 1500, checksum error was observed in
the secondary fragment.

This error occurs due to the UDP payload checksum being missed out
when computing the full checksum for these packets in
udp6_hwcsum_outgoing().

Fixes: d39d938c8228 ("ipv6: Introduce udpv6_send_skb()")
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e2ecfb137297..40d7234c27b9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1015,6 +1015,7 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 		 */
 		offset = skb_transport_offset(skb);
 		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+		csum = skb->csum;
 
 		skb->ip_summed = CHECKSUM_NONE;
 
-- 
cgit 


From 265698d7e6132a2d41471135534f4f36ad15b09c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 18 Sep 2017 22:46:36 +0200
Subject: nl80211: fix null-ptr dereference on invalid mesh configuration

If TX rates are specified during mesh join, the channel must
also be specified. Check the channel pointer to avoid a null
pointer dereference if it isn't.

Reported-by: Jouni Malinen <j@w1.fi>
Fixes: 8564e38206de ("cfg80211: add checks for beacon rate, extend to mesh")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fbd5593e88cb..690874293cfc 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9987,6 +9987,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 		if (err)
 			return err;
 
+		if (!setup.chandef.chan)
+			return -EINVAL;
+
 		err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band,
 					      &setup.beacon_rate);
 		if (err)
-- 
cgit 


From 76cc0d3282d4b933fa144fa41fbc5318e0fdca24 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 15 Sep 2017 12:00:07 +0800
Subject: ip6_gre: skb_push ipv6hdr before packing the header in ip6gre_header

Now in ip6gre_header before packing the ipv6 header, it skb_push t->hlen
which only includes encap_hlen + tun_hlen. It means greh and inner header
would be over written by ipv6 stuff and ipv6h might have no chance to set
up.

Jianlin found this issue when using remote any on ip6_gre, the packets he
captured on gre dev are truncated:

22:50:26.210866 Out ethertype IPv6 (0x86dd), length 120: truncated-ip6 -\
8128 bytes missing!(flowlabel 0x92f40, hlim 0, next-header Options (0)  \
payload length: 8192) ::1:2000:0 > ::1:0:86dd: HBH [trunc] ip-proto-128 \
8184

It should also skb_push ipv6hdr so that ipv6h points to the right position
to set ipv6 stuff up.

This patch is to skb_push hlen + sizeof(*ipv6h) and also fix some indents
in ip6gre_header.

Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b7a72d409334..20f66f4c9460 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -940,24 +940,25 @@ done:
 }
 
 static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
-			unsigned short type,
-			const void *daddr, const void *saddr, unsigned int len)
+			 unsigned short type, const void *daddr,
+			 const void *saddr, unsigned int len)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	struct ipv6hdr *ipv6h = skb_push(skb, t->hlen);
-	__be16 *p = (__be16 *)(ipv6h+1);
+	struct ipv6hdr *ipv6h;
+	__be16 *p;
 
-	ip6_flow_hdr(ipv6h, 0,
-		     ip6_make_flowlabel(dev_net(dev), skb,
-					t->fl.u.ip6.flowlabel, true,
-					&t->fl.u.ip6));
+	ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h));
+	ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
+						  t->fl.u.ip6.flowlabel,
+						  true, &t->fl.u.ip6));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = NEXTHDR_GRE;
 	ipv6h->saddr = t->parms.laddr;
 	ipv6h->daddr = t->parms.raddr;
 
-	p[0]		= t->parms.o_flags;
-	p[1]		= htons(type);
+	p = (__be16 *)(ipv6h + 1);
+	p[0] = t->parms.o_flags;
+	p[1] = htons(type);
 
 	/*
 	 *	Set the source hardware address.
-- 
cgit 


From 7b4dc3c0da0d66e7b20a826c537d41bb73e4df54 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Fri, 18 Aug 2017 17:49:58 +0800
Subject: drm/i915/gvt: Fix incorrect PCI BARs reporting

Looking at our virtual PCI device, we can see surprising Region 4 and Region 5.
00:10.0 VGA compatible controller: Intel Corporation Sky Lake Integrated Graphics (rev 06) (prog-if 00 [VGA controller])
        ....
        Region 0: Memory at 140000000 (64-bit, non-prefetchable) [size=16M]
        Region 2: Memory at 180000000 (64-bit, prefetchable) [size=1G]
        Region 4: Memory at <ignored> (32-bit, non-prefetchable)
        Region 5: Memory at <ignored> (32-bit, non-prefetchable)
        Expansion ROM at febd6000 [disabled] [size=2K]

The fact is that we only implemented BAR0 and BAR2. Surprising Region 4 and
Region 5 are shown because we report their size as 0xffffffff. They should
report size 0 instead.

BTW, the physical GPU has a PIO BAR. GVTg hasn't implemented PIO access, so
we ignored this BAR for vGPU device.

v2: fix BAR size value calculation.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1458032
Signed-off-by: Changbin Du <changbin.du@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
(cherry picked from commit f1751362d6357a90bc6e53176cec715ff2dbed74)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gvt/cfg_space.c | 113 +++++++++++++++--------------------
 1 file changed, 48 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
index 40af17ec6312..ff3154fe6588 100644
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -197,78 +197,65 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu,
 static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset,
 	void *p_data, unsigned int bytes)
 {
-	unsigned int bar_index =
-		(rounddown(offset, 8) % PCI_BASE_ADDRESS_0) / 8;
 	u32 new = *(u32 *)(p_data);
 	bool lo = IS_ALIGNED(offset, 8);
 	u64 size;
 	int ret = 0;
 	bool mmio_enabled =
 		vgpu_cfg_space(vgpu)[PCI_COMMAND] & PCI_COMMAND_MEMORY;
+	struct intel_vgpu_pci_bar *bars = vgpu->cfg_space.bar;
 
-	if (WARN_ON(bar_index >= INTEL_GVT_PCI_BAR_MAX))
-		return -EINVAL;
-
+	/*
+	 * Power-up software can determine how much address
+	 * space the device requires by writing a value of
+	 * all 1's to the register and then reading the value
+	 * back. The device will return 0's in all don't-care
+	 * address bits.
+	 */
 	if (new == 0xffffffff) {
-		/*
-		 * Power-up software can determine how much address
-		 * space the device requires by writing a value of
-		 * all 1's to the register and then reading the value
-		 * back. The device will return 0's in all don't-care
-		 * address bits.
-		 */
-		size = vgpu->cfg_space.bar[bar_index].size;
-		if (lo) {
-			new = rounddown(new, size);
-		} else {
-			u32 val = vgpu_cfg_space(vgpu)[rounddown(offset, 8)];
-			/* for 32bit mode bar it returns all-0 in upper 32
-			 * bit, for 64bit mode bar it will calculate the
-			 * size with lower 32bit and return the corresponding
-			 * value
+		switch (offset) {
+		case PCI_BASE_ADDRESS_0:
+		case PCI_BASE_ADDRESS_1:
+			size = ~(bars[INTEL_GVT_PCI_BAR_GTTMMIO].size -1);
+			intel_vgpu_write_pci_bar(vgpu, offset,
+						size >> (lo ? 0 : 32), lo);
+			/*
+			 * Untrap the BAR, since guest hasn't configured a
+			 * valid GPA
 			 */
-			if (val & PCI_BASE_ADDRESS_MEM_TYPE_64)
-				new &= (~(size-1)) >> 32;
-			else
-				new = 0;
-		}
-		/*
-		 * Unmapp & untrap the BAR, since guest hasn't configured a
-		 * valid GPA
-		 */
-		switch (bar_index) {
-		case INTEL_GVT_PCI_BAR_GTTMMIO:
 			ret = trap_gttmmio(vgpu, false);
 			break;
-		case INTEL_GVT_PCI_BAR_APERTURE:
+		case PCI_BASE_ADDRESS_2:
+		case PCI_BASE_ADDRESS_3:
+			size = ~(bars[INTEL_GVT_PCI_BAR_APERTURE].size -1);
+			intel_vgpu_write_pci_bar(vgpu, offset,
+						size >> (lo ? 0 : 32), lo);
 			ret = map_aperture(vgpu, false);
 			break;
+		default:
+			/* Unimplemented BARs */
+			intel_vgpu_write_pci_bar(vgpu, offset, 0x0, false);
 		}
-		intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
 	} else {
-		/*
-		 * Unmapp & untrap the old BAR first, since guest has
-		 * re-configured the BAR
-		 */
-		switch (bar_index) {
-		case INTEL_GVT_PCI_BAR_GTTMMIO:
-			ret = trap_gttmmio(vgpu, false);
+		switch (offset) {
+		case PCI_BASE_ADDRESS_0:
+		case PCI_BASE_ADDRESS_1:
+			/*
+			 * Untrap the old BAR first, since guest has
+			 * re-configured the BAR
+			 */
+			trap_gttmmio(vgpu, false);
+			intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
+			ret = trap_gttmmio(vgpu, mmio_enabled);
 			break;
-		case INTEL_GVT_PCI_BAR_APERTURE:
-			ret = map_aperture(vgpu, false);
+		case PCI_BASE_ADDRESS_2:
+		case PCI_BASE_ADDRESS_3:
+			map_aperture(vgpu, false);
+			intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
+			ret = map_aperture(vgpu, mmio_enabled);
 			break;
-		}
-		intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
-		/* Track the new BAR */
-		if (mmio_enabled) {
-			switch (bar_index) {
-			case INTEL_GVT_PCI_BAR_GTTMMIO:
-				ret = trap_gttmmio(vgpu, true);
-				break;
-			case INTEL_GVT_PCI_BAR_APERTURE:
-				ret = map_aperture(vgpu, true);
-				break;
-			}
+		default:
+			intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
 		}
 	}
 	return ret;
@@ -299,10 +286,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 	}
 
 	switch (rounddown(offset, 4)) {
-	case PCI_BASE_ADDRESS_0:
-	case PCI_BASE_ADDRESS_1:
-	case PCI_BASE_ADDRESS_2:
-	case PCI_BASE_ADDRESS_3:
+	case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5:
 		if (WARN_ON(!IS_ALIGNED(offset, 4)))
 			return -EINVAL;
 		return emulate_pci_bar_write(vgpu, offset, p_data, bytes);
@@ -344,7 +328,6 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
 	struct intel_gvt *gvt = vgpu->gvt;
 	const struct intel_gvt_device_info *info = &gvt->device_info;
 	u16 *gmch_ctl;
-	int i;
 
 	memcpy(vgpu_cfg_space(vgpu), gvt->firmware.cfg_space,
 	       info->cfg_space_size);
@@ -371,13 +354,13 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
 	 */
 	memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_1, 0, 4);
 	memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_3, 0, 4);
+	memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_4, 0, 8);
 	memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4);
 
-	for (i = 0; i < INTEL_GVT_MAX_BAR_NUM; i++) {
-		vgpu->cfg_space.bar[i].size = pci_resource_len(
-					      gvt->dev_priv->drm.pdev, i * 2);
-		vgpu->cfg_space.bar[i].tracked = false;
-	}
+	vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size =
+				pci_resource_len(gvt->dev_priv->drm.pdev, 0);
+	vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size =
+				pci_resource_len(gvt->dev_priv->drm.pdev, 2);
 }
 
 /**
-- 
cgit 


From 814feed316e9d15b0ae869ca729370e7630b8d13 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 10 Sep 2017 10:56:42 +0200
Subject: drm/i915: Fix an error handling in 'intel_framebuffer_init()'

We should go through the error handling path to decrease the
'framebuffer_references' as done everywhere else in this function.

Fixes: 2e2adb05736c ("drm/i915: Add render decompression support")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170910085642.13673-1-christophe.jaillet@wanadoo.fr
(cherry picked from commit 37875d6b3af702425ce292de81b77faf94766616)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f17275519484..00cd17c76fdc 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14030,7 +14030,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 
 		if (mode_cmd->handles[i] != mode_cmd->handles[0]) {
 			DRM_DEBUG_KMS("bad plane %d handle\n", i);
-			return -EINVAL;
+			goto err;
 		}
 
 		stride_alignment = intel_fb_stride_alignment(fb, i);
-- 
cgit 


From ac73661c6222faef1a97ec44f424234f165e4710 Mon Sep 17 00:00:00 2001
From: "Lee, Shawn C" <shawn.c.lee@intel.com>
Date: Tue, 12 Sep 2017 11:36:30 +0800
Subject: drm/i915/bxt: set min brightness from VBT

Min brightness value from vbt was missing for BXT platform.
This setting have to refer backlight ic spec to restrict
min backlight output. Without this restriction, driver would
allow to configure lower brightness value and violate
backlight ic requirement.

Fixes: 0fb890c01349 ("drm/i915/bxt: BLC implementation")
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Cooper Chiou <cooper.chiou@intel.com>
Cc: Gary C Wang <gary.c.wang@intel.com>
Signed-off-by: Shawn Lee <shawn.c.lee@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1505187390-7039-1-git-send-email-shawn.c.lee@intel.com
(cherry picked from commit c3881128cb672cf484a52fbb36b5daa9044d9168)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_panel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index a17b1de7d7e0..d4dd248ac9a8 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1699,6 +1699,8 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused)
 	if (!panel->backlight.max)
 		return -ENODEV;
 
+	panel->backlight.min = get_backlight_min_vbt(connector);
+
 	val = bxt_get_backlight(connector);
 	val = intel_panel_compute_brightness(connector, val);
 	panel->backlight.level = clamp(val, panel->backlight.min,
-- 
cgit 


From abeae421b03d800d33894df7fbca6d00c70c358e Mon Sep 17 00:00:00 2001
From: Uma Shankar <uma.shankar@intel.com>
Date: Tue, 5 Sep 2017 15:14:31 +0530
Subject: Revert "drm/i915/bxt: Disable device ready before shutdown command"

This reverts commit bbdf0b2ff32a ("drm/i915/bxt: Disable device ready
before shutdown command").

Disable device ready before shutdown command was added previously to
avoid a split screen issue seen on dual link DSI panels. As of now, dual
link is not supported and will need some rework in the upstream
code. For single link DSI panels, the change is not required. This will
cause failure in sending SHUTDOWN packet during disable. Hence reverting
the change. Will handle the change as part of dual link enabling in
upstream.

Fixes: bbdf0b2ff32a ("drm/i915/bxt: Disable device ready before shutdown command")
Cc: <stable@vger.kernel.org> # v4.12+
Signed-off-by: Uma Shankar <uma.shankar@intel.com>
Signed-off-by: Vidya Srinivas <vidya.srinivas@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1504604671-17237-1-git-send-email-vidya.srinivas@intel.com
(cherry picked from commit 33c8d8870c67faf3161898a56af98ac3c1c71450)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_dsi.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index f0c11aec5ea5..7442891762be 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -892,8 +892,6 @@ static void intel_dsi_disable(struct intel_encoder *encoder,
 			      struct intel_crtc_state *old_crtc_state,
 			      struct drm_connector_state *old_conn_state)
 {
-	struct drm_device *dev = encoder->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	enum port port;
 
@@ -902,15 +900,6 @@ static void intel_dsi_disable(struct intel_encoder *encoder,
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF);
 	intel_panel_disable_backlight(old_conn_state);
 
-	/*
-	 * Disable Device ready before the port shutdown in order
-	 * to avoid split screen
-	 */
-	if (IS_BROXTON(dev_priv)) {
-		for_each_dsi_port(port, intel_dsi->ports)
-			I915_WRITE(MIPI_DEVICE_READY(port), 0);
-	}
-
 	/*
 	 * According to the spec we should send SHUTDOWN before
 	 * MIPI_SEQ_DISPLAY_OFF only for v3+ VBTs, but field testing
-- 
cgit 


From 8c7a758873577f0d1bb3f879584338f73e6c6bc3 Mon Sep 17 00:00:00 2001
From: "Lee, Shawn C" <shawn.c.lee@intel.com>
Date: Wed, 13 Sep 2017 13:19:20 +0800
Subject: drm/i915/cnp: set min brightness from VBT

Min brightness value from vbt was missing for CNP platform.
This setting have to refer backlight ic spec to restrict
min backlight output. Without this restriction, driver would
allow to configure lower brightness value and violate
backlight ic requirement.

Fixes: 4c9f7086ac6d ("drm/i915/cnp: Backlight support for CNP.")
Cc: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Shawn Lee <shawn.c.lee@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1505279961-16140-1-git-send-email-shawn.c.lee@intel.com
(cherry picked from commit f44e354f857f207cd361269c5e38e1f96e0b616c)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_panel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index d4dd248ac9a8..3b1c5d783ee7 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1737,6 +1737,8 @@ cnp_setup_backlight(struct intel_connector *connector, enum pipe unused)
 	if (!panel->backlight.max)
 		return -ENODEV;
 
+	panel->backlight.min = get_backlight_min_vbt(connector);
+
 	val = bxt_get_backlight(connector);
 	val = intel_panel_compute_brightness(connector, val);
 	panel->backlight.level = clamp(val, panel->backlight.min,
-- 
cgit 


From 99df13b6ea811a63eeacb278d05a5b914ce28073 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 14 Sep 2017 17:42:13 +0100
Subject: drm/i915: Remove unused 'in_vbl' from i915_get_crtc_scanoutpos()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 1bf6ad622b9b ("drm/vblank: drop the mode argument from
drm_calc_vbltimestamp_from_scanoutpos") removed the use of in_vbl, but
did not remove the local variable. Do so now.

Fixes: 1bf6ad622b9b ("drm/vblank: drop the mode argument from drm_calc_vbltimestamp_from_scanoutpos")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170914164213.18461-1-chris@chris-wilson.co.uk
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
(cherry picked from commit e01e71fc49d4c95090a04f898a3fe788c652a04b)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e21ce9c18b6e..b63893eeca73 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -839,7 +839,6 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 								pipe);
 	int position;
 	int vbl_start, vbl_end, hsync_start, htotal, vtotal;
-	bool in_vbl = true;
 	unsigned long irqflags;
 
 	if (WARN_ON(!mode->crtc_clock)) {
@@ -922,8 +921,6 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 
-	in_vbl = position >= vbl_start && position < vbl_end;
-
 	/*
 	 * While in vblank, position will be negative
 	 * counting up towards 0 at vbl_end. And outside
-- 
cgit 


From f2654a4781318dc7ab8d6cde66f1fa39eab980a9 Mon Sep 17 00:00:00 2001
From: Fahad Kunnathadi <fahad.kunnathadi@dexceldesigns.com>
Date: Fri, 15 Sep 2017 12:01:58 +0530
Subject: net: phy: Fix mask value write on gmii2rgmii converter speed register

To clear Speed Selection in MDIO control register(0x10),
ie, clear bits 6 and 13 to zero while keeping other bits same.
Before AND operation,The Mask value has to be perform with bitwise NOT
operation (ie, ~ operator)

This patch clears current speed selection before writing the
new speed settings to gmii2rgmii converter

Fixes: f411a6160bd4 ("net: phy: Add gmiitorgmii converter support")

Signed-off-by: Fahad Kunnathadi <fahad.kunnathadi@dexceldesigns.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/xilinx_gmii2rgmii.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
index d15dd3938ba8..2e5150b0b8d5 100644
--- a/drivers/net/phy/xilinx_gmii2rgmii.c
+++ b/drivers/net/phy/xilinx_gmii2rgmii.c
@@ -44,7 +44,7 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev)
 	priv->phy_drv->read_status(phydev);
 
 	val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG);
-	val &= XILINX_GMII2RGMII_SPEED_MASK;
+	val &= ~XILINX_GMII2RGMII_SPEED_MASK;
 
 	if (phydev->speed == SPEED_1000)
 		val |= BMCR_SPEED1000;
-- 
cgit 


From 8c22dab03ad072e45060c299c70d02a4f6fc4aab Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 15 Sep 2017 15:58:33 +0800
Subject: ip6_tunnel: do not allow loading ip6_tunnel if ipv6 is disabled in
 cmdline

If ipv6 has been disabled from cmdline since kernel started, it makes
no sense to allow users to create any ip6 tunnel. Otherwise, it could
some potential problem.

Jianlin found a kernel crash caused by this in ip6_gre when he set
ipv6.disable=1 in grub:

[  209.588865] Unable to handle kernel paging request for data at address 0x00000080
[  209.588872] Faulting instruction address: 0xc000000000a3aa6c
[  209.588879] Oops: Kernel access of bad area, sig: 11 [#1]
[  209.589062] NIP [c000000000a3aa6c] fib_rules_lookup+0x4c/0x260
[  209.589071] LR [c000000000b9ad90] fib6_rule_lookup+0x50/0xb0
[  209.589076] Call Trace:
[  209.589097] fib6_rule_lookup+0x50/0xb0
[  209.589106] rt6_lookup+0xc4/0x110
[  209.589116] ip6gre_tnl_link_config+0x214/0x2f0 [ip6_gre]
[  209.589125] ip6gre_newlink+0x138/0x3a0 [ip6_gre]
[  209.589134] rtnl_newlink+0x798/0xb80
[  209.589142] rtnetlink_rcv_msg+0xec/0x390
[  209.589151] netlink_rcv_skb+0x138/0x150
[  209.589159] rtnetlink_rcv+0x48/0x70
[  209.589169] netlink_unicast+0x538/0x640
[  209.589175] netlink_sendmsg+0x40c/0x480
[  209.589184] ___sys_sendmsg+0x384/0x4e0
[  209.589194] SyS_sendmsg+0xd4/0x140
[  209.589201] SyS_socketcall+0x3e0/0x4f0
[  209.589209] system_call+0x38/0xe0

This patch is to return -EOPNOTSUPP in ip6_tunnel_init if ipv6 has been
disabled from cmdline.

Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index ae73164559d5..f2f21c24915f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2259,6 +2259,9 @@ static int __init ip6_tunnel_init(void)
 {
 	int  err;
 
+	if (!ipv6_mod_enabled())
+		return -EOPNOTSUPP;
+
 	err = register_pernet_device(&ip6_tnl_net_ops);
 	if (err < 0)
 		goto out_pernet;
-- 
cgit 


From 3ff4cbec87da48b0ec1f7b6196607b034de0c680 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Sat, 16 Sep 2017 14:02:21 +0200
Subject: net/sched: cls_matchall: fix crash when used with classful qdisc

this script, edited from Linux Advanced Routing and Traffic Control guide

tc q a dev en0 root handle 1: htb default a
tc c a dev en0 parent 1:  classid 1:1 htb rate 6mbit burst 15k
tc c a dev en0 parent 1:1 classid 1:a htb rate 5mbit ceil 6mbit burst 15k
tc c a dev en0 parent 1:1 classid 1:b htb rate 1mbit ceil 6mbit burst 15k
tc f a dev en0 parent 1:0 prio 1 $clsname $clsargs classid 1:b
ping $address -c1
tc -s c s dev en0

classifies traffic to 1:b or 1:a, depending on whether the packet matches
or not the pattern $clsargs of filter $clsname. However, when $clsname is
'matchall', a systematic crash can be observed in htb_classify(). HTB and
classful qdiscs don't assign initial value to struct tcf_result, but then
they expect it to contain valid values after filters have been run. Thus,
current 'matchall' ignores the TCA_MATCHALL_CLASSID attribute, configured
by user, and makes HTB (and classful qdiscs) dereference random pointers.

By assigning head->res to *res in mall_classify(), before the actions are
invoked, we fix this crash and enable TCA_MATCHALL_CLASSID functionality,
that had no effect on 'matchall' classifier since its first introduction.

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1460213
Reported-by: Jiri Benc <jbenc@redhat.com>
Fixes: b87f7936a932 ("net/sched: introduce Match-all classifier")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Yotam Gigi <yotamg@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_matchall.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 21cc45caf842..eeac606c95ab 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -32,6 +32,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	if (tc_skip_sw(head->flags))
 		return -1;
 
+	*res = head->res;
 	return tcf_exts_exec(skb, &head->exts, res);
 }
 
-- 
cgit 


From 51513748ddfe7a5d2812158a1e0570499b0c511c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 16 Sep 2017 13:10:06 -0700
Subject: Documentation: networking: fix ASCII art in switchdev.txt

Fix ASCII art in Documentation/networking/switchdev.txt:

Change non-ASCII "spaces" to ASCII spaces.

Change 2 erroneous '+' characters in ASCII art to '-' (at the '*'
characters below):

line 32:
                     +--+----+----+----+-*--+----+---+  +-----+-----+
line 41:
                     +--------------+---*------------+

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/switchdev.txt | 68 +++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index 5e40e1f68873..82236a17b5e6 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -13,42 +13,42 @@ an example setup using a data-center-class switch ASIC chip.  Other setups
 with SR-IOV or soft switches, such as OVS, are possible.
 
 
-                             User-space tools
-
-       user space                   |
-      +-------------------------------------------------------------------+
-       kernel                       | Netlink
-                                    |
-                     +--------------+-------------------------------+
-                     |         Network stack                        |
-                     |           (Linux)                            |
-                     |                                              |
-                     +----------------------------------------------+
+                             User-space tools
+
+       user space                   |
+      +-------------------------------------------------------------------+
+       kernel                       | Netlink
+                                    |
+                     +--------------+-------------------------------+
+                     |         Network stack                        |
+                     |           (Linux)                            |
+                     |                                              |
+                     +----------------------------------------------+
 
                            sw1p2     sw1p4     sw1p6
-                      sw1p1  +  sw1p3  +  sw1p5  +          eth1
-                        +    |    +    |    +    |            +
-                        |    |    |    |    |    |            |
-                     +--+----+----+----+-+--+----+---+  +-----+-----+
-                     |         Switch driver         |  |    mgmt   |
-                     |        (this document)        |  |   driver  |
-                     |                               |  |           |
-                     +--------------+----------------+  +-----------+
-                                    |
-       kernel                       | HW bus (eg PCI)
-      +-------------------------------------------------------------------+
-       hardware                     |
-                     +--------------+---+------------+
-                     |         Switch device (sw1)   |
-                     |  +----+                       +--------+
-                     |  |    v offloaded data path   | mgmt port
-                     |  |    |                       |
-                     +--|----|----+----+----+----+---+
-                        |    |    |    |    |    |
-                        +    +    +    +    +    +
-                       p1   p2   p3   p4   p5   p6
-
-                             front-panel ports
+                      sw1p1  +  sw1p3  +  sw1p5  +          eth1
+                        +    |    +    |    +    |            +
+                        |    |    |    |    |    |            |
+                     +--+----+----+----+----+----+---+  +-----+-----+
+                     |         Switch driver         |  |    mgmt   |
+                     |        (this document)        |  |   driver  |
+                     |                               |  |           |
+                     +--------------+----------------+  +-----------+
+                                    |
+       kernel                       | HW bus (eg PCI)
+      +-------------------------------------------------------------------+
+       hardware                     |
+                     +--------------+----------------+
+                     |         Switch device (sw1)   |
+                     |  +----+                       +--------+
+                     |  |    v offloaded data path   | mgmt port
+                     |  |    |                       |
+                     +--|----|----+----+----+----+---+
+                        |    |    |    |    |    |
+                        +    +    +    +    +    +
+                       p1   p2   p3   p4   p5   p6
+
+                             front-panel ports
 
 
                                     Fig 1.
-- 
cgit 


From 6ce14f6416c84bd9c81777edf899b57ac5000c87 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 19 Sep 2017 01:49:02 +0200
Subject: ACPI / watchdog: properly initialize resources

We copy a local resource structure into a list, but only
initialize some of its members, as pointed out by gcc-4.4:

drivers/acpi/acpi_watchdog.c: In function 'acpi_watchdog_init':
drivers/acpi/acpi_watchdog.c:105: error: 'res.child' may be used uninitialized in this function
drivers/acpi/acpi_watchdog.c:105: error: 'res.sibling' may be used uninitialized in this function
drivers/acpi/acpi_watchdog.c:105: error: 'res.parent' may be used uninitialized in this function
drivers/acpi/acpi_watchdog.c:105: error: 'res.desc' may be used uninitialized in this function
drivers/acpi/acpi_watchdog.c:105: error: 'res.name' may be used uninitialized in this function

Newer compilers can presumably optimize the uninitialized access
away entirely and don't warn at all, but rely on the kzalloc()
to zero the structure first. This adds an explicit initialization
to force consistent behavior.

Fixes: 058dfc767008 (ACPI / watchdog: Add support for WDAT hardware watchdog)
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_watchdog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c
index bf22c29d2517..11b113f8e367 100644
--- a/drivers/acpi/acpi_watchdog.c
+++ b/drivers/acpi/acpi_watchdog.c
@@ -66,7 +66,7 @@ void __init acpi_watchdog_init(void)
 	for (i = 0; i < wdat->entries; i++) {
 		const struct acpi_generic_address *gas;
 		struct resource_entry *rentry;
-		struct resource res;
+		struct resource res = {};
 		bool found;
 
 		gas = &entries[i].register_region;
-- 
cgit 


From 1e3c5ec66119783440ed211ae527674651affa9b Mon Sep 17 00:00:00 2001
From: Sathya Perla <sathya.perla@broadcom.com>
Date: Mon, 18 Sep 2017 17:05:37 +0530
Subject: bnxt_en: check for ingress qdisc in flower offload

Check for ingress-only qdisc for flower offload, as other qdiscs
are not supported for flower offload.

Suggested-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index ccd699fb2d70..7dd3d131043a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -750,6 +750,10 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
 {
 	int rc = 0;
 
+	if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
+	    cls_flower->common.chain_index)
+		return -EOPNOTSUPP;
+
 	switch (cls_flower->command) {
 	case TC_CLSFLOWER_REPLACE:
 		rc = bnxt_tc_add_flow(bp, src_fid, cls_flower);
-- 
cgit 


From 582db7e0c4c2fc5bb4f932f268035883385e3692 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Mon, 18 Sep 2017 15:03:46 +0200
Subject: bpf: devmap: pass on return value of bpf_map_precharge_memlock

If bpf_map_precharge_memlock in dev_map_alloc, -ENOMEM is returned
regardless of the actual error produced by bpf_map_precharge_memlock.
Fix it by passing on the error returned by bpf_map_precharge_memlock.

Also return -EINVAL instead of -ENOMEM if the page count overflow check
fails.

This makes dev_map_alloc match the behavior of other bpf maps' alloc
functions wrt. return values.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/devmap.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 959c9a07f318..e093d9a2c4dd 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -75,8 +75,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr)
 static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_dtab *dtab;
+	int err = -EINVAL;
 	u64 cost;
-	int err;
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -108,6 +108,8 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 	if (err)
 		goto free_dtab;
 
+	err = -ENOMEM;
+
 	/* A per cpu bitfield with a bit per possible net device */
 	dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr),
 					    __alignof__(unsigned long));
@@ -128,7 +130,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 free_dtab:
 	free_percpu(dtab->flush_needed);
 	kfree(dtab);
-	return ERR_PTR(-ENOMEM);
+	return ERR_PTR(err);
 }
 
 static void dev_map_free(struct bpf_map *map)
-- 
cgit 


From 5e75fe3927559505682b0053b5745e3f42d66e8c Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 18 Sep 2017 17:19:10 -0700
Subject: tools/testing/nvdimm: disable labels for nfit_test.1

Improve coverage of NVDIMM-N test scenarios by providing a test bus
incapable of label operations.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index d20791c3f499..bef419d4266d 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1527,9 +1527,6 @@ static void nfit_test1_setup(struct nfit_test *t)
 	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
-	set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
-	set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
-	set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
 }
 
 static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
-- 
cgit 


From 4c7124413aa759b8ea0b90cd39177e525396e662 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 18 Sep 2017 11:05:16 -0700
Subject: tcp: remove two unused functions

remove tcp_may_send_now and tcp_snd_test that are no longer used

Fixes: 840a3cbe8969 ("tcp: remove forward retransmit feature")
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  1 -
 net/ipv4/tcp_output.c | 34 ----------------------------------
 2 files changed, 35 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b510f284427a..3bc910a9bfc6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -544,7 +544,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 		     int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle);
-bool tcp_may_send_now(struct sock *sk);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
 int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
 void tcp_retransmit_timer(struct sock *sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1c839c99114c..517d737059d1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1806,40 +1806,6 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
 	return !after(end_seq, tcp_wnd_end(tp));
 }
 
-/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
- * should be put on the wire right now.  If so, it returns the number of
- * packets allowed by the congestion window.
- */
-static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
-				 unsigned int cur_mss, int nonagle)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	unsigned int cwnd_quota;
-
-	tcp_init_tso_segs(skb, cur_mss);
-
-	if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
-		return 0;
-
-	cwnd_quota = tcp_cwnd_test(tp, skb);
-	if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
-		cwnd_quota = 0;
-
-	return cwnd_quota;
-}
-
-/* Test if sending is allowed right now. */
-bool tcp_may_send_now(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = tcp_send_head(sk);
-
-	return skb &&
-		tcp_snd_test(sk, skb, tcp_current_mss(sk),
-			     (tcp_skb_is_last(sk, skb) ?
-			      tp->nonagle : TCP_NAGLE_PUSH));
-}
-
 /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
  * which is put after SKB on the list.  It is very much like
  * tcp_fragment() except that it may make several kinds of assumptions
-- 
cgit 


From 33a56086712561b8b9cdc881e0317f4c36861f72 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 18 Sep 2017 14:48:58 -0700
Subject: libnvdimm, namespace: fix btt claim class crash

Maurice reports:

    BUG: unable to handle kernel NULL pointer dereference at 0000000000000028
    IP: holder_class_store+0x253/0x2b0 [libnvdimm]

...while trying to reconfigure an NVDIMM-N namespace into 'sector' /
'btt' mode. The crash points to this line:

    (gdb) li *(holder_class_store+0x253)
    0x7773 is in holder_class_store (drivers/nvdimm/namespace_devs.c:1420).
    1415            for (i = 0; i < nd_region->ndr_mappings; i++) {
    1416                    struct nd_mapping *nd_mapping = &nd_region->mapping[i];
    1417                    struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
    1418                    struct nd_namespace_index *nsindex;
    1419
    1420                    nsindex = to_namespace_index(ndd, ndd->ns_current);

...where we are failing because ndd is NULL due to NVDIMM-N dimms not
supporting labels.

Long story short, default to the BTTv1 format in the label-less /
NVDIMM-N case.

Fixes: 14e494542636 ("libnvdimm, btt: BTT updates for UEFI 2.7 format")
Cc: <stable@vger.kernel.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Reported-by: Maurice A. Saldivar <maurice.a.saldivar@hpe.com>
Tested-by: Maurice A. Saldivar <maurice.a.saldivar@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/namespace_devs.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 1427a386a033..3e4d1e7998da 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1417,6 +1417,15 @@ static int btt_claim_class(struct device *dev)
 		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 		struct nd_namespace_index *nsindex;
 
+		/*
+		 * If any of the DIMMs do not support labels the only
+		 * possible BTT format is v1.
+		 */
+		if (!ndd) {
+			loop_bitmask = 0;
+			break;
+		}
+
 		nsindex = to_namespace_index(ndd, ndd->ns_current);
 		if (nsindex == NULL)
 			loop_bitmask |= 1;
-- 
cgit 


From 54640d238760a1a54dfebe039b49682522100186 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 18 Sep 2017 22:51:14 -0500
Subject: fcntl: Don't set si_code to SI_SIGIO when sig == SIGPOLL

When fixing things to avoid ambiguous cases I had a thinko
and included SIGPOLL/SIGIO in with all of the other signals
that have signal specific si_codes.  Which is completely wrong.

Fix that.

Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/fcntl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 0491da3b28c3..448a1119f0be 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -749,7 +749,7 @@ static void send_sigio_to_task(struct task_struct *p,
 			 * specific si_codes.  In that case use SI_SIGIO instead
 			 * to remove the ambiguity.
 			 */
-			if (sig_specific_sicodes(signum))
+			if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
 				si.si_code = SI_SIGIO;
 
 			/* Make sure we are called with one of the POLL_*
-- 
cgit 


From 129c6cda2de2a8ac44fab096152469999b727faf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 18 Sep 2017 13:03:43 -0700
Subject: 8139too: revisit napi_complete_done() usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It seems we have to be more careful in napi_complete_done()
use. This patch is not a revert, as it seems we can
avoid bug that Ville reported by moving the napi_complete_done()
test in the spinlock section.

Many thanks to Ville for detective work and all tests.

Fixes: 617f01211baf ("8139too: use napi_complete_done()")
Reported-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/8139too.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index ca22f2898664..d24b47b8e0b2 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -2135,11 +2135,12 @@ static int rtl8139_poll(struct napi_struct *napi, int budget)
 	if (likely(RTL_R16(IntrStatus) & RxAckBits))
 		work_done += rtl8139_rx(dev, tp, budget);
 
-	if (work_done < budget && napi_complete_done(napi, work_done)) {
+	if (work_done < budget) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&tp->lock, flags);
-		RTL_W16_F(IntrMask, rtl8139_intr_mask);
+		if (napi_complete_done(napi, work_done))
+			RTL_W16_F(IntrMask, rtl8139_intr_mask);
 		spin_unlock_irqrestore(&tp->lock, flags);
 	}
 	spin_unlock(&tp->rx_lock);
-- 
cgit 


From 8ecb1a29e11e24c458ee4ee59447d0ddf8274589 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 18 Sep 2017 16:31:30 -0700
Subject: net: systemport: Fix 64-bit statistics dependency

There are several problems with commit 10377ba7673d ("net: systemport:
Support 64bit statistics", first one got fixed in 7095c973453e ("net:
systemport: Fix 64-bit stats deadlock").

The second problem is that this specific code updates the
stats64.tx_{packets,bytes} from ndo_get_stats64() and that is what we
are returning to ethtool -S. If we are not running a tool that involves
calling ndo_get_stats64(), then we won't get updated ethtool stats.

The solution to this is to update the stats from both call sites,
factoring that into a specific function, While at it, don't just check
the sizeof() but also the type of the statistics in order to use the
64-bit stats seqlock.

Fixes: 10377ba7673d ("net: systemport: Support 64bit statistics")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 52 ++++++++++++++++++------------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index c3c53f6cd9e6..83eec9a8c275 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -432,6 +432,27 @@ static void bcm_sysport_update_mib_counters(struct bcm_sysport_priv *priv)
 	netif_dbg(priv, hw, priv->netdev, "updated MIB counters\n");
 }
 
+static void bcm_sysport_update_tx_stats(struct bcm_sysport_priv *priv,
+					u64 *tx_bytes, u64 *tx_packets)
+{
+	struct bcm_sysport_tx_ring *ring;
+	u64 bytes = 0, packets = 0;
+	unsigned int start;
+	unsigned int q;
+
+	for (q = 0; q < priv->netdev->num_tx_queues; q++) {
+		ring = &priv->tx_rings[q];
+		do {
+			start = u64_stats_fetch_begin_irq(&priv->syncp);
+			bytes = ring->bytes;
+			packets = ring->packets;
+		} while (u64_stats_fetch_retry_irq(&priv->syncp, start));
+
+		*tx_bytes += bytes;
+		*tx_packets += packets;
+	}
+}
+
 static void bcm_sysport_get_stats(struct net_device *dev,
 				  struct ethtool_stats *stats, u64 *data)
 {
@@ -439,11 +460,16 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
 	struct u64_stats_sync *syncp = &priv->syncp;
 	struct bcm_sysport_tx_ring *ring;
+	u64 tx_bytes = 0, tx_packets = 0;
 	unsigned int start;
 	int i, j;
 
-	if (netif_running(dev))
+	if (netif_running(dev)) {
 		bcm_sysport_update_mib_counters(priv);
+		bcm_sysport_update_tx_stats(priv, &tx_bytes, &tx_packets);
+		stats64->tx_bytes = tx_bytes;
+		stats64->tx_packets = tx_packets;
+	}
 
 	for (i =  0, j = 0; i < BCM_SYSPORT_STATS_LEN; i++) {
 		const struct bcm_sysport_stats *s;
@@ -461,12 +487,13 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 			continue;
 		p += s->stat_offset;
 
-		if (s->stat_sizeof == sizeof(u64))
+		if (s->stat_sizeof == sizeof(u64) &&
+		    s->type == BCM_SYSPORT_STAT_NETDEV64) {
 			do {
 				start = u64_stats_fetch_begin_irq(syncp);
 				data[i] = *(u64 *)p;
 			} while (u64_stats_fetch_retry_irq(syncp, start));
-		else
+		} else
 			data[i] = *(u32 *)p;
 		j++;
 	}
@@ -1716,27 +1743,12 @@ static void bcm_sysport_get_stats64(struct net_device *dev,
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
-	struct bcm_sysport_tx_ring *ring;
-	u64 tx_packets = 0, tx_bytes = 0;
 	unsigned int start;
-	unsigned int q;
 
 	netdev_stats_to_stats64(stats, &dev->stats);
 
-	for (q = 0; q < dev->num_tx_queues; q++) {
-		ring = &priv->tx_rings[q];
-		do {
-			start = u64_stats_fetch_begin_irq(&priv->syncp);
-			tx_bytes = ring->bytes;
-			tx_packets = ring->packets;
-		} while (u64_stats_fetch_retry_irq(&priv->syncp, start));
-
-		stats->tx_bytes += tx_bytes;
-		stats->tx_packets += tx_packets;
-	}
-
-	stats64->tx_bytes = stats->tx_bytes;
-	stats64->tx_packets = stats->tx_packets;
+	bcm_sysport_update_tx_stats(priv, &stats->tx_bytes,
+				    &stats->tx_packets);
 
 	do {
 		start = u64_stats_fetch_begin_irq(&priv->syncp);
-- 
cgit 


From c8b850241559d6bad7e640075e282a83a4ad7ead Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Mon, 18 Sep 2017 12:25:22 +0200
Subject: s390/scm_blk: consistently use blk_status_t as error type

Fix these warnings found by sparse:
drivers/s390/block/scm_blk.c:257:24: warning: incorrect type in assignment (different base types)
drivers/s390/block/scm_blk.c:257:24:    expected int [signed] <noident>
drivers/s390/block/scm_blk.c:257:24:    got restricted blk_status_t [usertype] error
drivers/s390/block/scm_blk.c:420:33: warning: incorrect type in argument 2 (different base types)
drivers/s390/block/scm_blk.c:420:33:    expected restricted blk_status_t [usertype] error
drivers/s390/block/scm_blk.c:420:33:    got int [signed] <noident>

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/scm_blk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 2e7fd966c515..eb51893c74a4 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -249,7 +249,7 @@ static void scm_request_requeue(struct scm_request *scmrq)
 static void scm_request_finish(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
-	int *error;
+	blk_status_t *error;
 	int i;
 
 	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
@@ -415,7 +415,7 @@ void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
 
 static void scm_blk_request_done(struct request *req)
 {
-	int *error = blk_mq_rq_to_pdu(req);
+	blk_status_t *error = blk_mq_rq_to_pdu(req);
 
 	blk_mq_end_request(req, *error);
 }
@@ -450,7 +450,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
 	atomic_set(&bdev->queued_reqs, 0);
 
 	bdev->tag_set.ops = &scm_mq_ops;
-	bdev->tag_set.cmd_size = sizeof(int);
+	bdev->tag_set.cmd_size = sizeof(blk_status_t);
 	bdev->tag_set.nr_hw_queues = nr_requests;
 	bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests;
 	bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
-- 
cgit 


From 55fb7347579017bdb09550b4e8019447340b7004 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 14 Sep 2017 13:55:22 +0200
Subject: s390/cio: recover from bad paths

In some situations we don't receive notification from firmware that
a previously unusable channelpath is usable again.

Schedule recovery for devices that return from path verification
without using all potentially usable paths. The recovery thread will
periodically trigger a path verification on the affected devices.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Suggested-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/device.c     | 12 +++++++++---
 drivers/s390/cio/device.h     |  1 +
 drivers/s390/cio/device_fsm.c | 12 ++++++++++++
 drivers/s390/cio/io_sch.h     |  2 ++
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 489b583f263d..e5c32f4b5287 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1225,10 +1225,16 @@ static int device_is_disconnected(struct ccw_device *cdev)
 static int recovery_check(struct device *dev, void *data)
 {
 	struct ccw_device *cdev = to_ccwdev(dev);
+	struct subchannel *sch;
 	int *redo = data;
 
 	spin_lock_irq(cdev->ccwlock);
 	switch (cdev->private->state) {
+	case DEV_STATE_ONLINE:
+		sch = to_subchannel(cdev->dev.parent);
+		if ((sch->schib.pmcw.pam & sch->opm) == sch->vpm)
+			break;
+		/* fall through */
 	case DEV_STATE_DISCONNECTED:
 		CIO_MSG_EVENT(3, "recovery: trigger 0.%x.%04x\n",
 			      cdev->private->dev_id.ssid,
@@ -1260,7 +1266,7 @@ static void recovery_work_func(struct work_struct *unused)
 		}
 		spin_unlock_irq(&recovery_lock);
 	} else
-		CIO_MSG_EVENT(4, "recovery: end\n");
+		CIO_MSG_EVENT(3, "recovery: end\n");
 }
 
 static DECLARE_WORK(recovery_work, recovery_work_func);
@@ -1274,11 +1280,11 @@ static void recovery_func(unsigned long data)
 	schedule_work(&recovery_work);
 }
 
-static void ccw_device_schedule_recovery(void)
+void ccw_device_schedule_recovery(void)
 {
 	unsigned long flags;
 
-	CIO_MSG_EVENT(4, "recovery: schedule\n");
+	CIO_MSG_EVENT(3, "recovery: schedule\n");
 	spin_lock_irqsave(&recovery_lock, flags);
 	if (!timer_pending(&recovery_timer) || (recovery_phase != 0)) {
 		recovery_phase = 0;
diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h
index ec497af99dd8..69cb70f080a5 100644
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h
@@ -134,6 +134,7 @@ void ccw_device_set_disconnected(struct ccw_device *cdev);
 void ccw_device_set_notoper(struct ccw_device *cdev);
 
 void ccw_device_set_timeout(struct ccw_device *, int);
+void ccw_device_schedule_recovery(void);
 
 /* Channel measurement facility related */
 void retry_set_schib(struct ccw_device *cdev);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 12016e32e519..f98ea674c3d8 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -476,6 +476,17 @@ static void create_fake_irb(struct irb *irb, int type)
 	}
 }
 
+static void ccw_device_handle_broken_paths(struct ccw_device *cdev)
+{
+	struct subchannel *sch = to_subchannel(cdev->dev.parent);
+	u8 broken_paths = (sch->schib.pmcw.pam & sch->opm) ^ sch->vpm;
+
+	if (broken_paths && (cdev->private->path_broken_mask != broken_paths))
+		ccw_device_schedule_recovery();
+
+	cdev->private->path_broken_mask = broken_paths;
+}
+
 void ccw_device_verify_done(struct ccw_device *cdev, int err)
 {
 	struct subchannel *sch;
@@ -508,6 +519,7 @@ callback:
 			memset(&cdev->private->irb, 0, sizeof(struct irb));
 		}
 		ccw_device_report_path_events(cdev);
+		ccw_device_handle_broken_paths(cdev);
 		break;
 	case -ETIME:
 	case -EUSERS:
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h
index 220f49145b2f..9a1b56b2df3e 100644
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -131,6 +131,8 @@ struct ccw_device_private {
 				   not operable */
 	u8 path_gone_mask;	/* mask of paths, that became unavailable */
 	u8 path_new_mask;	/* mask of paths, that became available */
+	u8 path_broken_mask;	/* mask of paths, which were found to be
+				   unusable */
 	struct {
 		unsigned int fast:1;	/* post with "channel end" */
 		unsigned int repall:1;	/* report every interrupt status */
-- 
cgit 


From 91c575b335766effa6103eba42a82aea560c365f Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Mon, 18 Sep 2017 16:10:35 +0200
Subject: s390/mm: make pmdp_invalidate() do invalidation only

Commit 227be799c39a ("s390/mm: uninline pmdp_xxx functions from pgtable.h")
inadvertently changed the behavior of pmdp_invalidate(), so that it now
clears the pmd instead of just marking it as invalid. Fix this by restoring
the original behavior.

A possible impact of the misbehaving pmdp_invalidate() would be the
MADV_DONTNEED races (see commits ced10803 and 58ceeb6b), although we
should not have any negative impact on the related dirty/young flags,
since those flags are not set by the hardware on s390.

Fixes: 227be799c39a ("s390/mm: uninline pmdp_xxx functions from pgtable.h")
Cc: <stable@vger.kernel.org> # v4.6+
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index dce708e061ea..20e75a2ca93a 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1507,7 +1507,9 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
 static inline void pmdp_invalidate(struct vm_area_struct *vma,
 				   unsigned long addr, pmd_t *pmdp)
 {
-	pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+	pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+
+	pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
 }
 
 #define __HAVE_ARCH_PMDP_SET_WRPROTECT
-- 
cgit 


From ba385c0594e723d41790ecfb12c610e6f90c7785 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Mon, 18 Sep 2017 16:51:51 +0200
Subject: s390/mm: fix write access check in gup_huge_pmd()

The check for the _SEGMENT_ENTRY_PROTECT bit in gup_huge_pmd() is the
wrong way around. It must not be set for write==1, and not be checked for
write==0. Fix this similar to how it was fixed for ptes long time ago in
commit 25591b070336 ("[S390] fix get_user_pages_fast").

One impact of this bug would be unnecessarily using the gup slow path for
write==0 on r/w mappings. A potentially more severe impact would be that
gup_huge_pmd() will succeed for write==1 on r/o mappings.

Cc: <stable@vger.kernel.org>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/gup.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 8ecc25e760fa..98ffe3ee9411 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -56,13 +56,12 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
-	unsigned long mask, result;
 	struct page *head, *page;
+	unsigned long mask;
 	int refs;
 
-	result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
-	mask = result | _SEGMENT_ENTRY_INVALID;
-	if ((pmd_val(pmd) & mask) != result)
+	mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
+	if ((pmd_val(pmd) & mask) != 0)
 		return 0;
 	VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
 
-- 
cgit 


From 95e2a3b3ef177730019e3799917193595133b275 Mon Sep 17 00:00:00 2001
From: "Jan H. Schönherr" <jschoenh@amazon.de>
Date: Sat, 16 Sep 2017 22:12:24 +0200
Subject: Revert "KVM: Don't accept obviously wrong gsi values via KVM_IRQFD"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 36ae3c0a36b7456432fedce38ae2f7bd3e01a563.

The commit broke compilation on !CONFIG_HAVE_KVM_IRQ_ROUTING. Also,
there may be cases with CONFIG_HAVE_KVM_IRQ_ROUTING, where larger
gsi values make sense.

As the commit was meant as an early indicator to user space that
something is wrong, reverting just restores the previous behavior
where overly large values are ignored when encountered (without
any direct feedback).

Reported-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Jan H. Schönherr <jschoenh@amazon.de>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 virt/kvm/eventfd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c608ab495282..f2ac53ab8243 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -565,8 +565,6 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 {
 	if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
 		return -EINVAL;
-	if (args->gsi >= KVM_MAX_IRQ_ROUTES)
-		return -EINVAL;
 
 	if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
 		return kvm_irqfd_deassign(kvm, args);
-- 
cgit 


From a4aaeccc7a91e24cc17f72a7eb2f586f5c3d811d Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 10 Sep 2017 13:43:37 -0700
Subject: iommu: Add missing dependencies

parisc:allmodconfig, xtensa:allmodconfig, and possibly others generate
the following Kconfig warning.

warning: (IPMMU_VMSA && ARM_SMMU && ARM_SMMU_V3 && QCOM_IOMMU) selects
IOMMU_IO_PGTABLE_LPAE which has unmet direct dependencies (IOMMU_SUPPORT &&
HAS_DMA && (ARM || ARM64 || COMPILE_TEST && !GENERIC_ATOMIC64))

IOMMU_IO_PGTABLE_LPAE depends on (COMPILE_TEST && !GENERIC_ATOMIC64),
so any configuration option selecting it needs to have the same dependencies.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 49bd2ab8c507..8530ef78925d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -278,7 +278,7 @@ config EXYNOS_IOMMU_DEBUG
 config IPMMU_VMSA
 	bool "Renesas VMSA-compatible IPMMU"
 	depends on ARM || IOMMU_DMA
-	depends on ARCH_RENESAS || COMPILE_TEST
+	depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64)
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
 	select ARM_DMA_USE_IOMMU
@@ -373,7 +373,7 @@ config MTK_IOMMU_V1
 config QCOM_IOMMU
 	# Note: iommu drivers cannot (yet?) be built as modules
 	bool "Qualcomm IOMMU Support"
-	depends on ARCH_QCOM || COMPILE_TEST
+	depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64)
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
 	select ARM_DMA_USE_IOMMU
-- 
cgit 


From 3bd71e18c5803c23014df962bdd74af1b34697fe Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 12 Sep 2017 22:10:21 +0200
Subject: iommu/vt-d: Fix harmless section mismatch warning

Building with gcc-4.6 results in this warning due to
dmar_table_print_dmar_entry being inlined as in newer compiler versions:

WARNING: vmlinux.o(.text+0x5c8bee): Section mismatch in reference from the function dmar_walk_remapping_entries() to the function .init.text:dmar_table_print_dmar_entry()
The function dmar_walk_remapping_entries() references
the function __init dmar_table_print_dmar_entry().
This is often because dmar_walk_remapping_entries lacks a __init
annotation or the annotation of dmar_table_print_dmar_entry is wrong.

This removes the __init annotation to avoid the warning. On compilers
that don't show the warning today, this should have no impact since the
function gets inlined anyway.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index ca5ebaeafd6a..57c920c1372d 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -497,7 +497,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
 #define	dmar_parse_one_rhsa		dmar_res_noop
 #endif
 
-static void __init
+static void
 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 {
 	struct acpi_dmar_hardware_unit *drhd;
-- 
cgit 


From 5baf6bb0fd2388742a0846cc7bcacee6dec78235 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 14 Sep 2017 14:01:00 +0200
Subject: drm/exynos: Fix locking in the suspend/resume paths

Commit 48a92916729b ("drm/exynos: use drm_for_each_connector_iter()")
replaced unsafe drm_for_each_connector() with drm_for_each_connector_iter()
and removed surrounding drm_modeset_lock calls. However, that lock was
there not only to protect unsafe drm_for_each_connector(), but it was also
required to be held by the dpms code which was called from the loop body.
This patch restores those drm_modeset_lock calls to fix broken suspend
and resume of Exynos DRM subsystem in v4.13 kernel.

Fixes: 48a92916729b ("drm/exynos: use drm_for_each_connector_iter()")
CC: stable@vger.kernel.org # v4.13
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_drv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index b1f7299600f0..7f3cfc5dd320 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -174,6 +174,7 @@ static int exynos_drm_suspend(struct device *dev)
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
+	drm_modeset_lock_all(drm_dev);
 	drm_connector_list_iter_begin(drm_dev, &conn_iter);
 	drm_for_each_connector_iter(connector, &conn_iter) {
 		int old_dpms = connector->dpms;
@@ -185,6 +186,7 @@ static int exynos_drm_suspend(struct device *dev)
 		connector->dpms = old_dpms;
 	}
 	drm_connector_list_iter_end(&conn_iter);
+	drm_modeset_unlock_all(drm_dev);
 
 	return 0;
 }
@@ -198,6 +200,7 @@ static int exynos_drm_resume(struct device *dev)
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
+	drm_modeset_lock_all(drm_dev);
 	drm_connector_list_iter_begin(drm_dev, &conn_iter);
 	drm_for_each_connector_iter(connector, &conn_iter) {
 		if (connector->funcs->dpms) {
@@ -208,6 +211,7 @@ static int exynos_drm_resume(struct device *dev)
 		}
 	}
 	drm_connector_list_iter_end(&conn_iter);
+	drm_modeset_unlock_all(drm_dev);
 
 	return 0;
 }
-- 
cgit 


From 6e8edf8a7d8d98e1734ff41e5c69439419319402 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 14 Sep 2017 14:01:01 +0200
Subject: drm/exynos: Fix suspend/resume support

Commit 7d902c05b480 ("drm: Nuke drm_atomic_helper_connector_dpms")
removed drm_atomic_helper_connector_dpms() helper saying that it was a dead
code. It was however indirectly used by Exynos DRM driver for implementing
suspend/resume support. To fix this regression (after that patch Exynos DRM
suspend/resume functions became no-ops and hardware fails to suspend),
this patch rewrites them with drm_atomic_helper_suspend/resume() helpers.

Fixes: 7d902c05b480 ("drm: Nuke drm_atomic_helper_connector_dpms")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_drv.c   | 40 ++++++++++---------------------
 drivers/gpu/drm/exynos/exynos_drm_drv.h   |  1 +
 drivers/gpu/drm/exynos/exynos_drm_fbdev.c | 20 ++++++++++++++++
 drivers/gpu/drm/exynos/exynos_drm_fbdev.h | 10 ++++++++
 4 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 7f3cfc5dd320..e651a58c18cf 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -168,25 +168,19 @@ static struct drm_driver exynos_drm_driver = {
 static int exynos_drm_suspend(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
-	struct drm_connector *connector;
-	struct drm_connector_list_iter conn_iter;
+	struct exynos_drm_private *private = drm_dev->dev_private;
 
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
-	drm_modeset_lock_all(drm_dev);
-	drm_connector_list_iter_begin(drm_dev, &conn_iter);
-	drm_for_each_connector_iter(connector, &conn_iter) {
-		int old_dpms = connector->dpms;
-
-		if (connector->funcs->dpms)
-			connector->funcs->dpms(connector, DRM_MODE_DPMS_OFF);
-
-		/* Set the old mode back to the connector for resume */
-		connector->dpms = old_dpms;
+	drm_kms_helper_poll_disable(drm_dev);
+	exynos_drm_fbdev_suspend(drm_dev);
+	private->suspend_state = drm_atomic_helper_suspend(drm_dev);
+	if (IS_ERR(private->suspend_state)) {
+		exynos_drm_fbdev_resume(drm_dev);
+		drm_kms_helper_poll_enable(drm_dev);
+		return PTR_ERR(private->suspend_state);
 	}
-	drm_connector_list_iter_end(&conn_iter);
-	drm_modeset_unlock_all(drm_dev);
 
 	return 0;
 }
@@ -194,24 +188,14 @@ static int exynos_drm_suspend(struct device *dev)
 static int exynos_drm_resume(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
-	struct drm_connector *connector;
-	struct drm_connector_list_iter conn_iter;
+	struct exynos_drm_private *private = drm_dev->dev_private;
 
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
-	drm_modeset_lock_all(drm_dev);
-	drm_connector_list_iter_begin(drm_dev, &conn_iter);
-	drm_for_each_connector_iter(connector, &conn_iter) {
-		if (connector->funcs->dpms) {
-			int dpms = connector->dpms;
-
-			connector->dpms = DRM_MODE_DPMS_OFF;
-			connector->funcs->dpms(connector, dpms);
-		}
-	}
-	drm_connector_list_iter_end(&conn_iter);
-	drm_modeset_unlock_all(drm_dev);
+	drm_atomic_helper_resume(drm_dev, private->suspend_state);
+	exynos_drm_fbdev_resume(drm_dev);
+	drm_kms_helper_poll_enable(drm_dev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index cf131c2aa23e..f8bae4cb4823 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -202,6 +202,7 @@ struct drm_exynos_file_private {
  */
 struct exynos_drm_private {
 	struct drm_fb_helper *fb_helper;
+	struct drm_atomic_state *suspend_state;
 
 	struct device *dma_dev;
 	void *mapping;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index c3a068409b48..dfb66ecf417b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -18,6 +18,8 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/exynos_drm.h>
 
+#include <linux/console.h>
+
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
 #include "exynos_drm_fbdev.h"
@@ -285,3 +287,21 @@ void exynos_drm_output_poll_changed(struct drm_device *dev)
 
 	drm_fb_helper_hotplug_event(fb_helper);
 }
+
+void exynos_drm_fbdev_suspend(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	console_lock();
+	drm_fb_helper_set_suspend(private->fb_helper, 1);
+	console_unlock();
+}
+
+void exynos_drm_fbdev_resume(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	console_lock();
+	drm_fb_helper_set_suspend(private->fb_helper, 0);
+	console_unlock();
+}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
index 330eef87f718..645d1bb7f665 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
@@ -21,6 +21,8 @@ int exynos_drm_fbdev_init(struct drm_device *dev);
 void exynos_drm_fbdev_fini(struct drm_device *dev);
 void exynos_drm_fbdev_restore_mode(struct drm_device *dev);
 void exynos_drm_output_poll_changed(struct drm_device *dev);
+void exynos_drm_fbdev_suspend(struct drm_device *drm);
+void exynos_drm_fbdev_resume(struct drm_device *drm);
 
 #else
 
@@ -39,6 +41,14 @@ static inline void exynos_drm_fbdev_restore_mode(struct drm_device *dev)
 
 #define exynos_drm_output_poll_changed (NULL)
 
+static inline void exynos_drm_fbdev_suspend(struct drm_device *drm)
+{
+}
+
+static inline void exynos_drm_fbdev_resume(struct drm_device *drm)
+{
+}
+
 #endif
 
 #endif
-- 
cgit 


From 9ac30ef6d8ec3533d0feec53e268c4fa32ea700c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 5 Sep 2017 10:19:55 +0200
Subject: drm: exynos: include linux/irq.h

I ran into a build error on x86:

drivers/gpu/drm/exynos/exynos5433_drm_decon.c: In function 'decon_conf_irq':
drivers/gpu/drm/exynos/exynos5433_drm_decon.c:706:2: error: implicit declaration of function 'irq_set_status_flags'; did you mean 'dquot_state_flag'? [-Werror=implicit-function-declaration]
  irq_set_status_flags(irq, IRQ_NOAUTOEN);

Adding the missing include fixes the error.

Fixes: b37d53a0382c ("drm/exynos/decon5433: move TE handling to DECON")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 730b8d9db187..6be5b53c3b27 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -14,6 +14,7 @@
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/iopoll.h>
+#include <linux/irq.h>
 #include <linux/mfd/syscon.h>
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>
-- 
cgit 


From d6500149bc4fddc5a91cd1a0c31b38fa36bff3ee Mon Sep 17 00:00:00 2001
From: Yu Zhang <yu.c.zhang@linux.intel.com>
Date: Mon, 18 Sep 2017 18:45:01 +0800
Subject: KVM: x86: Fix the NULL pointer parameter in check_cr_write()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Routine check_cr_write() will trigger emulator_get_cpuid()->
kvm_cpuid() to get maxphyaddr, and NULL is passed as values
for ebx/ecx/edx. This is problematic because kvm_cpuid() will
dereference these pointers.

Fixes: d1cd3ce90044 ("KVM: MMU: check guest CR3 reserved bits based on its physical address width.")
Reported-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/emulate.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 16bf6655aa85..15f527b44aa7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4102,10 +4102,12 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
 		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
 		if (efer & EFER_LMA) {
 			u64 maxphyaddr;
-			u32 eax = 0x80000008;
+			u32 eax, ebx, ecx, edx;
 
-			if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL,
-						 NULL, false))
+			eax = 0x80000008;
+			ecx = 0;
+			if (ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx,
+						 &edx, false))
 				maxphyaddr = eax & 0xff;
 			else
 				maxphyaddr = 36;
-- 
cgit 


From dc91f2eb1a4021eb6705c15e474942f84ab9b211 Mon Sep 17 00:00:00 2001
From: Haozhong Zhang <haozhong.zhang@intel.com>
Date: Mon, 18 Sep 2017 09:56:49 +0800
Subject: KVM: VMX: do not change SN bit in vmx_update_pi_irte()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In kvm_vcpu_trigger_posted_interrupt() and pi_pre_block(), KVM
assumes that PI notification events should not be suppressed when the
target vCPU is not blocked.

vmx_update_pi_irte() sets the SN field before changing an interrupt
from posting to remapping, but it does not check the vCPU mode.
Therefore, the change of SN field may break above the assumption.
Besides, I don't see reasons to suppress notification events here, so
remove the changes of SN field to avoid race condition.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reported-by: "Ramamurthy, Venkatesh" <venkatesh.ramamurthy@intel.com>
Reported-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Fixes: 28b835d60fcc ("KVM: Update Posted-Interrupts Descriptor when vCPU is preempted")
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06c0c6d0541e..7328c8c0ea3b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11911,12 +11911,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 
 		if (set)
 			ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
-		else {
-			/* suppress notification event before unposting */
-			pi_set_sn(vcpu_to_pi_desc(vcpu));
+		else
 			ret = irq_set_vcpu_affinity(host_irq, NULL);
-			pi_clear_sn(vcpu_to_pi_desc(vcpu));
-		}
 
 		if (ret < 0) {
 			printk(KERN_INFO "%s: failed to update PI IRTE\n",
-- 
cgit 


From 5753743fa5108b8f98bd61e40dc63f641b26c768 Mon Sep 17 00:00:00 2001
From: Haozhong Zhang <haozhong.zhang@intel.com>
Date: Mon, 18 Sep 2017 09:56:50 +0800
Subject: KVM: VMX: remove WARN_ON_ONCE in kvm_vcpu_trigger_posted_interrupt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)) in kvm_vcpu_trigger_posted_interrupt()
intends to detect the violation of invariant that VT-d PI notification
event is not suppressed when vcpu is in the guest mode. Because the
two checks for the target vcpu mode and the target suppress field
cannot be performed atomically, the target vcpu mode may change in
between. If that does happen, WARN_ON_ONCE() here may raise false
alarms.

As the previous patch fixed the real invariant breaker, remove this
WARN_ON_ONCE() to avoid false alarms, and document the allowed cases
instead.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reported-by: "Ramamurthy, Venkatesh" <venkatesh.ramamurthy@intel.com>
Reported-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Fixes: 28b835d60fcc ("KVM: Update Posted-Interrupts Descriptor when vCPU is preempted")
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7328c8c0ea3b..0726ca7a1b02 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5077,21 +5077,30 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
 	int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
 
 	if (vcpu->mode == IN_GUEST_MODE) {
-		struct vcpu_vmx *vmx = to_vmx(vcpu);
-
 		/*
-		 * Currently, we don't support urgent interrupt,
-		 * all interrupts are recognized as non-urgent
-		 * interrupt, so we cannot post interrupts when
-		 * 'SN' is set.
+		 * The vector of interrupt to be delivered to vcpu had
+		 * been set in PIR before this function.
+		 *
+		 * Following cases will be reached in this block, and
+		 * we always send a notification event in all cases as
+		 * explained below.
+		 *
+		 * Case 1: vcpu keeps in non-root mode. Sending a
+		 * notification event posts the interrupt to vcpu.
+		 *
+		 * Case 2: vcpu exits to root mode and is still
+		 * runnable. PIR will be synced to vIRR before the
+		 * next vcpu entry. Sending a notification event in
+		 * this case has no effect, as vcpu is not in root
+		 * mode.
 		 *
-		 * If the vcpu is in guest mode, it means it is
-		 * running instead of being scheduled out and
-		 * waiting in the run queue, and that's the only
-		 * case when 'SN' is set currently, warning if
-		 * 'SN' is set.
+		 * Case 3: vcpu exits to root mode and is blocked.
+		 * vcpu_block() has already synced PIR to vIRR and
+		 * never blocks vcpu if vIRR is not cleared. Therefore,
+		 * a blocked vcpu here does not wait for any requested
+		 * interrupts in PIR, and sending a notification event
+		 * which has no effect is safe here.
 		 */
-		WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
 
 		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
 		return true;
-- 
cgit 


From 0555ac4333d7da7cff83d5b06bd3679a3e1ef584 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho@docker.com>
Date: Mon, 18 Sep 2017 16:35:32 -0600
Subject: xen, arm64: drop dummy lookup_address()

This is unused, and conflicts with the definition that we'll add for XPFO.

Signed-off-by: Tycho Andersen <tycho@docker.com>
Reviewed-by: Julien Grall <julien.grall@arm.com>
CC: Boris Ostrovsky <boris.ostrovsky@oracle.com>
CC: Juergen Gross <jgross@suse.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 include/xen/arm/page.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h
index 415dbc6e43fd..6adc2a955340 100644
--- a/include/xen/arm/page.h
+++ b/include/xen/arm/page.h
@@ -84,16 +84,6 @@ static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 	BUG();
 }
 
-/* TODO: this shouldn't be here but it is because the frontend drivers
- * are using it (its rolled in headers) even though we won't hit the code path.
- * So for right now just punt with this.
- */
-static inline pte_t *lookup_address(unsigned long address, unsigned int *level)
-{
-	BUG();
-	return NULL;
-}
-
 extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 				   struct gnttab_map_grant_ref *kmap_ops,
 				   struct page **pages, unsigned int count);
-- 
cgit 


From 986a5f70173626eea9863fee6d6e029f0f2bc361 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 11 Sep 2017 14:34:34 +0200
Subject: iommu/qcom: Depend on HAS_DMA to fix compile error

If NO_DMA=y:

    warning: (IPMMU_VMSA && ARM_SMMU && ARM_SMMU_V3 && QCOM_IOMMU) selects IOMMU_IO_PGTABLE_LPAE which has unmet direct dependencies (IOMMU_SUPPORT && HAS_DMA && (ARM || ARM64 || COMPILE_TEST && !GENERIC_ATOMIC64))

and

    drivers/iommu/io-pgtable-arm.o: In function `__arm_lpae_sync_pte':
    io-pgtable-arm.c:(.text+0x206): undefined reference to `bad_dma_ops'
    drivers/iommu/io-pgtable-arm.o: In function `__arm_lpae_free_pages':
    io-pgtable-arm.c:(.text+0x6a6): undefined reference to `bad_dma_ops'
    drivers/iommu/io-pgtable-arm.o: In function `__arm_lpae_alloc_pages':
    io-pgtable-arm.c:(.text+0x812): undefined reference to `bad_dma_ops'
    io-pgtable-arm.c:(.text+0x81c): undefined reference to `bad_dma_ops'
    io-pgtable-arm.c:(.text+0x862): undefined reference to `bad_dma_ops'
    drivers/iommu/io-pgtable-arm.o: In function `arm_lpae_run_tests':
    io-pgtable-arm.c:(.init.text+0x86): undefined reference to `alloc_io_pgtable_ops'
    io-pgtable-arm.c:(.init.text+0x47c): undefined reference to `free_io_pgtable_ops'
    drivers/iommu/qcom_iommu.o: In function `qcom_iommu_init_domain':
    qcom_iommu.c:(.text+0x1ce): undefined reference to `alloc_io_pgtable_ops'
    drivers/iommu/qcom_iommu.o: In function `qcom_iommu_domain_free':
    qcom_iommu.c:(.text+0x754): undefined reference to `free_io_pgtable_ops'

QCOM_IOMMU selects IOMMU_IO_PGTABLE_LPAE, which bypasses its dependency
on HAS_DMA.  Make QCOM_IOMMU depend on HAS_DMA to fix this.

Fixes: 0ae349a0f33fb040 ("iommu/qcom: Add qcom_iommu")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 8530ef78925d..f3a21343e636 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -374,6 +374,7 @@ config QCOM_IOMMU
 	# Note: iommu drivers cannot (yet?) be built as modules
 	bool "Qualcomm IOMMU Support"
 	depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64)
+	depends on HAS_DMA
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
 	select ARM_DMA_USE_IOMMU
-- 
cgit 


From 8dd33bcb7050dd6f8c1432732f930932c9d3a33e Mon Sep 17 00:00:00 2001
From: Bo Yan <byan@nvidia.com>
Date: Mon, 18 Sep 2017 10:03:35 -0700
Subject: tracing: Erase irqsoff trace with empty write

One convenient way to erase trace is "echo > trace". However, this
is currently broken if the current tracer is irqsoff tracer. This
is because irqsoff tracer use max_buffer as the default trace
buffer.

Set the max_buffer as the one to be cleared when it's the trace
buffer currently in use.

Link: http://lkml.kernel.org/r/1505754215-29411-1-git-send-email-byan@nvidia.com

Cc: <mingo@redhat.com>
Cc: stable@vger.kernel.org
Fixes: 4acd4d00f ("tracing: give easy way to clear trace buffer")
Signed-off-by: Bo Yan <byan@nvidia.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5360b7aec57a..a7fb136da891 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4020,11 +4020,17 @@ static int tracing_open(struct inode *inode, struct file *file)
 	/* If this file was open for write, then erase contents */
 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
 		int cpu = tracing_get_cpu(inode);
+		struct trace_buffer *trace_buf = &tr->trace_buffer;
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+		if (tr->current_trace->print_max)
+			trace_buf = &tr->max_buffer;
+#endif
 
 		if (cpu == RING_BUFFER_ALL_CPUS)
-			tracing_reset_online_cpus(&tr->trace_buffer);
+			tracing_reset_online_cpus(trace_buf);
 		else
-			tracing_reset(&tr->trace_buffer, cpu);
+			tracing_reset(trace_buf, cpu);
 	}
 
 	if (file->f_mode & FMODE_READ) {
-- 
cgit 


From c7b3ae0bd2ca658c7a71c49901d08c590294fac9 Mon Sep 17 00:00:00 2001
From: "Ziqian SUN (Zamir)" <zsun@redhat.com>
Date: Mon, 11 Sep 2017 14:26:35 +0800
Subject: tracing: Ignore mmiotrace from kernel commandline

The mmiotrace tracer cannot be enabled with ftrace=mmiotrace in kernel
commandline. With this patch, noboot is added to the tracer struct,
and when system boot with a tracer that has noboot=true, it will print
out a warning message and continue booting.

Link: http://lkml.kernel.org/r/1505111195-31942-1-git-send-email-zsun@redhat.com

Signed-off-by: Ziqian SUN (Zamir) <zsun@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c           | 7 +++++++
 kernel/trace/trace.h           | 2 ++
 kernel/trace/trace_mmiotrace.c | 1 +
 3 files changed, 10 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a7fb136da891..d3ca35f38803 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5364,6 +5364,13 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
 	if (t == tr->current_trace)
 		goto out;
 
+	/* Some tracers won't work on kernel command line */
+	if (system_state < SYSTEM_RUNNING && t->noboot) {
+		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
+			t->name);
+		goto out;
+	}
+
 	/* Some tracers are only allowed for the top level buffer */
 	if (!trace_ok_for_array(t, tr)) {
 		ret = -EINVAL;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fb5d54d0d1b3..652c682707cd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -444,6 +444,8 @@ struct tracer {
 #ifdef CONFIG_TRACER_MAX_TRACE
 	bool			use_max_tr;
 #endif
+	/* True if tracer cannot be enabled in kernel param */
+	bool			noboot;
 };
 
 
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index cd7480d0a201..dca78fc48439 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -282,6 +282,7 @@ static struct tracer mmio_tracer __read_mostly =
 	.close		= mmio_close,
 	.read		= mmio_read,
 	.print_line	= mmio_print_line,
+	.noboot		= true,
 };
 
 __init static int init_mmio_trace(void)
-- 
cgit 


From aa767cfb75341a6b93742f4d7d1589ac2532c29e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 11 Sep 2017 22:07:50 +0200
Subject: of: provide inline helper for of_find_device_by_node

The ipmmu-vmsa driver fails in compile-testing on non-OF platforms:

drivers/iommu/ipmmu-vmsa.o: In function `ipmmu_of_xlate':
ipmmu-vmsa.c:(.text+0x740): undefined reference to `of_find_device_by_node'

It would be reasonable to assume that this interface works but
returns failure on non-OF builds, like it does on machines that
have been booted in another way, so this adds another inline
function helper.

Fixes: 7b2d59611fef ("iommu/ipmmu-vmsa: Replace local utlb code with fwspec ids")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_platform.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index e0d1946270f3..fb908e598348 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -57,7 +57,14 @@ extern const struct of_device_id of_default_bus_match_table[];
 extern struct platform_device *of_device_alloc(struct device_node *np,
 					 const char *bus_id,
 					 struct device *parent);
+#ifdef CONFIG_OF
 extern struct platform_device *of_find_device_by_node(struct device_node *np);
+#else
+static inline struct platform_device *of_find_device_by_node(struct device_node *np)
+{
+	return NULL;
+}
+#endif
 
 /* Platform devices and busses creation */
 extern struct platform_device *of_platform_device_create(struct device_node *np,
-- 
cgit 


From a6899e900509bb2442239d6198dc8bc64f8437ec Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 14 Sep 2017 23:39:38 +0200
Subject: dt-bindings: fix vendor prefix for Abracon

Commit 446810f2dd41 ("of: add vendor prefix for Abracon Corporation")
claimed that "abcn" was used as the vendor prefix while in fact "abracon"
was used in the subsequent commits. It is also the only prefix used in the
tree.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
[robh: fix alphabetical order]
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 1ea1fd4232ab..1afd298eddd7 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -3,8 +3,8 @@ Device tree binding vendor prefix registry.  Keep list in alphabetical order.
 This isn't an exhaustive list, but you should add new prefixes to it before
 using them to avoid name-space collisions.
 
-abcn	Abracon Corporation
 abilis	Abilis Systems
+abracon	Abracon Corporation
 actions	Actions Semiconductor Co., Ltd.
 active-semi	Active-Semi International Inc
 ad	Avionic Design GmbH
-- 
cgit 


From 3993491bf27117782bee05debc6a6afa51d61760 Mon Sep 17 00:00:00 2001
From: Ariel Elior <aelior@cavium.com>
Date: Tue, 19 Sep 2017 12:54:34 +0300
Subject: MAINTAINERS: Remove Yuval Mintz from maintainers list

Remove Yuval from maintaining the bnx2x & qed* modules as he is no longer
working for the company. Thanks Yuval for your huge contributions and
tireless efforts over the many years and various companies.

Ariel
Signed-off-by: Ariel Elior <aelior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 --
 1 file changed, 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2281af4b41b6..955f034fd523 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2853,7 +2853,6 @@ S:	Supported
 F:	drivers/scsi/bnx2i/
 
 BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
-M:	Yuval Mintz <Yuval.Mintz@cavium.com>
 M:	Ariel Elior <ariel.elior@cavium.com>
 M:	everest-linux-l2@cavium.com
 L:	netdev@vger.kernel.org
@@ -11047,7 +11046,6 @@ S:	Supported
 F:	drivers/scsi/qedi/
 
 QLOGIC QL4xxx ETHERNET DRIVER
-M:	Yuval Mintz <Yuval.Mintz@cavium.com>
 M:	Ariel Elior <Ariel.Elior@cavium.com>
 M:	everest-linux-l2@cavium.com
 L:	netdev@vger.kernel.org
-- 
cgit 


From 29a0cfbf91ba997591535a4f7246835ce8328141 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 18 Sep 2017 12:21:37 +0200
Subject: libceph: don't allow bidirectional swap of pg-upmap-items

This reverts most of commit f53b7665c8ce ("libceph: upmap semantic
changes").

We need to prevent duplicates in the final result.  For example, we
can currently take

  [1,2,3] and apply [(1,2)] and get [2,2,3]

or

  [1,2,3] and apply [(3,2)] and get [1,2,2]

The rest of the system is not prepared to handle duplicates in the
result set like this.

The reverted piece was intended to allow

  [1,2,3] and [(1,2),(2,1)] to get [2,1,3]

to reorder primaries.  First, this bidirectional swap is hard to
implement in a way that also prevents dups.  For example, [1,2,3] and
[(1,4),(2,3),(3,4)] would give [4,3,4] but would we just drop the last
step we'd have [4,3,3] which is also invalid, etc.  Simpler to just not
handle bidirectional swaps.  In practice, they are not needed: if you
just want to choose a different primary then use primary_affinity, or
pg_upmap (not pg_upmap_items).

Cc: stable@vger.kernel.org # 4.13
Link: http://tracker.ceph.com/issues/21410
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 net/ceph/osdmap.c | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f358d0bfa76b..79d14d70b7ea 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2445,19 +2445,34 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
 
 	pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
 	if (pg) {
-		for (i = 0; i < raw->size; i++) {
-			for (j = 0; j < pg->pg_upmap_items.len; j++) {
-				int from = pg->pg_upmap_items.from_to[j][0];
-				int to = pg->pg_upmap_items.from_to[j][1];
-
-				if (from == raw->osds[i]) {
-					if (!(to != CRUSH_ITEM_NONE &&
-					      to < osdmap->max_osd &&
-					      osdmap->osd_weight[to] == 0))
-						raw->osds[i] = to;
+		/*
+		 * Note: this approach does not allow a bidirectional swap,
+		 * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
+		 */
+		for (i = 0; i < pg->pg_upmap_items.len; i++) {
+			int from = pg->pg_upmap_items.from_to[i][0];
+			int to = pg->pg_upmap_items.from_to[i][1];
+			int pos = -1;
+			bool exists = false;
+
+			/* make sure replacement doesn't already appear */
+			for (j = 0; j < raw->size; j++) {
+				int osd = raw->osds[j];
+
+				if (osd == to) {
+					exists = true;
 					break;
 				}
+				/* ignore mapping if target is marked out */
+				if (osd == from && pos < 0 &&
+				    !(to != CRUSH_ITEM_NONE &&
+				      to < osdmap->max_osd &&
+				      osdmap->osd_weight[to] == 0)) {
+					pos = j;
+				}
 			}
+			if (!exists && pos >= 0)
+				raw->osds[pos] = to;
 		}
 	}
 }
-- 
cgit 


From 3fad4cdac235c5b13227d0c09854c689ae62c70b Mon Sep 17 00:00:00 2001
From: zijun_hu <zijun_hu@htc.com>
Date: Sat, 16 Sep 2017 01:59:41 +0800
Subject: irqchip/gic-v3: Iterate over possible CPUs by for_each_possible_cpu()

get_cpu_number() doesn't use existing helper to iterate over possible
CPUs, It will cause an error in case of discontinuous @cpu_possible_mask
such as 0b11110001, which can result from a core having failed to come
up on a SMP machine.

Fixed by using existing helper for_each_possible_cpu().

Signed-off-by: zijun_hu <zijun_hu@htc.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v3.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 519149ec9053..b5df99c6f680 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1042,7 +1042,7 @@ static int get_cpu_number(struct device_node *dn)
 {
 	const __be32 *cell;
 	u64 hwid;
-	int i;
+	int cpu;
 
 	cell = of_get_property(dn, "reg", NULL);
 	if (!cell)
@@ -1056,9 +1056,9 @@ static int get_cpu_number(struct device_node *dn)
 	if (hwid & ~MPIDR_HWID_BITMASK)
 		return -1;
 
-	for (i = 0; i < num_possible_cpus(); i++)
-		if (cpu_logical_map(i) == hwid)
-			return i;
+	for_each_possible_cpu(cpu)
+		if (cpu_logical_map(cpu) == hwid)
+			return cpu;
 
 	return -1;
 }
-- 
cgit 


From 6c09ffd027255d97c536aa4ac51a90973adfedc6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 12 Sep 2017 22:08:23 +0200
Subject: irqchip/gic-v4: Fix building with ancient gcc

gcc-4.5 and earlier don't like named initializers for anonymous
union members:

drivers/irqchip/irq-gic-v4.c: In function 'its_map_vlpi':
drivers/irqchip/irq-gic-v4.c:176:3: error: unknown field 'map' specified in initializer
drivers/irqchip/irq-gic-v4.c:176:3: error: missing braces around initializer
drivers/irqchip/irq-gic-v4.c:176:3: error: (near initialization for 'info.<anonymous>')
drivers/irqchip/irq-gic-v4.c: In function 'its_get_vlpi':
drivers/irqchip/irq-gic-v4.c:192:3: error: unknown field 'map' specified in initializer
drivers/irqchip/irq-gic-v4.c:192:3: error: missing braces around initializer
drivers/irqchip/irq-gic-v4.c:192:3: error: (near initialization for 'info.<anonymous>')
drivers/irqchip/irq-gic-v4.c: In function 'its_prop_update_vlpi':
drivers/irqchip/irq-gic-v4.c:208:3: error: unknown field 'config' specified in initializer
drivers/irqchip/irq-gic-v4.c:208:3: error: missing braces around initializer
drivers/irqchip/irq-gic-v4.c:208:3: error: (near initialization for 'info.<anonymous>')
drivers/irqchip/irq-gic-v4.c:208:3: error: initialization makes pointer from integer without a cast

This is fairly easy to work around, by using extra curly braces.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v4.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
index 2370e6d9e603..cd0bcc3b7e33 100644
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -173,7 +173,9 @@ int its_map_vlpi(int irq, struct its_vlpi_map *map)
 {
 	struct its_cmd_info info = {
 		.cmd_type = MAP_VLPI,
-		.map      = map,
+		{
+			.map      = map,
+		},
 	};
 
 	/*
@@ -189,7 +191,9 @@ int its_get_vlpi(int irq, struct its_vlpi_map *map)
 {
 	struct its_cmd_info info = {
 		.cmd_type = GET_VLPI,
-		.map      = map,
+		{
+			.map      = map,
+		},
 	};
 
 	return irq_set_vcpu_affinity(irq, &info);
@@ -205,7 +209,9 @@ int its_prop_update_vlpi(int irq, u8 config, bool inv)
 {
 	struct its_cmd_info info = {
 		.cmd_type = inv ? PROP_UPDATE_AND_INV_VLPI : PROP_UPDATE_VLPI,
-		.config   = config,
+		{
+			.config   = config,
+		},
 	};
 
 	return irq_set_vcpu_affinity(irq, &info);
-- 
cgit 


From 90019f8fcd71bf71653690329e32f41489e96122 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Tue, 5 Sep 2017 11:28:46 -0700
Subject: irqchip.mips-gic: Fix shared interrupt mask writes

The write_gic_smask() & write_gic_rmask() functions take a shared
interrupt number as a parameter, but we're incorrectly providing them a
bitmask with the shared interrupt's bit set. This effectively means that
we mask or unmask the shared interrupt 1<<n rather than shared interrupt
n, and as a result likely drop interrupts.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Fixes: 68898c8765f4 ("irqchip: mips-gic: Drop gic_(re)set_mask() functions")
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mips@linux-mips.org
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-mips-gic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 6e52a88bbd9e..40159ac12ac8 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -169,7 +169,7 @@ static void gic_mask_irq(struct irq_data *d)
 {
 	unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq);
 
-	write_gic_rmask(BIT(intr));
+	write_gic_rmask(intr);
 	gic_clear_pcpu_masks(intr);
 }
 
@@ -179,7 +179,7 @@ static void gic_unmask_irq(struct irq_data *d)
 	unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq);
 	unsigned int cpu;
 
-	write_gic_smask(BIT(intr));
+	write_gic_smask(intr);
 
 	gic_clear_pcpu_masks(intr);
 	cpu = cpumask_first_and(affinity, cpu_online_mask);
@@ -767,7 +767,7 @@ static int __init gic_of_init(struct device_node *node,
 	for (i = 0; i < gic_shared_intrs; i++) {
 		change_gic_pol(i, GIC_POL_ACTIVE_HIGH);
 		change_gic_trig(i, GIC_TRIG_LEVEL);
-		write_gic_rmask(BIT(i));
+		write_gic_rmask(i);
 	}
 
 	for (i = 0; i < gic_vpes; i++) {
-- 
cgit 


From 717e6f2893eb35ce6728c3cacdc297b78d371b31 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Mon, 11 Sep 2017 12:10:08 +0800
Subject: ceph: avoid panic in create_session_open_msg() if utsname() returns
 NULL

utsname() can return NULL while process is exiting. Kernel releases
file locks during process exits. We send request to mds when releasing
file lock. So it's possible that we open mds session while process is
exiting. utsname() is called in create_session_open_msg().

Link: http://tracker.ceph.com/issues/21275
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
[idryomov@gmail.com: drop utsname.h include from mds_client.c]
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 7 ++++---
 fs/ceph/mds_client.h | 3 +++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9dd6b836ac9e..84edfc60d87a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -7,7 +7,6 @@
 #include <linux/sched.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/utsname.h>
 #include <linux/ratelimit.h>
 
 #include "super.h"
@@ -884,8 +883,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	void *p;
 
 	const char* metadata[][2] = {
-		{"hostname", utsname()->nodename},
-		{"kernel_version", utsname()->release},
+		{"hostname", mdsc->nodename},
+		{"kernel_version", init_utsname()->release},
 		{"entity_id", opt->name ? : ""},
 		{"root", fsopt->server_path ? : "/"},
 		{NULL, NULL}
@@ -3539,6 +3538,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	init_rwsem(&mdsc->pool_perm_rwsem);
 	mdsc->pool_perm_tree = RB_ROOT;
 
+	strncpy(mdsc->nodename, utsname()->nodename,
+		sizeof(mdsc->nodename) - 1);
 	return 0;
 }
 
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index db57ae98ed34..636d6b2ec49c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -8,6 +8,7 @@
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/refcount.h>
+#include <linux/utsname.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/messenger.h>
@@ -368,6 +369,8 @@ struct ceph_mds_client {
 
 	struct rw_semaphore     pool_perm_rwsem;
 	struct rb_root		pool_perm_tree;
+
+	char nodename[__NEW_UTS_LEN + 1];
 };
 
 extern const char *ceph_mds_op_name(int op);
-- 
cgit 


From 19a8d6b7604df85402deecae01d7861cb1d40c89 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 19 Sep 2017 15:50:42 +0100
Subject: MIPS: PCI: Move map_irq() hooks out of initdata

04c81c7293df ("MIPS: PCI: Replace pci_fixup_irqs() call with host bridge
IRQ mapping hooks") moved the PCI IRQ fixup to the new host bridge
map/swizzle_irq() hooks mechanism. Those hooks can also be called after
boot, when all the __init/__initdata/__initconst sections have been freed.
Therefore, functions called by them (and the data they refer to) must not
be marked as __init/__initdata/__initconst lest compilation trigger section
mismatch warnings.

Fix all the board files map_irq() hooks by simply removing the respective
__init/__initdata/__initconst section markers and by adding another
persistent hook IRQ map for the txx9 board files.

Fixes: 04c81c7293df ("MIPS: PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks")
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Steve French <smfrench@gmail.com>
---
 arch/mips/ath79/pci.c           | 12 ++++++------
 arch/mips/pci/fixup-capcella.c  |  4 ++--
 arch/mips/pci/fixup-cobalt.c    |  8 ++++----
 arch/mips/pci/fixup-emma2rh.c   |  4 ++--
 arch/mips/pci/fixup-fuloong2e.c |  2 +-
 arch/mips/pci/fixup-ip32.c      |  4 ++--
 arch/mips/pci/fixup-jmr3927.c   |  2 +-
 arch/mips/pci/fixup-lantiq.c    |  2 +-
 arch/mips/pci/fixup-lemote2f.c  |  4 ++--
 arch/mips/pci/fixup-loongson3.c |  2 +-
 arch/mips/pci/fixup-malta.c     |  4 ++--
 arch/mips/pci/fixup-mpc30x.c    |  6 +++---
 arch/mips/pci/fixup-pmcmsp.c    |  8 ++++----
 arch/mips/pci/fixup-rbtx4927.c  |  2 +-
 arch/mips/pci/fixup-rbtx4938.c  |  2 +-
 arch/mips/pci/fixup-sni.c       | 12 ++++++------
 arch/mips/pci/fixup-tb0219.c    |  2 +-
 arch/mips/pci/fixup-tb0226.c    |  2 +-
 arch/mips/pci/fixup-tb0287.c    |  2 +-
 arch/mips/pci/pci-alchemy.c     |  2 +-
 arch/mips/pci/pci-bcm47xx.c     |  2 +-
 arch/mips/pci/pci-lasat.c       |  2 +-
 arch/mips/pci/pci-mt7620.c      |  2 +-
 arch/mips/pci/pci-octeon.c      |  5 ++---
 arch/mips/pci/pci-rt2880.c      |  2 +-
 arch/mips/pci/pci-rt3883.c      |  2 +-
 arch/mips/pci/pci-tx4938.c      |  2 +-
 arch/mips/pci/pci-tx4939.c      |  4 ++--
 arch/mips/pci/pci-xlp.c         |  2 +-
 arch/mips/pci/pci-xlr.c         |  2 +-
 arch/mips/pci/pcie-octeon.c     |  3 +--
 arch/mips/txx9/generic/pci.c    |  8 ++++++--
 32 files changed, 62 insertions(+), 60 deletions(-)

diff --git a/arch/mips/ath79/pci.c b/arch/mips/ath79/pci.c
index 730c0b03060d..b816cb4a25ff 100644
--- a/arch/mips/ath79/pci.c
+++ b/arch/mips/ath79/pci.c
@@ -22,10 +22,10 @@
 #include "pci.h"
 
 static int (*ath79_pci_plat_dev_init)(struct pci_dev *dev);
-static const struct ath79_pci_irq *ath79_pci_irq_map __initdata;
-static unsigned ath79_pci_nr_irqs __initdata;
+static const struct ath79_pci_irq *ath79_pci_irq_map;
+static unsigned ath79_pci_nr_irqs;
 
-static const struct ath79_pci_irq ar71xx_pci_irq_map[] __initconst = {
+static const struct ath79_pci_irq ar71xx_pci_irq_map[] = {
 	{
 		.slot	= 17,
 		.pin	= 1,
@@ -41,7 +41,7 @@ static const struct ath79_pci_irq ar71xx_pci_irq_map[] __initconst = {
 	}
 };
 
-static const struct ath79_pci_irq ar724x_pci_irq_map[] __initconst = {
+static const struct ath79_pci_irq ar724x_pci_irq_map[] = {
 	{
 		.slot	= 0,
 		.pin	= 1,
@@ -49,7 +49,7 @@ static const struct ath79_pci_irq ar724x_pci_irq_map[] __initconst = {
 	}
 };
 
-static const struct ath79_pci_irq qca955x_pci_irq_map[] __initconst = {
+static const struct ath79_pci_irq qca955x_pci_irq_map[] = {
 	{
 		.bus	= 0,
 		.slot	= 0,
@@ -64,7 +64,7 @@ static const struct ath79_pci_irq qca955x_pci_irq_map[] __initconst = {
 	},
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, uint8_t slot, uint8_t pin)
+int pcibios_map_irq(const struct pci_dev *dev, uint8_t slot, uint8_t pin)
 {
 	int irq = -1;
 	int i;
diff --git a/arch/mips/pci/fixup-capcella.c b/arch/mips/pci/fixup-capcella.c
index 1c02f5737367..b4c263f16b15 100644
--- a/arch/mips/pci/fixup-capcella.c
+++ b/arch/mips/pci/fixup-capcella.c
@@ -32,13 +32,13 @@
 #define INTC	PC104PLUS_INTC_IRQ
 #define INTD	PC104PLUS_INTD_IRQ
 
-static char irq_tab_capcella[][5] __initdata = {
+static char irq_tab_capcella[][5] = {
  [11] = { -1, INT1, INT1, INT1, INT1 },
  [12] = { -1, INT2, INT2, INT2, INT2 },
  [14] = { -1, INTA, INTB, INTC, INTD }
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_tab_capcella[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-cobalt.c b/arch/mips/pci/fixup-cobalt.c
index b3ab59318d91..44be65c3e6bb 100644
--- a/arch/mips/pci/fixup-cobalt.c
+++ b/arch/mips/pci/fixup-cobalt.c
@@ -147,7 +147,7 @@ static void qube_raq_via_board_id_fixup(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0,
 	 qube_raq_via_board_id_fixup);
 
-static char irq_tab_qube1[] __initdata = {
+static char irq_tab_qube1[] = {
   [COBALT_PCICONF_CPU]	   = 0,
   [COBALT_PCICONF_ETH0]	   = QUBE1_ETH0_IRQ,
   [COBALT_PCICONF_RAQSCSI] = SCSI_IRQ,
@@ -156,7 +156,7 @@ static char irq_tab_qube1[] __initdata = {
   [COBALT_PCICONF_ETH1]	   = 0
 };
 
-static char irq_tab_cobalt[] __initdata = {
+static char irq_tab_cobalt[] = {
   [COBALT_PCICONF_CPU]	   = 0,
   [COBALT_PCICONF_ETH0]	   = ETH0_IRQ,
   [COBALT_PCICONF_RAQSCSI] = SCSI_IRQ,
@@ -165,7 +165,7 @@ static char irq_tab_cobalt[] __initdata = {
   [COBALT_PCICONF_ETH1]	   = ETH1_IRQ
 };
 
-static char irq_tab_raq2[] __initdata = {
+static char irq_tab_raq2[] = {
   [COBALT_PCICONF_CPU]	   = 0,
   [COBALT_PCICONF_ETH0]	   = ETH0_IRQ,
   [COBALT_PCICONF_RAQSCSI] = RAQ2_SCSI_IRQ,
@@ -174,7 +174,7 @@ static char irq_tab_raq2[] __initdata = {
   [COBALT_PCICONF_ETH1]	   = ETH1_IRQ
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (cobalt_board_id <= COBALT_BRD_ID_QUBE1)
 		return irq_tab_qube1[slot];
diff --git a/arch/mips/pci/fixup-emma2rh.c b/arch/mips/pci/fixup-emma2rh.c
index 19caf775c206..c31cb6af1cd0 100644
--- a/arch/mips/pci/fixup-emma2rh.c
+++ b/arch/mips/pci/fixup-emma2rh.c
@@ -43,7 +43,7 @@
  */
 
 #define MAX_SLOT_NUM 10
-static unsigned char irq_map[][5] __initdata = {
+static unsigned char irq_map[][5] = {
 	[3] = {0, MARKEINS_PCI_IRQ_INTB, MARKEINS_PCI_IRQ_INTC,
 	       MARKEINS_PCI_IRQ_INTD, 0,},
 	[4] = {0, MARKEINS_PCI_IRQ_INTA, 0, 0, 0,},
@@ -85,7 +85,7 @@ static void emma2rh_pci_host_fixup(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_EMMA2RH,
 			 emma2rh_pci_host_fixup);
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_map[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c
index 50da773faede..b47c2771dc99 100644
--- a/arch/mips/pci/fixup-fuloong2e.c
+++ b/arch/mips/pci/fixup-fuloong2e.c
@@ -19,7 +19,7 @@
 /* South bridge slot number is set by the pci probe process */
 static u8 sb_slot = 5;
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = 0;
 
diff --git a/arch/mips/pci/fixup-ip32.c b/arch/mips/pci/fixup-ip32.c
index 133685e215ee..c6ec18a07e63 100644
--- a/arch/mips/pci/fixup-ip32.c
+++ b/arch/mips/pci/fixup-ip32.c
@@ -21,7 +21,7 @@
 #define INTB   MACEPCI_SHARED0_IRQ
 #define INTC   MACEPCI_SHARED1_IRQ
 #define INTD   MACEPCI_SHARED2_IRQ
-static char irq_tab_mace[][5] __initdata = {
+static char irq_tab_mace[][5] = {
       /* Dummy	INT#A  INT#B  INT#C  INT#D */
 	{0,	    0,	   0,	  0,	 0}, /* This is placeholder row - never used */
 	{0,	SCSI0, SCSI0, SCSI0, SCSI0},
@@ -39,7 +39,7 @@ static char irq_tab_mace[][5] __initdata = {
  * irqs.  I suppose a device without a pin A will thank us for doing it
  * right if there exists such a broken piece of crap.
  */
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_tab_mace[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-jmr3927.c b/arch/mips/pci/fixup-jmr3927.c
index 0f1069527cba..d3102eeea898 100644
--- a/arch/mips/pci/fixup-jmr3927.c
+++ b/arch/mips/pci/fixup-jmr3927.c
@@ -31,7 +31,7 @@
 #include <asm/txx9/pci.h>
 #include <asm/txx9/jmr3927.h>
 
-int __init jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	unsigned char irq = pin;
 
diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c
index 2b5427d3f35c..81530a13b349 100644
--- a/arch/mips/pci/fixup-lantiq.c
+++ b/arch/mips/pci/fixup-lantiq.c
@@ -23,7 +23,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 	return 0;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return of_irq_parse_and_map_pci(dev, slot, pin);
 }
diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c
index 95ab9a1bd010..20cdfdc08938 100644
--- a/arch/mips/pci/fixup-lemote2f.c
+++ b/arch/mips/pci/fixup-lemote2f.c
@@ -30,7 +30,7 @@
 #define PCID		7
 
 /* all the pci device has the PCIA pin, check the datasheet. */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
 	/*	INTA	INTB	INTC	INTD */
 	{0, 0, 0, 0, 0},	/*  11: Unused */
 	{0, 0, 0, 0, 0},	/*  12: Unused */
@@ -51,7 +51,7 @@ static char irq_tab[][5] __initdata = {
 	{0, 0, 0, 0, 0},	/*  27: Unused */
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int virq;
 
diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c
index 2b6d5e196f99..8a741c2c6685 100644
--- a/arch/mips/pci/fixup-loongson3.c
+++ b/arch/mips/pci/fixup-loongson3.c
@@ -32,7 +32,7 @@ static void print_fixup_info(const struct pci_dev *pdev)
 			pdev->vendor, pdev->device, pdev->irq);
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	print_fixup_info(dev);
 	return dev->irq;
diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c
index 40e920c653cc..3ec85331795e 100644
--- a/arch/mips/pci/fixup-malta.c
+++ b/arch/mips/pci/fixup-malta.c
@@ -12,7 +12,7 @@
 static char pci_irq[5] = {
 };
 
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
 	/*	INTA	INTB	INTC	INTD */
 	{0,	0,	0,	0,	0 },	/*  0: GT64120 PCI bridge */
 	{0,	0,	0,	0,	0 },	/*  1: Unused */
@@ -38,7 +38,7 @@ static char irq_tab[][5] __initdata = {
 	{0,	PCID,	PCIA,	PCIB,	PCIC }	/* 21: PCI Slot 4 */
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int virq;
 	virq = irq_tab[slot][pin];
diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c
index 8e4f8288eca2..66eaf456bc89 100644
--- a/arch/mips/pci/fixup-mpc30x.c
+++ b/arch/mips/pci/fixup-mpc30x.c
@@ -22,19 +22,19 @@
 
 #include <asm/vr41xx/mpc30x.h>
 
-static const int internal_func_irqs[] __initconst = {
+static const int internal_func_irqs[] = {
 	VRC4173_CASCADE_IRQ,
 	VRC4173_AC97_IRQ,
 	VRC4173_USB_IRQ,
 };
 
-static const int irq_tab_mpc30x[] __initconst = {
+static const int irq_tab_mpc30x[] = {
  [12] = VRC4173_PCMCIA1_IRQ,
  [13] = VRC4173_PCMCIA2_IRQ,
  [29] = MQ200_IRQ,
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (slot == 30)
 		return internal_func_irqs[PCI_FUNC(dev->devfn)];
diff --git a/arch/mips/pci/fixup-pmcmsp.c b/arch/mips/pci/fixup-pmcmsp.c
index fab405c21c2f..4ad2ef02087b 100644
--- a/arch/mips/pci/fixup-pmcmsp.c
+++ b/arch/mips/pci/fixup-pmcmsp.c
@@ -47,7 +47,7 @@
 
 #if defined(CONFIG_PMC_MSP7120_GW)
 /* Garibaldi Board IRQ wiring to PCI slots */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
 	/* INTA	   INTB	   INTC	   INTD */
 	{0,	0,	0,	0,	0 },	/*    (AD[0]): Unused */
 	{0,	0,	0,	0,	0 },	/*    (AD[1]): Unused */
@@ -86,7 +86,7 @@ static char irq_tab[][5] __initdata = {
 #elif defined(CONFIG_PMC_MSP7120_EVAL)
 
 /* MSP7120 Eval Board IRQ wiring to PCI slots */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
 	/* INTA	   INTB	   INTC	   INTD */
 	{0,	0,	0,	0,	0 },	/*    (AD[0]): Unused */
 	{0,	0,	0,	0,	0 },	/*    (AD[1]): Unused */
@@ -125,7 +125,7 @@ static char irq_tab[][5] __initdata = {
 #else
 
 /* Unknown board -- don't assign any IRQs */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
 	/* INTA	   INTB	   INTC	   INTD */
 	{0,	0,	0,	0,	0 },	/*    (AD[0]): Unused */
 	{0,	0,	0,	0,	0 },	/*    (AD[1]): Unused */
@@ -202,7 +202,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
  *  RETURNS:	 IRQ number
  *
  ****************************************************************************/
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 #if !defined(CONFIG_PMC_MSP7120_GW) && !defined(CONFIG_PMC_MSP7120_EVAL)
 	printk(KERN_WARNING "PCI: unknown board, no PCI IRQs assigned.\n");
diff --git a/arch/mips/pci/fixup-rbtx4927.c b/arch/mips/pci/fixup-rbtx4927.c
index 321db265829c..d6aaed1d6be9 100644
--- a/arch/mips/pci/fixup-rbtx4927.c
+++ b/arch/mips/pci/fixup-rbtx4927.c
@@ -36,7 +36,7 @@
 #include <asm/txx9/pci.h>
 #include <asm/txx9/rbtx4927.h>
 
-int __init rbtx4927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int rbtx4927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	unsigned char irq = pin;
 
diff --git a/arch/mips/pci/fixup-rbtx4938.c b/arch/mips/pci/fixup-rbtx4938.c
index a80579af609b..ff22a22db73e 100644
--- a/arch/mips/pci/fixup-rbtx4938.c
+++ b/arch/mips/pci/fixup-rbtx4938.c
@@ -13,7 +13,7 @@
 #include <asm/txx9/pci.h>
 #include <asm/txx9/rbtx4938.h>
 
-int __init rbtx4938_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int rbtx4938_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = tx4938_pcic1_map_irq(dev, slot);
 
diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c
index f67ebeeb4200..adb9a58641e8 100644
--- a/arch/mips/pci/fixup-sni.c
+++ b/arch/mips/pci/fixup-sni.c
@@ -40,7 +40,7 @@
  * seem to be a documentation error.  At least on my RM200C the Cirrus
  * Logic CL-GD5434 VGA is device 3.
  */
-static char irq_tab_rm200[8][5] __initdata = {
+static char irq_tab_rm200[8][5] = {
 	/*	 INTA  INTB  INTC  INTD */
 	{     0,    0,	  0,	0,    0 },	/* EISA bridge */
 	{  SCSI, SCSI, SCSI, SCSI, SCSI },	/* SCSI */
@@ -57,7 +57,7 @@ static char irq_tab_rm200[8][5] __initdata = {
  *
  * The VGA card is optional for RM300 systems.
  */
-static char irq_tab_rm300d[8][5] __initdata = {
+static char irq_tab_rm300d[8][5] = {
 	/*	 INTA  INTB  INTC  INTD */
 	{     0,    0,	  0,	0,    0 },	/* EISA bridge */
 	{  SCSI, SCSI, SCSI, SCSI, SCSI },	/* SCSI */
@@ -69,7 +69,7 @@ static char irq_tab_rm300d[8][5] __initdata = {
 	{     0, INTD, INTA, INTB, INTC },	/* Slot 4 */
 };
 
-static char irq_tab_rm300e[5][5] __initdata = {
+static char irq_tab_rm300e[5][5] = {
 	/*	 INTA  INTB  INTC  INTD */
 	{     0,    0,	  0,	0,    0 },	/* HOST bridge */
 	{  SCSI, SCSI, SCSI, SCSI, SCSI },	/* SCSI */
@@ -96,7 +96,7 @@ static char irq_tab_rm300e[5][5] __initdata = {
 #define INTC	PCIT_IRQ_INTC
 #define INTD	PCIT_IRQ_INTD
 
-static char irq_tab_pcit[13][5] __initdata = {
+static char irq_tab_pcit[13][5] = {
 	/*	 INTA  INTB  INTC  INTD */
 	{     0,     0,	    0,	   0,	  0 },	/* HOST bridge */
 	{ SCSI0, SCSI0, SCSI0, SCSI0, SCSI0 },	/* SCSI */
@@ -113,7 +113,7 @@ static char irq_tab_pcit[13][5] __initdata = {
 	{     0,  INTA,	 INTB,	INTC,  INTD },	/* Slot 5 */
 };
 
-static char irq_tab_pcit_cplus[13][5] __initdata = {
+static char irq_tab_pcit_cplus[13][5] = {
 	/*	 INTA  INTB  INTC  INTD */
 	{     0,     0,	    0,	   0,	  0 },	/* HOST bridge */
 	{     0,  INTB,	 INTC,	INTD,  INTA },	/* PCI Slot 9 */
@@ -130,7 +130,7 @@ static inline int is_rm300_revd(void)
 	return (csmsr & 0xa0) == 0x20;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	switch (sni_brd_type) {
 	case SNI_BRD_PCI_TOWER_CPLUS:
diff --git a/arch/mips/pci/fixup-tb0219.c b/arch/mips/pci/fixup-tb0219.c
index d0b0083fbd27..cc581535f257 100644
--- a/arch/mips/pci/fixup-tb0219.c
+++ b/arch/mips/pci/fixup-tb0219.c
@@ -23,7 +23,7 @@
 
 #include <asm/vr41xx/tb0219.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = -1;
 
diff --git a/arch/mips/pci/fixup-tb0226.c b/arch/mips/pci/fixup-tb0226.c
index 4196ccf3ea3d..b827b5cad5fd 100644
--- a/arch/mips/pci/fixup-tb0226.c
+++ b/arch/mips/pci/fixup-tb0226.c
@@ -23,7 +23,7 @@
 #include <asm/vr41xx/giu.h>
 #include <asm/vr41xx/tb0226.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = -1;
 
diff --git a/arch/mips/pci/fixup-tb0287.c b/arch/mips/pci/fixup-tb0287.c
index 8c5039ed75d7..98f26285f2e3 100644
--- a/arch/mips/pci/fixup-tb0287.c
+++ b/arch/mips/pci/fixup-tb0287.c
@@ -22,7 +22,7 @@
 
 #include <asm/vr41xx/tb0287.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	unsigned char bus;
 	int irq = -1;
diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c
index e99ca7702d8a..f15ec98de2de 100644
--- a/arch/mips/pci/pci-alchemy.c
+++ b/arch/mips/pci/pci-alchemy.c
@@ -522,7 +522,7 @@ static int __init alchemy_pci_init(void)
 arch_initcall(alchemy_pci_init);
 
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	struct alchemy_pci_context *ctx = dev->sysdata;
 	if (ctx && ctx->board_map_irq)
diff --git a/arch/mips/pci/pci-bcm47xx.c b/arch/mips/pci/pci-bcm47xx.c
index 76f16eaed0ad..230d7dd273e2 100644
--- a/arch/mips/pci/pci-bcm47xx.c
+++ b/arch/mips/pci/pci-bcm47xx.c
@@ -28,7 +28,7 @@
 #include <linux/bcma/bcma.h>
 #include <bcm47xx.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return 0;
 }
diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c
index 40d2797d2bc4..47f4ee6bbb3b 100644
--- a/arch/mips/pci/pci-lasat.c
+++ b/arch/mips/pci/pci-lasat.c
@@ -61,7 +61,7 @@ arch_initcall(lasat_pci_setup);
 #define LASAT_IRQ_PCIC	 (LASAT_IRQ_BASE + 7)
 #define LASAT_IRQ_PCID	 (LASAT_IRQ_BASE + 8)
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	switch (slot) {
 	case 1:
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
index 4e633c1e7ff3..90fba9bf98da 100644
--- a/arch/mips/pci/pci-mt7620.c
+++ b/arch/mips/pci/pci-mt7620.c
@@ -361,7 +361,7 @@ static int mt7620_pci_probe(struct platform_device *pdev)
 	return 0;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	u16 cmd;
 	u32 val;
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index 9ee01936862e..3e92a06fa772 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -59,8 +59,7 @@ union octeon_pci_address {
 	} s;
 };
 
-int __initconst (*octeon_pcibios_map_irq)(const struct pci_dev *dev,
-					 u8 slot, u8 pin);
+int (*octeon_pcibios_map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
 enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID;
 
 /**
@@ -74,7 +73,7 @@ enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID;
  *		 as it goes through each bridge.
  * Returns Interrupt number for the device
  */
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (octeon_pcibios_map_irq)
 		return octeon_pcibios_map_irq(dev, slot, pin);
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c
index d6360fe73d05..711cdccdf65b 100644
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -181,7 +181,7 @@ static inline void rt2880_pci_write_u32(unsigned long reg, u32 val)
 	spin_unlock_irqrestore(&rt2880_pci_lock, flags);
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	u16 cmd;
 	int irq = -1;
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index 04f8ea953297..958899ffe99c 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -564,7 +564,7 @@ err_put_intc_node:
 	return err;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return of_irq_parse_and_map_pci(dev, slot, pin);
 }
diff --git a/arch/mips/pci/pci-tx4938.c b/arch/mips/pci/pci-tx4938.c
index 000c0e1f9ef8..a6418460e3c4 100644
--- a/arch/mips/pci/pci-tx4938.c
+++ b/arch/mips/pci/pci-tx4938.c
@@ -112,7 +112,7 @@ int __init tx4938_pciclk66_setup(void)
 	return pciclk;
 }
 
-int __init tx4938_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
+int tx4938_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
 {
 	if (get_tx4927_pcicptr(dev->bus->sysdata) == tx4938_pcic1ptr) {
 		switch (slot) {
diff --git a/arch/mips/pci/pci-tx4939.c b/arch/mips/pci/pci-tx4939.c
index 9d6acc00f348..09a65f7dbe7c 100644
--- a/arch/mips/pci/pci-tx4939.c
+++ b/arch/mips/pci/pci-tx4939.c
@@ -48,7 +48,7 @@ void __init tx4939_report_pci1clk(void)
 		((pciclk + 50000) / 100000) % 10);
 }
 
-int __init tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
+int tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
 {
 	if (get_tx4927_pcicptr(dev->bus->sysdata) == tx4939_pcic1ptr) {
 		switch (slot) {
@@ -68,7 +68,7 @@ int __init tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
 	return -1;
 }
 
-int __init tx4939_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int tx4939_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = tx4939_pcic1_map_irq(dev, slot);
 
diff --git a/arch/mips/pci/pci-xlp.c b/arch/mips/pci/pci-xlp.c
index 7babf01600cb..9eff9137f78e 100644
--- a/arch/mips/pci/pci-xlp.c
+++ b/arch/mips/pci/pci-xlp.c
@@ -205,7 +205,7 @@ int xlp_socdev_to_node(const struct pci_dev *lnkdev)
 		return PCI_SLOT(lnkdev->devfn) / 8;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	struct pci_dev *lnkdev;
 	int lnkfunc, node;
diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
index 26d2dabef281..2a1c81a129ba 100644
--- a/arch/mips/pci/pci-xlr.c
+++ b/arch/mips/pci/pci-xlr.c
@@ -315,7 +315,7 @@ static void xls_pcie_ack_b(struct irq_data *d)
 	}
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return get_irq_vector(dev);
 }
diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c
index ad3584dbc9d7..fd2887415bc8 100644
--- a/arch/mips/pci/pcie-octeon.c
+++ b/arch/mips/pci/pcie-octeon.c
@@ -1464,8 +1464,7 @@ static int cvmx_pcie_rc_initialize(int pcie_port)
  *		 as it goes through each bridge.
  * Returns Interrupt number for the device
  */
-int __init octeon_pcie_pcibios_map_irq(const struct pci_dev *dev,
-				       u8 slot, u8 pin)
+int octeon_pcie_pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	/*
 	 * The EBH5600 board with the PCI to PCIe bridge mistakenly
diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c
index 0bd2a1e1ff9a..fb998726bd5d 100644
--- a/arch/mips/txx9/generic/pci.c
+++ b/arch/mips/txx9/generic/pci.c
@@ -386,9 +386,10 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 	return 0;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+static int (*txx9_pci_map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-	return txx9_board_vec->pci_map_irq(dev, slot, pin);
+	return txx9_pci_map_irq(dev, slot, pin);
 }
 
 char * (*txx9_board_pcibios_setup)(char *str) __initdata;
@@ -424,5 +425,8 @@ char *__init txx9_pcibios_setup(char *str)
 			txx9_pci_err_action = TXX9_PCI_ERR_IGNORE;
 		return NULL;
 	}
+
+	txx9_pci_map_irq = txx9_board_vec->pci_map_irq;
+
 	return str;
 }
-- 
cgit 


From fbcab13d2e2511a858590846ac2e2d7cbd830591 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 19 Sep 2017 09:51:28 -0400
Subject: selftests: silence test output by default

Some of the networking tests are very noisy and make it impossible to
see if we actually passed the tests as they run.  Default to suppressing
the output from any tests run in order to make it easier to track what
failed.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 693616651da5..4665463779f5 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -24,7 +24,7 @@ define RUN_TESTS
 			echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
 			echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
 		else					\
-			cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
+			cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
 		fi;					\
 	done;
 endef
@@ -55,7 +55,7 @@ endif
 define EMIT_TESTS
 	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
 		BASENAME_TEST=`basename $$TEST`;	\
-		echo "(./$$BASENAME_TEST && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
+		echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
 	done;
 endef
 
-- 
cgit 


From 422d8dc6fd3afecd66bd6acfcd73a2d53e338ff3 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 19 Sep 2017 09:51:26 -0400
Subject: selftest: add a reuseaddr test

This is to test for a regression introduced by

b9470c27607b ("inet: kill smallest_size and smallest_port")

which introduced a problem with reuseaddr and bind conflicts.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/net/.gitignore           |   1 +
 tools/testing/selftests/net/Makefile             |   2 +-
 tools/testing/selftests/net/reuseaddr_conflict.c | 114 +++++++++++++++++++++++
 3 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/net/reuseaddr_conflict.c

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 9801253e4802..c612d6e38c62 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -6,3 +6,4 @@ reuseport_bpf
 reuseport_bpf_cpu
 reuseport_bpf_numa
 reuseport_dualstack
+reuseaddr_conflict
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index de1f5772b878..3df542c84610 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -7,7 +7,7 @@ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetl
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket
 TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_FILES += reuseport_dualstack msg_zerocopy
+TEST_GEN_FILES += reuseport_dualstack msg_zerocopy reuseaddr_conflict
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
new file mode 100644
index 000000000000..7c5b12664b03
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -0,0 +1,114 @@
+/*
+ * Test for the regression introduced by
+ *
+ * b9470c27607b ("inet: kill smallest_size and smallest_port")
+ *
+ * If we open an ipv4 socket on a port with reuseaddr we shouldn't reset the tb
+ * when we open the ipv6 conterpart, which is what was happening previously.
+ */
+#include <errno.h>
+#include <error.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define PORT 9999
+
+int open_port(int ipv6, int any)
+{
+	int fd = -1;
+	int reuseaddr = 1;
+	int v6only = 1;
+	int addrlen;
+	int ret = -1;
+	struct sockaddr *addr;
+	int family = ipv6 ? AF_INET6 : AF_INET;
+
+	struct sockaddr_in6 addr6 = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(PORT),
+		.sin6_addr = in6addr_any
+	};
+	struct sockaddr_in addr4 = {
+		.sin_family = AF_INET,
+		.sin_port = htons(PORT),
+		.sin_addr.s_addr = any ? htonl(INADDR_ANY) : inet_addr("127.0.0.1"),
+	};
+
+
+	if (ipv6) {
+		addr = (struct sockaddr*)&addr6;
+		addrlen = sizeof(addr6);
+	} else {
+		addr = (struct sockaddr*)&addr4;
+		addrlen = sizeof(addr4);
+	}
+
+	if ((fd = socket(family, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+		perror("socket");
+		goto out;
+	}
+
+	if (ipv6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&v6only,
+			       sizeof(v6only)) < 0) {
+		perror("setsockopt IPV6_V6ONLY");
+		goto out;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr,
+		       sizeof(reuseaddr)) < 0) {
+		perror("setsockopt SO_REUSEADDR");
+		goto out;
+	}
+
+	if (bind(fd, addr, addrlen) < 0) {
+		perror("bind");
+		goto out;
+	}
+
+	if (any)
+		return fd;
+
+	if (listen(fd, 1) < 0) {
+		perror("listen");
+		goto out;
+	}
+	return fd;
+out:
+	close(fd);
+	return ret;
+}
+
+int main(void)
+{
+	int listenfd;
+	int fd1, fd2;
+
+	fprintf(stderr, "Opening 127.0.0.1:%d\n", PORT);
+	listenfd = open_port(0, 0);
+	if (listenfd < 0)
+		error(1, errno, "Couldn't open listen socket");
+	fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+	fd1 = open_port(0, 1);
+	if (fd1 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+	fprintf(stderr, "Opening in6addr_any:%d\n", PORT);
+	fd1 = open_port(1, 1);
+	if (fd1 < 0)
+		error(1, errno, "Couldn't open ipv6 reuseport");
+	fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+	fd2 = open_port(0, 1);
+	if (fd2 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+	close(fd1);
+	fprintf(stderr, "Opening INADDR_ANY:%d after closing ipv6 socket\n", PORT);
+	fd1 = open_port(0, 1);
+	if (fd1 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
+	fprintf(stderr, "Success");
+	return 0;
+}
-- 
cgit 


From e06d79fbc338935ef27befc84f8b8b2e5f878a10 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 19 Sep 2017 09:51:27 -0400
Subject: selftests: actually run the various net selftests

These self tests are just self contained binaries, they are not run by
any of the scripts in the directory.  This means they need to be marked
with TEST_GEN_PROGS to actually be run, not TEST_GEN_FILES.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/net/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3df542c84610..d86bca991f45 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,9 +5,9 @@ CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
 TEST_GEN_FILES =  socket
-TEST_GEN_FILES += psock_fanout psock_tpacket
-TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_FILES += reuseport_dualstack msg_zerocopy reuseaddr_conflict
+TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
 
 include ../lib.mk
 
-- 
cgit 


From 06e8852ceecccdeff6841a6c5cd78a947a75d5bc Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Fri, 8 Sep 2017 14:01:19 +0200
Subject: selftests/net: msg_zerocopy enable build with older kernel headers

Explicitly define SO_EE_ORIGIN_ZEROCOPY.
This makes the test program build with older kernel headers,
e.g. from Debian 9.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/net/msg_zerocopy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 40232af5b023..3ab6ec403905 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -55,7 +55,7 @@
 #include <unistd.h>
 
 #ifndef SO_EE_ORIGIN_ZEROCOPY
-#define SO_EE_ORIGIN_ZEROCOPY		SO_EE_ORIGIN_UPAGE
+#define SO_EE_ORIGIN_ZEROCOPY		5
 #endif
 
 #ifndef SO_ZEROCOPY
-- 
cgit 


From 9e987b70ada27554c5d176421de1d167218c49b5 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Fri, 15 Sep 2017 17:35:27 -0700
Subject: ACPI / bus: Make ACPI_HANDLE() work for non-GPL code again

Due to commit db3e50f3234b (device property: Get rid of struct
fwnode_handle type field), ACPI_HANDLE() inadvertently became
a GPL-only call. The call path that led to that was:

ACPI_HANDLE()
    ACPI_COMPANION()
        to_acpi_device_node()
            is_acpi_device_node()
                acpi_device_fwnode_ops
                    DECLARE_ACPI_FWNODE_OPS(acpi_device_fwnode_ops);

...and the new DECLARE_ACPI_FWNODE_OPS() includes
EXPORT_SYMBOL_GPL, whereas previously it was a static struct.

In order to avoid changing any of that, let's instead provide ever
so slightly better encapsulation of those struct fwnode_operations
instances. Those do not really need to be directly used in
inline function calls in header files. Simply moving two small
functions (is_acpi_device_node and is_acpi_data_node) out of
acpi_bus.h, and into a .c file, does that.

That leaves the internals of struct fwnode_operations as GPL-only
(which I think was the intent all along), but un-breaks any driver
code out there that relies on the ACPI subsystem's being (historically)
an EXPORT_SYMBOL-usable system. By that, I mean, ACPI_HANDLE() and
other basic ACPI calls were non-GPL-protected.

Also, while I'm there, remove a tiny bit of redundancy that was missed
in the earlier commit, by having is_acpi_node() use the other two
routines, instead of checking fwnode directly.

Fixes: db3e50f3234b (device property: Get rid of struct fwnode_handle type field)
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 13 +++++++++++++
 include/acpi/acpi_bus.h | 18 ++++--------------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index c1c216163de3..1e3c2517a1ac 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1293,3 +1293,16 @@ static int acpi_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
 DECLARE_ACPI_FWNODE_OPS(acpi_device_fwnode_ops);
 DECLARE_ACPI_FWNODE_OPS(acpi_data_fwnode_ops);
 const struct fwnode_operations acpi_static_fwnode_ops;
+
+bool is_acpi_device_node(const struct fwnode_handle *fwnode)
+{
+	return !IS_ERR_OR_NULL(fwnode) &&
+		fwnode->ops == &acpi_device_fwnode_ops;
+}
+EXPORT_SYMBOL(is_acpi_device_node);
+
+bool is_acpi_data_node(const struct fwnode_handle *fwnode)
+{
+	return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops;
+}
+EXPORT_SYMBOL(is_acpi_data_node);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index dedf9d789166..fa1505292f6c 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -399,17 +399,12 @@ extern const struct fwnode_operations acpi_device_fwnode_ops;
 extern const struct fwnode_operations acpi_data_fwnode_ops;
 extern const struct fwnode_operations acpi_static_fwnode_ops;
 
+bool is_acpi_device_node(const struct fwnode_handle *fwnode);
+bool is_acpi_data_node(const struct fwnode_handle *fwnode);
+
 static inline bool is_acpi_node(const struct fwnode_handle *fwnode)
 {
-	return !IS_ERR_OR_NULL(fwnode) &&
-		(fwnode->ops == &acpi_device_fwnode_ops
-		 || fwnode->ops == &acpi_data_fwnode_ops);
-}
-
-static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
-{
-	return !IS_ERR_OR_NULL(fwnode) &&
-		fwnode->ops == &acpi_device_fwnode_ops;
+	return (is_acpi_device_node(fwnode) || is_acpi_data_node(fwnode));
 }
 
 #define to_acpi_device_node(__fwnode)					\
@@ -422,11 +417,6 @@ static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
 			NULL;						\
 	})
 
-static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode)
-{
-	return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops;
-}
-
 #define to_acpi_data_node(__fwnode)					\
 	({								\
 		typeof(__fwnode) __to_acpi_data_node_fwnode = __fwnode;	\
-- 
cgit 


From 6073512cc8e2c48bed5c6625c02c5e4ae50cec34 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 18 Sep 2017 14:59:20 +0200
Subject: net: phy: Kconfig: Fix PHY infrastructure menu in menuconfig

Since the integration of PHYLINK, the configuration option which
used to be under the PHY infrastructure menu in menuconfig ended
up one level up (the network device driver section)

By placing PHYLINK option right after PHYLIB entry, it broke the
way Kconfig used to build the menu. See kconfig-language.txt, section
"Menu structure", 2nd method.

This is fixed by placing the PHYLINK option just before PHYLIB.

Fixes: 9525ae83959b ("phylink: add phylink infrastructure")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index a9d16a3af514..cd931cf9dcc2 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -160,15 +160,6 @@ config MDIO_XGENE
 
 endif
 
-menuconfig PHYLIB
-	tristate "PHY Device support and infrastructure"
-	depends on NETDEVICES
-	select MDIO_DEVICE
-	help
-	  Ethernet controllers are usually attached to PHY
-	  devices.  This option provides infrastructure for
-	  managing PHY devices.
-
 config PHYLINK
 	tristate
 	depends on NETDEVICES
@@ -179,6 +170,15 @@ config PHYLINK
 	  configuration links, PHYs, and Serdes links with MAC level
 	  autonegotiation modes.
 
+menuconfig PHYLIB
+	tristate "PHY Device support and infrastructure"
+	depends on NETDEVICES
+	select MDIO_DEVICE
+	help
+	  Ethernet controllers are usually attached to PHY
+	  devices.  This option provides infrastructure for
+	  managing PHY devices.
+
 if PHYLIB
 
 config SWPHY
-- 
cgit 


From b247c211ee367cfe975dd54e381094083adfd8d3 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 19 Sep 2017 02:43:13 +0200
Subject: PM: docs: Drop an excess character from devices.rst

Drop an excess "`" from Documentation/driver-api/pm/devices.rst.

Fixes: 2728b2d2e5be (PM / core / docs: Convert sleep states API document to reST)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/driver-api/pm/devices.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index bedd32388dac..a0dc2879a152 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -675,7 +675,7 @@ sub-domain of the parent domain.
 
 Support for power domains is provided through the :c:member:`pm_domain` field of
 |struct device|.  This field is a pointer to an object of type
-|struct dev_pm_domain|, defined in :file:`include/linux/pm.h``, providing a set
+|struct dev_pm_domain|, defined in :file:`include/linux/pm.h`, providing a set
 of power management callbacks analogous to the subsystem-level and device driver
 callbacks that are executed for the given device during all power transitions,
 instead of the respective subsystem-level callbacks.  Specifically, if a
-- 
cgit 


From 157c460e10cb6eca29ccbd0f023db159d0c55ec7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 19 Sep 2017 02:22:39 +0200
Subject: PM: core: Fix device_pm_check_callbacks()

The device_pm_check_callbacks() function doesn't check legacy
->suspend and ->resume callback pointers under the device's
bus type, class and driver, so in some cases it may set the
no_pm_callbacks flag for the device incorrectly and then the
callbacks may be skipped during system suspend/resume, which
shouldn't happen.

Fixes: aa8e54b55947 (PM / sleep: Go direct_complete if driver has no callbacks)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 4.5+ <stable@vger.kernel.org> # 4.5+
---
 drivers/base/power/main.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index ea1732ed7a9d..770b1539a083 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1860,10 +1860,13 @@ void device_pm_check_callbacks(struct device *dev)
 {
 	spin_lock_irq(&dev->power.lock);
 	dev->power.no_pm_callbacks =
-		(!dev->bus || pm_ops_is_empty(dev->bus->pm)) &&
-		(!dev->class || pm_ops_is_empty(dev->class->pm)) &&
+		(!dev->bus || (pm_ops_is_empty(dev->bus->pm) &&
+		 !dev->bus->suspend && !dev->bus->resume)) &&
+		(!dev->class || (pm_ops_is_empty(dev->class->pm) &&
+		 !dev->class->suspend && !dev->class->resume)) &&
 		(!dev->type || pm_ops_is_empty(dev->type->pm)) &&
 		(!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) &&
-		(!dev->driver || pm_ops_is_empty(dev->driver->pm));
+		(!dev->driver || (pm_ops_is_empty(dev->driver->pm) &&
+		 !dev->driver->suspend && !dev->driver->resume));
 	spin_unlock_irq(&dev->power.lock);
 }
-- 
cgit 


From ff76898c0a14a43f71bfe63dd1903087e96ce238 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 19 Sep 2017 08:23:22 -0700
Subject: cpufreq: dt-platdev: Add some missing platforms to the blacklist

Commit edeec420de24 (cpufreq: dt-platdev: Automatically create cpufreq
device with OPP v2) missed adding few platforms to the blacklist which
create the cpufreq-dt device from their own drivers, after some
dependencies are sorted out.

And for those platforms, both the platform specific driver and the
cpufreq-dt-platdev driver try to create the cpufreq-dt device now.

Fix that by including those platforms in the blacklist. This doesn't include
the TI platforms, for which there is a separate patch.

Fixes: edeec420de24 (cpufreq: dt-cpufreq: platdev Automatically create device with OPP v2)
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq-dt-platdev.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index a020da7940d6..430edadca527 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -106,6 +106,18 @@ static const struct of_device_id whitelist[] __initconst = {
  * platforms using "operating-points-v2" property.
  */
 static const struct of_device_id blacklist[] __initconst = {
+	{ .compatible = "calxeda,highbank", },
+	{ .compatible = "calxeda,ecx-2000", },
+
+	{ .compatible = "marvell,armadaxp", },
+
+	{ .compatible = "nvidia,tegra124", },
+
+	{ .compatible = "st,stih407", },
+	{ .compatible = "st,stih410", },
+
+	{ .compatible = "sigma,tango4", },
+
 	{ }
 };
 
-- 
cgit 


From ed40fad9a568efe83f5e80897ded71117b6911b3 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Thu, 31 Aug 2017 22:24:36 +0200
Subject: ARM: cpuidle: Avoid memleak if init fail

In case there are no DT idle states defined or
cpuidle_register_driver() fails, the copy of the idle driver is leaked:

    unreferenced object 0xede0dc00 (size 1024):
    comm "swapper/0", pid 1, jiffies 4294937431 (age 744.510s)
    hex dump (first 32 bytes):
    94 9e 0b c1 00 00 00 00 00 00 00 00 00 00 00 00 ................
    57 46 49 00 00 00 00 00 00 00 00 00 00 00 00 00 WFI.............
    backtrace:
    [<c1295f04>] arm_idle_init+0x44/0x1ac
    [<c0301e6c>] do_one_initcall+0x3c/0x16c
    [<c1200d70>] kernel_init_freeable+0x110/0x1d0
    [<c0cb3624>] kernel_init+0x8/0x114
    [<c0307a98>] ret_from_fork+0x14/0x3c

So fix this by freeing the unregistered copy in error case.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Fixes: d50a7d8acd78 (ARM: cpuidle: Support asymmetric idle definition)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-arm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index 7080c384ad5d..52a75053ee03 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -104,13 +104,13 @@ static int __init arm_idle_init(void)
 		ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);
 		if (ret <= 0) {
 			ret = ret ? : -ENODEV;
-			goto out_fail;
+			goto init_fail;
 		}
 
 		ret = cpuidle_register_driver(drv);
 		if (ret) {
 			pr_err("Failed to register cpuidle driver\n");
-			goto out_fail;
+			goto init_fail;
 		}
 
 		/*
@@ -149,6 +149,8 @@ static int __init arm_idle_init(void)
 	}
 
 	return 0;
+init_fail:
+	kfree(drv);
 out_fail:
 	while (--cpu >= 0) {
 		dev = per_cpu(cpuidle_devices, cpu);
-- 
cgit 


From 0c0bceb796878a5baea1f47033215fac0774e498 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Fri, 8 Sep 2017 12:24:41 +0300
Subject: ACPI: properties: Return _DSD hierarchical extension (data) sub-nodes
 correctly

The recently merged patch "ACPI: Prepare for constifying
acpi_get_next_subnode() fwnode argument" was part of a patchset
constifying the fwnode arguments across the fwnode property API. The
purpose of the patch was to allow returning non-const fwnodes from a data
structure the root of which is const.

Unfortunately the patch introduced the functionality, in particular when
starting parsed from an ACPI device node, the hierarchical data extension
nodes would not be enumerated.

Restore the old behaviour while still retaining constness properties of
the patch.

Fixes: 01c1da289791 "ACPI: Prepare for constifying acpi_get_next_subnode() fwnode argument"
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index c1c216163de3..265b74f440b6 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -908,11 +908,12 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 					    struct fwnode_handle *child)
 {
 	const struct acpi_device *adev = to_acpi_device_node(fwnode);
-	struct acpi_device *child_adev = NULL;
 	const struct list_head *head;
 	struct list_head *next;
 
 	if (!child || is_acpi_device_node(child)) {
+		struct acpi_device *child_adev;
+
 		if (adev)
 			head = &adev->children;
 		else
@@ -922,8 +923,8 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 			goto nondev;
 
 		if (child) {
-			child_adev = to_acpi_device_node(child);
-			next = child_adev->node.next;
+			adev = to_acpi_device_node(child);
+			next = adev->node.next;
 			if (next == head) {
 				child = NULL;
 				goto nondev;
@@ -941,8 +942,8 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 		const struct acpi_data_node *data = to_acpi_data_node(fwnode);
 		struct acpi_data_node *dn;
 
-		if (child_adev)
-			head = &child_adev->data.subnodes;
+		if (adev)
+			head = &adev->data.subnodes;
 		else if (data)
 			head = &data->data.subnodes;
 		else
-- 
cgit 


From 0647169cf9aa441700eb8f23ea49be060626534b Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Sep 2017 12:41:37 +0200
Subject: rhashtable: Documentation tweak

Clarify that rhashtable_walk_{stop,start} will not reset the iterator to
the beginning of the hash table.  Confusion between rhashtable_walk_enter
and rhashtable_walk_start has already lead to a bug.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 707ca5d677c6..ddd7dde87c3c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -735,9 +735,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
  * rhashtable_walk_start - Start a hash table walk
  * @iter:	Hash table iterator
  *
- * Start a hash table walk.  Note that we take the RCU lock in all
- * cases including when we return an error.  So you must always call
- * rhashtable_walk_stop to clean up.
+ * Start a hash table walk at the current iterator position.  Note that we take
+ * the RCU lock in all cases including when we return an error.  So you must
+ * always call rhashtable_walk_stop to clean up.
  *
  * Returns zero if successful.
  *
@@ -846,7 +846,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
  * rhashtable_walk_stop - Finish a hash table walk
  * @iter:	Hash table iterator
  *
- * Finish a hash table walk.
+ * Finish a hash table walk.  Does not reset the iterator to the start of the
+ * hash table.
  */
 void rhashtable_walk_stop(struct rhashtable_iter *iter)
 	__releases(RCU)
-- 
cgit 


From 75df6e688ccd517e339a7c422ef7ad73045b18a2 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Sun, 17 Sep 2017 03:23:48 -0700
Subject: tracing: Fix trace_pipe behavior for instance traces

When reading data from trace_pipe, tracing_wait_pipe() performs a
check to see if tracing has been turned off after some data was read.
Currently, this check always looks at global trace state, but it
should be checking the trace instance where trace_pipe is located at.

Because of this bug, cat instances/i1/trace_pipe in the following
script will immediately exit instead of waiting for data:

cd /sys/kernel/debug/tracing
echo 0 > tracing_on
mkdir -p instances/i1
echo 1 > instances/i1/tracing_on
echo 1 > instances/i1/events/sched/sched_process_exec/enable
cat instances/i1/trace_pipe

Link: http://lkml.kernel.org/r/20170917102348.1615-1-tahsin@google.com

Cc: stable@vger.kernel.org
Fixes: 10246fa35d4f ("tracing: give easy way to clear trace buffer")
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d3ca35f38803..752e5daf0896 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5680,7 +5680,7 @@ static int tracing_wait_pipe(struct file *filp)
 		 *
 		 * iter->pos will be 0 if we haven't read anything.
 		 */
-		if (!tracing_is_on() && iter->pos)
+		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
 			break;
 
 		mutex_unlock(&iter->mutex);
-- 
cgit 


From 930651a75bf1ba6893a8b8475270664ebdb6cf4a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 19 Sep 2017 09:15:59 -0700
Subject: bpf: do not disable/enable BH in bpf_map_free_id()

syzkaller reported following splat [1]

Since hard irq are disabled by the caller, bpf_map_free_id()
should not try to enable/disable BH.

Another solution would be to change htab_map_delete_elem() to
defer the free_htab_elem() call after
raw_spin_unlock_irqrestore(&b->lock, flags), but this might be not
enough to cover other code paths.

[1]
WARNING: CPU: 1 PID: 8052 at kernel/softirq.c:161 __local_bh_enable_ip
+0x1e/0x160 kernel/softirq.c:161
Kernel panic - not syncing: panic_on_warn set ...

CPU: 1 PID: 8052 Comm: syz-executor1 Not tainted 4.13.0-next-20170915+
#23
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x194/0x257 lib/dump_stack.c:52
 panic+0x1e4/0x417 kernel/panic.c:181
 __warn+0x1c4/0x1d9 kernel/panic.c:542
 report_bug+0x211/0x2d0 lib/bug.c:183
 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:178
 do_trap_no_signal arch/x86/kernel/traps.c:212 [inline]
 do_trap+0x260/0x390 arch/x86/kernel/traps.c:261
 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:298
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:311
 invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905
RIP: 0010:__local_bh_enable_ip+0x1e/0x160 kernel/softirq.c:161
RSP: 0018:ffff8801cdcd7748 EFLAGS: 00010046
RAX: 0000000000000082 RBX: 0000000000000201 RCX: 0000000000000000
RDX: 1ffffffff0b5933c RSI: 0000000000000201 RDI: ffffffff85ac99e0
RBP: ffff8801cdcd7758 R08: ffffffff85b87158 R09: 1ffff10039b9aec6
R10: ffff8801c99f24c0 R11: 0000000000000002 R12: ffffffff817b0b47
R13: dffffc0000000000 R14: ffff8801cdcd77e8 R15: 0000000000000001
 __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:176 [inline]
 _raw_spin_unlock_bh+0x30/0x40 kernel/locking/spinlock.c:207
 spin_unlock_bh include/linux/spinlock.h:361 [inline]
 bpf_map_free_id kernel/bpf/syscall.c:197 [inline]
 __bpf_map_put+0x267/0x320 kernel/bpf/syscall.c:227
 bpf_map_put+0x1a/0x20 kernel/bpf/syscall.c:235
 bpf_map_fd_put_ptr+0x15/0x20 kernel/bpf/map_in_map.c:96
 free_htab_elem+0xc3/0x1b0 kernel/bpf/hashtab.c:658
 htab_map_delete_elem+0x74d/0x970 kernel/bpf/hashtab.c:1063
 map_delete_elem kernel/bpf/syscall.c:633 [inline]
 SYSC_bpf kernel/bpf/syscall.c:1479 [inline]
 SyS_bpf+0x2188/0x46a0 kernel/bpf/syscall.c:1451
 entry_SYSCALL_64_fastpath+0x1f/0xbe

Fixes: f3f1c054c288 ("bpf: Introduce bpf_map ID")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/syscall.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cb17e1cd1d43..25d074920a00 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -186,15 +186,17 @@ static int bpf_map_alloc_id(struct bpf_map *map)
 
 static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
 {
+	unsigned long flags;
+
 	if (do_idr_lock)
-		spin_lock_bh(&map_idr_lock);
+		spin_lock_irqsave(&map_idr_lock, flags);
 	else
 		__acquire(&map_idr_lock);
 
 	idr_remove(&map_idr, map->id);
 
 	if (do_idr_lock)
-		spin_unlock_bh(&map_idr_lock);
+		spin_unlock_irqrestore(&map_idr_lock, flags);
 	else
 		__release(&map_idr_lock);
 }
-- 
cgit 


From 039cc1c1eb971ffe1973fddb8769a80fd4950a6f Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Tue, 19 Sep 2017 15:06:13 -0500
Subject: cpufreq: ti-cpufreq: Support additional am43xx platforms

Rather than letting the ti-cpufreq driver match against 'ti,am4372'
machine compatible during probe let's match against 'ti,am43' so that we
can support both 'ti,am4372' and 'ti,am438x' platforms which both match
to this compatible.

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/ti-cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
index b29cd3398463..4bf47de6101f 100644
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -190,7 +190,7 @@ static int ti_cpufreq_setup_syscon_register(struct ti_cpufreq_data *opp_data)
 
 static const struct of_device_id ti_cpufreq_of_match[] = {
 	{ .compatible = "ti,am33xx", .data = &am3x_soc_data, },
-	{ .compatible = "ti,am4372", .data = &am4x_soc_data, },
+	{ .compatible = "ti,am43", .data = &am4x_soc_data, },
 	{ .compatible = "ti,dra7", .data = &dra7_soc_data },
 	{},
 };
-- 
cgit 


From 2a4776e14f7da3d48fffb4edbb82355742f23478 Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:10 +0100
Subject: net: hns3: Fixes initialization of phy address from firmware

Default phy address of every port is 0. Therefore, phy address for
each port need to be fetched from firmware and device initialized
with fetched non-default phy address.

Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine &
Compatibility Layer Support")
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index bb45365fb817..db4e07dac29a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1066,6 +1066,7 @@ static int hclge_configure(struct hclge_dev *hdev)
 	for (i = 0; i < ETH_ALEN; i++)
 		hdev->hw.mac.mac_addr[i] = cfg.mac_addr[i];
 	hdev->hw.mac.media_type = cfg.media_type;
+	hdev->hw.mac.phy_addr = cfg.phy_addr;
 	hdev->num_desc = cfg.tqp_desc_num;
 	hdev->tm_info.num_pg = 1;
 	hdev->tm_info.num_tc = cfg.tc_num;
-- 
cgit 


From c5b1b97522ef32d2170c9aa1a0c1eec179acbb3a Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:11 +0100
Subject: net: hns3: Fixes the command used to unmap ring from vector

This patch fixes the IMP command being used to unmap the vector
from the corresponding ring.

Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine &
Compatibility Layer Support")
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index db4e07dac29a..e324bc6e9f4f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2779,7 +2779,7 @@ static int hclge_unmap_ring_from_vector(
 			}
 			i = 0;
 			hclge_cmd_setup_basic_desc(&desc,
-						   HCLGE_OPC_ADD_RING_TO_VECTOR,
+						   HCLGE_OPC_DEL_RING_TO_VECTOR,
 						   false);
 			req->int_vector_id = vector_id;
 		}
-- 
cgit 


From 0305b443a3ba5b84aa474786026df04a70460135 Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:12 +0100
Subject: net: hns3: Fixes ring-to-vector map-and-unmap command

This patch fixes the vector-to-ring map and unmap command and adds
INT_GL(for, Gap Limiting Interrupts) and VF id to it as required
by the hardware interface.

Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine &
Compatibility Layer Support")
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Mingguang Qu <qumingguang@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  | 8 ++++++--
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 8 ++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 91ae0135ee50..c2b613b40509 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -238,7 +238,7 @@ struct hclge_tqp_map {
 	u8 rsv[18];
 };
 
-#define HCLGE_VECTOR_ELEMENTS_PER_CMD	11
+#define HCLGE_VECTOR_ELEMENTS_PER_CMD	10
 
 enum hclge_int_type {
 	HCLGE_INT_TX,
@@ -252,8 +252,12 @@ struct hclge_ctrl_vector_chain {
 #define HCLGE_INT_TYPE_S	0
 #define HCLGE_INT_TYPE_M	0x3
 #define HCLGE_TQP_ID_S		2
-#define HCLGE_TQP_ID_M		(0x3fff << HCLGE_TQP_ID_S)
+#define HCLGE_TQP_ID_M		(0x7ff << HCLGE_TQP_ID_S)
+#define HCLGE_INT_GL_IDX_S	13
+#define HCLGE_INT_GL_IDX_M	(0x3 << HCLGE_INT_GL_IDX_S)
 	__le16 tqp_type_and_id[HCLGE_VECTOR_ELEMENTS_PER_CMD];
+	u8 vfid;
+	u8 rsv;
 };
 
 #define HCLGE_TC_NUM		8
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e324bc6e9f4f..eafd9c678162 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2680,7 +2680,11 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
 			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
 		hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
 			       HCLGE_TQP_ID_S,	node->tqp_index);
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M,
+			       HCLGE_INT_GL_IDX_S,
+			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
 		req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+		req->vfid = vport->vport_id;
 
 		if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
 			req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
@@ -2764,8 +2768,12 @@ static int hclge_unmap_ring_from_vector(
 			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
 		hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
 			       HCLGE_TQP_ID_S,	node->tqp_index);
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M,
+			       HCLGE_INT_GL_IDX_S,
+			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
 
 		req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+		req->vfid = vport->vport_id;
 
 		if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
 			req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
-- 
cgit 


From 139e8792537b327252a9676591a78e6408d50a85 Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:13 +0100
Subject: net: hns3: Fixes the initialization of MAC address in hardware

This patch fixes the initialization of MAC address, fetched from HNS3
firmware i.e. when it is not randomly generated, to the HNS3 hardware.

Fixes: ca60906d2795 ("net: hns3: Add support of HNS3 Ethernet Driver for
hip08 SoC")
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 1c3e29447891..4d68d6ea5143 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -2705,10 +2705,11 @@ static void hns3_init_mac_addr(struct net_device *netdev)
 		eth_hw_addr_random(netdev);
 		dev_warn(priv->dev, "using random MAC address %pM\n",
 			 netdev->dev_addr);
-		/* Also copy this new MAC address into hdev */
-		if (h->ae_algo->ops->set_mac_addr)
-			h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
 	}
+
+	if (h->ae_algo->ops->set_mac_addr)
+		h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
+
 }
 
 static void hns3_nic_set_priv_ops(struct net_device *netdev)
-- 
cgit 


From fbbb1536b220eb4f4f95cbceae6579489a8adad5 Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Tue, 19 Sep 2017 17:17:14 +0100
Subject: net: hns3: Fixes the ether address copy with appropriate API

This patch replaces the ethernet address copy instance with more
appropriate ether_addr_copy() function.

Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine &
Compatibility Layer Support")
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index eafd9c678162..8e172afd4876 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1063,8 +1063,7 @@ static int hclge_configure(struct hclge_dev *hdev)
 	hdev->base_tqp_pid = 0;
 	hdev->rss_size_max = 1;
 	hdev->rx_buf_len = cfg.rx_buf_len;
-	for (i = 0; i < ETH_ALEN; i++)
-		hdev->hw.mac.mac_addr[i] = cfg.mac_addr[i];
+	ether_addr_copy(hdev->hw.mac.mac_addr, cfg.mac_addr);
 	hdev->hw.mac.media_type = cfg.media_type;
 	hdev->hw.mac.phy_addr = cfg.phy_addr;
 	hdev->num_desc = cfg.tqp_desc_num;
-- 
cgit 


From 5e43aef8491ae3b5feb79cd15260faf39303ef33 Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:15 +0100
Subject: net: hns3: Fixes the default VLAN-id of PF

When there is no vlan id in the packets, hardware will treat the vlan id
as 0 and look for the mac_vlan table. This patch set the default vlan id
of PF as 0. Without this config, it will fail when look for mac_vlan
table, and hardware will drop packets.

Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine &
Compatibility Layer Support")
Signed-off-by: Mingguang Qu <qumingguang@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 8e172afd4876..74008ef23169 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3673,6 +3673,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
 {
 #define HCLGE_VLAN_TYPE_VF_TABLE   0
 #define HCLGE_VLAN_TYPE_PORT_TABLE 1
+	struct hnae3_handle *handle;
 	int ret;
 
 	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_VF_TABLE,
@@ -3682,8 +3683,11 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
 
 	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_PORT_TABLE,
 					 true);
+	if (ret)
+		return ret;
 
-	return ret;
+	handle = &hdev->vport[0].nic;
+	return hclge_set_port_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
 }
 
 static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
-- 
cgit 


From 90f7b11a5a0081feb7041fcc795c9a131a62a725 Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:16 +0100
Subject: net: hns3: Fixes the premature exit of loop when matching clients

When register/unregister ae_dev, ae_dev should match all client
in the client_list. Enet and roce can co-exists together so we
should continue checking for enet and roce presence together.
So break should not be there.

Above caused problems in loading and unloading of modules.

Fixes: 38eddd126772 ("net: hns3: Add support of the HNAE3 framework")
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.c | 43 ++++++-----------------------
 1 file changed, 9 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
index 59efbd605416..5bcb2238acb2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
@@ -37,20 +37,15 @@ static bool hnae3_client_match(enum hnae3_client_type client_type,
 }
 
 static int hnae3_match_n_instantiate(struct hnae3_client *client,
-				     struct hnae3_ae_dev *ae_dev,
-				     bool is_reg, bool *matched)
+				     struct hnae3_ae_dev *ae_dev, bool is_reg)
 {
 	int ret;
 
-	*matched = false;
-
 	/* check if this client matches the type of ae_dev */
 	if (!(hnae3_client_match(client->type, ae_dev->dev_type) &&
 	      hnae_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) {
 		return 0;
 	}
-	/* there is a match of client and dev */
-	*matched = true;
 
 	/* now, (un-)instantiate client by calling lower layer */
 	if (is_reg) {
@@ -69,7 +64,6 @@ int hnae3_register_client(struct hnae3_client *client)
 {
 	struct hnae3_client *client_tmp;
 	struct hnae3_ae_dev *ae_dev;
-	bool matched;
 	int ret = 0;
 
 	mutex_lock(&hnae3_common_lock);
@@ -86,7 +80,7 @@ int hnae3_register_client(struct hnae3_client *client)
 		/* if the client could not be initialized on current port, for
 		 * any error reasons, move on to next available port
 		 */
-		ret = hnae3_match_n_instantiate(client, ae_dev, true, &matched);
+		ret = hnae3_match_n_instantiate(client, ae_dev, true);
 		if (ret)
 			dev_err(&ae_dev->pdev->dev,
 				"match and instantiation failed for port\n");
@@ -102,12 +96,11 @@ EXPORT_SYMBOL(hnae3_register_client);
 void hnae3_unregister_client(struct hnae3_client *client)
 {
 	struct hnae3_ae_dev *ae_dev;
-	bool matched;
 
 	mutex_lock(&hnae3_common_lock);
 	/* un-initialize the client on every matched port */
 	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
-		hnae3_match_n_instantiate(client, ae_dev, false, &matched);
+		hnae3_match_n_instantiate(client, ae_dev, false);
 	}
 
 	list_del(&client->node);
@@ -124,7 +117,6 @@ int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
 	const struct pci_device_id *id;
 	struct hnae3_ae_dev *ae_dev;
 	struct hnae3_client *client;
-	bool matched;
 	int ret = 0;
 
 	mutex_lock(&hnae3_common_lock);
@@ -151,13 +143,10 @@ int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
 		 * initialize the figure out client instance
 		 */
 		list_for_each_entry(client, &hnae3_client_list, node) {
-			ret = hnae3_match_n_instantiate(client, ae_dev, true,
-							&matched);
+			ret = hnae3_match_n_instantiate(client, ae_dev, true);
 			if (ret)
 				dev_err(&ae_dev->pdev->dev,
 					"match and instantiation failed\n");
-			if (matched)
-				break;
 		}
 	}
 
@@ -175,7 +164,6 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
 	const struct pci_device_id *id;
 	struct hnae3_ae_dev *ae_dev;
 	struct hnae3_client *client;
-	bool matched;
 
 	mutex_lock(&hnae3_common_lock);
 	/* Check if there are matched ae_dev */
@@ -187,12 +175,8 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
 		/* check the client list for the match with this ae_dev type and
 		 * un-initialize the figure out client instance
 		 */
-		list_for_each_entry(client, &hnae3_client_list, node) {
-			hnae3_match_n_instantiate(client, ae_dev, false,
-						  &matched);
-			if (matched)
-				break;
-		}
+		list_for_each_entry(client, &hnae3_client_list, node)
+			hnae3_match_n_instantiate(client, ae_dev, false);
 
 		ae_algo->ops->uninit_ae_dev(ae_dev);
 		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
@@ -212,7 +196,6 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
 	const struct pci_device_id *id;
 	struct hnae3_ae_algo *ae_algo;
 	struct hnae3_client *client;
-	bool matched;
 	int ret = 0;
 
 	mutex_lock(&hnae3_common_lock);
@@ -246,13 +229,10 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
 	 * initialize the figure out client instance
 	 */
 	list_for_each_entry(client, &hnae3_client_list, node) {
-		ret = hnae3_match_n_instantiate(client, ae_dev, true,
-						&matched);
+		ret = hnae3_match_n_instantiate(client, ae_dev, true);
 		if (ret)
 			dev_err(&ae_dev->pdev->dev,
 				"match and instantiation failed\n");
-		if (matched)
-			break;
 	}
 
 out_err:
@@ -270,7 +250,6 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
 	const struct pci_device_id *id;
 	struct hnae3_ae_algo *ae_algo;
 	struct hnae3_client *client;
-	bool matched;
 
 	mutex_lock(&hnae3_common_lock);
 	/* Check if there are matched ae_algo */
@@ -279,12 +258,8 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
 		if (!id)
 			continue;
 
-		list_for_each_entry(client, &hnae3_client_list, node) {
-			hnae3_match_n_instantiate(client, ae_dev, false,
-						  &matched);
-			if (matched)
-				break;
-		}
+		list_for_each_entry(client, &hnae3_client_list, node)
+			hnae3_match_n_instantiate(client, ae_dev, false);
 
 		ae_algo->ops->uninit_ae_dev(ae_dev);
 		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
-- 
cgit 


From b5b7db8d680464b1d631fd016f5e093419f0bfd9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 19 Sep 2017 10:05:57 -0700
Subject: tcp: fastopen: fix on syn-data transmit failure

Our recent change exposed a bug in TCP Fastopen Client that syzkaller
found right away [1]

When we prepare skb with SYN+DATA, we attempt to transmit it,
and we update socket state as if the transmit was a success.

In socket RTX queue we have two skbs, one with the SYN alone,
and a second one containing the DATA.

When (malicious) ACK comes in, we now complain that second one had no
skb_mstamp.

The proper fix is to make sure that if the transmit failed, we do not
pretend we sent the DATA skb, and make it our send_head.

When 3WHS completes, we can now send the DATA right away, without having
to wait for a timeout.

[1]
WARNING: CPU: 0 PID: 100189 at net/ipv4/tcp_input.c:3117 tcp_clean_rtx_queue+0x2057/0x2ab0 net/ipv4/tcp_input.c:3117()

 WARN_ON_ONCE(last_ackt == 0);

Modules linked in:
CPU: 0 PID: 100189 Comm: syz-executor1 Not tainted
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
 0000000000000000 ffff8800b35cb1d8 ffffffff81cad00d 0000000000000000
 ffffffff828a4347 ffff88009f86c080 ffffffff8316eb20 0000000000000d7f
 ffff8800b35cb220 ffffffff812c33c2 ffff8800baad2440 00000009d46575c0
Call Trace:
 [<ffffffff81cad00d>] __dump_stack
 [<ffffffff81cad00d>] dump_stack+0xc1/0x124
 [<ffffffff812c33c2>] warn_slowpath_common+0xe2/0x150
 [<ffffffff812c361e>] warn_slowpath_null+0x2e/0x40
 [<ffffffff828a4347>] tcp_clean_rtx_queue+0x2057/0x2ab0 n
 [<ffffffff828ae6fd>] tcp_ack+0x151d/0x3930
 [<ffffffff828baa09>] tcp_rcv_state_process+0x1c69/0x4fd0
 [<ffffffff828efb7f>] tcp_v4_do_rcv+0x54f/0x7c0
 [<ffffffff8258aacb>] sk_backlog_rcv
 [<ffffffff8258aacb>] __release_sock+0x12b/0x3a0
 [<ffffffff8258ad9e>] release_sock+0x5e/0x1c0
 [<ffffffff8294a785>] inet_wait_for_connect
 [<ffffffff8294a785>] __inet_stream_connect+0x545/0xc50
 [<ffffffff82886f08>] tcp_sendmsg_fastopen
 [<ffffffff82886f08>] tcp_sendmsg+0x2298/0x35a0
 [<ffffffff82952515>] inet_sendmsg+0xe5/0x520
 [<ffffffff8257152f>] sock_sendmsg_nosec
 [<ffffffff8257152f>] sock_sendmsg+0xcf/0x110

Fixes: 8c72c65b426b ("tcp: update skb->skb_mstamp more carefully")
Fixes: 783237e8daf1 ("net-tcp: Fast Open client - sending SYN-data")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 517d737059d1..0bc9e46a5369 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3389,6 +3389,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 		goto done;
 	}
 
+	/* data was not sent, this is our new send_head */
+	sk->sk_send_head = syn_data;
+	tp->packets_out -= tcp_skb_pcount(syn_data);
+
 fallback:
 	/* Send a regular SYN with Fast Open cookie request option */
 	if (fo->cookie.len > 0)
@@ -3441,6 +3445,11 @@ int tcp_connect(struct sock *sk)
 	 */
 	tp->snd_nxt = tp->write_seq;
 	tp->pushed_seq = tp->write_seq;
+	buff = tcp_send_head(sk);
+	if (unlikely(buff)) {
+		tp->snd_nxt	= TCP_SKB_CB(buff)->seq;
+		tp->pushed_seq	= TCP_SKB_CB(buff)->seq;
+	}
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the SYN until an answer. */
-- 
cgit 


From f55956065ec94e3e9371463d693a1029c4cc3007 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Tue, 19 Sep 2017 19:35:18 +0200
Subject: net: emac: Fix napi poll list corruption

This patch is pretty much a carbon copy of
commit 3079c652141f ("caif: Fix napi poll list corruption")
with "caif" replaced by "emac".

The commit d75b1ade567f ("net: less interrupt masking in NAPI")
breaks emac.

It is now required that if the entire budget is consumed when poll
returns, the napi poll_list must remain empty.  However, like some
other drivers emac tries to do a last-ditch check and if there is
more work it will call napi_reschedule and then immediately process
some of this new work.  Should the entire budget be consumed while
processing such new work then we will violate the new caller
contract.

This patch fixes this by not touching any work when we reschedule
in emac.

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/emac/mal.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index 2c74baa2398a..fff09dcf9e34 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -402,7 +402,7 @@ static int mal_poll(struct napi_struct *napi, int budget)
 	unsigned long flags;
 
 	MAL_DBG2(mal, "poll(%d)" NL, budget);
- again:
+
 	/* Process TX skbs */
 	list_for_each(l, &mal->poll_list) {
 		struct mal_commac *mc =
@@ -451,7 +451,6 @@ static int mal_poll(struct napi_struct *napi, int budget)
 			spin_lock_irqsave(&mal->lock, flags);
 			mal_disable_eob_irq(mal);
 			spin_unlock_irqrestore(&mal->lock, flags);
-			goto again;
 		}
 		mc->ops->poll_tx(mc->dev);
 	}
-- 
cgit 


From 7c30013133964aaa2f45c17d6e9782ac6cfd7f5f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 20 Sep 2017 00:44:21 +0200
Subject: bpf: fix ri->map_owner pointer on bpf_prog_realloc

Commit 109980b894e9 ("bpf: don't select potentially stale
ri->map from buggy xdp progs") passed the pointer to the prog
itself to be loaded into r4 prior on bpf_redirect_map() helper
call, so that we can store the owner into ri->map_owner out of
the helper.

Issue with that is that the actual address of the prog is still
subject to change when subsequent rewrites occur that require
slow path in bpf_prog_realloc() to alloc more memory, e.g. from
patching inlining helper functions or constant blinding. Thus,
we really need to take prog->aux as the address we're holding,
which also works with prog clones as they share the same aux
object.

Instead of then fetching aux->prog during runtime, which could
potentially incur cache misses due to false sharing, we are
going to just use aux for comparison on the map owner. This
will also keep the patchlet of the same size, and later check
in xdp_map_invalid() only accesses read-only aux pointer from
the prog, it's also in the same cacheline already from prior
access when calling bpf_func.

Fixes: 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c |  7 ++++++-
 net/core/filter.c     | 24 +++++++++++++++---------
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 799b2451ef2d..b914fbe1383e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4205,7 +4205,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 		}
 
 		if (insn->imm == BPF_FUNC_redirect_map) {
-			u64 addr = (unsigned long)prog;
+			/* Note, we cannot use prog directly as imm as subsequent
+			 * rewrites would still change the prog pointer. The only
+			 * stable address we can use is aux, which also works with
+			 * prog clones during blinding.
+			 */
+			u64 addr = (unsigned long)prog->aux;
 			struct bpf_insn r4_ld[] = {
 				BPF_LD_IMM64(BPF_REG_4, addr),
 				*insn,
diff --git a/net/core/filter.c b/net/core/filter.c
index 24dd33dd9f04..82edad58d066 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1794,7 +1794,7 @@ struct redirect_info {
 	u32 flags;
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
-	const struct bpf_prog *map_owner;
+	unsigned long   map_owner;
 };
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -2500,11 +2500,17 @@ void xdp_do_flush_map(void)
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush_map);
 
+static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
+				   unsigned long aux)
+{
+	return (unsigned long)xdp_prog->aux != aux;
+}
+
 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 			       struct bpf_prog *xdp_prog)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
-	const struct bpf_prog *map_owner = ri->map_owner;
+	unsigned long map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
 	struct net_device *fwd = NULL;
 	u32 index = ri->ifindex;
@@ -2512,9 +2518,9 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 
 	ri->ifindex = 0;
 	ri->map = NULL;
-	ri->map_owner = NULL;
+	ri->map_owner = 0;
 
-	if (unlikely(map_owner != xdp_prog)) {
+	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
 		err = -EFAULT;
 		map = NULL;
 		goto err;
@@ -2574,7 +2580,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 			    struct bpf_prog *xdp_prog)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
-	const struct bpf_prog *map_owner = ri->map_owner;
+	unsigned long map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
 	struct net_device *fwd = NULL;
 	u32 index = ri->ifindex;
@@ -2583,10 +2589,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 
 	ri->ifindex = 0;
 	ri->map = NULL;
-	ri->map_owner = NULL;
+	ri->map_owner = 0;
 
 	if (map) {
-		if (unlikely(map_owner != xdp_prog)) {
+		if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
 			err = -EFAULT;
 			map = NULL;
 			goto err;
@@ -2632,7 +2638,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 	ri->ifindex = ifindex;
 	ri->flags = flags;
 	ri->map = NULL;
-	ri->map_owner = NULL;
+	ri->map_owner = 0;
 
 	return XDP_REDIRECT;
 }
@@ -2646,7 +2652,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 };
 
 BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
-	   const struct bpf_prog *, map_owner)
+	   unsigned long, map_owner)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
 
-- 
cgit 


From 6819a14ecbe2e089e5c5bb74edecafdde2028a00 Mon Sep 17 00:00:00 2001
From: Mike Manning <mmanning@brocade.com>
Date: Mon, 4 Sep 2017 15:52:55 +0100
Subject: net: ipv6: fix regression of no RTM_DELADDR sent after DAD failure

Commit f784ad3d79e5 ("ipv6: do not send RTM_DELADDR for tentative
addresses") incorrectly assumes that no RTM_NEWADDR are sent for
addresses in tentative state, as this does happen for the standard
IPv6 use-case of DAD failure, see the call to ipv6_ifa_notify() in
addconf_dad_stop(). So as a result of this change, no RTM_DELADDR is
sent after DAD failure for a link-local when strict DAD (accept_dad=2)
is configured, or on the next admin down in other cases. The absence
of this notification breaks backwards compatibility and causes problems
after DAD failure if this notification was being relied on. The
solution is to allow RTM_DELADDR to still be sent after DAD failure.

Fixes: f784ad3d79e5 ("ipv6: do not send RTM_DELADDR for tentative addresses")
Signed-off-by: Mike Manning <mmanning@brocade.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c2e2a78787ec..d7dbcc8eda10 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4940,9 +4940,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 
 	/* Don't send DELADDR notification for TENTATIVE address,
 	 * since NEWADDR notification is sent only after removing
-	 * TENTATIVE flag.
+	 * TENTATIVE flag, if DAD has not failed.
 	 */
-	if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR)
+	if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) &&
+	    event == RTM_DELADDR)
 		return;
 
 	skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
-- 
cgit 


From 35e015e1f5773417952fe91ce8790baf9b4237a2 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Tue, 12 Sep 2017 17:46:37 +0200
Subject: ipv6: fix net.ipv6.conf.all interface DAD handlers

Currently, writing into
net.ipv6.conf.all.{accept_dad,use_optimistic,optimistic_dad} has no effect.
Fix handling of these flags by:

- using the maximum of global and per-interface values for the
  accept_dad flag. That is, if at least one of the two values is
  non-zero, enable DAD on the interface. If at least one value is
  set to 2, enable DAD and disable IPv6 operation on the interface if
  MAC-based link-local address was found

- using the logical OR of global and per-interface values for the
  optimistic_dad flag. If at least one of them is set to one, optimistic
  duplicate address detection (RFC 4429) is enabled on the interface

- using the logical OR of global and per-interface values for the
  use_optimistic flag. If at least one of them is set to one,
  optimistic addresses won't be marked as deprecated during source address
  selection on the interface.

While at it, as we're modifying the prototype for ipv6_use_optimistic_addr(),
drop inline, and let the compiler decide.

Fixes: 7fd2561e4ebd ("net: ipv6: Add a sysctl to make optimistic addresses useful candidates")
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 18 ++++++++++++++----
 net/ipv6/addrconf.c                    | 27 ++++++++++++++++++++-------
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index b3345d0fe0a6..77f4de59dc9c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1680,6 +1680,9 @@ accept_dad - INTEGER
 	2: Enable DAD, and disable IPv6 operation if MAC-based duplicate
 	   link-local address has been found.
 
+	DAD operation and mode on a given interface will be selected according
+	to the maximum value of conf/{all,interface}/accept_dad.
+
 force_tllao - BOOLEAN
 	Enable sending the target link-layer address option even when
 	responding to a unicast neighbor solicitation.
@@ -1727,16 +1730,23 @@ suppress_frag_ndisc - INTEGER
 
 optimistic_dad - BOOLEAN
 	Whether to perform Optimistic Duplicate Address Detection (RFC 4429).
-		0: disabled (default)
-		1: enabled
+	0: disabled (default)
+	1: enabled
+
+	Optimistic Duplicate Address Detection for the interface will be enabled
+	if at least one of conf/{all,interface}/optimistic_dad is set to 1,
+	it will be disabled otherwise.
 
 use_optimistic - BOOLEAN
 	If enabled, do not classify optimistic addresses as deprecated during
 	source address selection.  Preferred addresses will still be chosen
 	before optimistic addresses, subject to other ranking in the source
 	address selection algorithm.
-		0: disabled (default)
-		1: enabled
+	0: disabled (default)
+	1: enabled
+
+	This will be enabled if at least one of
+	conf/{all,interface}/use_optimistic is set to 1, disabled otherwise.
 
 stable_secret - IPv6 address
 	This IPv6 address will be used as a secret to generate IPv6
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d7dbcc8eda10..96861c702c06 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1399,10 +1399,18 @@ static inline int ipv6_saddr_preferred(int type)
 	return 0;
 }
 
-static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev)
+static bool ipv6_use_optimistic_addr(struct net *net,
+				     struct inet6_dev *idev)
 {
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-	return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic;
+	if (!idev)
+		return false;
+	if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+		return false;
+	if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic)
+		return false;
+
+	return true;
 #else
 	return false;
 #endif
@@ -1472,7 +1480,7 @@ static int ipv6_get_saddr_eval(struct net *net,
 		/* Rule 3: Avoid deprecated and optimistic addresses */
 		u8 avoid = IFA_F_DEPRECATED;
 
-		if (!ipv6_use_optimistic_addr(score->ifa->idev))
+		if (!ipv6_use_optimistic_addr(net, score->ifa->idev))
 			avoid |= IFA_F_OPTIMISTIC;
 		ret = ipv6_saddr_preferred(score->addr_type) ||
 		      !(score->ifa->flags & avoid);
@@ -2460,7 +2468,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 		int max_addresses = in6_dev->cnf.max_addresses;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-		if (in6_dev->cnf.optimistic_dad &&
+		if ((net->ipv6.devconf_all->optimistic_dad ||
+		     in6_dev->cnf.optimistic_dad) &&
 		    !net->ipv6.devconf_all->forwarding && sllao)
 			addr_flags |= IFA_F_OPTIMISTIC;
 #endif
@@ -3051,7 +3060,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
 	u32 addr_flags = flags | IFA_F_PERMANENT;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-	if (idev->cnf.optimistic_dad &&
+	if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad ||
+	     idev->cnf.optimistic_dad) &&
 	    !dev_net(idev->dev)->ipv6.devconf_all->forwarding)
 		addr_flags |= IFA_F_OPTIMISTIC;
 #endif
@@ -3810,6 +3820,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 		goto out;
 
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+	    dev_net(dev)->ipv6.devconf_all->accept_dad < 1 ||
 	    idev->cnf.accept_dad < 1 ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
 	    ifp->flags & IFA_F_NODAD) {
@@ -3841,7 +3852,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 	 */
 	if (ifp->flags & IFA_F_OPTIMISTIC) {
 		ip6_ins_rt(ifp->rt);
-		if (ipv6_use_optimistic_addr(idev)) {
+		if (ipv6_use_optimistic_addr(dev_net(dev), idev)) {
 			/* Because optimistic nodes can use this address,
 			 * notify listeners. If DAD fails, RTM_DELADDR is sent.
 			 */
@@ -3897,7 +3908,9 @@ static void addrconf_dad_work(struct work_struct *w)
 		action = DAD_ABORT;
 		ifp->state = INET6_IFADDR_STATE_POSTDAD;
 
-		if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 &&
+		if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 ||
+		     idev->cnf.accept_dad > 1) &&
+		    !idev->cnf.disable_ipv6 &&
 		    !(ifp->flags & IFA_F_STABLE_PRIVACY)) {
 			struct in6_addr addr;
 
-- 
cgit 


From 23586b66d84ba3184b8820277f3fc42761640f87 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Mon, 18 Sep 2017 18:18:45 -0500
Subject: Fix SMB3.1.1 guest authentication to Samba

Samba rejects SMB3.1.1 dialect (vers=3.1.1) negotiate requests from
the kernel client due to the two byte pad at the end of the negotiate
contexts.

CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/smb2pdu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5c16591a128e..b0eaebe627e9 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -439,7 +439,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req)
 	build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
 	req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
 	req->NegotiateContextCount = cpu_to_le16(2);
-	inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2
+	inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context)
 			+ sizeof(struct smb2_encryption_neg_context)); /* calculate hash */
 }
 #else
-- 
cgit 


From 04fc52fb222d35e1f7a0d5d85b19a676ea1e10e8 Mon Sep 17 00:00:00 2001
From: Maciej Purski <m.purski@samsung.com>
Date: Tue, 5 Sep 2017 14:23:02 +0200
Subject: drm/exynos/hdmi: Fix unsafe list iteration

Function hdmi_mode_fixup() used bare list_for_each entry, which was
unsafe and caused memory corruption detected by kasan.
It now uses drm_for_each_connector_iter macro, which is now recommended
by the documentation and safe.

Signed-off-by: Maciej Purski <m.purski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_hdmi.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 214fa5e51963..0109ff40b1db 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -944,22 +944,27 @@ static bool hdmi_mode_fixup(struct drm_encoder *encoder,
 	struct drm_device *dev = encoder->dev;
 	struct drm_connector *connector;
 	struct drm_display_mode *m;
+	struct drm_connector_list_iter conn_iter;
 	int mode_ok;
 
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &conn_iter);
+	drm_for_each_connector_iter(connector, &conn_iter) {
 		if (connector->encoder == encoder)
 			break;
 	}
+	if (connector)
+		drm_connector_get(connector);
+	drm_connector_list_iter_end(&conn_iter);
 
-	if (connector->encoder != encoder)
+	if (!connector)
 		return true;
 
 	mode_ok = hdmi_mode_valid(connector, adjusted_mode);
 
 	if (mode_ok == MODE_OK)
-		return true;
+		goto cleanup;
 
 	/*
 	 * Find the most suitable mode and copy it to adjusted_mode.
@@ -979,6 +984,9 @@ static bool hdmi_mode_fixup(struct drm_encoder *encoder,
 		}
 	}
 
+cleanup:
+	drm_connector_put(connector);
+
 	return true;
 }
 
-- 
cgit 


From 8632ec8cdcaead67c019f1ba36760d1bdd0c5d23 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 23 Aug 2017 15:37:53 +1000
Subject: powerpc/configs: Update for CONFIG_SND changes

Commit eb3b705aaed9 ("ALSA: Make CONFIG_SND_OSSEMUL user-selectable")
means we need to set CONFIG_SND_OSSEMUL in our configs, otherwise we
lose some of the SND symbols.

And commit 0181307abc1d ("ALSA: seq: Reorganize kconfig and build")
reorganised things, which causes the churn.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/configs/g5_defconfig       | 5 +++--
 arch/powerpc/configs/gamecube_defconfig | 5 +++--
 arch/powerpc/configs/pasemi_defconfig   | 3 ++-
 arch/powerpc/configs/pmac32_defconfig   | 7 ++++---
 arch/powerpc/configs/ppc64_defconfig    | 7 ++++---
 arch/powerpc/configs/ppc64e_defconfig   | 7 ++++---
 arch/powerpc/configs/ppc6xx_defconfig   | 7 ++++---
 arch/powerpc/configs/wii_defconfig      | 5 +++--
 8 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index e084fa548d73..063817fee61c 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -138,10 +138,11 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQUENCER_OSS=m
 CONFIG_SND_POWERMAC=m
 CONFIG_SND_AOA=m
 CONFIG_SND_AOA_FABRIC_LAYOUT=m
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index 79bbc8238b32..805b0f87653c 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -64,11 +64,12 @@ CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_CLUT224 is not set
 CONFIG_SOUND=y
 CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=y
 CONFIG_SND_PCM_OSS=y
-CONFIG_SND_SEQUENCER_OSS=y
 # CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQUENCER_OSS=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index 8cf4a46bef86..6daa56f8895c 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -115,9 +115,10 @@ CONFIG_VGACON_SOFT_SCROLLBACK=y
 CONFIG_LOGO=y
 CONFIG_SOUND=y
 CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=y
 CONFIG_SND_PCM_OSS=y
+CONFIG_SND_SEQUENCER=y
 CONFIG_SND_SEQUENCER_OSS=y
 CONFIG_SND_USB_AUDIO=y
 CONFIG_SND_USB_USX2Y=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 8e798b1fbc99..1aab9a62a681 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -227,11 +227,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
 CONFIG_SND_DUMMY=m
 CONFIG_SND_POWERMAC=m
 CONFIG_SND_AOA=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 791db775a09c..6ddca80c52c3 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -222,11 +222,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
 CONFIG_SND_POWERMAC=m
 CONFIG_SND_AOA=m
 CONFIG_SND_AOA_FABRIC_LAYOUT=m
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index d0fe0f8f77c2..41d85cb3c9a2 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -141,11 +141,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
 CONFIG_HID_DRAGONRISE=y
 CONFIG_HID_GYRATION=y
 CONFIG_HID_TWINHAN=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index ae6eba482d75..da0e8d535eb8 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -789,17 +789,18 @@ CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_VGA16 is not set
 CONFIG_SOUND=m
 CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
 CONFIG_SND_DYNAMIC_MINORS=y
 # CONFIG_SND_SUPPORT_OLD_API is not set
 CONFIG_SND_VERBOSE_PRINTK=y
 CONFIG_SND_DEBUG=y
 CONFIG_SND_DEBUG_VERBOSE=y
 CONFIG_SND_PCM_XRUN_DEBUG=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
 CONFIG_SND_DUMMY=m
 CONFIG_SND_VIRMIDI=m
 CONFIG_SND_MTPAV=m
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index aef41b17a8bc..9c7400a19e9d 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -79,11 +79,12 @@ CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_SOUND=y
 CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=y
 CONFIG_SND_PCM_OSS=y
-CONFIG_SND_SEQUENCER_OSS=y
 # CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQUENCER_OSS=y
 CONFIG_HID_APPLE=m
 CONFIG_HID_WACOM=m
 CONFIG_MMC=y
-- 
cgit 


From 4917fcb58cc73f6b81455e3c5f960144809ddf1a Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 19 Sep 2017 11:47:06 +0530
Subject: powerpc/sysrq: Fix oops whem ppmu is not registered

Kernel crashes if power pmu is not registered and user tries to dump
regs with 'echo p > /proc/sysrq-trigger'. Sample log:

  Unable to handle kernel paging request for data at address 0x00000008
  Faulting instruction address: 0xc0000000000d52f0

  NIP [c0000000000d52f0] perf_event_print_debug+0x10/0x230
  LR [c00000000058a938] sysrq_handle_showregs+0x38/0x50
  Call Trace:
   printk+0x38/0x4c (unreliable)
   __handle_sysrq+0xe4/0x270
   write_sysrq_trigger+0x64/0x80
   proc_reg_write+0x80/0xd0
   __vfs_write+0x40/0x200
   vfs_write+0xc8/0x240
   SyS_write+0x60/0x110
   system_call+0x58/0x6c

Fixes: 5f6d0380c640 ("powerpc/perf: Define perf_event_print_debug() to print PMU register values")
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/core-book3s.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 2e3eb7431571..9e3da168d54c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -793,6 +793,11 @@ void perf_event_print_debug(void)
 	u32 pmcs[MAX_HWEVENTS];
 	int i;
 
+	if (!ppmu) {
+		pr_info("Performance monitor hardware not registered.\n");
+		return;
+	}
+
 	if (!ppmu->n_counter)
 		return;
 
-- 
cgit 


From c1fa0768a8713b135848f78fd43ffc208d8ded70 Mon Sep 17 00:00:00 2001
From: Gustavo Romero <gromero@linux.vnet.ibm.com>
Date: Wed, 13 Sep 2017 22:13:48 -0400
Subject: powerpc/tm: Flush TM only if CPU has TM feature

Commit cd63f3c ("powerpc/tm: Fix saving of TM SPRs in core dump")
added code to access TM SPRs in flush_tmregs_to_thread(). However
flush_tmregs_to_thread() does not check if TM feature is available on
CPU before trying to access TM SPRs in order to copy live state to
thread structures. flush_tmregs_to_thread() is indeed guarded by
CONFIG_PPC_TRANSACTIONAL_MEM but it might be the case that kernel
was compiled with CONFIG_PPC_TRANSACTIONAL_MEM enabled and ran on
a CPU without TM feature available, thus rendering the execution
of TM instructions that are treated by the CPU as illegal instructions.

The fix is just to add proper checking in flush_tmregs_to_thread()
if CPU has the TM feature before accessing any TM-specific resource,
returning immediately if TM is no available on the CPU. Adding
that checking in flush_tmregs_to_thread() instead of in places
where it is called, like in vsr_get() and vsr_set(), is better because
avoids the same problem cropping up elsewhere.

Cc: stable@vger.kernel.org # v4.13+
Fixes: cd63f3c ("powerpc/tm: Fix saving of TM SPRs in core dump")
Signed-off-by: Gustavo Romero <gromero@linux.vnet.ibm.com>
Reviewed-by: Cyril Bur <cyrilbur@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 07cd22e35405..f52ad5bb7109 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -131,7 +131,7 @@ static void flush_tmregs_to_thread(struct task_struct *tsk)
 	 * in the appropriate thread structures from live.
 	 */
 
-	if (tsk != current)
+	if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current))
 		return;
 
 	if (MSR_TM_SUSPENDED(mfmsr())) {
-- 
cgit 


From ad47ff3e33503e0969db2d4f9a40942aa6414598 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 19 Sep 2017 20:45:52 +1000
Subject: powerpc/sstep: Fix issues with set_cr0()

set_cr0() broke when we changed analyse_instr() to not modify the
register state. Instead of looking at regs->gpr[x] which has not
been updated yet, we need to look at op->val.

Fixes: 3cdfcbfd32b9 ("powerpc: Change analyse_instr so it doesn't modify *regs")
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index fb9f58b868e7..9d72e5900320 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -944,9 +944,9 @@ NOKPROBE_SYMBOL(emulate_dcbz);
 		: "r" (addr), "i" (-EFAULT), "0" (err))
 
 static nokprobe_inline void set_cr0(const struct pt_regs *regs,
-				    struct instruction_op *op, int rd)
+				    struct instruction_op *op)
 {
-	long val = regs->gpr[rd];
+	long val = op->val;
 
 	op->type |= SETCC;
 	op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
@@ -1326,7 +1326,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 	case 13:	/* addic. */
 		imm = (short) instr;
 		add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
-		set_cr0(regs, op, rd);
+		set_cr0(regs, op);
 		return 1;
 
 	case 14:	/* addi */
@@ -1397,13 +1397,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 
 	case 28:	/* andi. */
 		op->val = regs->gpr[rd] & (unsigned short) instr;
-		set_cr0(regs, op, ra);
+		set_cr0(regs, op);
 		goto logical_done_nocc;
 
 	case 29:	/* andis. */
 		imm = (unsigned short) instr;
 		op->val = regs->gpr[rd] & (imm << 16);
-		set_cr0(regs, op, ra);
+		set_cr0(regs, op);
 		goto logical_done_nocc;
 
 #ifdef __powerpc64__
@@ -2526,7 +2526,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 
  logical_done:
 	if (instr & 1)
-		set_cr0(regs, op, ra);
+		set_cr0(regs, op);
  logical_done_nocc:
 	op->reg = ra;
 	op->type |= SETREG;
@@ -2534,7 +2534,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 
  arith_done:
 	if (instr & 1)
-		set_cr0(regs, op, rd);
+		set_cr0(regs, op);
  compute_done:
 	op->reg = rd;
 	op->type |= SETREG;
-- 
cgit 


From 5bcaa4cc41923871777c3d13906267e812775094 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 19 Sep 2017 20:45:53 +1000
Subject: powerpc/sstep: Fix issues with mcrf

mcrf broke when we changed analyse_instr() to not modify the register
state. The instruction writes to the CR, so we need to store the result
in op->ccval, not op->val.

Fixes: 3cdfcbfd32b9 ("powerpc: Change analyse_instr so it doesn't modify *regs")
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 9d72e5900320..c4cda1afb49d 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1513,10 +1513,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 			op->type = COMPUTE + SETCC;
 			imm = 0xf0000000UL;
 			val = regs->gpr[rd];
-			op->val = regs->ccr;
+			op->ccval = regs->ccr;
 			for (sh = 0; sh < 8; ++sh) {
 				if (instr & (0x80000 >> sh))
-					op->val = (op->val & ~imm) |
+					op->ccval = (op->ccval & ~imm) |
 						(val & imm);
 				imm >>= 4;
 			}
-- 
cgit 


From 1575fe06f6b1d156ed31fb22c8631d49d79751d8 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 20 Sep 2017 09:32:19 +1000
Subject: powerpc/sstep: mullw should calculate a 64 bit signed result

mullw should do a 32 bit signed multiply and create a 64 bit signed
result. It currently truncates the result to 32 bits.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index c4cda1afb49d..5e8418c28bd8 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1651,8 +1651,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 			goto arith_done;
 
 		case 235:	/* mullw */
-			op->val = (unsigned int) regs->gpr[ra] *
-				(unsigned int) regs->gpr[rb];
+			op->val = (long)(int) regs->gpr[ra] *
+				(int) regs->gpr[rb];
+
 			goto arith_done;
 
 		case 266:	/* add */
-- 
cgit 


From 5d298baa41883fc421acfd932799c0f4177249ae Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Thu, 31 Aug 2017 17:17:41 +0530
Subject: powerpc/powernv: Clear LPCR[PECE1] via stop-api only for deep state
 offline

Commit 24be85a23d1f ("powerpc/powernv: Clear PECE1 in LPCR via
stop-api only on Hotplug") clears the PECE1 bit of the LPCR via
stop-api during CPU-Hotplug to prevent wakeup due to a decrementer on
an offlined CPU which is in a deep stop state.

In the case where the stop-api support is found to be lacking, the
commit 785a12afdb4a ("powerpc/powernv/idle: Disable LOSE_FULL_CONTEXT
states when stop-api fails") disables deep states that lose hypervisor
context. Thus in this case, the offlined CPU will be put to some
shallow idle state.

However, we currently unconditionally clear the PECE1 in LPCR via
stop-api during CPU-Hotplug even when deep states are disabled due to
stop-api failure.

Fix this by clearing PECE1 of LPCR via stop-api during CPU-Hotplug
*only* when the offlined CPU will be put to a deep state that loses
hypervisor context.

Fixes: 24be85a23d1f ("powerpc/powernv: Clear PECE1 in LPCR via stop-api only on Hotplug")
Reported-by: Pavithra Prakash <pavirampu@linux.vnet.ibm.com>
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Pavithra Prakash <pavrampu@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/idle.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 9f59041a172b..443d5ca71995 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -393,7 +393,13 @@ static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
 	u64 pir = get_hard_smp_processor_id(cpu);
 
 	mtspr(SPRN_LPCR, lpcr_val);
-	opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+
+	/*
+	 * Program the LPCR via stop-api only if the deepest stop state
+	 * can lose hypervisor context.
+	 */
+	if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
+		opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
 }
 
 /*
-- 
cgit 


From 590d08d3da45e9fed423b08ab38d71886c07abc8 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Tue, 19 Sep 2017 11:43:47 -0500
Subject: SMB3: Fix endian warning

Multi-dialect negotiate patch had a minor endian error.

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org> # 4.13+
---
 fs/cifs/smb2pdu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index b0eaebe627e9..b4c58a1db1ae 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -570,10 +570,11 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 			/* ops set to 3.0 by default for default so update */
 			ses->server->ops = &smb21_operations;
 		}
-	} else if (rsp->DialectRevision != ses->server->vals->protocol_id) {
+	} else if (le16_to_cpu(rsp->DialectRevision) !=
+				ses->server->vals->protocol_id) {
 		/* if requested single dialect ensure returned dialect matched */
 		cifs_dbg(VFS, "Illegal 0x%x dialect returned: not requested\n",
-			cpu_to_le16(rsp->DialectRevision));
+			le16_to_cpu(rsp->DialectRevision));
 		return -EIO;
 	}
 
-- 
cgit 


From c721c38957fb19982416f6be71aae7b30630d83b Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Tue, 19 Sep 2017 18:40:03 -0500
Subject: SMB3: Warn user if trying to sign connection that authenticated as
 guest

It can be confusing if user ends up authenticated as guest but they
requested signing (server will return error validating signed packets)
so add log message for this.

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/smb2pdu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index b4c58a1db1ae..d499ce265c3b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1176,6 +1176,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
 	while (sess_data->func)
 		sess_data->func(sess_data);
 
+	if ((ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) && (ses->sign))
+		cifs_dbg(VFS, "signing requested but authenticated as guest\n");
 	rc = sess_data->result;
 out:
 	kfree(sess_data);
-- 
cgit 


From 6e82e929d98552ed5156919e0f532eeba2da704b Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 20 Sep 2017 13:09:23 +1000
Subject: cifs: show 'soft' in the mount options for hard mounts

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 180b3356ff86..a864e6575309 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -461,6 +461,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 		seq_puts(s, ",nocase");
 	if (tcon->retry)
 		seq_puts(s, ",hard");
+	else
+		seq_puts(s, ",soft");
 	if (tcon->use_persistent)
 		seq_puts(s, ",persistenthandles");
 	else if (tcon->use_resilient)
-- 
cgit 


From fd0b19ed5389187829b854900511c9195875bb42 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Tue, 19 Sep 2017 22:07:18 -0700
Subject: MIPS: Fix perf event init

Commit c311c797998c ("cpumask: make "nr_cpumask_bits" unsigned")
modified mipspmu_event_init() to cast the struct perf_event cpu field to
an unsigned integer before it is compared with nr_cpumask_bits (and
*ahem* did so without copying the linux-mips mailing list or any MIPS
developers...). This is broken because the cpu field may be -1 for
events which follow a process rather than being affine to a particular
CPU. When this is the case the cast to an unsigned int results in a
value equal to ULONG_MAX, which is always greater than nr_cpumask_bits
so we always fail mipspmu_event_init() and return -ENODEV.

The check against nr_cpumask_bits seems nonsensical anyway, so this
patch simply removes it. The cpu field is going to either be -1 or a
valid CPU number. Comparing it with nr_cpumask_bits is effectively
checking that it's a valid cpu number, but it seems safe to rely on the
core perf events code to ensure that's the case.

The end result is that this fixes use of perf on MIPS when not
constraining events to a particular CPU, and fixes the "perf list hw"
command which fails to list any events without this.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Fixes: c311c797998c ("cpumask: make "nr_cpumask_bits" unsigned")
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mips@linux-mips.org
Cc: stable <stable@vger.kernel.org> # v4.12+
Patchwork: https://patchwork.linux-mips.org/patch/17323/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/perf_event_mipsxx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 9e6c74bf66c4..6668f67a61c3 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -618,8 +618,7 @@ static int mipspmu_event_init(struct perf_event *event)
 		return -ENOENT;
 	}
 
-	if ((unsigned int)event->cpu >= nr_cpumask_bits ||
-	    (event->cpu >= 0 && !cpu_online(event->cpu)))
+	if (event->cpu >= 0 && !cpu_online(event->cpu))
 		return -ENODEV;
 
 	if (!atomic_inc_not_zero(&active_events)) {
-- 
cgit 


From bd6227a150fdb56e7bb734976ef6e53a2c1cb334 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Thu, 14 Sep 2017 17:10:28 +0200
Subject: crypto: drbg - fix freeing of resources

During the change to use aligned buffers, the deallocation code path was
not updated correctly. The current code tries to free the aligned buffer
pointer and not the original buffer pointer as it is supposed to.

Thus, the code is updated to free the original buffer pointer and set
the aligned buffer pointer that is used throughout the code to NULL.

Fixes: 3cfc3b9721123 ("crypto: drbg - use aligned buffers")
CC: <stable@vger.kernel.org>
CC: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/drbg.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crypto/drbg.c b/crypto/drbg.c
index 633a88e93ab0..70018397e59a 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -1133,10 +1133,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg)
 {
 	if (!drbg)
 		return;
-	kzfree(drbg->V);
-	drbg->Vbuf = NULL;
-	kzfree(drbg->C);
-	drbg->Cbuf = NULL;
+	kzfree(drbg->Vbuf);
+	drbg->V = NULL;
+	kzfree(drbg->Cbuf);
+	drbg->C = NULL;
 	kzfree(drbg->scratchpadbuf);
 	drbg->scratchpadbuf = NULL;
 	drbg->reseed_ctr = 0;
-- 
cgit 


From 569f11c9f788959b640116b5bbd6d8a1f07326da Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:00 -0500
Subject: crypto: x86/blowfish - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R12 instead of RBP.  R12 can't be used as the RT0 register because
of x86 instruction encoding limitations.  So use R12 for CTX and RDI for
CTX.  This means that CTX is no longer an implicit function argument.
Instead it needs to be explicitly copied from RDI.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/blowfish-x86_64-asm_64.S | 48 +++++++++++++++++---------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 246c67006ed0..8c1fcb6bad21 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -33,7 +33,7 @@
 #define s3	((16 + 2 + (3 * 256)) * 4)
 
 /* register macros */
-#define CTX %rdi
+#define CTX %r12
 #define RIO %rsi
 
 #define RX0 %rax
@@ -56,12 +56,12 @@
 #define RX2bh %ch
 #define RX3bh %dh
 
-#define RT0 %rbp
+#define RT0 %rdi
 #define RT1 %rsi
 #define RT2 %r8
 #define RT3 %r9
 
-#define RT0d %ebp
+#define RT0d %edi
 #define RT1d %esi
 #define RT2d %r8d
 #define RT3d %r9d
@@ -120,13 +120,14 @@
 
 ENTRY(__blowfish_enc_blk)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 *	%rcx: bool, if true: xor output
 	 */
-	movq %rbp, %r11;
+	movq %r12, %r11;
 
+	movq %rdi, CTX;
 	movq %rsi, %r10;
 	movq %rdx, RIO;
 
@@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk)
 	round_enc(14);
 	add_roundkey_enc(16);
 
-	movq %r11, %rbp;
+	movq %r11, %r12;
 
 	movq %r10, RIO;
 	test %cl, %cl;
@@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk)
 
 ENTRY(blowfish_dec_blk)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
-	movq %rbp, %r11;
+	movq %r12, %r11;
 
+	movq %rdi, CTX;
 	movq %rsi, %r10;
 	movq %rdx, RIO;
 
@@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk)
 	movq %r10, RIO;
 	write_block();
 
-	movq %r11, %rbp;
+	movq %r11, %r12;
 
 	ret;
 ENDPROC(blowfish_dec_blk)
@@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk)
 
 ENTRY(__blowfish_enc_blk_4way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 *	%rcx: bool, if true: xor output
 	 */
-	pushq %rbp;
+	pushq %r12;
 	pushq %rbx;
 	pushq %rcx;
 
-	preload_roundkey_enc(0);
-
+	movq %rdi, CTX
 	movq %rsi, %r11;
 	movq %rdx, RIO;
 
+	preload_roundkey_enc(0);
+
 	read_block4();
 
 	round_enc4(0);
@@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way)
 	round_enc4(14);
 	add_preloaded_roundkey4();
 
-	popq %rbp;
+	popq %r12;
 	movq %r11, RIO;
 
-	test %bpl, %bpl;
+	test %r12b, %r12b;
 	jnz .L__enc_xor4;
 
 	write_block4();
 
 	popq %rbx;
-	popq %rbp;
+	popq %r12;
 	ret;
 
 .L__enc_xor4:
 	xor_block4();
 
 	popq %rbx;
-	popq %rbp;
+	popq %r12;
 	ret;
 ENDPROC(__blowfish_enc_blk_4way)
 
 ENTRY(blowfish_dec_blk_4way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
-	pushq %rbp;
+	pushq %r12;
 	pushq %rbx;
-	preload_roundkey_dec(17);
 
-	movq %rsi, %r11;
+	movq %rdi, CTX;
+	movq %rsi, %r11
 	movq %rdx, RIO;
 
+	preload_roundkey_dec(17);
 	read_block4();
 
 	round_dec4(17);
@@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way)
 	write_block4();
 
 	popq %rbx;
-	popq %rbp;
+	popq %r12;
 
 	ret;
 ENDPROC(blowfish_dec_blk_4way)
-- 
cgit 


From b46c9d717645529417ca9045cfdbf59f84922573 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:01 -0500
Subject: crypto: x86/camellia - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R12 instead of RBP.  Both are callee-saved registers, so the
substitution is straightforward.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/camellia-x86_64-asm_64.S | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 310319c601ed..95ba6956a7f6 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -75,17 +75,17 @@
 #define RCD1bh %dh
 
 #define RT0 %rsi
-#define RT1 %rbp
+#define RT1 %r12
 #define RT2 %r8
 
 #define RT0d %esi
-#define RT1d %ebp
+#define RT1d %r12d
 #define RT2d %r8d
 
 #define RT2bl %r8b
 
 #define RXOR %r9
-#define RRBP %r10
+#define RR12 %r10
 #define RDST %r11
 
 #define RXORd %r9d
@@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk)
 	 *	%rdx: src
 	 *	%rcx: bool xor
 	 */
-	movq %rbp, RRBP;
+	movq %r12, RR12;
 
 	movq %rcx, RXOR;
 	movq %rsi, RDST;
@@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk)
 
 	enc_outunpack(mov, RT1);
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	ret;
 
 .L__enc_xor:
 	enc_outunpack(xor, RT1);
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	ret;
 ENDPROC(__camellia_enc_blk)
 
@@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk)
 	movl $24, RXORd;
 	cmovel RXORd, RT2d; /* max */
 
-	movq %rbp, RRBP;
+	movq %r12, RR12;
 	movq %rsi, RDST;
 	movq %rdx, RIO;
 
@@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk)
 
 	dec_outunpack();
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	ret;
 ENDPROC(camellia_dec_blk)
 
@@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way)
 	 */
 	pushq %rbx;
 
-	movq %rbp, RRBP;
+	movq %r12, RR12;
 	movq %rcx, RXOR;
 	movq %rsi, RDST;
 	movq %rdx, RIO;
@@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way)
 
 	enc_outunpack2(mov, RT2);
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	popq %rbx;
 	ret;
 
 .L__enc2_xor:
 	enc_outunpack2(xor, RT2);
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	popq %rbx;
 	ret;
 ENDPROC(__camellia_enc_blk_2way)
@@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way)
 	cmovel RXORd, RT2d; /* max */
 
 	movq %rbx, RXOR;
-	movq %rbp, RRBP;
+	movq %r12, RR12;
 	movq %rsi, RDST;
 	movq %rdx, RIO;
 
@@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way)
 
 	dec_outunpack2();
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	movq RXOR, %rbx;
 	ret;
 ENDPROC(camellia_dec_blk_2way)
-- 
cgit 


From 4b15606664a2f8d7c4f0092fb0305fe1c7c65b7b Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:02 -0500
Subject: crypto: x86/cast5 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R15 instead of RBP.  R15 can't be used as the RID1 register because
of x86 instruction encoding limitations.  So use R15 for CTX and RDI for
CTX.  This means that CTX is no longer an implicit function argument.
Instead it needs to be explicitly copied from RDI.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 47 ++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index b4a8806234ea..86107c961bb4 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
 /**********************************************************************
   16-way AVX cast5
  **********************************************************************/
-#define CTX %rdi
+#define CTX %r15
 
 #define RL1 %xmm0
 #define RR1 %xmm1
@@ -70,8 +70,8 @@
 
 #define RTMP %xmm15
 
-#define RID1  %rbp
-#define RID1d %ebp
+#define RID1  %rdi
+#define RID1d %edi
 #define RID2  %rsi
 #define RID2d %esi
 
@@ -226,7 +226,7 @@
 .align 16
 __cast5_enc_blk16:
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	RL1: blocks 1 and 2
 	 *	RR1: blocks 3 and 4
 	 *	RL2: blocks 5 and 6
@@ -246,9 +246,11 @@ __cast5_enc_blk16:
 	 *	RR4: encrypted blocks 15 and 16
 	 */
 
-	pushq %rbp;
+	pushq %r15;
 	pushq %rbx;
 
+	movq %rdi, CTX;
+
 	vmovdqa .Lbswap_mask, RKM;
 	vmovd .Lfirst_mask, R1ST;
 	vmovd .L32_mask, R32;
@@ -283,7 +285,7 @@ __cast5_enc_blk16:
 
 .L__skip_enc:
 	popq %rbx;
-	popq %rbp;
+	popq %r15;
 
 	vmovdqa .Lbswap_mask, RKM;
 
@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16)
 .align 16
 __cast5_dec_blk16:
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	RL1: encrypted blocks 1 and 2
 	 *	RR1: encrypted blocks 3 and 4
 	 *	RL2: encrypted blocks 5 and 6
@@ -318,9 +320,11 @@ __cast5_dec_blk16:
 	 *	RR4: decrypted blocks 15 and 16
 	 */
 
-	pushq %rbp;
+	pushq %r15;
 	pushq %rbx;
 
+	movq %rdi, CTX;
+
 	vmovdqa .Lbswap_mask, RKM;
 	vmovd .Lfirst_mask, R1ST;
 	vmovd .L32_mask, R32;
@@ -356,7 +360,7 @@ __cast5_dec_blk16:
 
 	vmovdqa .Lbswap_mask, RKM;
 	popq %rbx;
-	popq %rbp;
+	popq %r15;
 
 	outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
 	outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16)
 
 ENTRY(cast5_ecb_enc_16way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 	FRAME_BEGIN
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 
 	vmovdqu (0*4*4)(%rdx), RL1;
@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way)
 	vmovdqu RR4, (6*4*4)(%r11);
 	vmovdqu RL4, (7*4*4)(%r11);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast5_ecb_enc_16way)
 
 ENTRY(cast5_ecb_dec_16way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 
 	FRAME_BEGIN
+	pushq %r15;
+
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 
 	vmovdqu (0*4*4)(%rdx), RL1;
@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way)
 	vmovdqu RR4, (6*4*4)(%r11);
 	vmovdqu RL4, (7*4*4)(%r11);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast5_ecb_dec_16way)
 
 ENTRY(cast5_cbc_dec_16way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 	FRAME_BEGIN
-
 	pushq %r12;
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 	movq %rdx, %r12;
 
@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way)
 	vmovdqu RR4, (6*16)(%r11);
 	vmovdqu RL4, (7*16)(%r11);
 
+	popq %r15;
 	popq %r12;
-
 	FRAME_END
 	ret;
 ENDPROC(cast5_cbc_dec_16way)
 
 ENTRY(cast5_ctr_16way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 *	%rcx: iv (big endian, 64bit)
 	 */
 	FRAME_BEGIN
-
 	pushq %r12;
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 	movq %rdx, %r12;
 
@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way)
 	vmovdqu RR4, (6*16)(%r11);
 	vmovdqu RL4, (7*16)(%r11);
 
+	popq %r15;
 	popq %r12;
-
 	FRAME_END
 	ret;
 ENDPROC(cast5_ctr_16way)
-- 
cgit 


From c66cc3be2951fad4d7d7f799baf57c8c5cc8d655 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:03 -0500
Subject: crypto: x86/cast6 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R15 instead of RBP.  R15 can't be used as the RID1 register because
of x86 instruction encoding limitations.  So use R15 for CTX and RDI for
CTX.  This means that CTX is no longer an implicit function argument.
Instead it needs to be explicitly copied from RDI.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 50 +++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 952d3156a933..7f30b6f0d72c 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
 /**********************************************************************
   8-way AVX cast6
  **********************************************************************/
-#define CTX %rdi
+#define CTX %r15
 
 #define RA1 %xmm0
 #define RB1 %xmm1
@@ -70,8 +70,8 @@
 
 #define RTMP %xmm15
 
-#define RID1  %rbp
-#define RID1d %ebp
+#define RID1  %rdi
+#define RID1d %edi
 #define RID2  %rsi
 #define RID2d %esi
 
@@ -264,15 +264,17 @@
 .align 8
 __cast6_enc_blk8:
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
 	 * output:
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
 	 */
 
-	pushq %rbp;
+	pushq %r15;
 	pushq %rbx;
 
+	movq %rdi, CTX;
+
 	vmovdqa .Lbswap_mask, RKM;
 	vmovd .Lfirst_mask, R1ST;
 	vmovd .L32_mask, R32;
@@ -297,7 +299,7 @@ __cast6_enc_blk8:
 	QBAR(11);
 
 	popq %rbx;
-	popq %rbp;
+	popq %r15;
 
 	vmovdqa .Lbswap_mask, RKM;
 
@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8)
 .align 8
 __cast6_dec_blk8:
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
 	 * output:
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
 	 */
 
-	pushq %rbp;
+	pushq %r15;
 	pushq %rbx;
 
+	movq %rdi, CTX;
+
 	vmovdqa .Lbswap_mask, RKM;
 	vmovd .Lfirst_mask, R1ST;
 	vmovd .L32_mask, R32;
@@ -343,7 +347,7 @@ __cast6_dec_blk8:
 	QBAR(0);
 
 	popq %rbx;
-	popq %rbp;
+	popq %r15;
 
 	vmovdqa .Lbswap_mask, RKM;
 	outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8)
 
 ENTRY(cast6_ecb_enc_8way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 	FRAME_BEGIN
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 
 	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way)
 
 	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast6_ecb_enc_8way)
 
 ENTRY(cast6_ecb_dec_8way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 	FRAME_BEGIN
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 
 	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way)
 
 	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast6_ecb_dec_8way)
 
 ENTRY(cast6_cbc_dec_8way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 	FRAME_BEGIN
-
 	pushq %r12;
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 	movq %rdx, %r12;
 
@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way)
 
 	store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	popq %r12;
-
 	FRAME_END
 	ret;
 ENDPROC(cast6_cbc_dec_8way)
@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way)
 	 *	%rcx: iv (little endian, 128bit)
 	 */
 	FRAME_BEGIN
-
 	pushq %r12;
+	pushq %r15
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 	movq %rdx, %r12;
 
@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way)
 
 	store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	popq %r12;
-
 	FRAME_END
 	ret;
 ENDPROC(cast6_ctr_8way)
@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way)
 	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
 	 */
 	FRAME_BEGIN
+	pushq %r15;
 
+	movq %rdi, CTX
 	movq %rsi, %r11;
 
 	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way)
 	/* dst <= regs xor IVs(in dst) */
 	store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast6_xts_enc_8way)
@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way)
 	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
 	 */
 	FRAME_BEGIN
+	pushq %r15;
 
+	movq %rdi, CTX
 	movq %rsi, %r11;
 
 	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way)
 	/* dst <= regs xor IVs(in dst) */
 	store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast6_xts_dec_8way)
-- 
cgit 


From 3ed7b4d67c6745300c9b5c6baa55da1161b57f60 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:04 -0500
Subject: crypto: x86/des3_ede - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use RSI instead of RBP for RT1.  Since RSI is also used as a the 'dst'
function argument, it needs to be saved on the stack until the argument
is needed.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/des3_ede-asm_64.S | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
index f3e91647ca27..8e49ce117494 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -64,12 +64,12 @@
 #define RW2bh %ch
 
 #define RT0 %r15
-#define RT1 %rbp
+#define RT1 %rsi
 #define RT2 %r14
 #define RT3 %rdx
 
 #define RT0d %r15d
-#define RT1d %ebp
+#define RT1d %esi
 #define RT2d %r14d
 #define RT3d %edx
 
@@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk)
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
-	pushq %rbp;
 	pushq %rbx;
 	pushq %r12;
 	pushq %r13;
 	pushq %r14;
 	pushq %r15;
 
+	pushq %rsi; /* dst */
+
 	read_block(%rdx, RL0, RR0);
 	initial_permutation(RL0, RR0);
 
@@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk)
 	round1(32+15, RL0, RR0, dummy2);
 
 	final_permutation(RR0, RL0);
+
+	popq %rsi /* dst */
 	write_block(%rsi, RR0, RL0);
 
 	popq %r15;
@@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk)
 	popq %r13;
 	popq %r12;
 	popq %rbx;
-	popq %rbp;
 
 	ret;
 ENDPROC(des3_ede_x86_64_crypt_blk)
@@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 	 *	%rdx: src (3 blocks)
 	 */
 
-	pushq %rbp;
 	pushq %rbx;
 	pushq %r12;
 	pushq %r13;
 	pushq %r14;
 	pushq %r15;
 
+	pushq %rsi /* dst */
+
 	/* load input */
 	movl 0 * 4(%rdx), RL0d;
 	movl 1 * 4(%rdx), RR0d;
@@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 	bswapl RR2d;
 	bswapl RL2d;
 
+	popq %rsi /* dst */
 	movl RR0d, 0 * 4(%rsi);
 	movl RL0d, 1 * 4(%rsi);
 	movl RR1d, 2 * 4(%rsi);
@@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 	popq %r13;
 	popq %r12;
 	popq %rbx;
-	popq %rbp;
 
 	ret;
 ENDPROC(des3_ede_x86_64_crypt_blk_3way)
-- 
cgit 


From d7b1722c72aa915283ada27709c6feeb392f6038 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:05 -0500
Subject: crypto: x86/sha1-avx2 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R11 instead of RBP.  Since R11 isn't a callee-saved register, it
doesn't need to be saved and restored on the stack.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha1_avx2_x86_64_asm.S | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1eab79c9ac48..9f712a7dfd79 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -89,7 +89,7 @@
 #define	REG_RE	%rdx
 #define	REG_RTA	%r12
 #define	REG_RTB	%rbx
-#define	REG_T1	%ebp
+#define	REG_T1	%r11d
 #define	xmm_mov	vmovups
 #define	avx2_zeroupper	vzeroupper
 #define	RND_F1	1
@@ -637,7 +637,6 @@ _loop3:
 	ENTRY(\name)
 
 	push	%rbx
-	push	%rbp
 	push	%r12
 	push	%r13
 	push	%r14
@@ -673,7 +672,6 @@ _loop3:
 	pop	%r14
 	pop	%r13
 	pop	%r12
-	pop	%rbp
 	pop	%rbx
 
 	ret
-- 
cgit 


From 6488bce756861b94810e54f83416d5e74c0f18bf Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:06 -0500
Subject: crypto: x86/sha1-ssse3 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Swap the usages of R12 and RBP.  Use R12 for the REG_D register, and use
RBP to store the pre-aligned stack pointer.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha1_ssse3_asm.S | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index a4109506a5e8..6204bd53528c 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -37,7 +37,7 @@
 #define REG_A	%ecx
 #define REG_B	%esi
 #define REG_C	%edi
-#define REG_D	%ebp
+#define REG_D	%r12d
 #define REG_E	%edx
 
 #define REG_T1	%eax
@@ -74,10 +74,10 @@
 	ENTRY(\name)
 
 	push	%rbx
-	push	%rbp
 	push	%r12
+	push	%rbp
+	mov	%rsp, %rbp
 
-	mov	%rsp, %r12
 	sub	$64, %rsp		# allocate workspace
 	and	$~15, %rsp		# align stack
 
@@ -99,10 +99,9 @@
 	xor	%rax, %rax
 	rep stosq
 
-	mov	%r12, %rsp		# deallocate workspace
-
-	pop	%r12
+	mov	%rbp, %rsp		# deallocate workspace
 	pop	%rbp
+	pop	%r12
 	pop	%rbx
 	ret
 
-- 
cgit 


From 673ac6fbc74f835e2125df9ee39e8a2a423832e2 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:07 -0500
Subject: crypto: x86/sha256-avx - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Swap the usages of R12 and RBP.  Use R12 for the TBL register, and use
RBP to store the pre-aligned stack pointer.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha256-avx-asm.S | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index e08888a1a5f2..001bbcf93c79 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -103,7 +103,7 @@ SRND = %rsi       # clobbers INP
 c = %ecx
 d = %r8d
 e = %edx
-TBL = %rbp
+TBL = %r12
 a = %eax
 b = %ebx
 
@@ -350,13 +350,13 @@ a = TMP_
 ENTRY(sha256_transform_avx)
 .align 32
 	pushq   %rbx
-	pushq   %rbp
+	pushq   %r12
 	pushq   %r13
 	pushq   %r14
 	pushq   %r15
-	pushq   %r12
+	pushq	%rbp
+	movq	%rsp, %rbp
 
-	mov	%rsp, %r12
 	subq    $STACK_SIZE, %rsp	# allocate stack space
 	and	$~15, %rsp		# align stack pointer
 
@@ -452,13 +452,12 @@ loop2:
 
 done_hash:
 
-	mov	%r12, %rsp
-
-	popq	%r12
+	mov	%rbp, %rsp
+	popq	%rbp
 	popq    %r15
 	popq    %r14
 	popq    %r13
-	popq    %rbp
+	popq	%r12
 	popq    %rbx
 	ret
 ENDPROC(sha256_transform_avx)
-- 
cgit 


From d3dfbfe2e6e7ecd620531d5201314ad14c4ed5b3 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:08 -0500
Subject: crypto: x86/sha256-avx2 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

There's no need to use RBP as a temporary register for the TBL value,
because it always stores the same value: the address of the K256 table.
Instead just reference the address of K256 directly.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha256-avx2-asm.S | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 89c8f09787d2..1420db15dcdd 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -98,8 +98,6 @@ d	= %r8d
 e       = %edx	# clobbers NUM_BLKS
 y3	= %esi	# clobbers INP
 
-
-TBL	= %rbp
 SRND	= CTX	# SRND is same register as CTX
 
 a = %eax
@@ -531,7 +529,6 @@ STACK_SIZE	= _RSP      + _RSP_SIZE
 ENTRY(sha256_transform_rorx)
 .align 32
 	pushq	%rbx
-	pushq	%rbp
 	pushq	%r12
 	pushq	%r13
 	pushq	%r14
@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx)
 	mov	CTX, _CTX(%rsp)
 
 loop0:
-	lea     K256(%rip), TBL
-
 	## Load first 16 dwords from two blocks
 	VMOVDQ	0*32(INP),XTMP0
 	VMOVDQ	1*32(INP),XTMP1
@@ -597,19 +592,19 @@ last_block_enter:
 
 .align 16
 loop1:
-	vpaddd	0*32(TBL, SRND), X0, XFER
+	vpaddd	K256+0*32(SRND), X0, XFER
 	vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 0*32
 
-	vpaddd	1*32(TBL, SRND), X0, XFER
+	vpaddd	K256+1*32(SRND), X0, XFER
 	vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 1*32
 
-	vpaddd	2*32(TBL, SRND), X0, XFER
+	vpaddd	K256+2*32(SRND), X0, XFER
 	vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 2*32
 
-	vpaddd	3*32(TBL, SRND), X0, XFER
+	vpaddd	K256+3*32(SRND), X0, XFER
 	vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 3*32
 
@@ -619,10 +614,11 @@ loop1:
 
 loop2:
 	## Do last 16 rounds with no scheduling
-	vpaddd	0*32(TBL, SRND), X0, XFER
+	vpaddd	K256+0*32(SRND), X0, XFER
 	vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
 	DO_4ROUNDS	_XFER + 0*32
-	vpaddd	1*32(TBL, SRND), X1, XFER
+
+	vpaddd	K256+1*32(SRND), X1, XFER
 	vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
 	DO_4ROUNDS	_XFER + 1*32
 	add	$2*32, SRND
@@ -676,9 +672,6 @@ loop3:
 	ja	done_hash
 
 do_last_block:
-	#### do last block
-	lea	K256(%rip), TBL
-
 	VMOVDQ	0*16(INP),XWORD0
 	VMOVDQ	1*16(INP),XWORD1
 	VMOVDQ	2*16(INP),XWORD2
@@ -718,7 +711,6 @@ done_hash:
 	popq	%r14
 	popq	%r13
 	popq	%r12
-	popq	%rbp
 	popq	%rbx
 	ret
 ENDPROC(sha256_transform_rorx)
-- 
cgit 


From 539012dcbdd1ff028268764385ed1f6d600812a7 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:09 -0500
Subject: crypto: x86/sha256-ssse3 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Swap the usages of R12 and RBP.  Use R12 for the TBL register, and use
RBP to store the pre-aligned stack pointer.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha256-ssse3-asm.S | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index 39b83c93e7fd..c6c05ed2c16a 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -95,7 +95,7 @@ SRND = %rsi       # clobbers INP
 c = %ecx
 d = %r8d
 e = %edx
-TBL = %rbp
+TBL = %r12
 a = %eax
 b = %ebx
 
@@ -356,13 +356,13 @@ a = TMP_
 ENTRY(sha256_transform_ssse3)
 .align 32
 	pushq   %rbx
-	pushq   %rbp
+	pushq   %r12
 	pushq   %r13
 	pushq   %r14
 	pushq   %r15
-	pushq   %r12
+	pushq   %rbp
+	mov	%rsp, %rbp
 
-	mov	%rsp, %r12
 	subq    $STACK_SIZE, %rsp
 	and	$~15, %rsp
 
@@ -462,13 +462,12 @@ loop2:
 
 done_hash:
 
-	mov	%r12, %rsp
-
-	popq    %r12
+	mov	%rbp, %rsp
+	popq	%rbp
 	popq    %r15
 	popq    %r14
 	popq    %r13
-	popq    %rbp
+	popq    %r12
 	popq    %rbx
 
 	ret
-- 
cgit 


From ca04c823763e5b82c237cabe0c17f547ecdc6271 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:10 -0500
Subject: crypto: sha512-avx2 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Mix things up a little bit to get rid of the RBP usage, without hurting
performance too much.  Use RDI instead of RBP for the TBL pointer.  That
will clobber CTX, so spill CTX onto the stack and use R12 to read it in
the outer loop.  R12 is used as a non-persistent temporary variable
elsewhere, so it's safe to use.

Also remove the unused y4 variable.

Reported-by: Eric Biggers <ebiggers3@gmail.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha512-avx2-asm.S | 75 ++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 36 deletions(-)

diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 7f5f6c6ec72e..b16d56005162 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -69,8 +69,9 @@ XFER  = YTMP0
 
 BYTE_FLIP_MASK  = %ymm9
 
-# 1st arg
-CTX         = %rdi
+# 1st arg is %rdi, which is saved to the stack and accessed later via %r12
+CTX1        = %rdi
+CTX2        = %r12
 # 2nd arg
 INP         = %rsi
 # 3rd arg
@@ -81,7 +82,7 @@ d           = %r8
 e           = %rdx
 y3          = %rsi
 
-TBL   = %rbp
+TBL   = %rdi # clobbers CTX1
 
 a     = %rax
 b     = %rbx
@@ -91,26 +92,26 @@ g     = %r10
 h     = %r11
 old_h = %r11
 
-T1    = %r12
+T1    = %r12 # clobbers CTX2
 y0    = %r13
 y1    = %r14
 y2    = %r15
 
-y4    = %r12
-
 # Local variables (stack frame)
 XFER_SIZE = 4*8
 SRND_SIZE = 1*8
 INP_SIZE = 1*8
 INPEND_SIZE = 1*8
+CTX_SIZE = 1*8
 RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 6*8
+GPRSAVE_SIZE = 5*8
 
 frame_XFER = 0
 frame_SRND = frame_XFER + XFER_SIZE
 frame_INP = frame_SRND + SRND_SIZE
 frame_INPEND = frame_INP + INP_SIZE
-frame_RSPSAVE = frame_INPEND + INPEND_SIZE
+frame_CTX = frame_INPEND + INPEND_SIZE
+frame_RSPSAVE = frame_CTX + CTX_SIZE
 frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
 frame_size = frame_GPRSAVE + GPRSAVE_SIZE
 
@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx)
 	mov	%rax, frame_RSPSAVE(%rsp)
 
 	# Save GPRs
-	mov	%rbp, frame_GPRSAVE(%rsp)
-	mov	%rbx, 8*1+frame_GPRSAVE(%rsp)
-	mov	%r12, 8*2+frame_GPRSAVE(%rsp)
-	mov	%r13, 8*3+frame_GPRSAVE(%rsp)
-	mov	%r14, 8*4+frame_GPRSAVE(%rsp)
-	mov	%r15, 8*5+frame_GPRSAVE(%rsp)
+	mov	%rbx, 8*0+frame_GPRSAVE(%rsp)
+	mov	%r12, 8*1+frame_GPRSAVE(%rsp)
+	mov	%r13, 8*2+frame_GPRSAVE(%rsp)
+	mov	%r14, 8*3+frame_GPRSAVE(%rsp)
+	mov	%r15, 8*4+frame_GPRSAVE(%rsp)
 
 	shl	$7, NUM_BLKS	# convert to bytes
 	jz	done_hash
@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx)
 	mov	NUM_BLKS, frame_INPEND(%rsp)
 
 	## load initial digest
-	mov	8*0(CTX),a
-	mov	8*1(CTX),b
-	mov	8*2(CTX),c
-	mov	8*3(CTX),d
-	mov	8*4(CTX),e
-	mov	8*5(CTX),f
-	mov	8*6(CTX),g
-	mov	8*7(CTX),h
+	mov	8*0(CTX1), a
+	mov	8*1(CTX1), b
+	mov	8*2(CTX1), c
+	mov	8*3(CTX1), d
+	mov	8*4(CTX1), e
+	mov	8*5(CTX1), f
+	mov	8*6(CTX1), g
+	mov	8*7(CTX1), h
+
+	# save %rdi (CTX) before it gets clobbered
+	mov	%rdi, frame_CTX(%rsp)
 
 	vmovdqa	PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
 
@@ -652,14 +655,15 @@ loop2:
 	subq	$1, frame_SRND(%rsp)
 	jne	loop2
 
-	addm	8*0(CTX),a
-	addm	8*1(CTX),b
-	addm	8*2(CTX),c
-	addm	8*3(CTX),d
-	addm	8*4(CTX),e
-	addm	8*5(CTX),f
-	addm	8*6(CTX),g
-	addm	8*7(CTX),h
+	mov	frame_CTX(%rsp), CTX2
+	addm	8*0(CTX2), a
+	addm	8*1(CTX2), b
+	addm	8*2(CTX2), c
+	addm	8*3(CTX2), d
+	addm	8*4(CTX2), e
+	addm	8*5(CTX2), f
+	addm	8*6(CTX2), g
+	addm	8*7(CTX2), h
 
 	mov	frame_INP(%rsp), INP
 	add	$128, INP
@@ -669,12 +673,11 @@ loop2:
 done_hash:
 
 # Restore GPRs
-	mov	frame_GPRSAVE(%rsp)     ,%rbp
-	mov	8*1+frame_GPRSAVE(%rsp) ,%rbx
-	mov	8*2+frame_GPRSAVE(%rsp) ,%r12
-	mov	8*3+frame_GPRSAVE(%rsp) ,%r13
-	mov	8*4+frame_GPRSAVE(%rsp) ,%r14
-	mov	8*5+frame_GPRSAVE(%rsp) ,%r15
+	mov	8*0+frame_GPRSAVE(%rsp), %rbx
+	mov	8*1+frame_GPRSAVE(%rsp), %r12
+	mov	8*2+frame_GPRSAVE(%rsp), %r13
+	mov	8*3+frame_GPRSAVE(%rsp), %r14
+	mov	8*4+frame_GPRSAVE(%rsp), %r15
 
 	# Restore Stack Pointer
 	mov	frame_RSPSAVE(%rsp), %rsp
-- 
cgit 


From 8f182f845d0fa26ecc18d3bdcc3d1077a0ea3a31 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:11 -0500
Subject: crypto: x86/twofish - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R13 instead of RBP.  Both are callee-saved registers, so the
substitution is straightforward.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index b3f49d286348..73b471da3622 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -76,8 +76,8 @@
 #define RT %xmm14
 #define RR %xmm15
 
-#define RID1  %rbp
-#define RID1d %ebp
+#define RID1  %r13
+#define RID1d %r13d
 #define RID2  %rsi
 #define RID2d %esi
 
@@ -259,7 +259,7 @@ __twofish_enc_blk8:
 
 	vmovdqu w(CTX), RK1;
 
-	pushq %rbp;
+	pushq %r13;
 	pushq %rbx;
 	pushq %rcx;
 
@@ -282,7 +282,7 @@ __twofish_enc_blk8:
 
 	popq %rcx;
 	popq %rbx;
-	popq %rbp;
+	popq %r13;
 
 	outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
 	outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
@@ -301,7 +301,7 @@ __twofish_dec_blk8:
 
 	vmovdqu (w+4*4)(CTX), RK1;
 
-	pushq %rbp;
+	pushq %r13;
 	pushq %rbx;
 
 	inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
@@ -322,7 +322,7 @@ __twofish_dec_blk8:
 	vmovdqu (w)(CTX), RK1;
 
 	popq %rbx;
-	popq %rbp;
+	popq %r13;
 
 	outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
 	outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
-- 
cgit 


From afd62fa26343be6445479e75de9f07092a061459 Mon Sep 17 00:00:00 2001
From: LEROY Christophe <christophe.leroy@c-s.fr>
Date: Wed, 13 Sep 2017 12:44:51 +0200
Subject: crypto: talitos - fix sha224

Kernel crypto tests report the following error at startup

[    2.752626] alg: hash: Test 4 failed for sha224-talitos
[    2.757907] 00000000: 30 e2 86 e2 e7 8a dd 0d d7 eb 9f d5 83 fe f1 b0
00000010: 2d 5a 6c a5 f9 55 ea fd 0e 72 05 22

This patch fixes it

Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/talitos.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 79791c690858..6596761bc0c8 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1756,9 +1756,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 		req_ctx->swinit = 0;
 	} else {
 		desc->ptr[1] = zero_entry;
-		/* Indicate next op is not the first. */
-		req_ctx->first = 0;
 	}
+	/* Indicate next op is not the first. */
+	req_ctx->first = 0;
 
 	/* HMAC key */
 	if (ctx->keylen)
-- 
cgit 


From 886a27c0fc8a34633aadb0986dba11d8c150ae2e Mon Sep 17 00:00:00 2001
From: LEROY Christophe <christophe.leroy@c-s.fr>
Date: Wed, 13 Sep 2017 12:44:57 +0200
Subject: crypto: talitos - fix hashing

md5sum on some files gives wrong result

Exemple:

With the md5sum from libkcapi:
c15115c05bad51113f81bdaee735dd09  test

With the original md5sum:
bbdf41d80ba7e8b2b7be3a0772be76cb  test

This patch fixes this issue

Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/talitos.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 6596761bc0c8..8a48fc4f3642 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1769,7 +1769,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 
 	sg_count = edesc->src_nents ?: 1;
 	if (is_sec1 && sg_count > 1)
-		sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length);
+		sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length);
 	else
 		sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
 				      DMA_TO_DEVICE);
-- 
cgit 


From 56136631573baa537a15e0012055ffe8cfec1a33 Mon Sep 17 00:00:00 2001
From: LEROY Christophe <christophe.leroy@c-s.fr>
Date: Tue, 12 Sep 2017 11:03:39 +0200
Subject: crypto: talitos - Don't provide setkey for non hmac hashing algs.

Today, md5sum fails with error -ENOKEY because a setkey
function is set for non hmac hashing algs, see strace output below:

mmap(NULL, 378880, PROT_READ, MAP_SHARED, 6, 0) = 0x77f50000
accept(3, 0, NULL)                      = 7
vmsplice(5, [{"bin/\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 378880}], 1, SPLICE_F_MORE|SPLICE_F_GIFT) = 262144
splice(4, NULL, 7, NULL, 262144, SPLICE_F_MORE) = -1 ENOKEY (Required key not available)
write(2, "Generation of hash for file kcap"..., 50) = 50
munmap(0x77f50000, 378880)              = 0

This patch ensures that setkey() function is set only
for hmac hashing.

Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/talitos.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 8a48fc4f3642..dff88838dce7 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -3057,7 +3057,8 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 		t_alg->algt.alg.hash.final = ahash_final;
 		t_alg->algt.alg.hash.finup = ahash_finup;
 		t_alg->algt.alg.hash.digest = ahash_digest;
-		t_alg->algt.alg.hash.setkey = ahash_setkey;
+		if (!strncmp(alg->cra_name, "hmac", 4))
+			t_alg->algt.alg.hash.setkey = ahash_setkey;
 		t_alg->algt.alg.hash.import = ahash_import;
 		t_alg->algt.alg.hash.export = ahash_export;
 
-- 
cgit 


From 3e1166b94e661ed51af1fe1fe5f74bd83450b50f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 12 Sep 2017 12:12:16 +0200
Subject: crypto: inside-secure - fix gcc-4.9 warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All older compiler versions up to gcc-4.9 produce these
harmless warnings:

drivers/crypto/inside-secure/safexcel_cipher.c:389:9: warning: missing braces around initializer [-Wmissing-braces]
drivers/crypto/inside-secure/safexcel_cipher.c:389:9: warning: (near initialization for ‘result.completion’) [-Wmissing-braces]
drivers/crypto/inside-secure/safexcel_hash.c:422:9: warning: missing braces around initializer [-Wmissing-braces]
drivers/crypto/inside-secure/safexcel_hash.c:422:9: warning: (near initialization for ‘result.completion’) [-Wmissing-braces]

This changes the syntax to something that works on all versions
without warnings.

Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/inside-secure/safexcel_cipher.c | 2 +-
 drivers/crypto/inside-secure/safexcel_hash.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index d2207ac5ba19..5438552bc6d7 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -386,7 +386,7 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct skcipher_request req;
-	struct safexcel_inv_result result = { 0 };
+	struct safexcel_inv_result result = {};
 	int ring = ctx->base.ring;
 
 	memset(&req, 0, sizeof(struct skcipher_request));
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 3f819399cd95..3980f946874f 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -419,7 +419,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
 	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct ahash_request req;
-	struct safexcel_inv_result result = { 0 };
+	struct safexcel_inv_result result = {};
 	int ring = ctx->base.ring;
 
 	memset(&req, 0, sizeof(struct ahash_request));
-- 
cgit 


From c056d910f08029662080a01b4ce2110e2c9a27b6 Mon Sep 17 00:00:00 2001
From: Horia Geantă <horia.geanta@nxp.com>
Date: Fri, 1 Sep 2017 17:12:59 +0300
Subject: crypto: caam - fix LS1021A support on ARMv7 multiplatform kernel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When built using multi_v7_defconfig, driver does not work on LS1021A:
[...]
caam 1700000.crypto: can't identify CAAM ipg clk: -2
caam: probe of 1700000.crypto failed with error -2
[...]

It turns out we have to detect at runtime whether driver is running
on an i.MX platform or not.

Cc: <stable@vger.kernel.org>
Fixes: 6c3af9559352 ("crypto: caam - add support for LS1021A")
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/Kconfig |  5 +---
 drivers/crypto/caam/ctrl.c  | 19 ++++++++-------
 drivers/crypto/caam/regs.h  | 59 +++++++++++++++++++++------------------------
 3 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index e36aeacd7635..1eb852765469 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -1,6 +1,7 @@
 config CRYPTO_DEV_FSL_CAAM
 	tristate "Freescale CAAM-Multicore driver backend"
 	depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
+	select SOC_BUS
 	help
 	  Enables the driver module for Freescale's Cryptographic Accelerator
 	  and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
@@ -141,10 +142,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
 	  To compile this as a module, choose M here: the module
 	  will be called caamrng.
 
-config CRYPTO_DEV_FSL_CAAM_IMX
-	def_bool SOC_IMX6 || SOC_IMX7D
-	depends on CRYPTO_DEV_FSL_CAAM
-
 config CRYPTO_DEV_FSL_CAAM_DEBUG
 	bool "Enable debug output in CAAM driver"
 	depends on CRYPTO_DEV_FSL_CAAM
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index dacb53fb690e..027e121c6f70 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -7,6 +7,7 @@
 #include <linux/device.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/sys_soc.h>
 
 #include "compat.h"
 #include "regs.h"
@@ -19,6 +20,8 @@ bool caam_little_end;
 EXPORT_SYMBOL(caam_little_end);
 bool caam_dpaa2;
 EXPORT_SYMBOL(caam_dpaa2);
+bool caam_imx;
+EXPORT_SYMBOL(caam_imx);
 
 #ifdef CONFIG_CAAM_QI
 #include "qi.h"
@@ -28,19 +31,11 @@ EXPORT_SYMBOL(caam_dpaa2);
  * i.MX targets tend to have clock control subsystems that can
  * enable/disable clocking to our device.
  */
-#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
 static inline struct clk *caam_drv_identify_clk(struct device *dev,
 						char *clk_name)
 {
-	return devm_clk_get(dev, clk_name);
+	return caam_imx ? devm_clk_get(dev, clk_name) : NULL;
 }
-#else
-static inline struct clk *caam_drv_identify_clk(struct device *dev,
-						char *clk_name)
-{
-	return NULL;
-}
-#endif
 
 /*
  * Descriptor to instantiate RNG State Handle 0 in normal mode and
@@ -430,6 +425,10 @@ static int caam_probe(struct platform_device *pdev)
 {
 	int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
 	u64 caam_id;
+	static const struct soc_device_attribute imx_soc[] = {
+		{.family = "Freescale i.MX"},
+		{},
+	};
 	struct device *dev;
 	struct device_node *nprop, *np;
 	struct caam_ctrl __iomem *ctrl;
@@ -451,6 +450,8 @@ static int caam_probe(struct platform_device *pdev)
 	dev_set_drvdata(dev, ctrlpriv);
 	nprop = pdev->dev.of_node;
 
+	caam_imx = (bool)soc_device_match(imx_soc);
+
 	/* Enable clocking */
 	clk = caam_drv_identify_clk(&pdev->dev, "ipg");
 	if (IS_ERR(clk)) {
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 2b5efff9ec3c..17cfd23a38fa 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -67,6 +67,7 @@
  */
 
 extern bool caam_little_end;
+extern bool caam_imx;
 
 #define caam_to_cpu(len)				\
 static inline u##len caam##len ## _to_cpu(u##len val)	\
@@ -154,13 +155,10 @@ static inline u64 rd_reg64(void __iomem *reg)
 #else /* CONFIG_64BIT */
 static inline void wr_reg64(void __iomem *reg, u64 data)
 {
-#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
-	if (caam_little_end) {
+	if (!caam_imx && caam_little_end) {
 		wr_reg32((u32 __iomem *)(reg) + 1, data >> 32);
 		wr_reg32((u32 __iomem *)(reg), data);
-	} else
-#endif
-	{
+	} else {
 		wr_reg32((u32 __iomem *)(reg), data >> 32);
 		wr_reg32((u32 __iomem *)(reg) + 1, data);
 	}
@@ -168,41 +166,40 @@ static inline void wr_reg64(void __iomem *reg, u64 data)
 
 static inline u64 rd_reg64(void __iomem *reg)
 {
-#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
-	if (caam_little_end)
+	if (!caam_imx && caam_little_end)
 		return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 |
 			(u64)rd_reg32((u32 __iomem *)(reg)));
-	else
-#endif
-		return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
-			(u64)rd_reg32((u32 __iomem *)(reg) + 1));
+
+	return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
+		(u64)rd_reg32((u32 __iomem *)(reg) + 1));
 }
 #endif /* CONFIG_64BIT  */
 
+static inline u64 cpu_to_caam_dma64(dma_addr_t value)
+{
+	if (caam_imx)
+		return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) |
+			 (u64)cpu_to_caam32(upper_32_bits(value)));
+
+	return cpu_to_caam64(value);
+}
+
+static inline u64 caam_dma64_to_cpu(u64 value)
+{
+	if (caam_imx)
+		return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) |
+			 (u64)caam32_to_cpu(upper_32_bits(value)));
+
+	return caam64_to_cpu(value);
+}
+
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-#ifdef CONFIG_SOC_IMX7D
-#define cpu_to_caam_dma(value) \
-		(((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
-		  (u64)cpu_to_caam32(upper_32_bits(value)))
-#define caam_dma_to_cpu(value) \
-		(((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \
-		  (u64)caam32_to_cpu(upper_32_bits(value)))
-#else
-#define cpu_to_caam_dma(value) cpu_to_caam64(value)
-#define caam_dma_to_cpu(value) caam64_to_cpu(value)
-#endif /* CONFIG_SOC_IMX7D */
+#define cpu_to_caam_dma(value) cpu_to_caam_dma64(value)
+#define caam_dma_to_cpu(value) caam_dma64_to_cpu(value)
 #else
 #define cpu_to_caam_dma(value) cpu_to_caam32(value)
 #define caam_dma_to_cpu(value) caam32_to_cpu(value)
-#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT  */
-
-#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
-#define cpu_to_caam_dma64(value) \
-		(((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
-		 (u64)cpu_to_caam32(upper_32_bits(value)))
-#else
-#define cpu_to_caam_dma64(value) cpu_to_caam64(value)
-#endif
+#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
 
 /*
  * jr_outentry
-- 
cgit 


From e117765a117da3ece15689cb8a759d16c415b08c Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Wed, 30 Aug 2017 09:17:39 +0200
Subject: crypto: af_alg - update correct dst SGL entry

When two adjacent TX SGL are processed and parts of both TX SGLs
are pulled into the per-request TX SGL, the wrong per-request
TX SGL entries were updated.

This fixes a NULL pointer dereference when a cipher implementation walks
the TX SGL where some of the SGL entries were NULL.

Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory...")
Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/af_alg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index ffa9f4ccd9b4..337cf382718e 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -619,14 +619,14 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
 	struct af_alg_ctx *ctx = ask->private;
 	struct af_alg_tsgl *sgl;
 	struct scatterlist *sg;
-	unsigned int i, j;
+	unsigned int i, j = 0;
 
 	while (!list_empty(&ctx->tsgl_list)) {
 		sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl,
 				       list);
 		sg = sgl->sg;
 
-		for (i = 0, j = 0; i < sgl->cur; i++) {
+		for (i = 0; i < sgl->cur; i++) {
 			size_t plen = min_t(size_t, used, sg[i].length);
 			struct page *page = sg_page(sg + i);
 
-- 
cgit 


From 8afafa6fba7809c0785018b77c95b19e58b35b94 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Fri, 15 Sep 2017 15:38:21 +0530
Subject: powerpc/kprobes: Update optprobes to use emulate_update_regs()

Optprobes depended on an updated regs->nip from analyse_instr() to
identify the location to branch back from the optprobes trampoline.
However, since commit 3cdfcbfd32b9d ("powerpc: Change analyse_instr so
it doesn't modify *regs"), analyse_instr() doesn't update the registers
anymore.  Due to this, we end up branching back from the optprobes
trampoline to the same branch into the trampoline resulting in a loop.

Fix this by calling out to emulate_update_regs() before using the nip.
Additionally, explicitly compare the return value from analyse_instr()
to 1, rather than just checking for !0 so as to guard against any
future changes to analyse_instr() that may result in -1 being returned
in more scenarios.

Fixes: 3cdfcbfd32b9d ("powerpc: Change analyse_instr so it doesn't modify *regs")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/optprobes.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 6f8273f5e988..91e037ab20a1 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -104,8 +104,10 @@ static unsigned long can_optimize(struct kprobe *p)
 	 * and that can be emulated.
 	 */
 	if (!is_conditional_branch(*p->ainsn.insn) &&
-			analyse_instr(&op, &regs, *p->ainsn.insn))
+			analyse_instr(&op, &regs, *p->ainsn.insn) == 1) {
+		emulate_update_regs(&regs, &op);
 		nip = regs.nip;
+	}
 
 	return nip;
 }
-- 
cgit 


From 1b25fda0533462c9cee3a22e8a7bea68fa670af2 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 19 Sep 2017 12:52:22 +0200
Subject: s390/topology: alternative topology for topology-less machines

If running on machines that do not provide topology information we
currently generate a "fake" topology which defines the maximum
distance between each cpu: each cpu will be put into an own drawer.

Historically this used to be the best option for (virtual) machines in
overcommited hypervisors.

For some workloads however it is better to generate a different
topology where all cpus are siblings within a package (all cpus are
core siblings). This shows performance improvements of up to 10%,
depending on the workload.

In order to keep the current behaviour, but also allow to switch to
the different core sibling topology use the existing "topology="
kernel parameter:

Specifying "topology=on" on machines without topology information will
generate the core siblings (fake) topology information, instead of the
default topology information where all cpus have the maximum distance.

On machines which provide topology information specifying
"topology=on" does not have any effect.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/early.c    | 12 --------
 arch/s390/kernel/topology.c | 72 ++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 61 insertions(+), 23 deletions(-)

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index ca8cd80e8feb..60181caf8e8a 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -404,18 +404,6 @@ static inline void save_vector_registers(void)
 #endif
 }
 
-static int __init topology_setup(char *str)
-{
-	bool enabled;
-	int rc;
-
-	rc = kstrtobool(str, &enabled);
-	if (!rc && !enabled)
-		S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY;
-	return rc;
-}
-early_param("topology", topology_setup);
-
 static int __init disable_vector_extension(char *str)
 {
 	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index bb47c92476f0..a0ce9c83f589 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -29,12 +29,20 @@
 #define PTF_VERTICAL	(1UL)
 #define PTF_CHECK	(2UL)
 
+enum {
+	TOPOLOGY_MODE_HW,
+	TOPOLOGY_MODE_SINGLE,
+	TOPOLOGY_MODE_PACKAGE,
+	TOPOLOGY_MODE_UNINITIALIZED
+};
+
 struct mask_info {
 	struct mask_info *next;
 	unsigned char id;
 	cpumask_t mask;
 };
 
+static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
 static void set_topology_timer(void);
 static void topology_work_fn(struct work_struct *work);
 static struct sysinfo_15_1_x *tl_info;
@@ -59,11 +67,26 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
 	cpumask_t mask;
 
 	cpumask_copy(&mask, cpumask_of(cpu));
-	if (!MACHINE_HAS_TOPOLOGY)
-		return mask;
-	for (; info; info = info->next) {
-		if (cpumask_test_cpu(cpu, &info->mask))
-			return info->mask;
+	switch (topology_mode) {
+	case TOPOLOGY_MODE_HW:
+		while (info) {
+			if (cpumask_test_cpu(cpu, &info->mask)) {
+				mask = info->mask;
+				break;
+			}
+			info = info->next;
+		}
+		if (cpumask_empty(&mask))
+			cpumask_copy(&mask, cpumask_of(cpu));
+		break;
+	case TOPOLOGY_MODE_PACKAGE:
+		cpumask_copy(&mask, cpu_present_mask);
+		break;
+	default:
+		/* fallthrough */
+	case TOPOLOGY_MODE_SINGLE:
+		cpumask_copy(&mask, cpumask_of(cpu));
+		break;
 	}
 	return mask;
 }
@@ -74,7 +97,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
 	int i;
 
 	cpumask_copy(&mask, cpumask_of(cpu));
-	if (!MACHINE_HAS_TOPOLOGY)
+	if (topology_mode != TOPOLOGY_MODE_HW)
 		return mask;
 	cpu -= cpu % (smp_cpu_mtid + 1);
 	for (i = 0; i <= smp_cpu_mtid; i++)
@@ -223,7 +246,7 @@ int topology_set_cpu_management(int fc)
 static void update_cpu_masks(void)
 {
 	struct cpu_topology_s390 *topo;
-	int cpu;
+	int cpu, id;
 
 	for_each_possible_cpu(cpu) {
 		topo = &cpu_topology[cpu];
@@ -231,12 +254,13 @@ static void update_cpu_masks(void)
 		topo->core_mask = cpu_group_map(&socket_info, cpu);
 		topo->book_mask = cpu_group_map(&book_info, cpu);
 		topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
-		if (!MACHINE_HAS_TOPOLOGY) {
+		if (topology_mode != TOPOLOGY_MODE_HW) {
+			id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
 			topo->thread_id = cpu;
 			topo->core_id = cpu;
-			topo->socket_id = cpu;
-			topo->book_id = cpu;
-			topo->drawer_id = cpu;
+			topo->socket_id = id;
+			topo->book_id = id;
+			topo->drawer_id = id;
 			if (cpu_present(cpu))
 				cpumask_set_cpu(cpu, &cpus_with_topology);
 		}
@@ -459,6 +483,12 @@ void __init topology_init_early(void)
 	struct sysinfo_15_1_x *info;
 
 	set_sched_topology(s390_topology);
+	if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
+		if (MACHINE_HAS_TOPOLOGY)
+			topology_mode = TOPOLOGY_MODE_HW;
+		else
+			topology_mode = TOPOLOGY_MODE_SINGLE;
+	}
 	if (!MACHINE_HAS_TOPOLOGY)
 		goto out;
 	tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE);
@@ -474,6 +504,26 @@ out:
 	__arch_update_cpu_topology();
 }
 
+static inline int topology_get_mode(int enabled)
+{
+	if (!enabled)
+		return TOPOLOGY_MODE_SINGLE;
+	return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+}
+
+static int __init topology_setup(char *str)
+{
+	bool enabled;
+	int rc;
+
+	rc = kstrtobool(str, &enabled);
+	if (rc)
+		return rc;
+	topology_mode = topology_get_mode(enabled);
+	return 0;
+}
+early_param("topology", topology_setup);
+
 static int __init topology_init(void)
 {
 	if (MACHINE_HAS_TOPOLOGY)
-- 
cgit 


From 51dce3867c6c63c7500332e5448c2ba76808d6b5 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 14 Sep 2017 14:42:32 +0200
Subject: s390/topology: enable / disable topology dynamically

Add a new sysctl file /proc/sys/s390/topology which displays if
topology is on (1) or off (0) as specified by the "topology=" kernel
parameter.

This allows to change topology information during runtime and
configuring it via /etc/sysctl.conf instead of using the kernel line
parameter.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/topology.c | 76 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 74 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index a0ce9c83f589..ed0bdd220e1a 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -8,6 +8,8 @@
 
 #include <linux/workqueue.h>
 #include <linux/bootmem.h>
+#include <linux/uaccess.h>
+#include <linux/sysctl.h>
 #include <linux/cpuset.h>
 #include <linux/device.h>
 #include <linux/export.h>
@@ -207,10 +209,8 @@ static void topology_update_polarization_simple(void)
 {
 	int cpu;
 
-	mutex_lock(&smp_cpu_state_mutex);
 	for_each_possible_cpu(cpu)
 		smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
-	mutex_unlock(&smp_cpu_state_mutex);
 }
 
 static int ptf(unsigned long fc)
@@ -278,6 +278,7 @@ static int __arch_update_cpu_topology(void)
 	struct sysinfo_15_1_x *info = tl_info;
 	int rc = 0;
 
+	mutex_lock(&smp_cpu_state_mutex);
 	cpumask_clear(&cpus_with_topology);
 	if (MACHINE_HAS_TOPOLOGY) {
 		rc = 1;
@@ -287,6 +288,7 @@ static int __arch_update_cpu_topology(void)
 	update_cpu_masks();
 	if (!MACHINE_HAS_TOPOLOGY)
 		topology_update_polarization_simple();
+	mutex_unlock(&smp_cpu_state_mutex);
 	return rc;
 }
 
@@ -313,6 +315,11 @@ void topology_schedule_update(void)
 	schedule_work(&topology_work);
 }
 
+static void topology_flush_work(void)
+{
+	flush_work(&topology_work);
+}
+
 static void topology_timer_fn(unsigned long ignored)
 {
 	if (ptf(PTF_CHECK))
@@ -511,6 +518,11 @@ static inline int topology_get_mode(int enabled)
 	return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
 }
 
+static inline int topology_is_enabled(void)
+{
+	return topology_mode != TOPOLOGY_MODE_SINGLE;
+}
+
 static int __init topology_setup(char *str)
 {
 	bool enabled;
@@ -524,12 +536,72 @@ static int __init topology_setup(char *str)
 }
 early_param("topology", topology_setup);
 
+static int topology_ctl_handler(struct ctl_table *ctl, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	unsigned int len;
+	int new_mode;
+	char buf[2];
+
+	if (!*lenp || *ppos) {
+		*lenp = 0;
+		return 0;
+	}
+	if (!write) {
+		strncpy(buf, topology_is_enabled() ? "1\n" : "0\n",
+			ARRAY_SIZE(buf));
+		len = strnlen(buf, ARRAY_SIZE(buf));
+		if (len > *lenp)
+			len = *lenp;
+		if (copy_to_user(buffer, buf, len))
+			return -EFAULT;
+		goto out;
+	}
+	len = *lenp;
+	if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
+		return -EFAULT;
+	if (buf[0] != '0' && buf[0] != '1')
+		return -EINVAL;
+	mutex_lock(&smp_cpu_state_mutex);
+	new_mode = topology_get_mode(buf[0] == '1');
+	if (topology_mode != new_mode) {
+		topology_mode = new_mode;
+		topology_schedule_update();
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	topology_flush_work();
+out:
+	*lenp = len;
+	*ppos += len;
+	return 0;
+}
+
+static struct ctl_table topology_ctl_table[] = {
+	{
+		.procname	= "topology",
+		.mode		= 0644,
+		.proc_handler	= topology_ctl_handler,
+	},
+	{ },
+};
+
+static struct ctl_table topology_dir_table[] = {
+	{
+		.procname	= "s390",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= topology_ctl_table,
+	},
+	{ },
+};
+
 static int __init topology_init(void)
 {
 	if (MACHINE_HAS_TOPOLOGY)
 		set_topology_timer();
 	else
 		topology_update_polarization_simple();
+	register_sysctl_table(topology_dir_table);
 	return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
 }
 device_initcall(topology_init);
-- 
cgit 


From 9e09007486c883de4aa78a384e1d54c3fe6e42d5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 18 Sep 2017 18:12:36 +0900
Subject: kbuild: rpm-pkg: delete firmware_install to fix build error

Commit 5620a0d1aacd ("firmware: delete in-kernel firmware") deleted
in-kernel firmware support, including "make firmware_install".

Since then, "make rpm-pkg" / "make binrpm-pkg" fails to build with
the error:

  make[2]: *** No rule to make target `firmware_install'.  Stop.

Commit df85b2d767aa ("firmware: Restore support for built-in firmware")
restored the build infrastructure for CONFIG_EXTRA_FIRMWARE, but this
is out of the scope of "make firmware_install".  So, the right thing to
do is to kill the use of "make firmware_install".

Fixes: 5620a0d1aacd ("firmware: delete in-kernel firmware")
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/package/mkspec | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index bb43f153fd8e..bf3e9abb2846 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -88,11 +88,8 @@ echo 'mkdir -p $RPM_BUILD_ROOT/boot/efi $RPM_BUILD_ROOT/lib/modules'
 echo "%else"
 echo 'mkdir -p $RPM_BUILD_ROOT/boot $RPM_BUILD_ROOT/lib/modules'
 echo "%endif"
-echo 'mkdir -p $RPM_BUILD_ROOT'"/lib/firmware/$KERNELRELEASE"
 
-echo 'INSTALL_MOD_PATH=$RPM_BUILD_ROOT make %{?_smp_mflags} KBUILD_SRC= mod-fw= modules_install'
-echo 'INSTALL_FW_PATH=$RPM_BUILD_ROOT'"/lib/firmware/$KERNELRELEASE"
-echo 'make INSTALL_FW_PATH=$INSTALL_FW_PATH' firmware_install
+echo 'INSTALL_MOD_PATH=$RPM_BUILD_ROOT make %{?_smp_mflags} KBUILD_SRC= modules_install'
 echo "%ifarch ia64"
 echo 'cp $KBUILD_IMAGE $RPM_BUILD_ROOT'"/boot/efi/vmlinuz-$KERNELRELEASE"
 echo 'ln -s '"efi/vmlinuz-$KERNELRELEASE" '$RPM_BUILD_ROOT'"/boot/"
@@ -119,7 +116,7 @@ if ! $PREBUILT; then
 echo 'rm -f $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE/build"
 echo 'rm -f $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE/source"
 echo "mkdir -p "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNELRELEASE"
-echo "EXCLUDES=\"$RCS_TAR_IGNORE --exclude .tmp_versions --exclude=*vmlinux* --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation --exclude=firmware --exclude .config.old --exclude .missing-syscalls.d\""
+echo "EXCLUDES=\"$RCS_TAR_IGNORE --exclude .tmp_versions --exclude=*vmlinux* --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation --exclude .config.old --exclude .missing-syscalls.d\""
 echo "tar "'$EXCLUDES'" -cf- . | (cd "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNELRELEASE;tar xvf -)"
 echo 'cd $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE"
 echo "ln -sf /usr/src/kernels/$KERNELRELEASE build"
@@ -154,7 +151,6 @@ echo '%defattr (-, root, root)'
 echo "/lib/modules/$KERNELRELEASE"
 echo "%exclude /lib/modules/$KERNELRELEASE/build"
 echo "%exclude /lib/modules/$KERNELRELEASE/source"
-echo "/lib/firmware/$KERNELRELEASE"
 echo "/boot/*"
 echo ""
 echo "%files headers"
-- 
cgit 


From cc18abbe449aafc013831a8e0440afc336ae1cba Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 18 Sep 2017 18:22:19 +0900
Subject: kbuild: deb-pkg: remove firmware package support

Commit 5620a0d1aacd ("firmware: delete in-kernel firmware") deleted
in-kernel firmware support, including the firmware install command.

So, the firmware package does not make sense any more.  Remove it.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Riku Voipio <riku.voipio@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/package/builddeb | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index aad67000e4dd..0bc87473f68f 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -92,12 +92,10 @@ else
 fi
 sourcename=$KDEB_SOURCENAME
 tmpdir="$objtree/debian/tmp"
-fwdir="$objtree/debian/fwtmp"
 kernel_headers_dir="$objtree/debian/hdrtmp"
 libc_headers_dir="$objtree/debian/headertmp"
 dbg_dir="$objtree/debian/dbgtmp"
 packagename=linux-image-$version
-fwpackagename=linux-firmware-image-$version
 kernel_headers_packagename=linux-headers-$version
 libc_headers_packagename=linux-libc-dev
 dbg_packagename=$packagename-dbg
@@ -126,10 +124,9 @@ esac
 BUILD_DEBUG="$(grep -s '^CONFIG_DEBUG_INFO=y' $KCONFIG_CONFIG || true)"
 
 # Setup the directory structure
-rm -rf "$tmpdir" "$fwdir" "$kernel_headers_dir" "$libc_headers_dir" "$dbg_dir" $objtree/debian/files
+rm -rf "$tmpdir" "$kernel_headers_dir" "$libc_headers_dir" "$dbg_dir" $objtree/debian/files
 mkdir -m 755 -p "$tmpdir/DEBIAN"
 mkdir -p "$tmpdir/lib" "$tmpdir/boot"
-mkdir -p "$fwdir/lib/firmware/$version/"
 mkdir -p "$kernel_headers_dir/lib/modules/$version/"
 
 # Build and install the kernel
@@ -306,7 +303,6 @@ else
 	cat <<EOF >> debian/control
 
 Package: $packagename
-Suggests: $fwpackagename
 Architecture: any
 Description: Linux kernel, version $version
  This package contains the Linux kernel, modules and corresponding other
@@ -345,22 +341,6 @@ Description: Linux kernel headers for $KERNELRELEASE on \${kernel:debarch}
  This is useful for people who need to build external modules
 EOF
 
-# Do we have firmware? Move it out of the way and build it into a package.
-if [ -e "$tmpdir/lib/firmware" ]; then
-	mv "$tmpdir/lib/firmware"/* "$fwdir/lib/firmware/$version/"
-	rmdir "$tmpdir/lib/firmware"
-
-	cat <<EOF >> debian/control
-
-Package: $fwpackagename
-Architecture: all
-Description: Linux kernel firmware, version $version
- This package contains firmware from the Linux kernel, version $version.
-EOF
-
-	create_package "$fwpackagename" "$fwdir"
-fi
-
 cat <<EOF >> debian/control
 
 Package: $libc_headers_packagename
-- 
cgit 


From 25b080bd53f2873785fa049f570ac1b361c11d72 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 20 Sep 2017 22:01:26 +0900
Subject: kbuild: rpm-pkg: fix version number handling

The "Release:" field of the spec file is determined based on the
.version file.

However, the .version file is not copied to the source tar file.
So, when we build the kernel from the source package, the UTS_VERSION
always indicates #1.  This does not match with "rpm -q".

The kernel UTS_VERSION and "rpm -q" do not agree for binrpm-pkg, either.
Please note the kernel has already been built before the spec file is
created.  Currently, mkspec invokes mkversion.  This script returns an
incremented version.  So, the "Release:" field of the spec file is
greater than the version in the kernel by one.

For the source package build (where .version file is missing), we can
give KBUILD_BUILD_VERSION=%{release} to the build command.

For the binary package build, we can simply read out the .version file
because it contains the version number that was used for building the
kernel image.

We can remove scripts/mkversion because scripts/package/Makefile need
not touch the .version file.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/mkversion        | 6 ------
 scripts/package/Makefile | 5 -----
 scripts/package/mkspec   | 6 ++----
 3 files changed, 2 insertions(+), 15 deletions(-)
 delete mode 100644 scripts/mkversion

diff --git a/scripts/mkversion b/scripts/mkversion
deleted file mode 100644
index c12addc9c7ef..000000000000
--- a/scripts/mkversion
+++ /dev/null
@@ -1,6 +0,0 @@
-if [ ! -f .version ]
-then
-    echo 1
-else
-    expr 0`cat .version` + 1
-fi
diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index 71b4a8af9d4d..73f9f3192b9f 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -50,8 +50,6 @@ rpm-pkg rpm: FORCE
 	$(MAKE) clean
 	$(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec
 	$(call cmd,src_tar,$(KERNELPATH),kernel.spec)
-	$(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
-	mv -f $(objtree)/.tmp_version $(objtree)/.version
 	rpmbuild $(RPMOPTS) --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz
 	rm $(KERNELPATH).tar.gz kernel.spec
 
@@ -60,9 +58,6 @@ rpm-pkg rpm: FORCE
 binrpm-pkg: FORCE
 	$(MAKE) KBUILD_SRC=
 	$(CONFIG_SHELL) $(MKSPEC) prebuilt > $(objtree)/binkernel.spec
-	$(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
-	mv -f $(objtree)/.tmp_version $(objtree)/.version
-
 	rpmbuild $(RPMOPTS) --define "_builddir $(objtree)" --target \
 		$(UTS_MACHINE) -bb $(objtree)/binkernel.spec
 	rm binkernel.spec
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index bf3e9abb2846..f47f17aae135 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -27,9 +27,7 @@ __KERNELRELEASE=`echo $KERNELRELEASE | sed -e "s/-/_/g"`
 echo "Name: kernel"
 echo "Summary: The Linux Kernel"
 echo "Version: $__KERNELRELEASE"
-# we need to determine the NEXT version number so that uname and
-# rpm -q will agree
-echo "Release: `. $srctree/scripts/mkversion`"
+echo "Release: $(cat .version 2>/dev/null || echo 1)"
 echo "License: GPL"
 echo "Group: System Environment/Kernel"
 echo "Vendor: The Linux Community"
@@ -77,7 +75,7 @@ fi
 echo "%build"
 
 if ! $PREBUILT; then
-echo "make clean && make %{?_smp_mflags}"
+echo "make clean && make %{?_smp_mflags} KBUILD_BUILD_VERSION=%{release}"
 echo ""
 fi
 
-- 
cgit 


From 35f3c984548524be5eb4c3f5295cf58d909e8746 Mon Sep 17 00:00:00 2001
From: Frank Rowand <frank.rowand@sony.com>
Date: Mon, 18 Sep 2017 17:18:44 -0700
Subject: scripts/dtc: dtx_diff - 2nd update of include dts paths to match
 build

Update dtx_diff include paths in the same manner as:
commit b12869a8d519 ("of: remove drivers/of/testcase-data from
include search path for CPP"), commit 5ffa2aed389c ("of: remove
arch/$(SRCARCH)/boot/dts from include search path for CPP"), and
commit 50f9ddaf64e1 ("of: search scripts/dtc/include-prefixes path
for both CPP and DTC").

Remove proposed include path kernel/dts/, which was never implemented
for the dtb build.

For the diff case, each source file is compiled separately.  For
each of those compiles, provide the location of the source file
as an include path, not the location of both source files.

Signed-off-by: Frank Rowand <frank.rowand@sony.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 scripts/dtc/dtx_diff | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/scripts/dtc/dtx_diff b/scripts/dtc/dtx_diff
index f9a3d8d23c64..8c4fbad2055e 100755
--- a/scripts/dtc/dtx_diff
+++ b/scripts/dtc/dtx_diff
@@ -86,6 +86,7 @@ eod
 compile_to_dts() {
 
 	dtx="$1"
+	dtc_include="$2"
 
 	if [ -d "${dtx}" ] ; then
 
@@ -113,7 +114,7 @@ compile_to_dts() {
 		# -----  input is DTS (source)
 
 		if ( cpp ${cpp_flags} -x assembler-with-cpp ${dtx} \
-			| ${DTC} -I dts ) ; then
+			| ${DTC} ${dtc_include} -I dts ) ; then
 			return
 		fi
 
@@ -320,18 +321,13 @@ fi
 
 cpp_flags="\
 	-nostdinc                                  \
-	-I${srctree}/arch/${ARCH}/boot/dts         \
 	-I${srctree}/scripts/dtc/include-prefixes  \
-	-I${srctree}/drivers/of/testcase-data      \
 	-undef -D__DTS__"
 
-dtc_flags="\
-	-i ${srctree}/arch/${ARCH}/boot/dts/ \
-	-i ${srctree}/kernel/dts             \
-	${dtx_path_1_dtc_include}            \
-	${dtx_path_2_dtc_include}"
-
-DTC="${DTC} ${dtc_flags} -O dts -qq -f ${dtc_sort} -o -"
+DTC="\
+	${DTC}                                     \
+	-i ${srctree}/scripts/dtc/include-prefixes \
+	-O dts -qq -f ${dtc_sort} -o -"
 
 
 # -----  do the diff or decompile
@@ -339,11 +335,11 @@ DTC="${DTC} ${dtc_flags} -O dts -qq -f ${dtc_sort} -o -"
 if (( ${cmd_diff} )) ; then
 
 	diff ${diff_flags} --label "${dtx_file_1}" --label "${dtx_file_2}" \
-		<(compile_to_dts "${dtx_file_1}") \
-		<(compile_to_dts "${dtx_file_2}")
+		<(compile_to_dts "${dtx_file_1}" "${dtx_path_1_dtc_include}") \
+		<(compile_to_dts "${dtx_file_2}" "${dtx_path_2_dtc_include}")
 
 else
 
-	compile_to_dts "${dtx_file_1}"
+	compile_to_dts "${dtx_file_1}" "${dtx_path_1_dtc_include}"
 
 fi
-- 
cgit 


From 749aaf3372b8b56b8743c3e27700d64c8bd06921 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Wed, 20 Sep 2017 13:56:06 -0500
Subject: PCI: endpoint: Use correct "end of test" interrupt

pci_epf_test_raise_irq() reads the interrupt to use for the response from
reg->command, but this has been cleared at the beginning of the command
handler so the value is always zero at this point.

Instead, extract the interrupt index before handling the command and then
pass the requested interrupt into pci_epf_test_raise_irq().  This allows us
to remove the specific code to extract the interrupt for
COMMAND_RAISE_MSI_IRQ since it is now handled in common code.

Fixes: 3ecf3232c54c ("PCI: endpoint: Do not reset *command* inadvertently")
Signed-off-by: John Keeping <john@metanate.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/pci/endpoint/functions/pci-epf-test.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index 4ddc6e8f9fe7..f9308c2f22e6 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -251,9 +251,8 @@ err:
 	return ret;
 }
 
-static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test)
+static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test, u8 irq)
 {
-	u8 irq;
 	u8 msi_count;
 	struct pci_epf *epf = epf_test->epf;
 	struct pci_epc *epc = epf->epc;
@@ -262,7 +261,6 @@ static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test)
 
 	reg->status |= STATUS_IRQ_RAISED;
 	msi_count = pci_epc_get_msi(epc);
-	irq = (reg->command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT;
 	if (irq > msi_count || msi_count <= 0)
 		pci_epc_raise_irq(epc, PCI_EPC_IRQ_LEGACY, 0);
 	else
@@ -289,6 +287,8 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 	reg->command = 0;
 	reg->status = 0;
 
+	irq = (command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT;
+
 	if (command & COMMAND_RAISE_LEGACY_IRQ) {
 		reg->status = STATUS_IRQ_RAISED;
 		pci_epc_raise_irq(epc, PCI_EPC_IRQ_LEGACY, 0);
@@ -301,7 +301,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 			reg->status |= STATUS_WRITE_FAIL;
 		else
 			reg->status |= STATUS_WRITE_SUCCESS;
-		pci_epf_test_raise_irq(epf_test);
+		pci_epf_test_raise_irq(epf_test, irq);
 		goto reset_handler;
 	}
 
@@ -311,7 +311,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 			reg->status |= STATUS_READ_SUCCESS;
 		else
 			reg->status |= STATUS_READ_FAIL;
-		pci_epf_test_raise_irq(epf_test);
+		pci_epf_test_raise_irq(epf_test, irq);
 		goto reset_handler;
 	}
 
@@ -321,13 +321,12 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 			reg->status |= STATUS_COPY_SUCCESS;
 		else
 			reg->status |= STATUS_COPY_FAIL;
-		pci_epf_test_raise_irq(epf_test);
+		pci_epf_test_raise_irq(epf_test, irq);
 		goto reset_handler;
 	}
 
 	if (command & COMMAND_RAISE_MSI_IRQ) {
 		msi_count = pci_epc_get_msi(epc);
-		irq = (command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT;
 		if (irq > msi_count || msi_count <= 0)
 			goto reset_handler;
 		reg->status = STATUS_IRQ_RAISED;
-- 
cgit 


From 008ba2a13f2d04c947adc536d19debb8fe66f110 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 14 Sep 2017 17:14:41 -0400
Subject: packet: hold bind lock when rebinding to fanout hook

Packet socket bind operations must hold the po->bind_lock. This keeps
po->running consistent with whether the socket is actually on a ptype
list to receive packets.

fanout_add unbinds a socket and its packet_rcv/tpacket_rcv call, then
binds the fanout object to receive through packet_rcv_fanout.

Make it hold the po->bind_lock when testing po->running and rebinding.
Else, it can race with other rebind operations, such as that in
packet_set_ring from packet_rcv to tpacket_rcv. Concurrent updates
can result in a socket being added to a fanout group twice, causing
use-after-free KASAN bug reports, among others.

Reported independently by both trinity and syzkaller.
Verified that the syzkaller reproducer passes after this patch.

Fixes: dc99f600698d ("packet: Add fanout support.")
Reported-by: nixioaming <nixiaoming@huawei.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c26172995511..d288f52c53f7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1684,10 +1684,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 
 	mutex_lock(&fanout_mutex);
 
-	err = -EINVAL;
-	if (!po->running)
-		goto out;
-
 	err = -EALREADY;
 	if (po->fanout)
 		goto out;
@@ -1749,7 +1745,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		list_add(&match->list, &fanout_list);
 	}
 	err = -EINVAL;
-	if (match->type == type &&
+
+	spin_lock(&po->bind_lock);
+	if (po->running &&
+	    match->type == type &&
 	    match->prot_hook.type == po->prot_hook.type &&
 	    match->prot_hook.dev == po->prot_hook.dev) {
 		err = -ENOSPC;
@@ -1761,6 +1760,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 			err = 0;
 		}
 	}
+	spin_unlock(&po->bind_lock);
+
+	if (err && !refcount_read(&match->sk_ref)) {
+		list_del(&match->list);
+		kfree(match);
+	}
+
 out:
 	if (err && rollover) {
 		kfree(rollover);
-- 
cgit 


From ec9dd352d591f0c90402ec67a317c1ed4fb2e638 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 18 Sep 2017 16:38:36 -0700
Subject: bpf: one perf event close won't free bpf program attached by another
 perf event

This patch fixes a bug exhibited by the following scenario:
  1. fd1 = perf_event_open with attr.config = ID1
  2. attach bpf program prog1 to fd1
  3. fd2 = perf_event_open with attr.config = ID1
     <this will be successful>
  4. user program closes fd2 and prog1 is detached from the tracepoint.
  5. user program with fd1 does not work properly as tracepoint
     no output any more.

The issue happens at step 4. Multiple perf_event_open can be called
successfully, but only one bpf prog pointer in the tp_event. In the
current logic, any fd release for the same tp_event will free
the tp_event->prog.

The fix is to free tp_event->prog only when the closing fd
corresponds to the one which registered the program.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/trace_events.h | 1 +
 kernel/events/core.c         | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 7f11050746ae..2e0f22298fe9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -272,6 +272,7 @@ struct trace_event_call {
 	int				perf_refcount;
 	struct hlist_head __percpu	*perf_events;
 	struct bpf_prog			*prog;
+	struct perf_event		*bpf_prog_owner;
 
 	int	(*perf_perm)(struct trace_event_call *,
 			     struct perf_event *);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3e691b75b2db..6bc21e202ae4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8171,6 +8171,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 		}
 	}
 	event->tp_event->prog = prog;
+	event->tp_event->bpf_prog_owner = event;
 
 	return 0;
 }
@@ -8185,7 +8186,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
 		return;
 
 	prog = event->tp_event->prog;
-	if (prog) {
+	if (prog && event->tp_event->bpf_prog_owner == event) {
 		event->tp_event->prog = NULL;
 		bpf_prog_put(prog);
 	}
-- 
cgit 


From 1fa089ec6d68849bfe60abd141375dc9b21b0ee8 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Wed, 20 Sep 2017 16:46:49 -0500
Subject: [SMB3] Update session and share information displayed for debugging
 SMB2/SMB3

We were not displaying some key fields (session status and capabilities and
whether guest authenticated) for SMB2/SMB3 session in /proc/fs/cifs/DebugData.

This is needed for real world triage of problems with the (now much more
common) SMB3 mounts.

Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifs_debug.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 9727e1dcacd5..cbb9534b89b4 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -160,8 +160,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 			if ((ses->serverDomain == NULL) ||
 				(ses->serverOS == NULL) ||
 				(ses->serverNOS == NULL)) {
-				seq_printf(m, "\n%d) entry for %s not fully "
-					   "displayed\n\t", i, ses->serverName);
+				seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d\t",
+					i, ses->serverName, ses->ses_count,
+					ses->capabilities, ses->status);
+				if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
+					seq_printf(m, "Guest\t");
+				else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
+					seq_printf(m, "Anonymous\t");
 			} else {
 				seq_printf(m,
 				    "\n%d) Name: %s  Domain: %s Uses: %d OS:"
-- 
cgit 


From b22666febf6fc67776d49782057fe4dd06f41552 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Wed, 20 Sep 2017 18:10:17 -0400
Subject: drm/amdkfd: Fix incorrect destroy_mqd parameter

When uninitializing a kernel queue.

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 0649dd43e780..a4ca1133482c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -184,7 +184,7 @@ static void uninitialize(struct kernel_queue *kq)
 	if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
 		kq->mqd->destroy_mqd(kq->mqd,
 					kq->queue->mqd,
-					false,
+					KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
 					QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
 					kq->queue->pipe,
 					kq->queue->queue);
-- 
cgit 


From cb1d9967461cdf3b6aac6317c8d954a14f842571 Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Wed, 20 Sep 2017 18:10:21 -0400
Subject: drm/amdkfd: Fix kernel-queue wrapping bugs

Avoid intermediate negative numbers when doing calculations with a mix
of signed and unsigned variables where implicit conversions can lead
to unexpected results.

When kernel queue buffer wraps around to 0, we need to check that rptr
won't be overwritten by the new packet.

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index a4ca1133482c..ed71ad40e8f7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -210,6 +210,11 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
 	uint32_t wptr, rptr;
 	unsigned int *queue_address;
 
+	/* When rptr == wptr, the buffer is empty.
+	 * When rptr == wptr + 1, the buffer is full.
+	 * It is always rptr that advances to the position of wptr, rather than
+	 * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
+	 */
 	rptr = *kq->rptr_kernel;
 	wptr = *kq->wptr_kernel;
 	queue_address = (unsigned int *)kq->pq_kernel_addr;
@@ -219,11 +224,10 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
 	pr_debug("wptr: %d\n", wptr);
 	pr_debug("queue_address 0x%p\n", queue_address);
 
-	available_size = (rptr - 1 - wptr + queue_size_dwords) %
+	available_size = (rptr + queue_size_dwords - 1 - wptr) %
 							queue_size_dwords;
 
-	if (packet_size_in_dwords >= queue_size_dwords ||
-			packet_size_in_dwords >= available_size) {
+	if (packet_size_in_dwords > available_size) {
 		/*
 		 * make sure calling functions know
 		 * acquire_packet_buffer() failed
@@ -233,6 +237,14 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
 	}
 
 	if (wptr + packet_size_in_dwords >= queue_size_dwords) {
+		/* make sure after rolling back to position 0, there is
+		 * still enough space.
+		 */
+		if (packet_size_in_dwords >= rptr) {
+			*buffer_ptr = NULL;
+			return -ENOMEM;
+		}
+		/* fill nops, roll back and start at position 0 */
 		while (wptr > 0) {
 			queue_address[wptr] = kq->nop_packet;
 			wptr = (wptr + 1) % queue_size_dwords;
-- 
cgit 


From c986169fdec992c464c84d0a3abdbfc846ed3df9 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Wed, 20 Sep 2017 18:10:22 -0400
Subject: drm/amdkfd: Print event limit messages only once per process

To avoid spamming the log.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 5 ++++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 5979158c3f7b..944abfad39c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -292,7 +292,10 @@ static int create_signal_event(struct file *devkfd,
 				struct kfd_event *ev)
 {
 	if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
-		pr_warn("Signal event wasn't created because limit was reached\n");
+		if (!p->signal_event_limit_reached) {
+			pr_warn("Signal event wasn't created because limit was reached\n");
+			p->signal_event_limit_reached = true;
+		}
 		return -ENOMEM;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index b397ec726400..b87e96cee5fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -521,6 +521,7 @@ struct kfd_process {
 	struct list_head signal_event_pages;
 	u32 next_nonsignal_event_id;
 	size_t signal_event_count;
+	bool signal_event_limit_reached;
 };
 
 /**
-- 
cgit 


From c2a64bb9fcd31c39feddf30748b4ee8d82e53c6a Mon Sep 17 00:00:00 2001
From: Meng Xu <mengxu.gatech@gmail.com>
Date: Tue, 19 Sep 2017 13:19:13 -0400
Subject: net: compat: assert the size of cmsg copied in is as expected

The actual length of cmsg fetched in during the second loop
(i.e., kcmsg - kcmsg_base) could be different from what we
get from the first loop (i.e., kcmlen).

The main reason is that the two get_user() calls in the two
loops (i.e., get_user(ucmlen, &ucmsg->cmsg_len) and
__get_user(ucmlen, &ucmsg->cmsg_len)) could cause ucmlen
to have different values even they fetch from the same userspace
address, as user can race to change the memory content in
&ucmsg->cmsg_len across fetches.

Although in the second loop, the sanity check
if ((char *)kcmsg_base + kcmlen - (char *)kcmsg < CMSG_ALIGN(tmp))
is inplace, it only ensures that the cmsg fetched in during the
second loop does not exceed the length of kcmlen, but not
necessarily equal to kcmlen. But indicated by the assignment
kmsg->msg_controllen = kcmlen, we should enforce that.

This patch adds this additional sanity check and ensures that
what is recorded in kmsg->msg_controllen is the actual cmsg length.

Signed-off-by: Meng Xu <mengxu.gatech@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/compat.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/compat.c b/net/compat.c
index 6ded6c821d7a..22381719718c 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -185,6 +185,13 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 		ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
 	}
 
+	/*
+	 * check the length of messages copied in is the same as the
+	 * what we get from the first loop
+	 */
+	if ((char *)kcmsg - (char *)kcmsg_base != kcmlen)
+		goto Einval;
+
 	/* Ok, looks like we made it.  Hook it up and return success. */
 	kmsg->msg_control = kcmsg_base;
 	kmsg->msg_controllen = kcmlen;
-- 
cgit 


From 92dd5452c1be873a1193561f4f691763103d22ac Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 19 Sep 2017 18:45:56 +0100
Subject: net: change skb->mac_header when Generic XDP calls adjust_head

Since XDP's view of the packet includes the MAC header, moving the start-
 of-packet with bpf_xdp_adjust_head needs to also update the offset of the
 MAC header (which is relative to skb->head, not to the skb->data that was
 changed).
Without this, tcpdump sees packets starting from the old MAC header rather
 than the new one, at least in my tests on the loopback device.

Fixes: b5cdae3291f7 ("net: Generic XDP")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index fb766d906148..9a2254f9802f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3892,6 +3892,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 		__skb_pull(skb, off);
 	else if (off < 0)
 		__skb_push(skb, -off);
+	skb->mac_header += off;
 
 	switch (act) {
 	case XDP_REDIRECT:
-- 
cgit 


From 5e62d98c4bbc243f3dca18e73a754b629839fc5c Mon Sep 17 00:00:00 2001
From: Troy Kisky <troy.kisky@boundarydevices.com>
Date: Tue, 19 Sep 2017 17:33:07 -0700
Subject: net: fec: only check queue 0 if RXF_0/TXF_0 interrupt is set

Before queue 0 was always checked if any queue caused an interrupt.
It is better to just mark queue 0 if queue 0 has caused an interrupt.

Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
Acked-by: Fugang Duan <Fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 56f56d6ada9c..464055fb33d5 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1559,14 +1559,14 @@ fec_enet_collect_events(struct fec_enet_private *fep, uint int_events)
 	if (int_events == 0)
 		return false;
 
-	if (int_events & FEC_ENET_RXF)
+	if (int_events & FEC_ENET_RXF_0)
 		fep->work_rx |= (1 << 2);
 	if (int_events & FEC_ENET_RXF_1)
 		fep->work_rx |= (1 << 0);
 	if (int_events & FEC_ENET_RXF_2)
 		fep->work_rx |= (1 << 1);
 
-	if (int_events & FEC_ENET_TXF)
+	if (int_events & FEC_ENET_TXF_0)
 		fep->work_tx |= (1 << 2);
 	if (int_events & FEC_ENET_TXF_1)
 		fep->work_tx |= (1 << 0);
-- 
cgit 


From 7063c163cd4a20184b3bbada503dab8f254a8c56 Mon Sep 17 00:00:00 2001
From: Troy Kisky <troy.kisky@boundarydevices.com>
Date: Tue, 19 Sep 2017 17:33:08 -0700
Subject: net: fec: remove unused interrupt FEC_ENET_TS_TIMER

FEC_ENET_TS_TIMER is not checked in the interrupt routine
so there is no need to enable it.

Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
Acked-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 38c7b21e5d63..ede1876a9a19 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -374,8 +374,8 @@ struct bufdesc_ex {
 #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)
 #define FEC_ENET_TS_TIMER       ((uint)0x00008000)
 
-#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII | FEC_ENET_TS_TIMER)
-#define FEC_NAPI_IMASK	(FEC_ENET_MII | FEC_ENET_TS_TIMER)
+#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)
+#define FEC_NAPI_IMASK	FEC_ENET_MII
 #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
 
 /* ENET interrupt coalescing macro define */
-- 
cgit 


From e24ee2780a1d6786b94139ebf95b44c9ae62bf13 Mon Sep 17 00:00:00 2001
From: Troy Kisky <troy.kisky@boundarydevices.com>
Date: Tue, 19 Sep 2017 17:33:09 -0700
Subject: net: fec: return IRQ_HANDLED if fec_ptp_check_pps_event handled it

fec_ptp_check_pps_event will return 1 if FEC_T_TF_MASK caused
an interrupt. Don't return IRQ_NONE in this case.

Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
Acked-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 464055fb33d5..3dc2d771a222 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1604,8 +1604,8 @@ fec_enet_interrupt(int irq, void *dev_id)
 	}
 
 	if (fep->ptp_clock)
-		fec_ptp_check_pps_event(fep);
-
+		if (fec_ptp_check_pps_event(fep))
+			ret = IRQ_HANDLED;
 	return ret;
 }
 
-- 
cgit 


From 02388bf87f72e1d47174cd8f81c34443920eb5a0 Mon Sep 17 00:00:00 2001
From: Meng Xu <mengxu.gatech@gmail.com>
Date: Tue, 19 Sep 2017 21:49:55 -0400
Subject: isdn/i4l: fetch the ppp_write buffer in one shot

In isdn_ppp_write(), the header (i.e., protobuf) of the buffer is
fetched twice from userspace. The first fetch is used to peek at the
protocol of the message and reset the huptimer if necessary; while the
second fetch copies in the whole buffer. However, given that buf resides
in userspace memory, a user process can race to change its memory content
across fetches. By doing so, we can either avoid resetting the huptimer
for any type of packets (by first setting proto to PPP_LCP and later
change to the actual type) or force resetting the huptimer for LCP
packets.

This patch changes this double-fetch behavior into two single fetches
decided by condition (lp->isdn_device < 0 || lp->isdn_channel <0).
A more detailed discussion can be found at
https://marc.info/?l=linux-kernel&m=150586376926123&w=2

Signed-off-by: Meng Xu <mengxu.gatech@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_ppp.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 6c44609fd83a..cd2b3c69771a 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -825,7 +825,6 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count)
 	isdn_net_local *lp;
 	struct ippp_struct *is;
 	int proto;
-	unsigned char protobuf[4];
 
 	is = file->private_data;
 
@@ -839,24 +838,28 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count)
 	if (!lp)
 		printk(KERN_DEBUG "isdn_ppp_write: lp == NULL\n");
 	else {
-		/*
-		 * Don't reset huptimer for
-		 * LCP packets. (Echo requests).
-		 */
-		if (copy_from_user(protobuf, buf, 4))
-			return -EFAULT;
-		proto = PPP_PROTOCOL(protobuf);
-		if (proto != PPP_LCP)
-			lp->huptimer = 0;
+		if (lp->isdn_device < 0 || lp->isdn_channel < 0) {
+			unsigned char protobuf[4];
+			/*
+			 * Don't reset huptimer for
+			 * LCP packets. (Echo requests).
+			 */
+			if (copy_from_user(protobuf, buf, 4))
+				return -EFAULT;
+
+			proto = PPP_PROTOCOL(protobuf);
+			if (proto != PPP_LCP)
+				lp->huptimer = 0;
 
-		if (lp->isdn_device < 0 || lp->isdn_channel < 0)
 			return 0;
+		}
 
 		if ((dev->drv[lp->isdn_device]->flags & DRV_FLAG_RUNNING) &&
 		    lp->dialstate == 0 &&
 		    (lp->flags & ISDN_NET_CONNECTED)) {
 			unsigned short hl;
 			struct sk_buff *skb;
+			unsigned char *cpy_buf;
 			/*
 			 * we need to reserve enough space in front of
 			 * sk_buff. old call to dev_alloc_skb only reserved
@@ -869,11 +872,21 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count)
 				return count;
 			}
 			skb_reserve(skb, hl);
-			if (copy_from_user(skb_put(skb, count), buf, count))
+			cpy_buf = skb_put(skb, count);
+			if (copy_from_user(cpy_buf, buf, count))
 			{
 				kfree_skb(skb);
 				return -EFAULT;
 			}
+
+			/*
+			 * Don't reset huptimer for
+			 * LCP packets. (Echo requests).
+			 */
+			proto = PPP_PROTOCOL(cpy_buf);
+			if (proto != PPP_LCP)
+				lp->huptimer = 0;
+
 			if (is->debug & 0x40) {
 				printk(KERN_DEBUG "ppp xmit: len %d\n", (int) skb->len);
 				isdn_ppp_frame_log("xmit", skb->data, skb->len, 32, is->unit, lp->ppp_slot);
-- 
cgit 


From e92a0843795779678397ac0790a76de20f79cc13 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:50 +0800
Subject: net: hns3: Cleanup for ROCE capability flag in ae_dev

This patch add the ROCE supported flag in the driver_data
field of pci_device_id, delete roce_pci_tbl and change
HNAE_DEV_SUPPORT_ROCE_B to HNAE3_DEV_SUPPORT_ROCE_B.
This cleanup is done in order to support adding capability
in pci_device_id and to fix initialization failure when
cmd is not supported.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |  5 ++++-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 25 ++++------------------
 .../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 16 +++++++++-----
 3 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index b2f28ae81273..0f7b61a92f44 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -49,7 +49,10 @@
 #define HNAE3_CLASS_NAME_SIZE 16
 
 #define HNAE3_DEV_INITED_B			0x0
-#define HNAE_DEV_SUPPORT_ROCE_B			0x1
+#define HNAE3_DEV_SUPPORT_ROCE_B		0x1
+
+#define hnae3_dev_roce_supported(hdev) \
+	hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)
 
 #define ring_ptr_move_fw(ring, p) \
 	((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 74008ef23169..6953d19c6475 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -46,17 +46,7 @@ static const struct pci_device_id ae_algo_pci_tbl[] = {
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
-	/* Required last entry */
-	{0, }
-};
-
-static const struct pci_device_id roce_pci_tbl[] = {
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
-	/* Required last entry */
+	/* required last entry */
 	{0, }
 };
 
@@ -894,7 +884,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
 	hdev->num_tqps = __le16_to_cpu(req->tqp_num);
 	hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
 
-	if (hnae_get_bit(hdev->ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B)) {
+	if (hnae3_dev_roce_supported(hdev)) {
 		hdev->num_roce_msix =
 		hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
 			       HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
@@ -3932,8 +3922,7 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 				goto err;
 
 			if (hdev->roce_client &&
-			    hnae_get_bit(hdev->ae_dev->flag,
-					 HNAE_DEV_SUPPORT_ROCE_B)) {
+			    hnae3_dev_roce_supported(hdev)) {
 				struct hnae3_client *rc = hdev->roce_client;
 
 				ret = hclge_init_roce_base_info(vport);
@@ -3956,8 +3945,7 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 
 			break;
 		case HNAE3_CLIENT_ROCE:
-			if (hnae_get_bit(hdev->ae_dev->flag,
-					 HNAE_DEV_SUPPORT_ROCE_B)) {
+			if (hnae3_dev_roce_supported(hdev)) {
 				hdev->roce_client = client;
 				vport->roce.client = client;
 			}
@@ -4069,7 +4057,6 @@ static void hclge_pci_uninit(struct hclge_dev *hdev)
 static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
 	struct pci_dev *pdev = ae_dev->pdev;
-	const struct pci_device_id *id;
 	struct hclge_dev *hdev;
 	int ret;
 
@@ -4084,10 +4071,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hdev->ae_dev = ae_dev;
 	ae_dev->priv = hdev;
 
-	id = pci_match_id(roce_pci_tbl, ae_dev->pdev);
-	if (id)
-		hnae_set_bit(ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B, 1);
-
 	ret = hclge_pci_init(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "PCI init failed\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 4d68d6ea5143..94d8bb5b92f0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -41,11 +41,16 @@ static struct hnae3_client client;
 static const struct pci_device_id hns3_pci_tbl[] = {
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA),
+	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC),
+	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA),
+	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC),
+	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC),
+	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
 	/* required last entry */
 	{0, }
 };
@@ -1348,6 +1353,7 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	ae_dev->pdev = pdev;
+	ae_dev->flag = ent->driver_data;
 	ae_dev->dev_type = HNAE3_DEV_KNIC;
 	pci_set_drvdata(pdev, ae_dev);
 
-- 
cgit 


From 2daf4a6536f3109ed0ed758cec14743e0e5c20ea Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:51 +0800
Subject: net: hns3: Fix initialization when cmd is not supported

When ae_dev doesn't support DCB, rx_priv_wl_config,
common_thrd_config and tm_qs_bp_cfg can't be called, otherwise
cmd return fail, which causes the hclge module initialization
process to fail.
This patch fix it by adding a DCB capability flag to check if
the ae_dev support DCB.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |  7 ++++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 26 +++++++++++++---------
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c  |  4 ++++
 .../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 10 ++++-----
 4 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 0f7b61a92f44..ad685f5aa6d1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -50,10 +50,17 @@
 
 #define HNAE3_DEV_INITED_B			0x0
 #define HNAE3_DEV_SUPPORT_ROCE_B		0x1
+#define HNAE3_DEV_SUPPORT_DCB_B			0x2
+
+#define HNAE3_DEV_SUPPORT_ROCE_DCB_BITS (BIT(HNAE3_DEV_SUPPORT_DCB_B) |\
+		BIT(HNAE3_DEV_SUPPORT_ROCE_B))
 
 #define hnae3_dev_roce_supported(hdev) \
 	hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)
 
+#define hnae3_dev_dcb_supported(hdev) \
+	hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_DCB_B)
+
 #define ring_ptr_move_fw(ring, p) \
 	((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
 #define ring_ptr_move_bw(ring, p) \
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 6953d19c6475..903f43a8c2a1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1772,18 +1772,22 @@ int hclge_buffer_alloc(struct hclge_dev *hdev)
 		return ret;
 	}
 
-	ret = hclge_rx_priv_wl_config(hdev);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"could not configure rx private waterline %d\n", ret);
-		return ret;
-	}
+	if (hnae3_dev_dcb_supported(hdev)) {
+		ret = hclge_rx_priv_wl_config(hdev);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"could not configure rx private waterline %d\n",
+				ret);
+			return ret;
+		}
 
-	ret = hclge_common_thrd_config(hdev);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"could not configure common threshold %d\n", ret);
-		return ret;
+		ret = hclge_common_thrd_config(hdev);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"could not configure common threshold %d\n",
+				ret);
+			return ret;
+		}
 	}
 
 	ret = hclge_common_wl_config(hdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 1c577d268f00..c91dbf19c4b1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -976,6 +976,10 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
 	if (ret)
 		return ret;
 
+	/* Only DCB-supported dev supports qset back pressure setting */
+	if (!hnae3_dev_dcb_supported(hdev))
+		return 0;
+
 	for (i = 0; i < hdev->tm_info.num_tc; i++) {
 		ret = hclge_tm_qs_bp_cfg(hdev, i);
 		if (ret)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 94d8bb5b92f0..35369e1c8036 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -42,15 +42,15 @@ static const struct pci_device_id hns3_pci_tbl[] = {
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA),
-	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC),
-	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA),
-	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC),
-	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC),
-	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	/* required last entry */
 	{0, }
 };
-- 
cgit 


From d221df4e0faae2b9cc8ad78f3e5e777461b6b542 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:52 +0800
Subject: net: hns3: Fix for DEFAULT_DV when dev doesn't support DCB

When ae_dev doesn't support DCB, DEFAULT_DV must be set to
a lower value, otherwise the buffer allocation process will
fail.
This patch fix it by setting it to 30K bytes.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  | 1 +
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index c2b613b40509..30e2ad5ac0da 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -688,6 +688,7 @@ struct hclge_reset_tqp_queue {
 #define HCLGE_DEFAULT_TX_BUF		0x4000	 /* 16k  bytes */
 #define HCLGE_TOTAL_PKT_BUF		0x108000 /* 1.03125M bytes */
 #define HCLGE_DEFAULT_DV		0xA000	 /* 40k byte */
+#define HCLGE_DEFAULT_NON_DCB_DV	0x7800	/* 30K byte */
 
 #define HCLGE_TYPE_CRQ			0
 #define HCLGE_TYPE_CSQ			1
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 903f43a8c2a1..796370adf99c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1444,7 +1444,11 @@ static bool  hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
 	tc_num = hclge_get_tc_num(hdev);
 	pfc_enable_num = hclge_get_pfc_enalbe_num(hdev);
 
-	shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV;
+	if (hnae3_dev_dcb_supported(hdev))
+		shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV;
+	else
+		shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_NON_DCB_DV;
+
 	shared_buf_tc = pfc_enable_num * hdev->mps +
 			(tc_num - pfc_enable_num) * hdev->mps / 2 +
 			hdev->mps;
-- 
cgit 


From bb1fe9ea6371e075d3d1448cd3ff6441d31307be Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:53 +0800
Subject: net: hns3: Fix for not setting rx private buffer size to zero

When rx private buffer is disabled, there may be some case that
the rx private buffer is not set to zero, which may cause buffer
allocation process to fail.
This patch fixes this problem by setting priv->enable to 0 and
priv->buf_size to zero when rx private buffer is disabled.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 796370adf99c..a7d8fb1e15f6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1504,6 +1504,11 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
 				priv->wl.high = 2 * hdev->mps;
 				priv->buf_size = priv->wl.high;
 			}
+		} else {
+			priv->enable = 0;
+			priv->wl.low = 0;
+			priv->wl.high = 0;
+			priv->buf_size = 0;
 		}
 	}
 
@@ -1516,8 +1521,15 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		priv = &hdev->priv_buf[i];
 
-		if (hdev->hw_tc_map & BIT(i))
-			priv->enable = 1;
+		priv->enable = 0;
+		priv->wl.low = 0;
+		priv->wl.high = 0;
+		priv->buf_size = 0;
+
+		if (!(hdev->hw_tc_map & BIT(i)))
+			continue;
+
+		priv->enable = 1;
 
 		if (hdev->tm_info.hw_pfc_map & BIT(i)) {
 			priv->wl.low = 128;
-- 
cgit 


From b8c8bf47da5576657370798da6f18a8cb0245d5b Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:54 +0800
Subject: net: hns3: Fix for rx_priv_buf_alloc not setting rx shared buffer

rx_priv_buf_alloc is used to tell hardware how much buffer is
used for rx direction, right now only the private buffer is
assigned.
For ae_dev that doesn't support DCB, private rx buffer is assigned
to zero, only shared rx buffer is used. So not setting the shared
rx buffer cause dropping of packet in SSU.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  | 3 ++-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 30e2ad5ac0da..758cf3948131 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -270,7 +270,8 @@ struct hclge_tx_buff_alloc {
 
 struct hclge_rx_priv_buff {
 	__le16 buf_num[HCLGE_TC_NUM];
-	u8 rsv[8];
+	__le16 shared_buf;
+	u8 rsv[6];
 };
 
 struct hclge_query_version {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a7d8fb1e15f6..e313552bb23d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1622,6 +1622,10 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
 			cpu_to_le16(true << HCLGE_TC0_PRI_BUF_EN_B);
 	}
 
+	req->shared_buf =
+		cpu_to_le16((hdev->s_buf.buf_size >> HCLGE_BUF_UNIT_S) |
+			    (1 << HCLGE_TC0_PRI_BUF_EN_B));
+
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-- 
cgit 


From d602a52540c9b92e0dd152cfe1d0848c23f08894 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:55 +0800
Subject: net: hns3: Fix for rx priv buf allocation when DCB is not supported

When hdev doesn't support DCB, rx private buffer is not allocated,
otherwise there is not enough buffer for rx shared buffer, causing
buffer allocation process to fail.
This patch fixes by checking the dcb capability in
hclge_rx_buffer_calc.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e313552bb23d..c660f0caf709 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1489,6 +1489,16 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
 	struct hclge_priv_buf *priv;
 	int i;
 
+	/* When DCB is not supported, rx private
+	 * buffer is not allocated.
+	 */
+	if (!hnae3_dev_dcb_supported(hdev)) {
+		if (!hclge_is_rx_buf_ok(hdev, rx_all))
+			return -ENOMEM;
+
+		return 0;
+	}
+
 	/* step 1, try to alloc private buffer for all enabled tc */
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		priv = &hdev->priv_buf[i];
-- 
cgit 


From c4726338d928c824f56c27734d837b8244132705 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:56 +0800
Subject: net: hns3: Fix typo error for feild in hclge_tm

This patch fixes a typo error for feild, which should be field.

Fixes: 848440544b41f ("net: hns3: Add support of TX Scheduler & Shaper to HNS3 driver")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c    | 20 ++++++++++----------
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h    |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index c91dbf19c4b1..fe659f752237 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -280,11 +280,11 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
 
 	shap_cfg_cmd->pg_id = pg_id;
 
-	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b);
-	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u);
-	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s);
-	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b);
-	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s);
+	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b);
+	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u);
+	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s);
+	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b);
+	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
@@ -307,11 +307,11 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
 
 	shap_cfg_cmd->pri_id = pri_id;
 
-	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b);
-	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u);
-	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s);
-	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b);
-	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s);
+	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b);
+	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u);
+	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s);
+	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b);
+	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 7e67337dfaf2..85158b0d73fe 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -94,10 +94,10 @@ struct hclge_bp_to_qs_map_cmd {
 	u32 rsvd1;
 };
 
-#define hclge_tm_set_feild(dest, string, val) \
+#define hclge_tm_set_field(dest, string, val) \
 			hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
 				       (HCLGE_TM_SHAP_##string##_LSH), val)
-#define hclge_tm_get_feild(src, string) \
+#define hclge_tm_get_field(src, string) \
 			hnae_get_field((src), (HCLGE_TM_SHAP_##string##_MSK), \
 				       (HCLGE_TM_SHAP_##string##_LSH))
 
-- 
cgit 


From 68ece54efd417d415462adbaa2700cba50de3ff6 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:57 +0800
Subject: net: hns3: Fix for setting rss_size incorrectly

rss_size is 1, 2, 4, 8, 16, 32, 64, 128, but acutal tc queue
size can be any u16 less than 128. If tc queue size is 5, we
set the rss_size to 8, indirection table will be used to limit
the size of actual queue size.
It may cause dropping of receiving packet in hardware if
rss_size is not set correctly.
For now, each TC has the same rss size.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 76 ++++++++++------------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  1 +
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c  |  1 +
 3 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c660f0caf709..e0685e630afe 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2606,6 +2606,7 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev)
 	u16 tc_valid[HCLGE_MAX_TC_NUM];
 	u16 tc_size[HCLGE_MAX_TC_NUM];
 	u32 *rss_indir = NULL;
+	u16 rss_size = 0, roundup_size;
 	const u8 *key;
 	int i, ret, j;
 
@@ -2620,7 +2621,13 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev)
 	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
 		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) {
 			vport[j].rss_indirection_tbl[i] =
-				i % hdev->rss_size_max;
+				i % vport[j].alloc_rss_size;
+
+			/* vport 0 is for PF */
+			if (j != 0)
+				continue;
+
+			rss_size = vport[j].alloc_rss_size;
 			rss_indir[i] = vport[j].rss_indirection_tbl[i];
 		}
 	}
@@ -2637,42 +2644,31 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev)
 	if (ret)
 		goto err;
 
+	/* Each TC have the same queue size, and tc_size set to hardware is
+	 * the log2 of roundup power of two of rss_size, the acutal queue
+	 * size is limited by indirection table.
+	 */
+	if (rss_size > HCLGE_RSS_TC_SIZE_7 || rss_size == 0) {
+		dev_err(&hdev->pdev->dev,
+			"Configure rss tc size failed, invalid TC_SIZE = %d\n",
+			rss_size);
+		return -EINVAL;
+	}
+
+	roundup_size = roundup_pow_of_two(rss_size);
+	roundup_size = ilog2(roundup_size);
+
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		if (hdev->hw_tc_map & BIT(i))
-			tc_valid[i] = 1;
-		else
-			tc_valid[i] = 0;
+		tc_valid[i] = 0;
 
-		switch (hdev->rss_size_max) {
-		case HCLGE_RSS_TC_SIZE_0:
-			tc_size[i] = 0;
-			break;
-		case HCLGE_RSS_TC_SIZE_1:
-			tc_size[i] = 1;
-			break;
-		case HCLGE_RSS_TC_SIZE_2:
-			tc_size[i] = 2;
-			break;
-		case HCLGE_RSS_TC_SIZE_3:
-			tc_size[i] = 3;
-			break;
-		case HCLGE_RSS_TC_SIZE_4:
-			tc_size[i] = 4;
-			break;
-		case HCLGE_RSS_TC_SIZE_5:
-			tc_size[i] = 5;
-			break;
-		case HCLGE_RSS_TC_SIZE_6:
-			tc_size[i] = 6;
-			break;
-		case HCLGE_RSS_TC_SIZE_7:
-			tc_size[i] = 7;
-			break;
-		default:
-			break;
-		}
-		tc_offset[i] = hdev->rss_size_max * i;
+		if (!(hdev->hw_tc_map & BIT(i)))
+			continue;
+
+		tc_valid[i] = 1;
+		tc_size[i] = roundup_size;
+		tc_offset[i] = rss_size * i;
 	}
+
 	ret = hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
 
 err:
@@ -4167,12 +4163,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	ret = hclge_rss_init_hw(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
-		return  ret;
-	}
-
 	ret = hclge_init_vlan_config(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret);
@@ -4185,6 +4175,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
+	ret = hclge_rss_init_hw(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
+		return ret;
+	}
+
 	setup_timer(&hdev->service_timer, hclge_service_timer,
 		    (unsigned long)hdev);
 	INIT_WORK(&hdev->service_task, hclge_service_task);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index edb10ad075eb..7f8dd129c10d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -477,6 +477,7 @@ struct hclge_vport {
 	u8  rss_hash_key[HCLGE_RSS_KEY_SIZE]; /* User configured hash keys */
 	/* User configured lookup table entries */
 	u8  rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE];
+	u16 alloc_rss_size;
 
 	u16 qs_offset;
 	u16 bw_limit;		/* VSI BW Limit (0 = disabled) */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index fe659f752237..b7ba7aa66620 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -397,6 +397,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 			kinfo->num_tqps / kinfo->num_tc);
 	vport->qs_offset = hdev->tm_info.num_tc * vport->vport_id;
 	vport->dwrr = 100;  /* 100 percent as init */
+	vport->alloc_rss_size = kinfo->rss_size;
 
 	for (i = 0; i < kinfo->num_tc; i++) {
 		if (hdev->hw_tc_map & BIT(i)) {
-- 
cgit 


From c5795c5308af81568d1573598716091120c85a38 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:58 +0800
Subject: net: hns3: Fix for pri to tc mapping in TM

Current mapping between pri and tc is one to one,
so user can't map multi priorities to the same tc.
This patch changes the mapping to many to one.

Fixes: 848440544b41f ("net: hns3: Add support of TX Scheduler & Shaper to HNS3 driver")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h             |  3 ++-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c   | 16 +++++++++-------
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index ad685f5aa6d1..1a01cadfe5f3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -376,12 +376,12 @@ struct hnae3_ae_algo {
 struct hnae3_tc_info {
 	u16	tqp_offset;	/* TQP offset from base TQP */
 	u16	tqp_count;	/* Total TQPs */
-	u8	up;		/* user priority */
 	u8	tc;		/* TC index */
 	bool	enable;		/* If this TC is enable or not */
 };
 
 #define HNAE3_MAX_TC		8
+#define HNAE3_MAX_USER_PRIO	8
 struct hnae3_knic_private_info {
 	struct net_device *netdev; /* Set by KNIC client when init instance */
 	u16 rss_size;		   /* Allocated RSS queues */
@@ -389,6 +389,7 @@ struct hnae3_knic_private_info {
 	u16 num_desc;
 
 	u8 num_tc;		   /* Total number of enabled TCs */
+	u8 prio_tc[HNAE3_MAX_USER_PRIO];  /* TC indexed by prio */
 	struct hnae3_tc_info tc_info[HNAE3_MAX_TC]; /* Idx of array is HW TC */
 
 	u16 num_tqps;		  /* total number of TQPs in this handle */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 7f8dd129c10d..9fcfd9395424 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -176,7 +176,6 @@ struct hclge_pg_info {
 struct hclge_tc_info {
 	u8 tc_id;
 	u8 tc_sch_mode;		/* 0: sp; 1: dwrr */
-	u8 up;
 	u8 pgid;
 	u32 bw_limit;
 };
@@ -197,6 +196,7 @@ struct hclge_tm_info {
 	u8 num_tc;
 	u8 num_pg;      /* It must be 1 if vNET-Base schd */
 	u8 pg_dwrr[HCLGE_PG_NUM];
+	u8 prio_tc[HNAE3_MAX_USER_PRIO];
 	struct hclge_pg_info pg_info[HCLGE_PG_NUM];
 	struct hclge_tc_info tc_info[HNAE3_MAX_TC];
 	enum hclge_fc_mode fc_mode;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index b7ba7aa66620..73a75d7cc551 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -128,9 +128,7 @@ static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id)
 {
 	u8 tc;
 
-	for (tc = 0; tc < hdev->tm_info.num_tc; tc++)
-		if (hdev->tm_info.tc_info[tc].up == pri_id)
-			break;
+	tc = hdev->tm_info.prio_tc[pri_id];
 
 	if (tc >= hdev->tm_info.num_tc)
 		return -EINVAL;
@@ -158,7 +156,7 @@ static int hclge_up_to_tc_map(struct hclge_dev *hdev)
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PRI_TO_TC_MAPPING, false);
 
-	for (pri_id = 0; pri_id < hdev->tm_info.num_tc; pri_id++) {
+	for (pri_id = 0; pri_id < HNAE3_MAX_USER_PRIO; pri_id++) {
 		ret = hclge_fill_pri_array(hdev, pri, pri_id);
 		if (ret)
 			return ret;
@@ -405,16 +403,17 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 			kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size;
 			kinfo->tc_info[i].tqp_count = kinfo->rss_size;
 			kinfo->tc_info[i].tc = i;
-			kinfo->tc_info[i].up = hdev->tm_info.tc_info[i].up;
 		} else {
 			/* Set to default queue if TC is disable */
 			kinfo->tc_info[i].enable = false;
 			kinfo->tc_info[i].tqp_offset = 0;
 			kinfo->tc_info[i].tqp_count = 1;
 			kinfo->tc_info[i].tc = 0;
-			kinfo->tc_info[i].up = 0;
 		}
 	}
+
+	memcpy(kinfo->prio_tc, hdev->tm_info.prio_tc,
+	       FIELD_SIZEOF(struct hnae3_knic_private_info, prio_tc));
 }
 
 static void hclge_tm_vport_info_update(struct hclge_dev *hdev)
@@ -436,12 +435,15 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
 	for (i = 0; i < hdev->tm_info.num_tc; i++) {
 		hdev->tm_info.tc_info[i].tc_id = i;
 		hdev->tm_info.tc_info[i].tc_sch_mode = HCLGE_SCH_MODE_DWRR;
-		hdev->tm_info.tc_info[i].up = i;
 		hdev->tm_info.tc_info[i].pgid = 0;
 		hdev->tm_info.tc_info[i].bw_limit =
 			hdev->tm_info.pg_info[0].bw_limit;
 	}
 
+	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
+		hdev->tm_info.prio_tc[i] =
+			(i >= hdev->tm_info.num_tc) ? 0 : i;
+
 	hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
 }
 
-- 
cgit 


From 4d61eda812041ef9c820d7f147884133fd3307bc Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 19 Sep 2017 16:27:39 +0100
Subject: CIFS: make arrays static const, reduces object code size

Don't populate the read-only arrays types[] on the stack, instead make
them both static const.  Makes the object code smaller by over 200 bytes:

Before:
   text	   data	    bss	    dec	    hex	filename
 111503	  37696	    448	 149647	  2488f	fs/cifs/file.o

After:
   text	   data	    bss	    dec	    hex	filename
 111140	  37856	    448	 149444	  247c4	fs/cifs/file.o

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/file.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0786f19d288f..822311919163 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1102,8 +1102,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 	struct cifs_tcon *tcon;
 	unsigned int num, max_num, max_buf;
 	LOCKING_ANDX_RANGE *buf, *cur;
-	int types[] = {LOCKING_ANDX_LARGE_FILES,
-		       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+	static const int types[] = {
+		LOCKING_ANDX_LARGE_FILES,
+		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
+	};
 	int i;
 
 	xid = get_xid();
@@ -1434,8 +1436,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 		  unsigned int xid)
 {
 	int rc = 0, stored_rc;
-	int types[] = {LOCKING_ANDX_LARGE_FILES,
-		       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+	static const int types[] = {
+		LOCKING_ANDX_LARGE_FILES,
+		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
+	};
 	unsigned int i;
 	unsigned int max_num, num, max_buf;
 	LOCKING_ANDX_RANGE *buf, *cur;
-- 
cgit 


From 94183331e815617246b1baa97e0916f358c794bb Mon Sep 17 00:00:00 2001
From: Shu Wang <shuwang@redhat.com>
Date: Thu, 7 Sep 2017 16:03:27 +0800
Subject: cifs: release cifs root_cred after exit_cifs

memory leak was found by kmemleak. exit_cifs_spnego
should be called before cifs module removed, or
cifs root_cred will not be released.

kmemleak report:
unreferenced object 0xffff880070a3ce40 (size 192):
  backtrace:
     kmemleak_alloc+0x4a/0xa0
     kmem_cache_alloc+0xc7/0x1d0
     prepare_kernel_cred+0x20/0x120
     init_cifs_spnego+0x2d/0x170 [cifs]
     0xffffffffc07801f3
     do_one_initcall+0x51/0x1b0
     do_init_module+0x60/0x1fd
     load_module+0x161e/0x1b60
     SYSC_finit_module+0xa9/0x100
     SyS_finit_module+0xe/0x10

Signed-off-by: Shu Wang <shuwang@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/cifsfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a864e6575309..8c8b75d33f31 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1449,7 +1449,7 @@ exit_cifs(void)
 	exit_cifs_idmap();
 #endif
 #ifdef CONFIG_CIFS_UPCALL
-	unregister_key_type(&cifs_spnego_key_type);
+	exit_cifs_spnego();
 #endif
 	cifs_destroy_request_bufs();
 	cifs_destroy_mids();
-- 
cgit 


From f5c4ba816315d3b813af16f5571f86c8d4e897bd Mon Sep 17 00:00:00 2001
From: Shu Wang <shuwang@redhat.com>
Date: Fri, 8 Sep 2017 18:48:33 +0800
Subject: cifs: release auth_key.response for reconnect.

There is a race that cause cifs reconnect in cifs_mount,
- cifs_mount
  - cifs_get_tcp_session
    - [ start thread cifs_demultiplex_thread
      - cifs_read_from_socket: -ECONNABORTED
        - DELAY_WORK smb2_reconnect_server ]
  - cifs_setup_session
  - [ smb2_reconnect_server ]

auth_key.response was allocated in cifs_setup_session, and
will release when the session destoried. So when session re-
connect, auth_key.response should be check and released.

Tested with my system:
CIFS VFS: Free previous auth_key.response = ffff8800320bbf80

A simple auth_key.response allocation call trace:
- cifs_setup_session
- SMB2_sess_setup
- SMB2_sess_auth_rawntlmssp_authenticate
- build_ntlmssp_auth_blob
- setup_ntlmv2_rsp

Signed-off-by: Shu Wang <shuwang@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/connect.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8d38b22afb2b..0bfc2280436d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -4154,6 +4154,14 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
 	cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n",
 		 server->sec_mode, server->capabilities, server->timeAdj);
 
+	if (ses->auth_key.response) {
+		cifs_dbg(VFS, "Free previous auth_key.response = %p\n",
+			 ses->auth_key.response);
+		kfree(ses->auth_key.response);
+		ses->auth_key.response = NULL;
+		ses->auth_key.len = 0;
+	}
+
 	if (server->ops->sess_setup)
 		rc = server->ops->sess_setup(xid, ses, nls_info);
 
-- 
cgit 


From 0603c96f3af50e2f9299fa410c224ab1d465e0f9 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Wed, 20 Sep 2017 19:57:18 -0500
Subject: SMB: Validate negotiate (to protect against downgrade) even if
 signing off

As long as signing is supported (ie not a guest user connection) and
connection is SMB3 or SMB3.02, then validate negotiate (protect
against man in the middle downgrade attacks).  We had been doing this
only when signing was required, not when signing was just enabled,
but this more closely matches recommended SMB3 behavior and is
better security.  Suggested by Metze.

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Jeremy Allison <jra@samba.org>
Acked-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/smb2pdu.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index d499ce265c3b..6f0e6343c15e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -656,15 +656,22 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 
 	/*
 	 * validation ioctl must be signed, so no point sending this if we
-	 * can not sign it.  We could eventually change this to selectively
+	 * can not sign it (ie are not known user).  Even if signing is not
+	 * required (enabled but not negotiated), in those cases we selectively
 	 * sign just this, the first and only signed request on a connection.
-	 * This is good enough for now since a user who wants better security
-	 * would also enable signing on the mount. Having validation of
-	 * negotiate info for signed connections helps reduce attack vectors
+	 * Having validation of negotiate info  helps reduce attack vectors.
 	 */
-	if (tcon->ses->server->sign == false)
+	if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
 		return 0; /* validation requires signing */
 
+	if (tcon->ses->user_name == NULL) {
+		cifs_dbg(FYI, "Can't validate negotiate: null user mount\n");
+		return 0; /* validation requires signing */
+	}
+
+	if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
+		cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n");
+
 	vneg_inbuf.Capabilities =
 			cpu_to_le32(tcon->ses->server->vals->req_capabilities);
 	memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,
-- 
cgit 


From a90bcb86ae700c12432446c4aa1819e7b8e172ec Mon Sep 17 00:00:00 2001
From: Petar Penkov <ppenkov@google.com>
Date: Tue, 29 Aug 2017 11:20:32 -0700
Subject: iov_iter: fix page_copy_sane for compound pages

Issue is that if the data crosses a page boundary inside a compound
page, this check will incorrectly trigger a WARN_ON.

To fix this, compute the order using the head of the compound page and
adjust the offset to be relative to that head.

Fixes: 72e809ed81ed ("iov_iter: sanity checks for copy to/from page
primitives")

Signed-off-by: Petar Penkov <ppenkov@google.com>
CC: Al Viro <viro@zeniv.linux.org.uk>
CC: Eric Dumazet <edumazet@google.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 lib/iov_iter.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 52c8dd6d8e82..1c1c06ddc20a 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -687,8 +687,10 @@ EXPORT_SYMBOL(_copy_from_iter_full_nocache);
 
 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
 {
-	size_t v = n + offset;
-	if (likely(n <= v && v <= (PAGE_SIZE << compound_order(page))))
+	struct page *head = compound_head(page);
+	size_t v = n + offset + page_address(page) - page_address(head);
+
+	if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
 		return true;
 	WARN_ON(1);
 	return false;
-- 
cgit 


From 58aff0af757356065f33290d96a9cd46dfbcae88 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 18 Sep 2017 17:47:38 +0100
Subject: ipc/shm: Fix order of parameters when calling
 copy_compat_shmid_to_user

Commit 553f770ef71b ("ipc: move compat shmctl to native") moved the
compat IPC syscall handling into ipc/shm.c and refactored the struct
accessors in the process. Unfortunately, the call to
copy_compat_shmid_to_user when handling a compat {IPC,SHM}_STAT command
gets the arguments the wrong way round, passing a kernel stack address
as the user buffer (destination) and the user buffer as the kernel stack
address (source).

This patch fixes the parameter ordering so the buffers are accessed
correctly.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 ipc/shm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index 1b3adfe3c60e..1e2b1692ba2c 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1237,7 +1237,7 @@ COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
 		err = shmctl_stat(ns, shmid, cmd, &sem64);
 		if (err < 0)
 			return err;
-		if (copy_compat_shmid_to_user(&sem64, uptr, version))
+		if (copy_compat_shmid_to_user(uptr, &sem64, version))
 			err = -EFAULT;
 		return err;
 
-- 
cgit 


From 3e77adeea3c5393c9b624832f65441e92867f618 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 7 Sep 2017 16:35:40 +1000
Subject: powerpc/eeh: Create PHB PEs after EEH is initialized

Otherwise we end up not yet having computed the right diag data size
on powernv where EEH initialization is delayed, thus causing memory
corruption later on when calling OPAL.

Fixes: 5cb1f8fdddb7 ("powerpc/powernv/pci: Dynamically allocate PHB diag data")
Cc: stable@vger.kernel.org # v4.13+
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh.c     |  4 ++++
 arch/powerpc/kernel/eeh_dev.c | 18 ------------------
 2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 9e816787c0d4..116000b45531 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1019,6 +1019,10 @@ int eeh_init(void)
 	} else if ((ret = eeh_ops->init()))
 		return ret;
 
+	/* Initialize PHB PEs */
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		eeh_dev_phb_init_dynamic(hose);
+
 	/* Initialize EEH event */
 	ret = eeh_event_init();
 	if (ret)
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index ad04ecd63c20..a34e6912c15e 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -78,21 +78,3 @@ void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
 	/* EEH PE for PHB */
 	eeh_phb_pe_create(phb);
 }
-
-/**
- * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
- *
- * Scan all the existing PHBs and create EEH devices for their OF
- * nodes and their children OF nodes
- */
-static int __init eeh_dev_phb_init(void)
-{
-	struct pci_controller *phb, *tmp;
-
-	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
-		eeh_dev_phb_init_dynamic(phb);
-
-	return 0;
-}
-
-core_initcall(eeh_dev_phb_init);
-- 
cgit 


From 087ff6a5ae3052bb2835e191094b793789cb8817 Mon Sep 17 00:00:00 2001
From: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Date: Wed, 20 Sep 2017 17:02:51 -0400
Subject: powerpc/pseries: Fix "OF: ERROR: Bad of_node_put() on /cpus" during
 DLPAR

Commit 215ee763f8cb ("powerpc: pseries: remove dlpar_attach_node
dependency on full path") reworked dlpar_attach_node() to no longer
look up the parent node "/cpus", but instead to have the parent node
passed by the caller in the function parameter list.

As a result dlpar_attach_node() is no longer responsible for freeing
the reference to the parent node. However, commit 215ee763f8cb failed
to remove the of_node_put(parent) call in dlpar_attach_node(), or to
take into account that the reference to the parent in the caller
dlpar_cpu_add() needs to be held until after dlpar_attach_node()
returns.

As a result doing repeated cpu add/remove dlpar operations will
eventually result in the following error:

  OF: ERROR: Bad of_node_put() on /cpus
  CPU: 0 PID: 10896 Comm: drmgr Not tainted 4.13.0-autotest #1
  Call Trace:
   dump_stack+0x15c/0x1f8 (unreliable)
   of_node_release+0x1a4/0x1c0
   kobject_put+0x1a8/0x310
   kobject_del+0xbc/0xf0
   __of_detach_node_sysfs+0x144/0x210
   of_detach_node+0xf0/0x180
   dlpar_detach_node+0xc4/0x120
   dlpar_cpu_remove+0x280/0x560
   dlpar_cpu_release+0xbc/0x1b0
   arch_cpu_release+0x6c/0xb0
   cpu_release_store+0xa0/0x100
   dev_attr_store+0x68/0xa0
   sysfs_kf_write+0xa8/0xf0
   kernfs_fop_write+0x2cc/0x400
   __vfs_write+0x5c/0x340
   vfs_write+0x1a8/0x3d0
   SyS_write+0xa8/0x1a0
   system_call+0x58/0x6c

Fix the issue by removing the of_node_put(parent) call from
dlpar_attach_node(), and ensuring that the reference to the parent
node is properly held and released by the caller dlpar_cpu_add().

Fixes: 215ee763f8cb ("powerpc: pseries: remove dlpar_attach_node dependency on full path")
Signed-off-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Reported-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
[mpe: Add a comment in the code and frob the change log slightly]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/dlpar.c       | 1 -
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 6 +++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 783f36364690..e45b5f10645a 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -266,7 +266,6 @@ int dlpar_attach_node(struct device_node *dn, struct device_node *parent)
 		return rc;
 	}
 
-	of_node_put(dn->parent);
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index fc0d8f97c03a..fadb95efbb9e 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -462,15 +462,19 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 	}
 
 	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
-	of_node_put(parent);
 	if (!dn) {
 		pr_warn("Failed call to configure-connector, drc index: %x\n",
 			drc_index);
 		dlpar_release_drc(drc_index);
+		of_node_put(parent);
 		return -EINVAL;
 	}
 
 	rc = dlpar_attach_node(dn, parent);
+
+	/* Regardless we are done with parent now */
+	of_node_put(parent);
+
 	if (rc) {
 		saved_rc = rc;
 		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
-- 
cgit 


From b537ca6fede69a281dc524983e5e633d79a10a08 Mon Sep 17 00:00:00 2001
From: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Date: Wed, 20 Sep 2017 17:02:52 -0400
Subject: powerpc/pseries: Fix parent_dn reference leak in add_dt_node()

A reference to the parent device node is held by add_dt_node() for the
node to be added. If the call to dlpar_configure_connector() fails
add_dt_node() returns ENOENT and that reference is not freed.

Add a call to of_node_put(parent_dn) prior to bailing out after a
failed dlpar_configure_connector() call.

Fixes: 8d5ff320766f ("powerpc/pseries: Make dlpar_configure_connector parent node aware")
Cc: stable@vger.kernel.org # v3.12+
Signed-off-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/mobility.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 210ce632d63e..f7042ad492ba 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -226,8 +226,10 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
 		return -ENOENT;
 
 	dn = dlpar_configure_connector(drc_index, parent_dn);
-	if (!dn)
+	if (!dn) {
+		of_node_put(parent_dn);
 		return -ENOENT;
+	}
 
 	rc = dlpar_attach_node(dn, parent_dn);
 	if (rc)
-- 
cgit 


From 0551968add53777fddd18f4ffb4e3bbc1f646d79 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Sep 2017 11:54:44 +0200
Subject: Revert "genirq: Restrict effective affinity to interrupts actually
 using it"

This reverts commit 74def747bcd09692bdbf8c6a15350795b0f11ca8.

The change to the helper function is only correct for the /proc/irq/
readout usage, but breaks the existing x86 usage of that function.

Reported-by: Yanko Kaneti <yaneti@declera.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irq.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index b99a784635ff..d4728bf6a537 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -783,10 +783,7 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
 static inline
 struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
 {
-	if (!cpumask_empty(d->common->effective_affinity))
-		return d->common->effective_affinity;
-
-	return d->common->affinity;
+	return d->common->effective_affinity;
 }
 static inline void irq_data_update_effective_affinity(struct irq_data *d,
 						      const struct cpumask *m)
-- 
cgit 


From 2bc84526d174a2a89c76438f049fc03ac259a159 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Wed, 6 Sep 2017 16:44:35 -0600
Subject: Makefile: kselftest and kselftest-clean fail for make O=dir case

kselftest and kselftest-clean targets fail when object directory is
specified to relocate objects. Fix it so it can find the source tree
to build from.

make O=/tmp/kselftest_top kselftest

make[1]: Entering directory '/tmp/kselftest_top'
make[2]: Entering directory '/tmp/kselftest_top'
make[2]: *** tools/testing/selftests: No such file or directory.  Stop.
make[2]: Leaving directory '/tmp/kselftest_top'
./linux-kselftest/Makefile:1185: recipe for target
'kselftest' failed
make[1]: *** [kselftest] Error 2
make[1]: Leaving directory '/tmp/kselftest_top'
Makefile:145: recipe for target 'sub-make' failed
make: *** [sub-make] Error 2

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Acked-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 64cbc66cebca..9a70578922d3 100644
--- a/Makefile
+++ b/Makefile
@@ -1172,11 +1172,11 @@ headers_check: headers_install
 
 PHONY += kselftest
 kselftest:
-	$(Q)$(MAKE) -C tools/testing/selftests run_tests
+	$(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests
 
 PHONY += kselftest-clean
 kselftest-clean:
-	$(Q)$(MAKE) -C tools/testing/selftests clean
+	$(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean
 
 PHONY += kselftest-merge
 kselftest-merge:
-- 
cgit 


From 8050ef2b83a18f628f9501af958fbff39443d58d Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Wed, 6 Sep 2017 18:36:22 -0600
Subject: selftests: lib.mk: kselftest and kselftest-clean fail for make O=dir
 case

kselftest and kselftest-clean targets fail when object directory is
specified to relocate objects. Main Makefile make O= path clears the
built-in defines LINK.c, COMPILE.S, LINK.S, and RM that are used in
lib.mk to build and clean targets. Define them.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 4665463779f5..266d3ed4bb41 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -7,6 +7,7 @@ OUTPUT := $(shell pwd)
 endif
 
 TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
+TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
 
 all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
@@ -62,6 +63,11 @@ endef
 emit_tests:
 	$(EMIT_TESTS)
 
+# define if isn't already. It is undefined in make O= case.
+ifeq ($(RM),)
+RM := rm -f
+endif
+
 define CLEAN
 	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 endef
@@ -69,6 +75,15 @@ endef
 clean:
 	$(CLEAN)
 
+# When make O= with kselftest target from main level
+# the following aren't defined.
+#
+ifneq ($(KBUILD_SRC),)
+LINK.c = $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
+LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+endif
+
 $(OUTPUT)/%:%.c
 	$(LINK.c) $^ $(LDLIBS) -o $@
 
-- 
cgit 


From 52fd1d082398b928a86d4fdf33c9f3abe1bf7914 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 7 Sep 2017 19:57:43 -0600
Subject: selftests: Makefile: clear LDFLAGS for make O=dir use-case

kselftest target fails when object directory is specified to relocate
objects. Inherited "LDFLAGS = -m" fails the test builds. Clear it.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 26ce4f7168be..f4368db011ea 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -52,6 +52,10 @@ override LDFLAGS =
 override MAKEFLAGS =
 endif
 
+ifneq ($(KBUILD_SRC),)
+override LDFLAGS =
+endif
+
 BUILD := $(O)
 ifndef BUILD
   BUILD := $(KBUILD_OUTPUT)
-- 
cgit 


From e0a5696a23290c31c5ac2c76f0e7fe50a12c1fc6 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 7 Sep 2017 20:04:26 -0600
Subject: selftests: lib.mk: fix test executable status check to use full path

Fix test executable status check to use full path for make O=dir case,m
when tests are relocated to user specified object directory. Without the
full path, this check fails to find the file and fails the test.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 266d3ed4bb41..fd1cbbbca8d7 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -21,7 +21,7 @@ define RUN_TESTS
 		test_num=`echo $$test_num+1 | bc`;	\
 		echo "selftests: $$BASENAME_TEST";	\
 		echo "========================================";	\
-		if [ ! -x $$BASENAME_TEST ]; then	\
+		if [ ! -x $$TEST ]; then	\
 			echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
 			echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
 		else					\
-- 
cgit 


From e2fb65594cae9a016fab4639a5d22a914f1e16c8 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Fri, 8 Sep 2017 16:05:50 -0600
Subject: selftests: watchdog: fix to use TEST_GEN_PROGS and remove clean

TEST_PROGS should be used for test scripts that don't ned to be built.
Use TEST_GEN_PROGS instead which is intended for test executables.

Remove clean target and let the common clean take care of cleaning.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/watchdog/Makefile | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tools/testing/selftests/watchdog/Makefile b/tools/testing/selftests/watchdog/Makefile
index f863c664e3d1..ee068511fd0b 100644
--- a/tools/testing/selftests/watchdog/Makefile
+++ b/tools/testing/selftests/watchdog/Makefile
@@ -1,8 +1,3 @@
-TEST_PROGS := watchdog-test
-
-all: $(TEST_PROGS)
+TEST_GEN_PROGS := watchdog-test
 
 include ../lib.mk
-
-clean:
-	rm -fr $(TEST_PROGS)
-- 
cgit 


From be16a244c199983656e58ddb10d80c67197e502f Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 11 Sep 2017 13:06:33 -0600
Subject: selftests: lib.mk: add TEST_CUSTOM_PROGS to allow custom test
 run/install

Some tests such as sync can't use generic build rules in lib.mk and require
custom rules. Currently there is no provision to allow custom builds and
test such as sync use TEST_PROGS which is reserved for test shell scripts.
Add TEST_CUSTOM_PROGS variable to lib.mk to run and install custom tests
built by individual test make files.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index fd1cbbbca8d7..b4699c71aee4 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -6,6 +6,12 @@ ifeq (0,$(MAKELEVEL))
 OUTPUT := $(shell pwd)
 endif
 
+# The following are built by lib.mk common compile rules.
+# TEST_CUSTOM_PROGS should be used by tests that require
+# custom build rule and prevent common build rule use.
+# TEST_PROGS are for test shell scripts.
+# TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests
+# and install targets. Common clean doesn't touch them.
 TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
 TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
@@ -31,7 +37,7 @@ define RUN_TESTS
 endef
 
 run_tests: all
-	$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_PROGS))
+	$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
 
 define INSTALL_RULE
 	@if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then					\
@@ -39,10 +45,10 @@ define INSTALL_RULE
 		echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/";	\
 		rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/;		\
 	fi
-	@if [ "X$(TEST_GEN_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then					\
+	@if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then					\
 		mkdir -p ${INSTALL_PATH};										\
-		echo "rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";	\
-		rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;		\
+		echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";	\
+		rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;		\
 	fi
 endef
 
@@ -54,7 +60,7 @@ else
 endif
 
 define EMIT_TESTS
-	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
+	@for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
 		BASENAME_TEST=`basename $$TEST`;	\
 		echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
 	done;
-- 
cgit 


From 38f7251852a0cd47f34af4a6f84df0fadafa8ac7 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 11 Sep 2017 13:46:44 -0600
Subject: selftests: sync: use TEST_CUSTOM_PROGS instead of TEST_PROGS

lib.mk var TEST_CUSTOM_PROGS is for tests that need custom build
rules. TEST_PROGS is used for test shell scripts. Fix it to use
TEST_CUSTOM_PROGS. lib.mk will run and install them.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/sync/Makefile | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
index 4981c6b6d050..43db80b71e80 100644
--- a/tools/testing/selftests/sync/Makefile
+++ b/tools/testing/selftests/sync/Makefile
@@ -2,9 +2,11 @@ CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
 CFLAGS += -I../../../../usr/include/
 LDFLAGS += -pthread
 
-TEST_PROGS = sync_test
+# lib.mk TEST_CUSTOM_PROGS var is for custome tests that need special
+# build rules. lib.mk will run and install them.
+TEST_CUSTOM_PROGS = sync_test
 
-all: $(TEST_PROGS)
+all: $(TEST_CUSTOM_PROGS)
 
 include ../lib.mk
 
-- 
cgit 


From b2fc6ade9fe2118591e7f51e21b51f65e36f138f Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 11 Sep 2017 14:14:41 -0600
Subject: selftests: sync: kselftest and kselftest-clean fail for make O=dir
 case

sync test fails to build when object directory is specified to relocate
object files. Fix it to specify the correct path. Fix clean target to
remove objects. Also include simplified logic to use TEST_CUSTOM_PROGS
in build and clean targets instead of hard-coding the test name each
time.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/sync/Makefile | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
index 43db80b71e80..8e04d0afcbd7 100644
--- a/tools/testing/selftests/sync/Makefile
+++ b/tools/testing/selftests/sync/Makefile
@@ -2,14 +2,16 @@ CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
 CFLAGS += -I../../../../usr/include/
 LDFLAGS += -pthread
 
-# lib.mk TEST_CUSTOM_PROGS var is for custome tests that need special
+.PHONY: all clean
+
+include ../lib.mk
+
+# lib.mk TEST_CUSTOM_PROGS var is for custom tests that need special
 # build rules. lib.mk will run and install them.
-TEST_CUSTOM_PROGS = sync_test
 
+TEST_CUSTOM_PROGS := $(OUTPUT)/sync_test
 all: $(TEST_CUSTOM_PROGS)
 
-include ../lib.mk
-
 OBJS = sync_test.o sync.o
 
 TESTS += sync_alloc.o
@@ -20,6 +22,16 @@ TESTS += sync_stress_parallelism.o
 TESTS += sync_stress_consumer.o
 TESTS += sync_stress_merge.o
 
-sync_test: $(OBJS) $(TESTS)
+OBJS := $(patsubst %,$(OUTPUT)/%,$(OBJS))
+TESTS := $(patsubst %,$(OUTPUT)/%,$(TESTS))
+
+$(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
+	$(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
+
+$(OBJS): $(OUTPUT)/%.o: %.c
+	$(CC) -c $^ -o $@
+
+$(TESTS): $(OUTPUT)/%.o: %.c
+	$(CC) -c $^ -o $@
 
-EXTRA_CLEAN := sync_test $(OBJS) $(TESTS)
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS)
-- 
cgit 


From 1a940687e424080a6ed285f2de8b2f0d018edf3e Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 11 Sep 2017 19:03:11 -0600
Subject: selftests: lib.mk: copy test scripts and test files for make O=dir
 run

For make O=dir run_tests to work, test scripts, test files, and other
dependencies need to be copied over to the object directory. Running
tests from the object directory is necessary to avoid making the source
tree dirty.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index b4699c71aee4..f65886af7c0c 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -37,7 +37,18 @@ define RUN_TESTS
 endef
 
 run_tests: all
+ifneq ($(KBUILD_SRC),)
+	@if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
+		@rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
+	fi
+	@if [ "X$(TEST_PROGS)" != "X" ]; then
+		$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
+	else
+		$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
+	fi
+else
 	$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
+endif
 
 define INSTALL_RULE
 	@if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then					\
-- 
cgit 


From 9bbe7dc05c1f80200621a626ec2266987f4b42b3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 18 Sep 2017 17:55:24 +0200
Subject: MIPS: MSP71xx: Include asm/setup.h

msp71xx_defconfig can not be built at the in v4.14-rc1

arch/mips/pmcs-msp71xx/msp_smp.c:72:2: error: implicit declaration of function 'set_vi_handler' [-Werror=implicit-function-declaration]

I don't know what caused the regression, but including the right
header is the obvious fix.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/17309/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pmcs-msp71xx/msp_smp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/pmcs-msp71xx/msp_smp.c b/arch/mips/pmcs-msp71xx/msp_smp.c
index ffa0f7101a97..2b08242ade62 100644
--- a/arch/mips/pmcs-msp71xx/msp_smp.c
+++ b/arch/mips/pmcs-msp71xx/msp_smp.c
@@ -22,6 +22,8 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 
+#include <asm/setup.h>
+
 #ifdef CONFIG_MIPS_MT_SMP
 #define MIPS_CPU_IPI_RESCHED_IRQ 0	/* SW int 0 for resched */
 #define MIPS_CPU_IPI_CALL_IRQ 1		/* SW int 1 for call */
-- 
cgit 


From c22c8043105591a8b74142cf837604087cdba40b Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 19 Sep 2017 14:11:22 +0100
Subject: MIPS: Fix input modify in __write_64bit_c0_split()

The inline asm in __write_64bit_c0_split() modifies the 64-bit input
operand by shifting the high register left by 32, and constructing the
full 64-bit value in the low register (even on a 32-bit kernel), so if
that value is used again it could cause breakage as GCC would assume the
registers haven't changed when they have.

To quote the GCC extended asm documentation:
> Warning: Do not modify the contents of input-only operands (except for
> inputs tied to outputs). The compiler assumes that on exit from the
> asm statement these operands contain the same values as they had
> before executing the statement.

Avoid modifying the input by using a temporary variable as an output
which is modified instead of the input and not otherwise used. The asm
is always __volatile__ so GCC shouldn't optimise it out. The low
register of the temporary output is written before the high register of
the input is read, so we have two constraint alternatives, one where
both use the same registers (for when the input value isn't subsequently
used), and one with an early clobber on the output in case the low
output uses the same register as the high input. This allows the
resulting assembly to remain mostly unchanged.

A diff of a MIPS32r6 kernel reveals only three differences, two in
relation to write_c0_r10k_diag() in cpu_probe() (register allocation
rearranged slightly but otherwise identical), and one in relation to
write_c0_cvmmemctl2() in kvm_vz_local_flush_guesttlb_all(), but the
octeon CPU is only supported on 64-bit kernels where
__write_64bit_c0_split() isn't used so that shouldn't matter in
practice. So there currently doesn't appear to be anything broken by
this bug.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/17315/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mipsregs.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index e4ed1bc9a734..a6810923b3f0 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -1377,29 +1377,32 @@ do {									\
 
 #define __write_64bit_c0_split(source, sel, val)			\
 do {									\
+	unsigned long long __tmp;					\
 	unsigned long __flags;						\
 									\
 	local_irq_save(__flags);					\
 	if (sel == 0)							\
 		__asm__ __volatile__(					\
 			".set\tmips64\n\t"				\
-			"dsll\t%L0, %L0, 32\n\t"			\
+			"dsll\t%L0, %L1, 32\n\t"			\
 			"dsrl\t%L0, %L0, 32\n\t"			\
-			"dsll\t%M0, %M0, 32\n\t"			\
+			"dsll\t%M0, %M1, 32\n\t"			\
 			"or\t%L0, %L0, %M0\n\t"				\
 			"dmtc0\t%L0, " #source "\n\t"			\
 			".set\tmips0"					\
-			: : "r" (val));					\
+			: "=&r,r" (__tmp)				\
+			: "r,0" (val));					\
 	else								\
 		__asm__ __volatile__(					\
 			".set\tmips64\n\t"				\
-			"dsll\t%L0, %L0, 32\n\t"			\
+			"dsll\t%L0, %L1, 32\n\t"			\
 			"dsrl\t%L0, %L0, 32\n\t"			\
-			"dsll\t%M0, %M0, 32\n\t"			\
+			"dsll\t%M0, %M1, 32\n\t"			\
 			"or\t%L0, %L0, %M0\n\t"				\
 			"dmtc0\t%L0, " #source ", " #sel "\n\t"		\
 			".set\tmips0"					\
-			: : "r" (val));					\
+			: "=&r,r" (__tmp)				\
+			: "r,0" (val));					\
 	local_irq_restore(__flags);					\
 } while (0)
 
-- 
cgit 


From 8eba3651f1dad49c83bb7f8d672301dac4c6add6 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@gmail.com>
Date: Tue, 12 Sep 2017 20:36:28 +0200
Subject: MIPS: PCI: fix pcibios_map_irq section mismatch

Drop  the __init from pcibios_map_irq() to make this section mis-
match go away:

WARNING: vmlinux.o(.text+0x56acd4): Section mismatch in reference from the function pcibios_scanbus() to the function .init.text:pcibios_map_irq()
The function pcibios_scanbus() references
the function __init pcibios_map_irq().
This is often because pcibios_scanbus lacks a __init
annotation or the annotation of pcibios_map_irq is wrong.

Run-Tested only on Alchemy.

Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/17267/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pci/fixup-capcella.c  | 2 +-
 arch/mips/pci/fixup-cobalt.c    | 2 +-
 arch/mips/pci/fixup-emma2rh.c   | 2 +-
 arch/mips/pci/fixup-fuloong2e.c | 2 +-
 arch/mips/pci/fixup-ip32.c      | 2 +-
 arch/mips/pci/fixup-lantiq.c    | 2 +-
 arch/mips/pci/fixup-lemote2f.c  | 2 +-
 arch/mips/pci/fixup-loongson3.c | 2 +-
 arch/mips/pci/fixup-malta.c     | 2 +-
 arch/mips/pci/fixup-mpc30x.c    | 2 +-
 arch/mips/pci/fixup-pmcmsp.c    | 2 +-
 arch/mips/pci/fixup-sni.c       | 2 +-
 arch/mips/pci/fixup-tb0219.c    | 2 +-
 arch/mips/pci/fixup-tb0226.c    | 2 +-
 arch/mips/pci/fixup-tb0287.c    | 2 +-
 arch/mips/pci/pci-alchemy.c     | 2 +-
 arch/mips/pci/pci-bcm47xx.c     | 2 +-
 arch/mips/pci/pci-lasat.c       | 2 +-
 arch/mips/pci/pci-mt7620.c      | 2 +-
 arch/mips/pci/pci-octeon.c      | 4 ++--
 arch/mips/pci/pci-rt2880.c      | 2 +-
 arch/mips/pci/pci-rt3883.c      | 2 +-
 arch/mips/pci/pci-xlp.c         | 2 +-
 arch/mips/pci/pci-xlr.c         | 2 +-
 24 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/arch/mips/pci/fixup-capcella.c b/arch/mips/pci/fixup-capcella.c
index 1c02f5737367..35a671595a37 100644
--- a/arch/mips/pci/fixup-capcella.c
+++ b/arch/mips/pci/fixup-capcella.c
@@ -38,7 +38,7 @@ static char irq_tab_capcella[][5] __initdata = {
  [14] = { -1, INTA, INTB, INTC, INTD }
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_tab_capcella[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-cobalt.c b/arch/mips/pci/fixup-cobalt.c
index b3ab59318d91..62810d3fe99b 100644
--- a/arch/mips/pci/fixup-cobalt.c
+++ b/arch/mips/pci/fixup-cobalt.c
@@ -174,7 +174,7 @@ static char irq_tab_raq2[] __initdata = {
   [COBALT_PCICONF_ETH1]	   = ETH1_IRQ
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (cobalt_board_id <= COBALT_BRD_ID_QUBE1)
 		return irq_tab_qube1[slot];
diff --git a/arch/mips/pci/fixup-emma2rh.c b/arch/mips/pci/fixup-emma2rh.c
index 19caf775c206..4832ac9f118a 100644
--- a/arch/mips/pci/fixup-emma2rh.c
+++ b/arch/mips/pci/fixup-emma2rh.c
@@ -85,7 +85,7 @@ static void emma2rh_pci_host_fixup(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_EMMA2RH,
 			 emma2rh_pci_host_fixup);
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_map[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c
index 50da773faede..b47c2771dc99 100644
--- a/arch/mips/pci/fixup-fuloong2e.c
+++ b/arch/mips/pci/fixup-fuloong2e.c
@@ -19,7 +19,7 @@
 /* South bridge slot number is set by the pci probe process */
 static u8 sb_slot = 5;
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = 0;
 
diff --git a/arch/mips/pci/fixup-ip32.c b/arch/mips/pci/fixup-ip32.c
index 133685e215ee..ea29f5450be3 100644
--- a/arch/mips/pci/fixup-ip32.c
+++ b/arch/mips/pci/fixup-ip32.c
@@ -39,7 +39,7 @@ static char irq_tab_mace[][5] __initdata = {
  * irqs.  I suppose a device without a pin A will thank us for doing it
  * right if there exists such a broken piece of crap.
  */
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_tab_mace[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c
index 2b5427d3f35c..81530a13b349 100644
--- a/arch/mips/pci/fixup-lantiq.c
+++ b/arch/mips/pci/fixup-lantiq.c
@@ -23,7 +23,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 	return 0;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return of_irq_parse_and_map_pci(dev, slot, pin);
 }
diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c
index 95ab9a1bd010..7e5991e0e323 100644
--- a/arch/mips/pci/fixup-lemote2f.c
+++ b/arch/mips/pci/fixup-lemote2f.c
@@ -51,7 +51,7 @@ static char irq_tab[][5] __initdata = {
 	{0, 0, 0, 0, 0},	/*  27: Unused */
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int virq;
 
diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c
index 2b6d5e196f99..8a741c2c6685 100644
--- a/arch/mips/pci/fixup-loongson3.c
+++ b/arch/mips/pci/fixup-loongson3.c
@@ -32,7 +32,7 @@ static void print_fixup_info(const struct pci_dev *pdev)
 			pdev->vendor, pdev->device, pdev->irq);
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	print_fixup_info(dev);
 	return dev->irq;
diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c
index 40e920c653cc..1f5f25e39590 100644
--- a/arch/mips/pci/fixup-malta.c
+++ b/arch/mips/pci/fixup-malta.c
@@ -38,7 +38,7 @@ static char irq_tab[][5] __initdata = {
 	{0,	PCID,	PCIA,	PCIB,	PCIC }	/* 21: PCI Slot 4 */
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int virq;
 	virq = irq_tab[slot][pin];
diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c
index 8e4f8288eca2..5da62c76e271 100644
--- a/arch/mips/pci/fixup-mpc30x.c
+++ b/arch/mips/pci/fixup-mpc30x.c
@@ -34,7 +34,7 @@ static const int irq_tab_mpc30x[] __initconst = {
  [29] = MQ200_IRQ,
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (slot == 30)
 		return internal_func_irqs[PCI_FUNC(dev->devfn)];
diff --git a/arch/mips/pci/fixup-pmcmsp.c b/arch/mips/pci/fixup-pmcmsp.c
index fab405c21c2f..f2b7b1e4395b 100644
--- a/arch/mips/pci/fixup-pmcmsp.c
+++ b/arch/mips/pci/fixup-pmcmsp.c
@@ -202,7 +202,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
  *  RETURNS:	 IRQ number
  *
  ****************************************************************************/
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 #if !defined(CONFIG_PMC_MSP7120_GW) && !defined(CONFIG_PMC_MSP7120_EVAL)
 	printk(KERN_WARNING "PCI: unknown board, no PCI IRQs assigned.\n");
diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c
index f67ebeeb4200..309e1c562959 100644
--- a/arch/mips/pci/fixup-sni.c
+++ b/arch/mips/pci/fixup-sni.c
@@ -130,7 +130,7 @@ static inline int is_rm300_revd(void)
 	return (csmsr & 0xa0) == 0x20;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	switch (sni_brd_type) {
 	case SNI_BRD_PCI_TOWER_CPLUS:
diff --git a/arch/mips/pci/fixup-tb0219.c b/arch/mips/pci/fixup-tb0219.c
index d0b0083fbd27..cc581535f257 100644
--- a/arch/mips/pci/fixup-tb0219.c
+++ b/arch/mips/pci/fixup-tb0219.c
@@ -23,7 +23,7 @@
 
 #include <asm/vr41xx/tb0219.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = -1;
 
diff --git a/arch/mips/pci/fixup-tb0226.c b/arch/mips/pci/fixup-tb0226.c
index 4196ccf3ea3d..b827b5cad5fd 100644
--- a/arch/mips/pci/fixup-tb0226.c
+++ b/arch/mips/pci/fixup-tb0226.c
@@ -23,7 +23,7 @@
 #include <asm/vr41xx/giu.h>
 #include <asm/vr41xx/tb0226.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = -1;
 
diff --git a/arch/mips/pci/fixup-tb0287.c b/arch/mips/pci/fixup-tb0287.c
index 8c5039ed75d7..98f26285f2e3 100644
--- a/arch/mips/pci/fixup-tb0287.c
+++ b/arch/mips/pci/fixup-tb0287.c
@@ -22,7 +22,7 @@
 
 #include <asm/vr41xx/tb0287.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	unsigned char bus;
 	int irq = -1;
diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c
index e99ca7702d8a..f15ec98de2de 100644
--- a/arch/mips/pci/pci-alchemy.c
+++ b/arch/mips/pci/pci-alchemy.c
@@ -522,7 +522,7 @@ static int __init alchemy_pci_init(void)
 arch_initcall(alchemy_pci_init);
 
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	struct alchemy_pci_context *ctx = dev->sysdata;
 	if (ctx && ctx->board_map_irq)
diff --git a/arch/mips/pci/pci-bcm47xx.c b/arch/mips/pci/pci-bcm47xx.c
index 76f16eaed0ad..230d7dd273e2 100644
--- a/arch/mips/pci/pci-bcm47xx.c
+++ b/arch/mips/pci/pci-bcm47xx.c
@@ -28,7 +28,7 @@
 #include <linux/bcma/bcma.h>
 #include <bcm47xx.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return 0;
 }
diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c
index 40d2797d2bc4..47f4ee6bbb3b 100644
--- a/arch/mips/pci/pci-lasat.c
+++ b/arch/mips/pci/pci-lasat.c
@@ -61,7 +61,7 @@ arch_initcall(lasat_pci_setup);
 #define LASAT_IRQ_PCIC	 (LASAT_IRQ_BASE + 7)
 #define LASAT_IRQ_PCID	 (LASAT_IRQ_BASE + 8)
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	switch (slot) {
 	case 1:
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
index 4e633c1e7ff3..90fba9bf98da 100644
--- a/arch/mips/pci/pci-mt7620.c
+++ b/arch/mips/pci/pci-mt7620.c
@@ -361,7 +361,7 @@ static int mt7620_pci_probe(struct platform_device *pdev)
 	return 0;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	u16 cmd;
 	u32 val;
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index 9ee01936862e..771f5de6362d 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -59,7 +59,7 @@ union octeon_pci_address {
 	} s;
 };
 
-int __initconst (*octeon_pcibios_map_irq)(const struct pci_dev *dev,
+int (*octeon_pcibios_map_irq)(const struct pci_dev *dev,
 					 u8 slot, u8 pin);
 enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID;
 
@@ -74,7 +74,7 @@ enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID;
  *		 as it goes through each bridge.
  * Returns Interrupt number for the device
  */
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (octeon_pcibios_map_irq)
 		return octeon_pcibios_map_irq(dev, slot, pin);
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c
index d6360fe73d05..711cdccdf65b 100644
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -181,7 +181,7 @@ static inline void rt2880_pci_write_u32(unsigned long reg, u32 val)
 	spin_unlock_irqrestore(&rt2880_pci_lock, flags);
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	u16 cmd;
 	int irq = -1;
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index 04f8ea953297..958899ffe99c 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -564,7 +564,7 @@ err_put_intc_node:
 	return err;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return of_irq_parse_and_map_pci(dev, slot, pin);
 }
diff --git a/arch/mips/pci/pci-xlp.c b/arch/mips/pci/pci-xlp.c
index 7babf01600cb..9eff9137f78e 100644
--- a/arch/mips/pci/pci-xlp.c
+++ b/arch/mips/pci/pci-xlp.c
@@ -205,7 +205,7 @@ int xlp_socdev_to_node(const struct pci_dev *lnkdev)
 		return PCI_SLOT(lnkdev->devfn) / 8;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	struct pci_dev *lnkdev;
 	int lnkfunc, node;
diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
index 26d2dabef281..2a1c81a129ba 100644
--- a/arch/mips/pci/pci-xlr.c
+++ b/arch/mips/pci/pci-xlr.c
@@ -315,7 +315,7 @@ static void xls_pcie_ack_b(struct irq_data *d)
 	}
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return get_irq_vector(dev);
 }
-- 
cgit 


From 51a9a8284e43642fc3e85810fd54f4c245d23a14 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 18 Sep 2017 10:03:12 +0100
Subject: x86/xen: clean up clang build warning

In the case where sizeof(maddr) != sizeof(long) p is initialized and
never read and clang throws a warning on this.  Move declaration of
p to clean up the clang build warning:

warning: Value stored to 'p' during its initialization is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 arch/x86/include/asm/xen/hypercall.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9606688caa4b..e089c1675a7c 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -552,13 +552,13 @@ static inline void
 MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
 			struct desc_struct desc)
 {
-	u32 *p = (u32 *) &desc;
-
 	mcl->op = __HYPERVISOR_update_descriptor;
 	if (sizeof(maddr) == sizeof(long)) {
 		mcl->args[0] = maddr;
 		mcl->args[1] = *(unsigned long *)&desc;
 	} else {
+		u32 *p = (u32 *)&desc;
+
 		mcl->args[0] = maddr;
 		mcl->args[1] = maddr >> 32;
 		mcl->args[2] = *p++;
-- 
cgit 


From c8e1812960eeae42e2183154927028511c4bc566 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Wed, 20 Sep 2017 15:45:36 +0300
Subject: net_sched: always reset qdisc backlog in qdisc_reset()

SKB stored in qdisc->gso_skb also counted into backlog.

Some qdiscs don't reset backlog to zero in ->reset(),
for example sfq just dequeue and free all queued skb.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 92237e75dbbc..bf8c81e07c70 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -685,6 +685,7 @@ void qdisc_reset(struct Qdisc *qdisc)
 		qdisc->gso_skb = NULL;
 	}
 	qdisc->q.qlen = 0;
+	qdisc->qstats.backlog = 0;
 }
 EXPORT_SYMBOL(qdisc_reset);
 
-- 
cgit 


From 21f4d5cc25ec0e6e8eb8420dd2c399e6d2fc7d14 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Wed, 20 Sep 2017 15:46:11 +0300
Subject: net_sched/hfsc: fix curve activation in hfsc_change_class()

If real-time or fair-share curves are enabled in hfsc_change_class()
class isn't inserted into rb-trees yet. Thus init_ed() and init_vf()
must be called in place of update_ed() and update_vf().

Remove isn't required because for now curves cannot be disabled.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hfsc.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index daaf214e5201..3f88b75488b0 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -958,6 +958,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	}
 
 	if (cl != NULL) {
+		int old_flags;
+
 		if (parentid) {
 			if (cl->cl_parent &&
 			    cl->cl_parent->cl_common.classid != parentid)
@@ -978,6 +980,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		}
 
 		sch_tree_lock(sch);
+		old_flags = cl->cl_flags;
+
 		if (rsc != NULL)
 			hfsc_change_rsc(cl, rsc, cur_time);
 		if (fsc != NULL)
@@ -986,10 +990,21 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			hfsc_change_usc(cl, usc, cur_time);
 
 		if (cl->qdisc->q.qlen != 0) {
-			if (cl->cl_flags & HFSC_RSC)
-				update_ed(cl, qdisc_peek_len(cl->qdisc));
-			if (cl->cl_flags & HFSC_FSC)
-				update_vf(cl, 0, cur_time);
+			int len = qdisc_peek_len(cl->qdisc);
+
+			if (cl->cl_flags & HFSC_RSC) {
+				if (old_flags & HFSC_RSC)
+					update_ed(cl, len);
+				else
+					init_ed(cl, len);
+			}
+
+			if (cl->cl_flags & HFSC_FSC) {
+				if (old_flags & HFSC_FSC)
+					update_vf(cl, 0, cur_time);
+				else
+					init_vf(cl, len);
+			}
 		}
 		sch_tree_unlock(sch);
 
-- 
cgit 


From fe2502e49b58606580c77b3d84e42f946de182d8 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 20 Sep 2017 09:18:45 -0700
Subject: net_sched: remove cls_flower idr on failure

Fixes: c15ab236d69d ("net/sched: Change cls_flower to use IDR")
Cc: Chris Mi <chrism@mellanox.com>
Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 1a267e77c6de..d230cb4c8094 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -922,28 +922,28 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 
 		if (!tc_flags_valid(fnew->flags)) {
 			err = -EINVAL;
-			goto errout;
+			goto errout_idr;
 		}
 	}
 
 	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
 	if (err)
-		goto errout;
+		goto errout_idr;
 
 	err = fl_check_assign_mask(head, &mask);
 	if (err)
-		goto errout;
+		goto errout_idr;
 
 	if (!tc_skip_sw(fnew->flags)) {
 		if (!fold && fl_lookup(head, &fnew->mkey)) {
 			err = -EEXIST;
-			goto errout;
+			goto errout_idr;
 		}
 
 		err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
 					     head->ht_params);
 		if (err)
-			goto errout;
+			goto errout_idr;
 	}
 
 	if (!tc_skip_hw(fnew->flags)) {
@@ -952,7 +952,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 					   &mask.key,
 					   fnew);
 		if (err)
-			goto errout;
+			goto errout_idr;
 	}
 
 	if (!tc_in_hw(fnew->flags))
@@ -981,6 +981,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	kfree(tb);
 	return 0;
 
+errout_idr:
+	if (fnew->handle)
+		idr_remove_ext(&head->handle_idr, fnew->handle);
 errout:
 	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);
-- 
cgit 


From 0ab09befdbb7ca9b969d6206108629ddff43876e Mon Sep 17 00:00:00 2001
From: Alex Ng <alexng@microsoft.com>
Date: Wed, 20 Sep 2017 11:17:35 -0700
Subject: hv_netvsc: fix send buffer failure on MTU change
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If MTU is changed the host would reject the send buffer change.
This problem is result of recent change to allow changing send
buffer size.

Every time we change the MTU, we store the previous net_device section
count before destroying the buffer, but we don’t store the previous
section size. When we reinitialize the buffer, its size is calculated
by multiplying the previous count and previous size. Since we
continuously increase the MTU, the host returns us a decreasing count
value while the section size is reinitialized to 1728 bytes every
time.

This eventually leads to a condition where the calculated buf_size is
so small that the host rejects it.

Fixes: 8b5327975ae1 ("netvsc: allow controlling send/recv buffer size")
Signed-off-by: Alex Ng <alexng@microsoft.com>
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h | 2 ++
 drivers/net/hyperv/netvsc.c     | 7 ++-----
 drivers/net/hyperv/netvsc_drv.c | 8 ++++++++
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d98cdfb1536b..5176be76ca7d 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -150,6 +150,8 @@ struct netvsc_device_info {
 	u32  num_chn;
 	u32  send_sections;
 	u32  recv_sections;
+	u32  send_section_size;
+	u32  recv_section_size;
 };
 
 enum rndis_device_state {
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index a5511b7326af..8d5077fb0492 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -76,9 +76,6 @@ static struct netvsc_device *alloc_net_device(void)
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 
-	net_device->recv_section_size = NETVSC_RECV_SECTION_SIZE;
-	net_device->send_section_size = NETVSC_SEND_SECTION_SIZE;
-
 	init_completion(&net_device->channel_init_wait);
 	init_waitqueue_head(&net_device->subchan_open);
 	INIT_WORK(&net_device->subchan_work, rndis_set_subchannel);
@@ -262,7 +259,7 @@ static int netvsc_init_buf(struct hv_device *device,
 	int ret = 0;
 
 	/* Get receive buffer area. */
-	buf_size = device_info->recv_sections * net_device->recv_section_size;
+	buf_size = device_info->recv_sections * device_info->recv_section_size;
 	buf_size = roundup(buf_size, PAGE_SIZE);
 
 	net_device->recv_buf = vzalloc(buf_size);
@@ -344,7 +341,7 @@ static int netvsc_init_buf(struct hv_device *device,
 		goto cleanup;
 
 	/* Now setup the send buffer. */
-	buf_size = device_info->send_sections * net_device->send_section_size;
+	buf_size = device_info->send_sections * device_info->send_section_size;
 	buf_size = round_up(buf_size, PAGE_SIZE);
 
 	net_device->send_buf = vzalloc(buf_size);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index d4902ee5f260..a32ae02e1b6c 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -848,7 +848,9 @@ static int netvsc_set_channels(struct net_device *net,
 	device_info.num_chn = count;
 	device_info.ring_size = ring_size;
 	device_info.send_sections = nvdev->send_section_cnt;
+	device_info.send_section_size = nvdev->send_section_size;
 	device_info.recv_sections = nvdev->recv_section_cnt;
+	device_info.recv_section_size = nvdev->recv_section_size;
 
 	rndis_filter_device_remove(dev, nvdev);
 
@@ -963,7 +965,9 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	device_info.ring_size = ring_size;
 	device_info.num_chn = nvdev->num_chn;
 	device_info.send_sections = nvdev->send_section_cnt;
+	device_info.send_section_size = nvdev->send_section_size;
 	device_info.recv_sections = nvdev->recv_section_cnt;
+	device_info.recv_section_size = nvdev->recv_section_size;
 
 	rndis_filter_device_remove(hdev, nvdev);
 
@@ -1485,7 +1489,9 @@ static int netvsc_set_ringparam(struct net_device *ndev,
 	device_info.num_chn = nvdev->num_chn;
 	device_info.ring_size = ring_size;
 	device_info.send_sections = new_tx;
+	device_info.send_section_size = nvdev->send_section_size;
 	device_info.recv_sections = new_rx;
+	device_info.recv_section_size = nvdev->recv_section_size;
 
 	netif_device_detach(ndev);
 	was_opened = rndis_filter_opened(nvdev);
@@ -1934,7 +1940,9 @@ static int netvsc_probe(struct hv_device *dev,
 	device_info.ring_size = ring_size;
 	device_info.num_chn = VRSS_CHANNEL_DEFAULT;
 	device_info.send_sections = NETVSC_DEFAULT_TX;
+	device_info.send_section_size = NETVSC_SEND_SECTION_SIZE;
 	device_info.recv_sections = NETVSC_DEFAULT_RX;
+	device_info.recv_section_size = NETVSC_RECV_SECTION_SIZE;
 
 	nvdev = rndis_filter_device_add(dev, &device_info);
 	if (IS_ERR(nvdev)) {
-- 
cgit 


From 4a7a3860caac1a8779e8c459d8abe21b111798d6 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Wed, 20 Sep 2017 15:32:53 -0500
Subject: net: qcom/emac: add software control for pause frame mode

The EMAC has the option of sending only a single pause frame when
flow control is enabled and the RX queue is full.  Although sending
only one pause frame has little value, this would allow admins to
enable automatic flow control without having to worry about the EMAC
flooding nearby switches with pause frames if the kernel hangs.

The option is enabled by using the single-pause-mode private flag.

Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/emac/emac-ethtool.c | 30 +++++++++++++++++++++++
 drivers/net/ethernet/qualcomm/emac/emac-mac.c     | 22 +++++++++++++++++
 drivers/net/ethernet/qualcomm/emac/emac.c         |  3 +++
 drivers/net/ethernet/qualcomm/emac/emac.h         |  3 +++
 4 files changed, 58 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
index bbe24639aa5a..c8c6231b87f3 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
@@ -88,6 +88,8 @@ static void emac_set_msglevel(struct net_device *netdev, u32 data)
 static int emac_get_sset_count(struct net_device *netdev, int sset)
 {
 	switch (sset) {
+	case ETH_SS_PRIV_FLAGS:
+		return 1;
 	case ETH_SS_STATS:
 		return EMAC_STATS_LEN;
 	default:
@@ -100,6 +102,10 @@ static void emac_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 	unsigned int i;
 
 	switch (stringset) {
+	case ETH_SS_PRIV_FLAGS:
+		strcpy(data, "single-pause-mode");
+		break;
+
 	case ETH_SS_STATS:
 		for (i = 0; i < EMAC_STATS_LEN; i++) {
 			strlcpy(data, emac_ethtool_stat_strings[i],
@@ -230,6 +236,27 @@ static int emac_get_regs_len(struct net_device *netdev)
 	return EMAC_MAX_REG_SIZE * sizeof(u32);
 }
 
+#define EMAC_PRIV_ENABLE_SINGLE_PAUSE	BIT(0)
+
+static int emac_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct emac_adapter *adpt = netdev_priv(netdev);
+
+	adpt->single_pause_mode = !!(flags & EMAC_PRIV_ENABLE_SINGLE_PAUSE);
+
+	if (netif_running(netdev))
+		return emac_reinit_locked(adpt);
+
+	return 0;
+}
+
+static u32 emac_get_priv_flags(struct net_device *netdev)
+{
+	struct emac_adapter *adpt = netdev_priv(netdev);
+
+	return adpt->single_pause_mode ? EMAC_PRIV_ENABLE_SINGLE_PAUSE : 0;
+}
+
 static const struct ethtool_ops emac_ethtool_ops = {
 	.get_link_ksettings = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
@@ -253,6 +280,9 @@ static const struct ethtool_ops emac_ethtool_ops = {
 
 	.get_regs_len    = emac_get_regs_len,
 	.get_regs        = emac_get_regs,
+
+	.set_priv_flags = emac_set_priv_flags,
+	.get_priv_flags = emac_get_priv_flags,
 };
 
 void emac_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index bcd4708b3745..0ea3ca09c689 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -551,6 +551,28 @@ static void emac_mac_start(struct emac_adapter *adpt)
 	mac &= ~(HUGEN | VLAN_STRIP | TPAUSE | SIMR | HUGE | MULTI_ALL |
 		 DEBUG_MODE | SINGLE_PAUSE_MODE);
 
+	/* Enable single-pause-frame mode if requested.
+	 *
+	 * If enabled, the EMAC will send a single pause frame when the RX
+	 * queue is full.  This normally leads to packet loss because
+	 * the pause frame disables the remote MAC only for 33ms (the quanta),
+	 * and then the remote MAC continues sending packets even though
+	 * the RX queue is still full.
+	 *
+	 * If disabled, the EMAC sends a pause frame every 31ms until the RX
+	 * queue is no longer full.  Normally, this is the preferred
+	 * method of operation.  However, when the system is hung (e.g.
+	 * cores are halted), the EMAC interrupt handler is never called
+	 * and so the RX queue fills up quickly and stays full.  The resuling
+	 * non-stop "flood" of pause frames sometimes has the effect of
+	 * disabling nearby switches.  In some cases, other nearby switches
+	 * are also affected, shutting down the entire network.
+	 *
+	 * The user can enable or disable single-pause-frame mode
+	 * via ethtool.
+	 */
+	mac |= adpt->single_pause_mode ? SINGLE_PAUSE_MODE : 0;
+
 	writel_relaxed(csr1, adpt->csr + EMAC_EMAC_WRAPPER_CSR1);
 
 	writel_relaxed(mac, adpt->base + EMAC_MAC_CTRL);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 60850bfa3d32..759543512117 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -443,6 +443,9 @@ static void emac_init_adapter(struct emac_adapter *adpt)
 
 	/* default to automatic flow control */
 	adpt->automatic = true;
+
+	/* Disable single-pause-frame mode by default */
+	adpt->single_pause_mode = false;
 }
 
 /* Get the clock */
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.h b/drivers/net/ethernet/qualcomm/emac/emac.h
index 8ee4ec6aef2e..d7c9f44209d4 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac.h
@@ -363,6 +363,9 @@ struct emac_adapter {
 	bool				tx_flow_control;
 	bool				rx_flow_control;
 
+	/* True == use single-pause-frame mode. */
+	bool				single_pause_mode;
+
 	/* Ring parameter */
 	u8				tpd_burst;
 	u8				rfd_burst;
-- 
cgit 


From 19cab8872692960535aa6d12e3a295ac51d1a648 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 20 Sep 2017 15:52:13 -0700
Subject: net: ethtool: Add back transceiver type

Commit 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API")
deprecated the ethtool_cmd::transceiver field, which was fine in
premise, except that the PHY library was actually using it to report the
type of transceiver: internal or external.

Use the first word of the reserved field to put this __u8 transceiver
field back in. It is made read-only, and we don't expect the
ETHTOOL_xLINKSETTINGS API to be doing anything with this anyway, so this
is mostly for the legacy path where we do:

ethtool_get_settings()
-> dev->ethtool_ops->get_link_ksettings()
   -> convert_link_ksettings_to_legacy_settings()

to have no information loss compared to the legacy get_settings API.

Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 6 +++++-
 net/core/ethtool.c           | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9c041dae8e2c..5bd1b1de4ea0 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1753,6 +1753,8 @@ enum ethtool_reset_flags {
  *	%ethtool_link_mode_bit_indices for the link modes, and other
  *	link features that the link partner advertised through
  *	autonegotiation; 0 if unknown or not applicable.  Read-only.
+ * @transceiver: Used to distinguish different possible PHY types,
+ *	reported consistently by PHYLIB.  Read-only.
  *
  * If autonegotiation is disabled, the speed and @duplex represent the
  * fixed link mode and are writable if the driver supports multiple
@@ -1804,7 +1806,9 @@ struct ethtool_link_settings {
 	__u8	eth_tp_mdix;
 	__u8	eth_tp_mdix_ctrl;
 	__s8	link_mode_masks_nwords;
-	__u32	reserved[8];
+	__u8	transceiver;
+	__u8	reserved1[3];
+	__u32	reserved[7];
 	__u32	link_mode_masks[0];
 	/* layout of link_mode_masks fields:
 	 * __u32 map_supported[link_mode_masks_nwords];
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6a582ae4c5d9..3228411ada0f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -525,6 +525,8 @@ convert_link_ksettings_to_legacy_settings(
 		= link_ksettings->base.eth_tp_mdix;
 	legacy_settings->eth_tp_mdix_ctrl
 		= link_ksettings->base.eth_tp_mdix_ctrl;
+	legacy_settings->transceiver
+		= link_ksettings->base.transceiver;
 	return retval;
 }
 
-- 
cgit 


From ceb628134a75564d7bfa8e4ef902e6e588339e11 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 20 Sep 2017 15:52:14 -0700
Subject: net: phy: Keep reporting transceiver type

With commit 2d55173e71b0 ("phy: add generic function to support
ksetting support"), we lost the ability to report the transceiver type
like we used to. Now that we have added back the transceiver type to
ethtool_link_settings, we can report it back like we used to and have no
loss of information.

Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API")
Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index e842d2cd1ee7..2b1e67bc1e73 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -373,7 +373,8 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev,
 		cmd->base.port = PORT_BNC;
 	else
 		cmd->base.port = PORT_MII;
-
+	cmd->base.transceiver = phy_is_internal(phydev) ?
+				XCVR_INTERNAL : XCVR_EXTERNAL;
 	cmd->base.phy_address = phydev->mdio.addr;
 	cmd->base.autoneg = phydev->autoneg;
 	cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl;
-- 
cgit 


From 8a7ffeb795f864dd605b579c05934cba95dc8ad3 Mon Sep 17 00:00:00 2001
From: Nisar Sayed <Nisar.Sayed@microchip.com>
Date: Thu, 21 Sep 2017 02:36:36 +0530
Subject: lan78xx: Fix for eeprom read/write when device auto suspend

Fix for eeprom read/write when device auto suspend

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Nisar Sayed <Nisar.Sayed@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index b99a7fb09f8e..fcf85ae37435 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1265,30 +1265,46 @@ static int lan78xx_ethtool_get_eeprom(struct net_device *netdev,
 				      struct ethtool_eeprom *ee, u8 *data)
 {
 	struct lan78xx_net *dev = netdev_priv(netdev);
+	int ret;
+
+	ret = usb_autopm_get_interface(dev->intf);
+	if (ret)
+		return ret;
 
 	ee->magic = LAN78XX_EEPROM_MAGIC;
 
-	return lan78xx_read_raw_eeprom(dev, ee->offset, ee->len, data);
+	ret = lan78xx_read_raw_eeprom(dev, ee->offset, ee->len, data);
+
+	usb_autopm_put_interface(dev->intf);
+
+	return ret;
 }
 
 static int lan78xx_ethtool_set_eeprom(struct net_device *netdev,
 				      struct ethtool_eeprom *ee, u8 *data)
 {
 	struct lan78xx_net *dev = netdev_priv(netdev);
+	int ret;
+
+	ret = usb_autopm_get_interface(dev->intf);
+	if (ret)
+		return ret;
 
 	/* Allow entire eeprom update only */
 	if ((ee->magic == LAN78XX_EEPROM_MAGIC) &&
 	    (ee->offset == 0) &&
 	    (ee->len == 512) &&
 	    (data[0] == EEPROM_INDICATOR))
-		return lan78xx_write_raw_eeprom(dev, ee->offset, ee->len, data);
+		ret = lan78xx_write_raw_eeprom(dev, ee->offset, ee->len, data);
 	else if ((ee->magic == LAN78XX_OTP_MAGIC) &&
 		 (ee->offset == 0) &&
 		 (ee->len == 512) &&
 		 (data[0] == OTP_INDICATOR_1))
-		return lan78xx_write_raw_otp(dev, ee->offset, ee->len, data);
+		ret = lan78xx_write_raw_otp(dev, ee->offset, ee->len, data);
 
-	return -EINVAL;
+	usb_autopm_put_interface(dev->intf);
+
+	return ret;
 }
 
 static void lan78xx_get_strings(struct net_device *netdev, u32 stringset,
-- 
cgit 


From c077682282dd34c75e8477c22dffe7c9aebc6e98 Mon Sep 17 00:00:00 2001
From: Nisar Sayed <Nisar.Sayed@microchip.com>
Date: Thu, 21 Sep 2017 02:36:37 +0530
Subject: lan78xx: Allow EEPROM write for less than MAX_EEPROM_SIZE

Allow EEPROM write for less than MAX_EEPROM_SIZE

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Nisar Sayed <Nisar.Sayed@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index fcf85ae37435..f8c63eec8353 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1290,11 +1290,10 @@ static int lan78xx_ethtool_set_eeprom(struct net_device *netdev,
 	if (ret)
 		return ret;
 
-	/* Allow entire eeprom update only */
-	if ((ee->magic == LAN78XX_EEPROM_MAGIC) &&
-	    (ee->offset == 0) &&
-	    (ee->len == 512) &&
-	    (data[0] == EEPROM_INDICATOR))
+	/* Invalid EEPROM_INDICATOR at offset zero will result in a failure
+	 * to load data from EEPROM
+	 */
+	if (ee->magic == LAN78XX_EEPROM_MAGIC)
 		ret = lan78xx_write_raw_eeprom(dev, ee->offset, ee->len, data);
 	else if ((ee->magic == LAN78XX_OTP_MAGIC) &&
 		 (ee->offset == 0) &&
-- 
cgit 


From e365280521029c9366bab038915274ddaa1b7195 Mon Sep 17 00:00:00 2001
From: Nisar Sayed <Nisar.Sayed@microchip.com>
Date: Thu, 21 Sep 2017 02:36:38 +0530
Subject: lan78xx: Use default values loaded from EEPROM/OTP after reset

Use default value of auto duplex and auto speed values loaded
from EEPROM/OTP after reset. The LAN78xx allows platform
configurations to be loaded from EEPROM/OTP.
Ex: When external phy is connected, the MAC can be configured to
have correct auto speed, auto duplex, auto polarity configured
from the EEPROM/OTP.

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Nisar Sayed <Nisar.Sayed@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index f8c63eec8353..0161f77641fa 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2449,7 +2449,6 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 	/* LAN7801 only has RGMII mode */
 	if (dev->chipid == ID_REV_CHIP_ID_7801_)
 		buf &= ~MAC_CR_GMII_EN_;
-	buf |= MAC_CR_AUTO_DUPLEX_ | MAC_CR_AUTO_SPEED_;
 	ret = lan78xx_write_reg(dev, MAC_CR, buf);
 
 	ret = lan78xx_read_reg(dev, MAC_TX, &buf);
-- 
cgit 


From f0ef1f4f2b772c0a1c8b35a6ae3edf974cc110dd Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Thu, 21 Sep 2017 08:24:27 +0200
Subject: net: stmmac: Cocci spatch "of_table"

Make sure (of/i2c/platform)_device_id tables are NULL terminated.
Found by coccinelle spatch "misc/of_table.cocci"

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index a366b3747eeb..8a280b48e3a9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -315,6 +315,7 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
 		{ .compatible = "allwinner,sun8i-h3-emac" },
 		{ .compatible = "allwinner,sun8i-v3s-emac" },
 		{ .compatible = "allwinner,sun50i-a64-emac" },
+		{},
 	};
 
 	/* If phy-handle property is passed from DT, use it as the PHY */
-- 
cgit 


From 09579ac803a3638344b8544b5940793d5358673e Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:26 +0200
Subject: net/smc: add missing dev_put

In the infiniband part, SMC currently uses get_netdev which calls
dev_hold on the returned net device. However, the SMC code never calls
dev_put on that net device resulting in a wrong reference count.

This patch adds a dev_put after the usage of the net device to fix the
issue.

Signed-off-by: Hans Wippel <hwippel@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_ib.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 547e0e113b17..0b5852299158 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -380,6 +380,7 @@ static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
 	ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport);
 	if (ndev) {
 		memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN);
+		dev_put(ndev);
 	} else if (!rc) {
 		memcpy(&smcibdev->mac[ibport - 1][0],
 		       &smcibdev->gid[ibport - 1].raw[8], 3);
-- 
cgit 


From 846e344eb7229018457d6d6fc1ab0cc0a167692f Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:27 +0200
Subject: net/smc: add receive timeout check

The SMC receive function currently lacks a timeout check under the
condition that no data were received and no data are available. This
patch adds such a check.

Signed-off-by: Hans Wippel <hwippel@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_rx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index b17a333e9bb0..3e631ae4b6b6 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -148,6 +148,8 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
 				read_done = sock_intr_errno(timeo);
 				break;
 			}
+			if (!timeo)
+				return -EAGAIN;
 		}
 
 		if (!atomic_read(&conn->bytes_to_rcv)) {
-- 
cgit 


From 731b008560e6dfaf5fb297543f17bbe9bb868f3c Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:28 +0200
Subject: net/smc: take RCU read lock for routing cache lookup

smc_netinfo_by_tcpsk() looks up the routing cache. Such a lookup requires
protection by an RCU read lock.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 8c6d24b2995d..2e8d2dabac0c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -282,6 +282,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
 			 __be32 *subnet, u8 *prefix_len)
 {
 	struct dst_entry *dst = sk_dst_get(clcsock->sk);
+	struct in_device *in_dev;
 	struct sockaddr_in addr;
 	int rc = -ENOENT;
 	int len;
@@ -298,14 +299,17 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
 	/* get address to which the internal TCP socket is bound */
 	kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
 	/* analyze IPv4 specific data of net_device belonging to TCP socket */
-	for_ifa(dst->dev->ip_ptr) {
-		if (ifa->ifa_address != addr.sin_addr.s_addr)
+	rcu_read_lock();
+	in_dev = __in_dev_get_rcu(dst->dev);
+	for_ifa(in_dev) {
+		if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
 			continue;
 		*prefix_len = inet_mask_len(ifa->ifa_mask);
 		*subnet = ifa->ifa_address & ifa->ifa_mask;
 		rc = 0;
 		break;
-	} endfor_ifa(dst->dev->ip_ptr);
+	} endfor_ifa(in_dev);
+	rcu_read_unlock();
 
 out_rel:
 	dst_release(dst);
-- 
cgit 


From a6832c3acdb2ceb099ec3c385777fbaa6d5a5fd6 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:29 +0200
Subject: net/smc: adjust net_device refcount

smc_pnet_fill_entry() uses dev_get_by_name() adding a refcount to ndev.
The following smc_pnet_enter() has to reduce the refcount if the entry
to be added exists already in the pnet table.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_pnet.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 78f7af28ae4f..31f8453c25c5 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -181,8 +181,10 @@ static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
 			     sizeof(new_pnetelem->ndev->name)) ||
 		    smc_pnet_same_ibname(pnetelem,
 					 new_pnetelem->smcibdev->ibdev->name,
-					 new_pnetelem->ib_port))
+					 new_pnetelem->ib_port)) {
+			dev_put(pnetelem->ndev);
 			goto found;
+		}
 	}
 	list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
 	rc = 0;
-- 
cgit 


From 8301fa44b41b1ca46a0547809eb173112559dc51 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:30 +0200
Subject: net/smc: adapt send request completion notification

The solicited flag is meaningful for the receive completion queue.
Ask for next work completion of any type on the send queue.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_wr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index ab56bda66783..525d91e0d57e 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -244,7 +244,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
 	int rc;
 
 	ib_req_notify_cq(link->smcibdev->roce_cq_send,
-			 IB_CQ_SOLICITED_MASK | IB_CQ_REPORT_MISSED_EVENTS);
+			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
 	pend = container_of(priv, struct smc_wr_tx_pend, priv);
 	rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
 			  &failed_wr);
-- 
cgit 


From 5bc11ddbdf7fc6681db5c3f9a92cdee0f19cee1e Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:31 +0200
Subject: net/smc: longer delay for client link group removal

Client link group creation always follows the server linkgroup creation.
If peer creates a new server link group, client has to create a new
client link group. If peer reuses a server link group for a new
connection, client has to reuse its client link group as well. This
patch introduces a longer delay for client link group removal to make
sure this link group still exists, once the peer decides to reuse a
server link group. This avoids out-of-sync conditions for link groups.
If already scheduled, modify the delay.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 1a16d51e2330..20b66e79c5d6 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -25,8 +25,9 @@
 #include "smc_cdc.h"
 #include "smc_close.h"
 
-#define SMC_LGR_NUM_INCR	256
-#define SMC_LGR_FREE_DELAY	(600 * HZ)
+#define SMC_LGR_NUM_INCR		256
+#define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
+#define SMC_LGR_FREE_DELAY_CLNT		(SMC_LGR_FREE_DELAY_SERV + 10)
 
 static u32 smc_lgr_num;			/* unique link group number */
 
@@ -107,8 +108,15 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 		__smc_lgr_unregister_conn(conn);
 	}
 	write_unlock_bh(&lgr->conns_lock);
-	if (reduced && !lgr->conns_num)
-		schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY);
+	if (!reduced || lgr->conns_num)
+		return;
+	/* client link group creation always follows the server link group
+	 * creation. For client use a somewhat higher removal delay time,
+	 * otherwise there is a risk of out-of-sync link groups.
+	 */
+	mod_delayed_work(system_wq, &lgr->free_work,
+			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+						 SMC_LGR_FREE_DELAY_SERV);
 }
 
 static void smc_lgr_free_work(struct work_struct *work)
-- 
cgit 


From bfbedfd38378c1ad9df84469403d69c17b074b66 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:32 +0200
Subject: net/smc: terminate link group if out-of-sync is received

An out-of-sync condition can just be detected by the client.
If the server receives a CLC DECLINE message indicating an out-of-sync
condition for the link groups, the server must clean up the out-of-sync
link group.
There is no need for an extra third parameter in smc_clc_send_decline().

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c  |  6 ++----
 net/smc/smc_clc.c | 10 +++++-----
 net/smc/smc_clc.h |  3 +--
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 2e8d2dabac0c..745f145d4c4d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -513,7 +513,7 @@ decline_rdma:
 	/* RDMA setup failed, switch back to TCP */
 	smc->use_fallback = true;
 	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
-		rc = smc_clc_send_decline(smc, reason_code, 0);
+		rc = smc_clc_send_decline(smc, reason_code);
 		if (rc < sizeof(struct smc_clc_msg_decline))
 			goto out_err;
 	}
@@ -808,8 +808,6 @@ static void smc_listen_work(struct work_struct *work)
 		rc = local_contact;
 		if (rc == -ENOMEM)
 			reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
-		else if (rc == -ENOLINK)
-			reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
 		goto decline_rdma;
 	}
 	link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
@@ -903,7 +901,7 @@ decline_rdma:
 	smc_conn_free(&new_smc->conn);
 	new_smc->use_fallback = true;
 	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
-		rc = smc_clc_send_decline(new_smc, reason_code, 0);
+		rc = smc_clc_send_decline(new_smc, reason_code);
 		if (rc < sizeof(struct smc_clc_msg_decline))
 			goto out_err;
 	}
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 3934913ab835..b7dd2743fb5c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -95,9 +95,10 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 	}
 	if (clcm->type == SMC_CLC_DECLINE) {
 		reason_code = SMC_CLC_DECL_REPLY;
-		if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis)
-			== SMC_CLC_DECL_SYNCERR)
+		if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
 			smc->conn.lgr->sync_err = true;
+			smc_lgr_terminate(smc->conn.lgr);
+		}
 	}
 
 out:
@@ -105,8 +106,7 @@ out:
 }
 
 /* send CLC DECLINE message across internal TCP socket */
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
-			 u8 out_of_sync)
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
 {
 	struct smc_clc_msg_decline dclc;
 	struct msghdr msg;
@@ -118,7 +118,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
 	dclc.hdr.type = SMC_CLC_DECLINE;
 	dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
 	dclc.hdr.version = SMC_CLC_V1;
-	dclc.hdr.flag = out_of_sync ? 1 : 0;
+	dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
 	memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
 	dclc.peer_diagnosis = htonl(peer_diag_info);
 	memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 13db8ce177c9..1c55414041d4 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -106,8 +106,7 @@ struct smc_ib_device;
 
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		     u8 expected_type);
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
-			 u8 out_of_sync);
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
 int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
 			  u8 ibport);
 int smc_clc_send_confirm(struct smc_sock *smc);
-- 
cgit 


From 18e537cd58e8d6932719bfa79cb96a1fbc639199 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:33 +0200
Subject: net/smc: introduce a delay

The number of outstanding work requests is limited. If all work
requests are in use, tx processing is postponed to another scheduling
of the tx worker. Switch to a delayed worker to have a gap for tx
completion queue events before the next retry.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc.h       |  2 +-
 net/smc/smc_close.c | 12 +++++++-----
 net/smc/smc_tx.c    | 12 ++++++++----
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/net/smc/smc.h b/net/smc/smc.h
index 6e44313e4467..0ccd6fa387ad 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -149,7 +149,7 @@ struct smc_connection {
 	atomic_t		sndbuf_space;	/* remaining space in sndbuf */
 	u16			tx_cdc_seq;	/* sequence # for CDC send */
 	spinlock_t		send_lock;	/* protect wr_sends */
-	struct work_struct	tx_work;	/* retry of smc_cdc_msg_send */
+	struct delayed_work	tx_work;	/* retry of smc_cdc_msg_send */
 
 	struct smc_host_cdc_msg	local_rx_ctrl;	/* filled during event_handl.
 						 * .prod cf. TCP rcv_nxt
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 3c2e166b5d22..5201bc103bd8 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -208,7 +208,7 @@ again:
 	case SMC_ACTIVE:
 		smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		if (sk->sk_state == SMC_ACTIVE) {
 			/* send close request */
@@ -234,7 +234,7 @@ again:
 		if (!smc_cdc_rxed_any_close(conn))
 			smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		if (sk->sk_err != ECONNABORTED) {
 			/* confirm close from peer */
@@ -263,7 +263,9 @@ again:
 		/* peer sending PeerConnectionClosed will cause transition */
 		break;
 	case SMC_PROCESSABORT:
-		cancel_work_sync(&conn->tx_work);
+		release_sock(sk);
+		cancel_delayed_work_sync(&conn->tx_work);
+		lock_sock(sk);
 		smc_close_abort(conn);
 		sk->sk_state = SMC_CLOSED;
 		smc_close_wait_tx_pends(smc);
@@ -425,7 +427,7 @@ again:
 	case SMC_ACTIVE:
 		smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		/* send close wr request */
 		rc = smc_close_wr(conn);
@@ -439,7 +441,7 @@ again:
 		if (!smc_cdc_rxed_any_close(conn))
 			smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		/* confirm close from peer */
 		rc = smc_close_wr(conn);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 3c656beb8820..3866573288dd 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -24,6 +24,8 @@
 #include "smc_cdc.h"
 #include "smc_tx.h"
 
+#define SMC_TX_WORK_DELAY	HZ
+
 /***************************** sndbuf producer *******************************/
 
 /* callback implementation for sk.sk_write_space()
@@ -406,7 +408,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
 				goto out_unlock;
 			}
 			rc = 0;
-			schedule_work(&conn->tx_work);
+			schedule_delayed_work(&conn->tx_work,
+					      SMC_TX_WORK_DELAY);
 		}
 		goto out_unlock;
 	}
@@ -430,7 +433,7 @@ out_unlock:
  */
 static void smc_tx_work(struct work_struct *work)
 {
-	struct smc_connection *conn = container_of(work,
+	struct smc_connection *conn = container_of(to_delayed_work(work),
 						   struct smc_connection,
 						   tx_work);
 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
@@ -468,7 +471,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
 		if (!rc)
 			rc = smc_cdc_msg_send(conn, wr_buf, pend);
 		if (rc < 0) {
-			schedule_work(&conn->tx_work);
+			schedule_delayed_work(&conn->tx_work,
+					      SMC_TX_WORK_DELAY);
 			return;
 		}
 		smc_curs_write(&conn->rx_curs_confirmed,
@@ -487,6 +491,6 @@ void smc_tx_consumer_update(struct smc_connection *conn)
 void smc_tx_init(struct smc_sock *smc)
 {
 	smc->sk.sk_write_space = smc_tx_write_space;
-	INIT_WORK(&smc->conn.tx_work, smc_tx_work);
+	INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
 	spin_lock_init(&smc->conn.send_lock);
 }
-- 
cgit 


From 8c96feeeb39ba0b89c6121f71e8f7aa2a4d46671 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:34 +0200
Subject: net/smc: no close wait in case of process shut down

Usually socket closing is delayed if there is still data available in
the send buffer to be transmitted. If a process is killed, the delay
should be avoided.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_close.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 5201bc103bd8..f0d16fb825f7 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -174,15 +174,15 @@ int smc_close_active(struct smc_sock *smc)
 {
 	struct smc_cdc_conn_state_flags *txflags =
 		&smc->conn.local_tx_ctrl.conn_state_flags;
-	long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
 	struct smc_connection *conn = &smc->conn;
 	struct sock *sk = &smc->sk;
 	int old_state;
+	long timeout;
 	int rc = 0;
 
-	if (sock_flag(sk, SOCK_LINGER) &&
-	    !(current->flags & PF_EXITING))
-		timeout = sk->sk_lingertime;
+	timeout = current->flags & PF_EXITING ?
+		  0 : sock_flag(sk, SOCK_LINGER) ?
+		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
 
 again:
 	old_state = sk->sk_state;
@@ -413,13 +413,14 @@ void smc_close_sock_put_work(struct work_struct *work)
 int smc_close_shutdown_write(struct smc_sock *smc)
 {
 	struct smc_connection *conn = &smc->conn;
-	long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
 	struct sock *sk = &smc->sk;
 	int old_state;
+	long timeout;
 	int rc = 0;
 
-	if (sock_flag(sk, SOCK_LINGER))
-		timeout = sk->sk_lingertime;
+	timeout = current->flags & PF_EXITING ?
+		  0 : sock_flag(sk, SOCK_LINGER) ?
+		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
 
 again:
 	old_state = sk->sk_state;
-- 
cgit 


From e8b95728f724797f958912fd9b765a695595d3a6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 1 Sep 2017 17:13:43 -0700
Subject: Input: uinput - avoid FF flush when destroying device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Normally, when input device supporting force feedback effects is being
destroyed, we try to "flush" currently playing effects, so that the
physical device does not continue vibrating (or executing other effects).
Unfortunately this does not work well for uinput as flushing of the effects
deadlocks with the destroy action:

- if device is being destroyed because the file descriptor is being closed,
  then there is noone to even service FF requests;

- if device is being destroyed because userspace sent UI_DEV_DESTROY,
  while theoretically it could be possible to service FF requests,
  userspace is unlikely to do so (they'd need to make sure FF handling
  happens on a separate thread) even if kernel solves the issue with FF
  ioctls deadlocking with UI_DEV_DESTROY ioctl on udev->mutex.

To avoid lockups like the one below, let's install a custom input device
flush handler, and avoid trying to flush force feedback effects when we
destroying the device, and instead rely on uinput to shut off the device
properly.

NMI watchdog: Watchdog detected hard LOCKUP on cpu 3
...
 <<EOE>>  [<ffffffff817a0307>] _raw_spin_lock_irqsave+0x37/0x40
 [<ffffffff810e633d>] complete+0x1d/0x50
 [<ffffffffa00ba08c>] uinput_request_done+0x3c/0x40 [uinput]
 [<ffffffffa00ba587>] uinput_request_submit.part.7+0x47/0xb0 [uinput]
 [<ffffffffa00bb62b>] uinput_dev_erase_effect+0x5b/0x76 [uinput]
 [<ffffffff815d91ad>] erase_effect+0xad/0xf0
 [<ffffffff815d929d>] flush_effects+0x4d/0x90
 [<ffffffff815d4cc0>] input_flush_device+0x40/0x60
 [<ffffffff815daf1c>] evdev_cleanup+0xac/0xc0
 [<ffffffff815daf5b>] evdev_disconnect+0x2b/0x60
 [<ffffffff815d74ac>] __input_unregister_device+0xac/0x150
 [<ffffffff815d75f7>] input_unregister_device+0x47/0x70
 [<ffffffffa00bac45>] uinput_destroy_device+0xb5/0xc0 [uinput]
 [<ffffffffa00bb2de>] uinput_ioctl_handler.isra.9+0x65e/0x740 [uinput]
 [<ffffffff811231ab>] ? do_futex+0x12b/0xad0
 [<ffffffffa00bb3f8>] uinput_ioctl+0x18/0x20 [uinput]
 [<ffffffff81241248>] do_vfs_ioctl+0x298/0x480
 [<ffffffff81337553>] ? security_file_ioctl+0x43/0x60
 [<ffffffff812414a9>] SyS_ioctl+0x79/0x90
 [<ffffffff817a04ee>] entry_SYSCALL_64_fastpath+0x12/0x71

Reported-by: Rodrigo Rivas Costa <rodrigorivascosta@gmail.com>
Reported-by: Clément VUCHENER <clement.vuchener@gmail.com>
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=193741
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/ff-core.c     | 13 ++++++++++---
 drivers/input/misc/uinput.c | 18 ++++++++++++++++++
 include/linux/input.h       |  1 +
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/input/ff-core.c b/drivers/input/ff-core.c
index 8f2042432c85..66a46c84e28f 100644
--- a/drivers/input/ff-core.c
+++ b/drivers/input/ff-core.c
@@ -237,9 +237,15 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file)
 EXPORT_SYMBOL_GPL(input_ff_erase);
 
 /*
- * flush_effects - erase all effects owned by a file handle
+ * input_ff_flush - erase all effects owned by a file handle
+ * @dev: input device to erase effect from
+ * @file: purported owner of the effects
+ *
+ * This function erases all force-feedback effects associated with
+ * the given owner from specified device. Note that @file may be %NULL,
+ * in which case all effects will be erased.
  */
-static int flush_effects(struct input_dev *dev, struct file *file)
+int input_ff_flush(struct input_dev *dev, struct file *file)
 {
 	struct ff_device *ff = dev->ff;
 	int i;
@@ -255,6 +261,7 @@ static int flush_effects(struct input_dev *dev, struct file *file)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(input_ff_flush);
 
 /**
  * input_ff_event() - generic handler for force-feedback events
@@ -343,7 +350,7 @@ int input_ff_create(struct input_dev *dev, unsigned int max_effects)
 	mutex_init(&ff->mutex);
 
 	dev->ff = ff;
-	dev->flush = flush_effects;
+	dev->flush = input_ff_flush;
 	dev->event = input_ff_event;
 	__set_bit(EV_FF, dev->evbit);
 
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 022be0e22eba..2cff40be8860 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -230,6 +230,18 @@ static int uinput_dev_erase_effect(struct input_dev *dev, int effect_id)
 	return uinput_request_submit(udev, &request);
 }
 
+static int uinput_dev_flush(struct input_dev *dev, struct file *file)
+{
+	/*
+	 * If we are called with file == NULL that means we are tearing
+	 * down the device, and therefore we can not handle FF erase
+	 * requests: either we are handling UI_DEV_DESTROY (and holding
+	 * the udev->mutex), or the file descriptor is closed and there is
+	 * nobody on the other side anymore.
+	 */
+	return file ? input_ff_flush(dev, file) : 0;
+}
+
 static void uinput_destroy_device(struct uinput_device *udev)
 {
 	const char *name, *phys;
@@ -297,6 +309,12 @@ static int uinput_create_device(struct uinput_device *udev)
 		dev->ff->playback = uinput_dev_playback;
 		dev->ff->set_gain = uinput_dev_set_gain;
 		dev->ff->set_autocenter = uinput_dev_set_autocenter;
+		/*
+		 * The standard input_ff_flush() implementation does
+		 * not quite work for uinput as we can't reasonably
+		 * handle FF requests during device teardown.
+		 */
+		dev->flush = uinput_dev_flush;
 	}
 
 	error = input_register_device(udev->dev);
diff --git a/include/linux/input.h b/include/linux/input.h
index a65e3b24fb18..fb5e23c7ed98 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -529,6 +529,7 @@ int input_ff_event(struct input_dev *dev, unsigned int type, unsigned int code,
 
 int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct file *file);
 int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
+int input_ff_flush(struct input_dev *dev, struct file *file);
 
 int input_ff_create_memless(struct input_dev *dev, void *data,
 		int (*play_effect)(struct input_dev *, void *, struct ff_effect *));
-- 
cgit 


From 6b4877c7bdc6ae39ce03716df7caeecf204697eb Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 6 Sep 2017 16:22:59 -0700
Subject: Input: uinput - avoid crash when sending FF request to device going
 away

If FF request comes in while uinput device is going away,
uinput_request_send() will fail with -ENODEV, and uinput_request_submit()
will attempt to mark the slot as unused by calling uinput_request_done().
Unfortunately in this case we haven't initialized request->done completion
yet, and we get a crash:

[   39.402036] BUG: spinlock bad magic on CPU#1, fftest/3108
[   39.402046]  lock: 0xffff88006a93bb00, .magic: 00000000, .owner: /39, .owner_cpu: 1217155072
[   39.402055] CPU: 1 PID: 3108 Comm: fftest Tainted: G        W 4.13.0+ #15
[   39.402059] Hardware name: LENOVO 20HQS0EG02/20HQS0EG02, BIOS N1MET37W (1.22 ) 07/04/2017
[   39.402064]  0000000000000086 f0fad82f3ceaa120 ffff88006a93b9a0 ffffffff9de941bb
[   39.402077]  ffff88026df8ae00 ffff88006a93bb00 ffff88006a93b9c0 ffffffff9dca62b7
[   39.402088]  ffff88006a93bb00 ffff88006a93baf8 ffff88006a93b9e0 ffffffff9dca62e7
[   39.402099] Call Trace:
[   39.402112]  [<ffffffff9de941bb>] dump_stack+0x4d/0x63
[   39.402123]  [<ffffffff9dca62b7>] spin_dump+0x97/0x9c
[   39.402130]  [<ffffffff9dca62e7>] spin_bug+0x2b/0x2d
[   39.402138]  [<ffffffff9dca6373>] do_raw_spin_lock+0x28/0xfd
[   39.402147]  [<ffffffff9e3055cd>] _raw_spin_lock_irqsave+0x19/0x1f
[   39.402154]  [<ffffffff9dca05b7>] complete+0x1d/0x48
[   39.402162]  [<ffffffffc04f30af>] 0xffffffffc04f30af
[   39.402167]  [<ffffffffc04f468c>] 0xffffffffc04f468c
[   39.402177]  [<ffffffff9dd59c16>] ? __slab_free+0x22f/0x359
[   39.402184]  [<ffffffff9dcc13e9>] ? tk_clock_read+0xc/0xe
[   39.402189]  [<ffffffffc04f471f>] 0xffffffffc04f471f
[   39.402195]  [<ffffffff9dc9ffe5>] ? __wake_up+0x44/0x4b
[   39.402200]  [<ffffffffc04f3240>] ? 0xffffffffc04f3240
[   39.402207]  [<ffffffff9e0f57f3>] erase_effect+0xa1/0xd2
[   39.402214]  [<ffffffff9e0f58c6>] input_ff_flush+0x43/0x5c
[   39.402219]  [<ffffffffc04f32ad>] 0xffffffffc04f32ad
[   39.402227]  [<ffffffff9e0f174f>] input_flush_device+0x3d/0x51
[   39.402234]  [<ffffffff9e0f69ae>] evdev_flush+0x49/0x5c
[   39.402243]  [<ffffffff9dd62d6e>] filp_close+0x3f/0x65
[   39.402253]  [<ffffffff9dd7dcf7>] put_files_struct+0x66/0xc1
[   39.402261]  [<ffffffff9dd7ddeb>] exit_files+0x47/0x4e
[   39.402270]  [<ffffffff9dc6b329>] do_exit+0x483/0x969
[   39.402278]  [<ffffffff9dc73211>] ? recalc_sigpending_tsk+0x3d/0x44
[   39.402285]  [<ffffffff9dc6c7a2>] do_group_exit+0x42/0xb0
[   39.402293]  [<ffffffff9dc767e1>] get_signal+0x58d/0x5bf
[   39.402300]  [<ffffffff9dc03701>] do_signal+0x37/0x53e
[   39.402307]  [<ffffffff9e0f8401>] ? evdev_ioctl_handler+0xac8/0xb04
[   39.402314]  [<ffffffff9e0f8464>] ? evdev_ioctl+0x10/0x12
[   39.402321]  [<ffffffff9dd74cfa>] ? do_vfs_ioctl+0x42e/0x501
[   39.402328]  [<ffffffff9dc0170e>] prepare_exit_to_usermode+0x66/0x90
[   39.402333]  [<ffffffff9dc0181b>] syscall_return_slowpath+0xe3/0xec
[   39.402339]  [<ffffffff9e305b7b>] int_ret_from_sys_call+0x25/0x8f

While we could solve this by simply initializing the completion earlier, we
are better off rearranging the code a bit so we avoid calling complete() on
requests that we did not send out. This patch consolidates marking request
slots as free in one place (in uinput_request_submit(), the same place
where we acquire them) and having everyone else simply signal completion
of the requests.

Fixes: 00ce756ce53a ("Input: uinput - mark failed submission requests as free")
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/uinput.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 2cff40be8860..443151de90c6 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -98,14 +98,15 @@ static int uinput_request_reserve_slot(struct uinput_device *udev,
 					uinput_request_alloc_id(udev, request));
 }
 
-static void uinput_request_done(struct uinput_device *udev,
-				struct uinput_request *request)
+static void uinput_request_release_slot(struct uinput_device *udev,
+					unsigned int id)
 {
 	/* Mark slot as available */
-	udev->requests[request->id] = NULL;
-	wake_up(&udev->requests_waitq);
+	spin_lock(&udev->requests_lock);
+	udev->requests[id] = NULL;
+	spin_unlock(&udev->requests_lock);
 
-	complete(&request->done);
+	wake_up(&udev->requests_waitq);
 }
 
 static int uinput_request_send(struct uinput_device *udev,
@@ -138,20 +139,22 @@ static int uinput_request_send(struct uinput_device *udev,
 static int uinput_request_submit(struct uinput_device *udev,
 				 struct uinput_request *request)
 {
-	int error;
+	int retval;
 
-	error = uinput_request_reserve_slot(udev, request);
-	if (error)
-		return error;
+	retval = uinput_request_reserve_slot(udev, request);
+	if (retval)
+		return retval;
 
-	error = uinput_request_send(udev, request);
-	if (error) {
-		uinput_request_done(udev, request);
-		return error;
-	}
+	retval = uinput_request_send(udev, request);
+	if (retval)
+		goto out;
 
 	wait_for_completion(&request->done);
-	return request->retval;
+	retval = request->retval;
+
+ out:
+	uinput_request_release_slot(udev, request->id);
+	return retval;
 }
 
 /*
@@ -169,7 +172,7 @@ static void uinput_flush_requests(struct uinput_device *udev)
 		request = udev->requests[i];
 		if (request) {
 			request->retval = -ENODEV;
-			uinput_request_done(udev, request);
+			complete(&request->done);
 		}
 	}
 
@@ -957,7 +960,7 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 			}
 
 			req->retval = ff_up.retval;
-			uinput_request_done(udev, req);
+			complete(&req->done);
 			goto out;
 
 		case UI_END_FF_ERASE:
@@ -973,7 +976,7 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 			}
 
 			req->retval = ff_erase.retval;
-			uinput_request_done(udev, req);
+			complete(&req->done);
 			goto out;
 	}
 
-- 
cgit 


From 127b8e2692b6c145955bb71e482ca1bc6ce10027 Mon Sep 17 00:00:00 2001
From: Gabriel Fernandez <gabriel.fernandez@st.com>
Date: Tue, 19 Sep 2017 13:44:06 +0200
Subject: dt-bindings: clk: stm32h7: fix clock-cell size

The clock-cell size is 1 on stm32h7 plaform.

Signed-off-by: Gabriel Fernandez <gabriel.fernandez@st.com>
Fixes: 3e4d618b0722 ("clk: stm32h7: Add stm32h743 clock driver")
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt b/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt
index a135504c7d57..cac24ee10b72 100644
--- a/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt
+++ b/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt
@@ -32,7 +32,7 @@ Example:
 		compatible = "st,stm32h743-rcc", "st,stm32-rcc";
 		reg = <0x58024400 0x400>;
 		#reset-cells = <1>;
-		#clock-cells = <2>;
+		#clock-cells = <1>;
 		clocks = <&clk_hse>, <&clk_lse>, <&clk_i2s_ckin>;
 
 		st,syscfg = <&pwrcfg>;
-- 
cgit 


From 059fbe8b5171960bd5c2371bb327ac18733773de Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 21 Sep 2017 13:27:02 +0200
Subject: net: phy: Fix truncation of large IRQ numbers in phy_attached_print()

Given NR_IRQS is 2048 on sparc64, and even 32784 on alpha, 3 digits is
not enough to represent interrupt numbers on all architectures.  Hence
PHY interrupt numbers may be truncated during printing.

Increase the buffer size from 4 to 8 bytes to fix this.

Fixes: 5e369aefdce4818c ("net: stmmac: Delete dead code for MDIO registration")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8cf0c5901f95..67f25ac29025 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -879,7 +879,7 @@ void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
 {
 	const char *drv_name = phydev->drv ? phydev->drv->name : "unbound";
 	char *irq_str;
-	char irq_num[4];
+	char irq_num[8];
 
 	switch(phydev->irq) {
 	case PHY_POLL:
-- 
cgit 


From 222d7dbd258dad4cd5241c43ef818141fad5a87a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 21 Sep 2017 09:15:46 -0700
Subject: net: prevent dst uses after free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In linux-4.13, Wei worked hard to convert dst to a traditional
refcounted model, removing GC.

We now want to make sure a dst refcount can not transition from 0 back
to 1.

The problem here is that input path attached a not refcounted dst to an
skb. Then later, because packet is forwarded and hits skb_dst_force()
before exiting RCU section, we might try to take a refcount on one dst
that is about to be freed, if another cpu saw 1 -> 0 transition in
dst_release() and queued the dst for freeing after one RCU grace period.

Lets unify skb_dst_force() and skb_dst_force_safe(), since we should
always perform the complete check against dst refcount, and not assume
it is not zero.

Bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=197005

[  989.919496]  skb_dst_force+0x32/0x34
[  989.919498]  __dev_queue_xmit+0x1ad/0x482
[  989.919501]  ? eth_header+0x28/0xc6
[  989.919502]  dev_queue_xmit+0xb/0xd
[  989.919504]  neigh_connected_output+0x9b/0xb4
[  989.919507]  ip_finish_output2+0x234/0x294
[  989.919509]  ? ipt_do_table+0x369/0x388
[  989.919510]  ip_finish_output+0x12c/0x13f
[  989.919512]  ip_output+0x53/0x87
[  989.919513]  ip_forward_finish+0x53/0x5a
[  989.919515]  ip_forward+0x2cb/0x3e6
[  989.919516]  ? pskb_trim_rcsum.part.9+0x4b/0x4b
[  989.919518]  ip_rcv_finish+0x2e2/0x321
[  989.919519]  ip_rcv+0x26f/0x2eb
[  989.919522]  ? vlan_do_receive+0x4f/0x289
[  989.919523]  __netif_receive_skb_core+0x467/0x50b
[  989.919526]  ? tcp_gro_receive+0x239/0x239
[  989.919529]  ? inet_gro_receive+0x226/0x238
[  989.919530]  __netif_receive_skb+0x4d/0x5f
[  989.919532]  netif_receive_skb_internal+0x5c/0xaf
[  989.919533]  napi_gro_receive+0x45/0x81
[  989.919536]  ixgbe_poll+0xc8a/0xf09
[  989.919539]  ? kmem_cache_free_bulk+0x1b6/0x1f7
[  989.919540]  net_rx_action+0xf4/0x266
[  989.919543]  __do_softirq+0xa8/0x19d
[  989.919545]  irq_exit+0x5d/0x6b
[  989.919546]  do_IRQ+0x9c/0xb5
[  989.919548]  common_interrupt+0x93/0x93
[  989.919548]  </IRQ>

Similarly dst_clone() can use dst_hold() helper to have additional
debugging, as a follow up to commit 44ebe79149ff ("net: add debug
atomic_inc_not_zero() in dst_hold()")

In net-next we will convert dst atomic_t to refcount_t for peace of
mind.

Fixes: a4c2fd7f7891 ("net: remove DST_NOCACHE flag")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Wei Wang <weiwan@google.com>
Reported-by: Paweł Staszewski <pstaszewski@itcare.pl>
Bisected-by: Paweł Staszewski <pstaszewski@itcare.pl>
Acked-by: Wei Wang <weiwan@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h   | 22 ++++------------------
 include/net/route.h |  2 +-
 include/net/sock.h  |  2 +-
 3 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 93568bd0a352..06a6765da074 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
 static inline struct dst_entry *dst_clone(struct dst_entry *dst)
 {
 	if (dst)
-		atomic_inc(&dst->__refcnt);
+		dst_hold(dst);
 	return dst;
 }
 
@@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
 	__skb_dst_copy(nskb, oskb->_skb_refdst);
 }
 
-/**
- * skb_dst_force - makes sure skb dst is refcounted
- * @skb: buffer
- *
- * If dst is not yet refcounted, let's do it
- */
-static inline void skb_dst_force(struct sk_buff *skb)
-{
-	if (skb_dst_is_noref(skb)) {
-		WARN_ON(!rcu_read_lock_held());
-		skb->_skb_refdst &= ~SKB_DST_NOREF;
-		dst_clone(skb_dst(skb));
-	}
-}
-
 /**
  * dst_hold_safe - Take a reference on a dst if possible
  * @dst: pointer to dst entry
@@ -339,16 +324,17 @@ static inline bool dst_hold_safe(struct dst_entry *dst)
 }
 
 /**
- * skb_dst_force_safe - makes sure skb dst is refcounted
+ * skb_dst_force - makes sure skb dst is refcounted
  * @skb: buffer
  *
  * If dst is not yet refcounted and not destroyed, grab a ref on it.
  */
-static inline void skb_dst_force_safe(struct sk_buff *skb)
+static inline void skb_dst_force(struct sk_buff *skb)
 {
 	if (skb_dst_is_noref(skb)) {
 		struct dst_entry *dst = skb_dst(skb);
 
+		WARN_ON(!rcu_read_lock_held());
 		if (!dst_hold_safe(dst))
 			dst = NULL;
 
diff --git a/include/net/route.h b/include/net/route.h
index 1b09a9368c68..57dfc6850d37 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -190,7 +190,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 	rcu_read_lock();
 	err = ip_route_input_noref(skb, dst, src, tos, devin);
 	if (!err) {
-		skb_dst_force_safe(skb);
+		skb_dst_force(skb);
 		if (!skb_dst(skb))
 			err = -EINVAL;
 	}
diff --git a/include/net/sock.h b/include/net/sock.h
index 03a362568357..a6b9a8d1a6df 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -856,7 +856,7 @@ void sk_stream_write_space(struct sock *sk);
 static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	/* dont let skb dst not refcounted, we are going to leave rcu lock */
-	skb_dst_force_safe(skb);
+	skb_dst_force(skb);
 
 	if (!sk->sk_backlog.tail)
 		sk->sk_backlog.head = skb;
-- 
cgit 


From 4ba72fc080ad44a5c1e93449ec070cd4d331803f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 21 Sep 2017 22:34:54 +0200
Subject: drm/sun4i: cec: Enable back CEC-pin framework

Now that the cec-pin framework has been merged, we can remove the safeguard
that were preventing the CEC part of the sun4i HDMI driver and actually
start to use it.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/gpu/drm/sun4i/Kconfig      | 2 +-
 drivers/gpu/drm/sun4i/sun4i_hdmi.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index 06f05302ee75..882d85db9053 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -26,7 +26,7 @@ config DRM_SUN4I_HDMI_CEC
        bool "Allwinner A10 HDMI CEC Support"
        depends on DRM_SUN4I_HDMI
        select CEC_CORE
-       depends on CEC_PIN
+       select CEC_PIN
        help
 	  Choose this option if you have an Allwinner SoC with an HDMI
 	  controller and want to use CEC.
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi.h b/drivers/gpu/drm/sun4i/sun4i_hdmi.h
index 1457750988da..a1f8cba251a2 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi.h
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi.h
@@ -15,7 +15,7 @@
 #include <drm/drm_connector.h>
 #include <drm/drm_encoder.h>
 
-#include <media/cec.h>
+#include <media/cec-pin.h>
 
 #define SUN4I_HDMI_CTRL_REG		0x004
 #define SUN4I_HDMI_CTRL_ENABLE			BIT(31)
-- 
cgit 


From 44889942b6eb356eab27ce25fe10701adfec7776 Mon Sep 17 00:00:00 2001
From: Ladi Prosek <lprosek@redhat.com>
Date: Fri, 22 Sep 2017 07:53:15 +0200
Subject: KVM: nVMX: fix HOST_CR3/HOST_CR4 cache

For nested virt we maintain multiple VMCS that can run on a vCPU. So it is
incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching
the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in
vCPU-wide data structures.

Hyper-V nested on KVM runs into this consistently for me with PCID enabled.
CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3)
fires, and the currently loaded VMCS is updated. Then we switch from L2 to
L1 and the next exit reverts CR3 to its old value.

Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant")
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0726ca7a1b02..c83d28b0ab05 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -200,6 +200,8 @@ struct loaded_vmcs {
 	int cpu;
 	bool launched;
 	bool nmi_known_unmasked;
+	unsigned long vmcs_host_cr3;	/* May not match real cr3 */
+	unsigned long vmcs_host_cr4;	/* May not match real cr4 */
 	struct list_head loaded_vmcss_on_cpu_link;
 };
 
@@ -600,8 +602,6 @@ struct vcpu_vmx {
 		int           gs_ldt_reload_needed;
 		int           fs_reload_needed;
 		u64           msr_host_bndcfgs;
-		unsigned long vmcs_host_cr3;	/* May not match real cr3 */
-		unsigned long vmcs_host_cr4;	/* May not match real cr4 */
 	} host_state;
 	struct {
 		int vm86_active;
@@ -5178,12 +5178,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 	 */
 	cr3 = __read_cr3();
 	vmcs_writel(HOST_CR3, cr3);		/* 22.2.3  FIXME: shadow tables */
-	vmx->host_state.vmcs_host_cr3 = cr3;
+	vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
 
 	/* Save the most likely value for this task's CR4 in the VMCS. */
 	cr4 = cr4_read_shadow();
 	vmcs_writel(HOST_CR4, cr4);			/* 22.2.3, 22.2.5 */
-	vmx->host_state.vmcs_host_cr4 = cr4;
+	vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
 
 	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
 #ifdef CONFIG_X86_64
@@ -9274,15 +9274,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
 	cr3 = __get_current_cr3_fast();
-	if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
+	if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
 		vmcs_writel(HOST_CR3, cr3);
-		vmx->host_state.vmcs_host_cr3 = cr3;
+		vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
 	}
 
 	cr4 = cr4_read_shadow();
-	if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+	if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
 		vmcs_writel(HOST_CR4, cr4);
-		vmx->host_state.vmcs_host_cr4 = cr4;
+		vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
 	}
 
 	/* When single-stepping over STI and MOV SS, we must clear the
-- 
cgit 


From e001fa78d44d0b5c7b1498d1e4a038740efa3b1e Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Fri, 15 Sep 2017 15:26:14 +1000
Subject: KVM: PPC: Book3S HV: Check for updated HDSISR on P9 HDSI exception

On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
Interrupt (HDSI) the HDSISR is not be updated at all.

To work around this we put a canary value into the HDSISR before
returning to a guest and then check for this canary when we take a
HDSI. If we find the canary on a HDSI, we know the hardware didn't
update the HDSISR. In this case we return to the guest to retake the
HDSI which should correctly update the HDSISR the second time HDSI
entry.

After talking to Paulus we've applied this workaround to all POWER9
CPUs. The workaround of returning to the guest shouldn't ever be
triggered on well behaving CPU. The extra instructions should have
negligible performance impact.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 17936f82d3c7..ec69fa45d5a2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1121,6 +1121,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 BEGIN_FTR_SECTION
 	mtspr	SPRN_PPR, r0
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+/* Move canary into DSISR to check for later */
+BEGIN_FTR_SECTION
+	li	r0, 0x7fff
+	mtspr	SPRN_HDSISR, r0
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
 	ld	r0, VCPU_GPR(R0)(r4)
 	ld	r4, VCPU_GPR(R4)(r4)
 
@@ -1956,9 +1963,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 kvmppc_hdsi:
 	ld	r3, VCPU_KVM(r9)
 	lbz	r0, KVM_RADIX(r3)
-	cmpwi	r0, 0
 	mfspr	r4, SPRN_HDAR
 	mfspr	r6, SPRN_HDSISR
+BEGIN_FTR_SECTION
+	/* Look for DSISR canary. If we find it, retry instruction */
+	cmpdi	r6, 0x7fff
+	beq	6f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+	cmpwi	r0, 0
 	bne	.Lradix_hdsi		/* on radix, just save DAR/DSISR/ASDR */
 	/* HPTE not found fault or protection fault? */
 	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
-- 
cgit 


From e87be9b29c22852ec300826e3b1d551b00c1eb7a Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 14 Sep 2017 14:30:43 +0200
Subject: mmc: tmio: remove broken and noisy debug macro

Some change for v4.14 broke the debug output for TMIO. But since it was
not helpful to me and too noisy for my taste anyhow, let's just remove
it instead of fixing it. We'll find something better if we'd need it...

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc_core.c | 47 ----------------------------------------
 1 file changed, 47 deletions(-)

diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 12cf8288d663..a7293e186e03 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -129,50 +129,6 @@ static int tmio_mmc_next_sg(struct tmio_mmc_host *host)
 
 #define CMDREQ_TIMEOUT	5000
 
-#ifdef CONFIG_MMC_DEBUG
-
-#define STATUS_TO_TEXT(a, status, i) \
-	do { \
-		if ((status) & TMIO_STAT_##a) { \
-			if ((i)++) \
-				printk(KERN_DEBUG " | "); \
-			printk(KERN_DEBUG #a); \
-		} \
-	} while (0)
-
-static void pr_debug_status(u32 status)
-{
-	int i = 0;
-
-	pr_debug("status: %08x = ", status);
-	STATUS_TO_TEXT(CARD_REMOVE, status, i);
-	STATUS_TO_TEXT(CARD_INSERT, status, i);
-	STATUS_TO_TEXT(SIGSTATE, status, i);
-	STATUS_TO_TEXT(WRPROTECT, status, i);
-	STATUS_TO_TEXT(CARD_REMOVE_A, status, i);
-	STATUS_TO_TEXT(CARD_INSERT_A, status, i);
-	STATUS_TO_TEXT(SIGSTATE_A, status, i);
-	STATUS_TO_TEXT(CMD_IDX_ERR, status, i);
-	STATUS_TO_TEXT(STOPBIT_ERR, status, i);
-	STATUS_TO_TEXT(ILL_FUNC, status, i);
-	STATUS_TO_TEXT(CMD_BUSY, status, i);
-	STATUS_TO_TEXT(CMDRESPEND, status, i);
-	STATUS_TO_TEXT(DATAEND, status, i);
-	STATUS_TO_TEXT(CRCFAIL, status, i);
-	STATUS_TO_TEXT(DATATIMEOUT, status, i);
-	STATUS_TO_TEXT(CMDTIMEOUT, status, i);
-	STATUS_TO_TEXT(RXOVERFLOW, status, i);
-	STATUS_TO_TEXT(TXUNDERRUN, status, i);
-	STATUS_TO_TEXT(RXRDY, status, i);
-	STATUS_TO_TEXT(TXRQ, status, i);
-	STATUS_TO_TEXT(ILL_ACCESS, status, i);
-	printk("\n");
-}
-
-#else
-#define pr_debug_status(s)  do { } while (0)
-#endif
-
 static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
 {
 	struct tmio_mmc_host *host = mmc_priv(mmc);
@@ -762,9 +718,6 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid)
 	status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS);
 	ireg = status & TMIO_MASK_IRQ & ~host->sdcard_irq_mask;
 
-	pr_debug_status(status);
-	pr_debug_status(ireg);
-
 	/* Clear the status except the interrupt status */
 	sd_ctrl_write32_as_16_and_16(host, CTL_STATUS, TMIO_MASK_IRQ);
 
-- 
cgit 


From 6ae033689d7b1a419def78e8e990b0eab8bb6419 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 18 Sep 2017 15:16:08 +0300
Subject: mmc: sdhci-pci: Fix voltage switch for some Intel host controllers

Some Intel host controllers (e.g. CNP) use an ACPI device-specific method
to ensure correct voltage switching. Fix voltage switch for those, by
adding a call to the DSM.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pci-core.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index bbaddf18a1b3..d0ccc6729fd2 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -392,6 +392,7 @@ static const struct sdhci_pci_fixes sdhci_intel_pch_sdio = {
 
 enum {
 	INTEL_DSM_FNS		=  0,
+	INTEL_DSM_V18_SWITCH	=  3,
 	INTEL_DSM_DRV_STRENGTH	=  9,
 	INTEL_DSM_D3_RETUNE	= 10,
 };
@@ -557,6 +558,19 @@ static void intel_hs400_enhanced_strobe(struct mmc_host *mmc,
 	sdhci_writel(host, val, INTEL_HS400_ES_REG);
 }
 
+static void sdhci_intel_voltage_switch(struct sdhci_host *host)
+{
+	struct sdhci_pci_slot *slot = sdhci_priv(host);
+	struct intel_host *intel_host = sdhci_pci_priv(slot);
+	struct device *dev = &slot->chip->pdev->dev;
+	u32 result = 0;
+	int err;
+
+	err = intel_dsm(intel_host, dev, INTEL_DSM_V18_SWITCH, &result);
+	pr_debug("%s: %s DSM error %d result %u\n",
+		 mmc_hostname(host->mmc), __func__, err, result);
+}
+
 static const struct sdhci_ops sdhci_intel_byt_ops = {
 	.set_clock		= sdhci_set_clock,
 	.set_power		= sdhci_intel_set_power,
@@ -565,6 +579,7 @@ static const struct sdhci_ops sdhci_intel_byt_ops = {
 	.reset			= sdhci_reset,
 	.set_uhs_signaling	= sdhci_set_uhs_signaling,
 	.hw_reset		= sdhci_pci_hw_reset,
+	.voltage_switch		= sdhci_intel_voltage_switch,
 };
 
 static void byt_read_dsm(struct sdhci_pci_slot *slot)
-- 
cgit 


From c0d05cde2a685cd6486390c62be684ce456d84d6 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 21 Sep 2017 11:20:58 +0100
Subject: iommu/of: Remove PCI host bridge node check

of_pci_iommu_init() tries to be clever and stop its alias walk at the
device represented by master_np, in case of weird PCI topologies where
the bridge to the IOMMU and the rest of the system is not at the root.
It turns out this is a bit short-sighted, since there are plenty of
other callers of pci_for_each_dma_alias() which would also need the same
behaviour in that situation, and the only platform so far with such a
topology (Cavium ThunderX2) already solves it more generally via a PCI
quirk. As this check is effectively redundant, and returning a boolean
value as an int is a bit broken anyway, let's just get rid of it.

Reported-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Fixes: d87beb749281 ("iommu/of: Handle PCI aliases properly")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/of_iommu.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index e60e3dba85a0..50947ebb6d17 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -157,10 +157,7 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
 
 	err = of_iommu_xlate(info->dev, &iommu_spec);
 	of_node_put(iommu_spec.np);
-	if (err)
-		return err;
-
-	return info->np == pdev->bus->dev.of_node;
+	return err;
 }
 
 const struct iommu_ops *of_iommu_configure(struct device *dev,
-- 
cgit 


From a88dc7ba15cd4f4ef5102b5185f8fa7ff86e54e1 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Wed, 20 Sep 2017 12:26:38 +0530
Subject: drivers/perf: arm_pmu_acpi: Release memory obtained by kasprintf

Free memory region, if arm_pmu_acpi_probe is not successful.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/perf/arm_pmu_acpi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 0a9b78705ee8..3303dd8d8eb5 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -235,6 +235,7 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 		ret = armpmu_register(pmu);
 		if (ret) {
 			pr_warn("Failed to register PMU for CPU%d\n", cpu);
+			kfree(pmu->name);
 			return ret;
 		}
 	}
-- 
cgit 


From e6f9bc34d3779cb7b6a337afed5de8be3f0fab77 Mon Sep 17 00:00:00 2001
From: Alex Estrin <alex.estrin@intel.com>
Date: Thu, 31 Aug 2017 09:30:34 -0700
Subject: IB/core: Fix for core panic

Build with the latest patches resulted in panic:
11384.486289] BUG: unable to handle kernel NULL pointer dereference at
         (null)
[11384.486293] IP:           (null)
[11384.486295] PGD 0
[11384.486295] P4D 0
[11384.486296]
[11384.486299] Oops: 0010 [#1] SMP
......... snip ......
[11384.486401] CPU: 0 PID: 968 Comm: kworker/0:1H Tainted: G        W  O
    4.13.0-a-stream-20170825 #1
[11384.486402] Hardware name: Intel Corporation S2600WT2R/S2600WT2R,
BIOS SE5C610.86B.01.01.0014.121820151719 12/18/2015
[11384.486418] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core]
[11384.486419] task: ffff880850579680 task.stack: ffffc90007fec000
[11384.486420] RIP: 0010:          (null)
[11384.486420] RSP: 0018:ffffc90007fef970 EFLAGS: 00010206
[11384.486421] RAX: ffff88084cfe8000 RBX: ffff88084dce4000 RCX:
ffffc90007fef978
[11384.486422] RDX: 0000000000000000 RSI: 0000000000000001 RDI:
ffff88084cfe8000
[11384.486422] RBP: ffffc90007fefab0 R08: 0000000000000000 R09:
ffff88084dce4080
[11384.486423] R10: ffffffffa02d7f60 R11: 0000000000000000 R12:
ffff88105af65a00
[11384.486423] R13: ffff88084dce4000 R14: 000000000000c000 R15:
000000000000c000
[11384.486424] FS:  0000000000000000(0000) GS:ffff88085f400000(0000)
knlGS:0000000000000000
[11384.486425] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[11384.486425] CR2: 0000000000000000 CR3: 0000000001c09000 CR4:
00000000001406f0
[11384.486426] Call Trace:
[11384.486431]  ? is_valid_mcast_lid.isra.21+0xfb/0x110 [ib_core]
[11384.486436]  ib_attach_mcast+0x6f/0xa0 [ib_core]
[11384.486441]  ipoib_mcast_attach+0x81/0x190 [ib_ipoib]
[11384.486443]  ipoib_mcast_join_complete+0x354/0xb40 [ib_ipoib]
[11384.486448]  mcast_work_handler+0x330/0x6c0 [ib_core]
[11384.486452]  join_handler+0x101/0x220 [ib_core]
[11384.486455]  ib_sa_mcmember_rec_callback+0x54/0x80 [ib_core]
[11384.486459]  recv_handler+0x3a/0x60 [ib_core]
[11384.486462]  ib_mad_recv_done+0x423/0x9b0 [ib_core]
[11384.486466]  __ib_process_cq+0x5d/0xb0 [ib_core]
[11384.486469]  ib_cq_poll_work+0x20/0x60 [ib_core]
[11384.486472]  process_one_work+0x149/0x360
[11384.486474]  worker_thread+0x4d/0x3c0
[11384.486487]  kthread+0x109/0x140
[11384.486488]  ? rescuer_thread+0x380/0x380
[11384.486489]  ? kthread_park+0x60/0x60
[11384.486490]  ? kthread_park+0x60/0x60
[11384.486493]  ret_from_fork+0x25/0x30
[11384.486493] Code:  Bad RIP value.
[11384.486493] Code:  Bad RIP value.
[11384.486496] RIP:           (null) RSP: ffffc90007fef970
[11384.486497] CR2: 0000000000000000
[11384.486531] ---[ end trace b1acec6fb4ff6e75 ]---
[11384.532133] Kernel panic - not syncing: Fatal exception
[11384.536541] Kernel Offset: disabled
[11384.969491] ---[ end Kernel panic - not syncing: Fatal exception
[11384.976875] sched: Unexpected reschedule of offline CPU#1!
[11384.983646] ------------[ cut here ]------------

Rdma device driver may not have implemented (*get_link_layer)()
so it can not be called directly. Should use appropriate helper function.

Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Fixes: 523633359224 ("IB/core: Fix the validations of a multicast LID in attach or detach operations")
Cc: stable@kernel.org # 4.13
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Alex Estrin <alex.estrin@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/verbs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index ee9e27dc799b..de57d6c11a25 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1646,7 +1646,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
 	 */
 	if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
 		if (attr.qp_state >= IB_QPS_INIT) {
-			if (qp->device->get_link_layer(qp->device, attr.port_num) !=
+			if (rdma_port_get_link_layer(qp->device, attr.port_num) !=
 			    IB_LINK_LAYER_INFINIBAND)
 				return true;
 			goto lid_check;
@@ -1655,7 +1655,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
 
 	/* Can't get a quick answer, iterate over all ports */
 	for (port = 0; port < qp->device->phys_port_cnt; port++)
-		if (qp->device->get_link_layer(qp->device, port) !=
+		if (rdma_port_get_link_layer(qp->device, port) !=
 		    IB_LINK_LAYER_INFINIBAND)
 			num_eth_ports++;
 
-- 
cgit 


From 3d318605f5e32ff44fb290d9b67573b34213c4c8 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Wed, 13 Sep 2017 09:52:32 -0700
Subject: iw_cxgb4: put ep reference in pass_accept_req()

The listening endpoint should always be dereferenced at the end of
pass_accept_req().

Fixes: f86fac79afec ("RDMA/iw_cxgb4: atomic find and reference for listening endpoints")

Cc: stable@vger.kernel.org
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index ceaa2fa54d32..83322dbc4711 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2594,9 +2594,9 @@ fail:
 	c4iw_put_ep(&child_ep->com);
 reject:
 	reject_cr(dev, hwtid, skb);
+out:
 	if (parent_ep)
 		c4iw_put_ep(&parent_ep->com);
-out:
 	return 0;
 }
 
-- 
cgit 


From 3c8415cc7aff467faba25841fb859660ac14a04e Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Tue, 5 Sep 2017 11:52:33 -0700
Subject: iw_cxgb4: drop listen destroy replies if no ep found

If the thread waiting for a CLOSE_LISTSRV_RPL times out and bails,
then we need to handle a subsequent CPL if it arrives and the stid has
been released.  In this case silently drop it.

Cc: stable@vger.kernel.org
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cm.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 83322dbc4711..19297e408820 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2333,9 +2333,14 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 	unsigned int stid = GET_TID(rpl);
 	struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
 
+	if (!ep) {
+		pr_debug("%s stid %d lookup failure!\n", __func__, stid);
+		goto out;
+	}
 	pr_debug("%s ep %p\n", __func__, ep);
 	c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
 	c4iw_put_ep(&ep->com);
+out:
 	return 0;
 }
 
-- 
cgit 


From 8b1bbf36b7452c4acb20e91948eaa5e225ea6978 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Tue, 5 Sep 2017 11:52:34 -0700
Subject: iw_cxgb4: remove the stid on listen create failure

If a listen create fails, then the server tid (stid) is incorrectly left
in the stid idr table, which can cause a touch-after-free if the stid
is looked up and the already freed endpoint is touched.  So make sure
and remove it in the error path.

Cc: stable@vger.kernel.org
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 19297e408820..daf7a56e5d7e 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3462,7 +3462,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 		cm_id->provider_data = ep;
 		goto out;
 	}
-
+	remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
 	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
 			ep->com.local_addr.ss_family);
 fail2:
-- 
cgit 


From 05f5c38576475439f3124a3e6d62db68de83f7be Mon Sep 17 00:00:00 2001
From: KT Liao <kt.liao@emc.com.tw>
Date: Fri, 22 Sep 2017 10:00:57 -0700
Subject: Input: elan_i2c - extend Flash-Write delay

The original 20ms delay is only marginally enough delay after a block write
operation during firmware update. Let's increase the delay to ensure that
the controller finishes up storing the page to avoid failures in the
firmware updates.

Signed-off-by: KT Liao <kt.liao@emc.com.tw>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elan_i2c_i2c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c
index 15b1330606c1..e19eb60b3d2f 100644
--- a/drivers/input/mouse/elan_i2c_i2c.c
+++ b/drivers/input/mouse/elan_i2c_i2c.c
@@ -598,7 +598,7 @@ static int elan_i2c_write_fw_block(struct i2c_client *client,
 	}
 
 	/* Wait for F/W to update one page ROM data. */
-	msleep(20);
+	msleep(35);
 
 	error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CTRL_CMD, val);
 	if (error) {
-- 
cgit 


From af3c79beff48fb2bd1c79d02688004cf57215acf Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Date: Thu, 7 Sep 2017 13:38:18 +0300
Subject: IB/ipoib: Suppress the retry related completion errors

IPoIB doesn't support transport/rnr retry schemes as per
RFC so those errors are expected. No need to flood the
log files with them.

Tested-by: Michael Nowak <michael.nowak@oracle.com>
Tested-by: Rafael Alejandro Peralez <rafael.peralez@oracle.com>
Tested-by: Liwen Huang <liwen.huang@oracle.com>
Tested-by: Hong Liu <hong.x.liu@oracle.com>
Reviewed-by: Mukesh Kacker <mukesh.kacker@oracle.com>
Reported-by: Rajiv Raja <rajiv.raja@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 14b62f7472b4..7774654c2ccb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -823,12 +823,18 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 	    wc->status != IB_WC_WR_FLUSH_ERR) {
 		struct ipoib_neigh *neigh;
 
-		if (wc->status != IB_WC_RNR_RETRY_EXC_ERR)
-			ipoib_warn(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
-				   wc->status, wr_id, wc->vendor_err);
+		/* IB_WC[_RNR]_RETRY_EXC_ERR error is part of the life cycle,
+		 * so don't make waves.
+		 */
+		if (wc->status == IB_WC_RNR_RETRY_EXC_ERR ||
+		    wc->status == IB_WC_RETRY_EXC_ERR)
+			ipoib_dbg(priv,
+				  "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n",
+				   __func__, wc->status, wr_id, wc->vendor_err);
 		else
-			ipoib_dbg(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
-				  wc->status, wr_id, wc->vendor_err);
+			ipoib_warn(priv,
+				    "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n",
+				   __func__, wc->status, wr_id, wc->vendor_err);
 
 		spin_lock_irqsave(&priv->lock, flags);
 		neigh = tx->neigh;
-- 
cgit 


From 06564f60859bdf7e73d70ae35d7e285e96ae9c46 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 11 Sep 2017 17:03:13 +0100
Subject: IB/ocrdma: fix incorrect fall-through on switch statement

In the case where mbox_status is OCRDMA_MBX_STATUS_FAILED and
add_status is OCRDMA_MBX_STATUS_FAILED err_num is assigned -EAGAIN
however the case OCRDMA_MBX_STATUS_FAILED is missing a break and
falls through to the default case which then re-assigns err_num
to -EFAULT.   Fix this so that err_num is assigned to -EAGAIN
for the add_status OCRDMA_MBX_STATUS_FAILED case and -EFAULT
otherwise.

Detected by CoverityScan CID#703125 ("Missing break in switch")

Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index dcb5942f9fb5..65b166cc7437 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -252,7 +252,10 @@ static int ocrdma_get_mbx_errno(u32 status)
 		case OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES:
 			err_num = -EAGAIN;
 			break;
+		default:
+			err_num = -EFAULT;
 		}
+		break;
 	default:
 		err_num = -EFAULT;
 	}
-- 
cgit 


From cbafad87e1507044c7d442087d41d5e3d432cc4e Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Mon, 18 Sep 2017 12:28:48 +0100
Subject: IB/mlx5: fix debugfs cleanup

If delay_drop_debugfs_init() fails in any of the operations to create
debugfs, it is calling delay_drop_debugfs_cleanup() as part of its
cleanup. But delay_drop_debugfs_cleanup() checks for 'dbg' and since
we have not yet pointed 'dbg' to the debugfs we need to cleanup, the
cleanup fails and we are left with stray debugfs elements and also a
memory leak.

Fixes: 4a5fd5d2965c ("IB/mlx5: Add necessary delay drop assignment")
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Acked-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index ab3c562d5ba7..05fb4bdff6a0 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3837,11 +3837,13 @@ static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
 	if (!dbg)
 		return -ENOMEM;
 
+	dev->delay_drop.dbg = dbg;
+
 	dbg->dir_debugfs =
 		debugfs_create_dir("delay_drop",
 				   dev->mdev->priv.dbg_root);
 	if (!dbg->dir_debugfs)
-		return -ENOMEM;
+		goto out_debugfs;
 
 	dbg->events_cnt_debugfs =
 		debugfs_create_atomic_t("num_timeout_events", 0400,
@@ -3865,8 +3867,6 @@ static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
 	if (!dbg->timeout_debugfs)
 		goto out_debugfs;
 
-	dev->delay_drop.dbg = dbg;
-
 	return 0;
 
 out_debugfs:
-- 
cgit 


From e13547bc181ae6c279adf0df054717787f24ee89 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Tue, 19 Sep 2017 13:22:13 +0300
Subject: IB/bnxt_re: Fix frame stack compilation warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reduce stack size by dynamically allocating memory instead
of declaring large struct on the stack:

drivers/infiniband/hw/bnxt_re/ib_verbs.c: In function ‘bnxt_re_query_qp’:
drivers/infiniband/hw/bnxt_re/ib_verbs.c:1600:1: warning: the frame size of 1216 bytes is larger than 1024 bytes [-Wframe-larger-than=]
 }
 ^

Cc: Selvin Xavier <selvin.xavier@broadcom.com>
Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver")
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Acked-by: Selvin Xavier <selvin.xavier@broadcom.com>
Reviewed-by: Jonathan Toppins <jtoppins@redhat.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 67 +++++++++++++++++---------------
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 01eee15bbd65..03caf48af8e2 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1551,43 +1551,46 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 {
 	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	struct bnxt_re_dev *rdev = qp->rdev;
-	struct bnxt_qplib_qp qplib_qp;
+	struct bnxt_qplib_qp *qplib_qp;
 	int rc;
 
-	memset(&qplib_qp, 0, sizeof(struct bnxt_qplib_qp));
-	qplib_qp.id = qp->qplib_qp.id;
-	qplib_qp.ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index;
+	qplib_qp = kzalloc(sizeof(*qplib_qp), GFP_KERNEL);
+	if (!qplib_qp)
+		return -ENOMEM;
+
+	qplib_qp->id = qp->qplib_qp.id;
+	qplib_qp->ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index;
 
-	rc = bnxt_qplib_query_qp(&rdev->qplib_res, &qplib_qp);
+	rc = bnxt_qplib_query_qp(&rdev->qplib_res, qplib_qp);
 	if (rc) {
 		dev_err(rdev_to_dev(rdev), "Failed to query HW QP");
-		return rc;
+		goto out;
 	}
-	qp_attr->qp_state = __to_ib_qp_state(qplib_qp.state);
-	qp_attr->en_sqd_async_notify = qplib_qp.en_sqd_async_notify ? 1 : 0;
-	qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp.access);
-	qp_attr->pkey_index = qplib_qp.pkey_index;
-	qp_attr->qkey = qplib_qp.qkey;
+	qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state);
+	qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0;
+	qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access);
+	qp_attr->pkey_index = qplib_qp->pkey_index;
+	qp_attr->qkey = qplib_qp->qkey;
 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
-	rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp.ah.flow_label,
-			qplib_qp.ah.host_sgid_index,
-			qplib_qp.ah.hop_limit,
-			qplib_qp.ah.traffic_class);
-	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, qplib_qp.ah.dgid.data);
-	rdma_ah_set_sl(&qp_attr->ah_attr, qplib_qp.ah.sl);
-	ether_addr_copy(qp_attr->ah_attr.roce.dmac, qplib_qp.ah.dmac);
-	qp_attr->path_mtu = __to_ib_mtu(qplib_qp.path_mtu);
-	qp_attr->timeout = qplib_qp.timeout;
-	qp_attr->retry_cnt = qplib_qp.retry_cnt;
-	qp_attr->rnr_retry = qplib_qp.rnr_retry;
-	qp_attr->min_rnr_timer = qplib_qp.min_rnr_timer;
-	qp_attr->rq_psn = qplib_qp.rq.psn;
-	qp_attr->max_rd_atomic = qplib_qp.max_rd_atomic;
-	qp_attr->sq_psn = qplib_qp.sq.psn;
-	qp_attr->max_dest_rd_atomic = qplib_qp.max_dest_rd_atomic;
-	qp_init_attr->sq_sig_type = qplib_qp.sig_type ? IB_SIGNAL_ALL_WR :
-							IB_SIGNAL_REQ_WR;
-	qp_attr->dest_qp_num = qplib_qp.dest_qpn;
+	rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp->ah.flow_label,
+			qplib_qp->ah.host_sgid_index,
+			qplib_qp->ah.hop_limit,
+			qplib_qp->ah.traffic_class);
+	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, qplib_qp->ah.dgid.data);
+	rdma_ah_set_sl(&qp_attr->ah_attr, qplib_qp->ah.sl);
+	ether_addr_copy(qp_attr->ah_attr.roce.dmac, qplib_qp->ah.dmac);
+	qp_attr->path_mtu = __to_ib_mtu(qplib_qp->path_mtu);
+	qp_attr->timeout = qplib_qp->timeout;
+	qp_attr->retry_cnt = qplib_qp->retry_cnt;
+	qp_attr->rnr_retry = qplib_qp->rnr_retry;
+	qp_attr->min_rnr_timer = qplib_qp->min_rnr_timer;
+	qp_attr->rq_psn = qplib_qp->rq.psn;
+	qp_attr->max_rd_atomic = qplib_qp->max_rd_atomic;
+	qp_attr->sq_psn = qplib_qp->sq.psn;
+	qp_attr->max_dest_rd_atomic = qplib_qp->max_dest_rd_atomic;
+	qp_init_attr->sq_sig_type = qplib_qp->sig_type ? IB_SIGNAL_ALL_WR :
+							 IB_SIGNAL_REQ_WR;
+	qp_attr->dest_qp_num = qplib_qp->dest_qpn;
 
 	qp_attr->cap.max_send_wr = qp->qplib_qp.sq.max_wqe;
 	qp_attr->cap.max_send_sge = qp->qplib_qp.sq.max_sge;
@@ -1596,7 +1599,9 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 	qp_attr->cap.max_inline_data = qp->qplib_qp.max_inline_data;
 	qp_init_attr->cap = qp_attr->cap;
 
-	return 0;
+out:
+	kfree(qplib_qp);
+	return rc;
 }
 
 /* Routine for sending QP1 packets for RoCE V1 an V2
-- 
cgit 


From 01df7f5a77b926c2d790cf66d942d2a68d4ad5de Mon Sep 17 00:00:00 2001
From: Adit Ranadive <aditr@vmware.com>
Date: Thu, 21 Sep 2017 15:56:21 -0700
Subject: RDMA/vmw_pvrdma: Fix reporting correct opcodes for completion

Since the IB_WC_BIND_MW opcode has been dropped, set the correct
IB WC opcode explicitly.

Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver")
Reviewed-by: Aditya Sarwade <asarwade@vmware.com>
Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Signed-off-by: Bryan Tan <bryantan@vmware.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 663a0c301c43..984aa3484928 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -416,9 +416,34 @@ static inline enum ib_wc_status pvrdma_wc_status_to_ib(
 	return (enum ib_wc_status)status;
 }
 
-static inline int pvrdma_wc_opcode_to_ib(int opcode)
-{
-	return opcode;
+static inline int pvrdma_wc_opcode_to_ib(unsigned int opcode)
+{
+	switch (opcode) {
+	case PVRDMA_WC_SEND:
+		return IB_WC_SEND;
+	case PVRDMA_WC_RDMA_WRITE:
+		return IB_WC_RDMA_WRITE;
+	case PVRDMA_WC_RDMA_READ:
+		return IB_WC_RDMA_READ;
+	case PVRDMA_WC_COMP_SWAP:
+		return IB_WC_COMP_SWAP;
+	case PVRDMA_WC_FETCH_ADD:
+		return IB_WC_FETCH_ADD;
+	case PVRDMA_WC_LOCAL_INV:
+		return IB_WC_LOCAL_INV;
+	case PVRDMA_WC_FAST_REG_MR:
+		return IB_WC_REG_MR;
+	case PVRDMA_WC_MASKED_COMP_SWAP:
+		return IB_WC_MASKED_COMP_SWAP;
+	case PVRDMA_WC_MASKED_FETCH_ADD:
+		return IB_WC_MASKED_FETCH_ADD;
+	case PVRDMA_WC_RECV:
+		return IB_WC_RECV;
+	case PVRDMA_WC_RECV_RDMA_WITH_IMM:
+		return IB_WC_RECV_RDMA_WITH_IMM;
+	default:
+		return IB_WC_SEND;
+	}
 }
 
 static inline int pvrdma_wc_flags_to_ib(int flags)
-- 
cgit 


From cd9100ca9e78eb03bf5a0ee2ba98ebdc8cc5880e Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Tue, 19 Sep 2017 09:19:09 -0500
Subject: i40iw: Fail open if there are no available MSI-X vectors

Check number of available MSI-X vectors for i40iw.
If there are no available vectors, fail the open.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_main.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index cc742c3132c6..5965b59a2f83 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1400,6 +1400,11 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
 	u32 i;
 	u32 size;
 
+	if (!ldev->msix_count) {
+		i40iw_pr_err("No MSI-X vectors\n");
+		return I40IW_ERR_CONFIG;
+	}
+
 	iwdev->msix_count = ldev->msix_count;
 
 	size = sizeof(struct i40iw_msix_vector) * iwdev->msix_count;
@@ -1550,7 +1555,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
 
 	status = i40iw_save_msix_info(iwdev, ldev);
 	if (status)
-		goto exit;
+		return status;
 	iwdev->hw.dev_context = (void *)ldev->pcidev;
 	iwdev->hw.hw_addr = ldev->hw_addr;
 	status = i40iw_allocate_dma_mem(&iwdev->hw,
-- 
cgit 


From 47fb3c1610b28dd435b892762280eebf04fe9adf Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Tue, 19 Sep 2017 09:19:10 -0500
Subject: i40iw: Prevent multiple netdev event notifier registrations

Netdev event notifier registration/de-registration is not
synchronized with a lock and there is a possibility of a
duplicate registration of notifier before the unregister
completes.

Register netdev event notifiers during module init and
de-register them at module exit.

This avoids the need to tie the registration to first netdev
client interface open and de-registration to last client
interface close and the synchronization to achieve it.

This also fixes a crash due to duplicate registration.

BUG: unable to handle kernel paging request at ffffffffa0d60388
IP: [<ffffffff8160f75d>] notifier_call_chain+0x3d/0x70
PGD 190d067 PUD 190e063 PMD 76c840067 PTE 0
Oops: 0000 [#1] SMP
Modules linked in: i40e(OF-) fuse btrfs zlib_deflate raid6_pq xor vfat msdos
[..]
e1000e vxlan ip_tunnel ptp pps_core i2c_core video [last unloaded: i40iw]
CPU: 1 PID: 27101 Comm: modprobe Tainted: GF       W  O--------------   3.10.0-229.el7.x86_64 #1
Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Q87M-D2H, BIOS F7 01/17/2014
task: ffff88076e8a96c0 ti: ffff8806959c8000 task.ti: ffff8806959c8000
RIP: 0010:[<ffffffff8160f75d>]  [<ffffffff8160f75d>] notifier_call_chain+0x3d/0x70
RSP: 0018:ffff8806959cbb38  EFLAGS: 00010282
RAX: ffffffffa0d60380 RBX: 00000000fffffffd RCX: 0000000000000000
0708] RDX: 0000000000000000 RSI: ffff88081227a000 RDI: 0000000000000002
RBP: ffff8806959cbb60 R08: 0000000000000246 R09: 000000000000700c
R10: ffff88080e16ea40 R11: 00000000000ae8df R12: ffffffffa0d60380
R13: 0000000000000002 R14: ffff88076e738800 R15: 0000000000000000
FS:  00007f604ef4a740(0000) GS:ffff88083e240000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffffa0d60388 CR3: 0000000753cd2000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
ffffffff819e73a0 0000000000000000 0000000000000002 ffff88076e738800
00000000ffffffff ffff8806959cbba0 ffffffff8109d61d 0000000000000000
0000000000000000 ffff88076e738800 0000000000000000 ffff88076e738800
Call Trace:
[<ffffffff8109d61d>] __blocking_notifier_call_chain+0x4d/0x70
[<ffffffff8109d656>] blocking_notifier_call_chain+0x16/0x20
[<ffffffff8156b9e4>] __inet_del_ifa+0x154/0x2b0
[<ffffffff8156d102>] inetdev_event+0x182/0x530
[<ffffffff8160f76c>] notifier_call_chain+0x4c/0x70
[<ffffffff8109d446>] raw_notifier_call_chain+0x16/0x20
[<ffffffff814f71fd>] call_netdevice_notifiers+0x2d/0x60
[<ffffffff814f8845>] rollback_registered_many+0x105/0x220
[<ffffffff814f89a0>] rollback_registered+0x40/0x70
[<ffffffff814f9c88>] unregister_netdevice_queue+0x48/0x80
[<ffffffff814f9cdc>] unregister_netdev+0x1c/0x30
[<ffffffffa0067139>] i40e_vsi_release+0x2a9/0x2b0 [i40e]
[<ffffffffa00674e8>] i40e_remove+0x128/0x2b0 [i40e]
[<ffffffff813092db>] pci_device_remove+0x3b/0xb0
[<ffffffff813d26ef>] __device_release_driver+0x7f/0xf0
[<ffffffff813d3068>] driver_detach+0xb8/0xc0
[<ffffffff813d22db>] bus_remove_driver+0x9b/0x120
[<ffffffff813d36dc>] driver_unregister+0x2c/0x50
[<ffffffff81307d4c>] pci_unregister_driver+0x2c/0x90
[<ffffffffa008f9d0>] i40e_exit_module+0x10/0x23 [i40e]
[<ffffffff810dad0b>] SyS_delete_module+0x16b/0x2d0
[<ffffffff81013b0c>] ? do_notify_resume+0x9c/0xb0
[<ffffffff81613da9>] system_call_fastpath+0x16/0x1b
Code: e5 41 57 4d 89 c7 41 56 49 89 d6 41 55 49 89 f5 41 54 53 89 cb
75 14 eb 3d 0f 1f 44 00 00 83 eb 01 74 25 4d 85 e4 74 20 4c 89 e0 <4c>
8b 60 08 4c 89 f2 4c 89 ee 48 89 c7 ff 10 4d 85 ff 74 04 41
RIP  [<ffffffff8160f75d>] notifier_call_chain+0x3d/0x70

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw.h       |  1 -
 drivers/infiniband/hw/i40iw/i40iw_main.c  | 32 ++++++++++++++++---------------
 drivers/infiniband/hw/i40iw/i40iw_utils.c |  6 +++---
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 9b1566468744..a65e4cbdce2f 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -201,7 +201,6 @@ enum init_completion_state {
 	CEQ_CREATED,
 	ILQ_CREATED,
 	IEQ_CREATED,
-	INET_NOTIFIER,
 	IP_ADDR_REGISTERED,
 	RDMA_DEV_REGISTERED
 };
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 5965b59a2f83..27590ae21881 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -99,8 +99,6 @@ static struct notifier_block i40iw_net_notifier = {
 	.notifier_call = i40iw_net_event
 };
 
-static atomic_t i40iw_notifiers_registered;
-
 /**
  * i40iw_find_i40e_handler - find a handler given a client info
  * @ldev: pointer to a client info
@@ -1376,11 +1374,20 @@ error:
  */
 static void i40iw_register_notifiers(void)
 {
-	if (atomic_inc_return(&i40iw_notifiers_registered) == 1) {
-		register_inetaddr_notifier(&i40iw_inetaddr_notifier);
-		register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
-		register_netevent_notifier(&i40iw_net_notifier);
-	}
+	register_inetaddr_notifier(&i40iw_inetaddr_notifier);
+	register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+	register_netevent_notifier(&i40iw_net_notifier);
+}
+
+/**
+ * i40iw_unregister_notifiers - unregister tcp ip notifiers
+ */
+
+static void i40iw_unregister_notifiers(void)
+{
+	unregister_netevent_notifier(&i40iw_net_notifier);
+	unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
+	unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
 }
 
 /**
@@ -1467,12 +1474,6 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev)
 		if (!iwdev->reset)
 			i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
 		/* fallthrough */
-	case INET_NOTIFIER:
-		if (!atomic_dec_return(&i40iw_notifiers_registered)) {
-			unregister_netevent_notifier(&i40iw_net_notifier);
-			unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
-			unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
-		}
 		/* fallthrough */
 	case PBLE_CHUNK_MEM:
 		i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
@@ -1672,8 +1673,6 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
 			break;
 		iwdev->init_state = PBLE_CHUNK_MEM;
 		iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM);
-		i40iw_register_notifiers();
-		iwdev->init_state = INET_NOTIFIER;
 		status = i40iw_add_mac_ip(iwdev);
 		if (status)
 			break;
@@ -2023,6 +2022,8 @@ static int __init i40iw_init_module(void)
 	i40iw_client.type = I40E_CLIENT_IWARP;
 	spin_lock_init(&i40iw_handler_lock);
 	ret = i40e_register_client(&i40iw_client);
+	i40iw_register_notifiers();
+
 	return ret;
 }
 
@@ -2034,6 +2035,7 @@ static int __init i40iw_init_module(void)
  */
 static void __exit i40iw_exit_module(void)
 {
+	i40iw_unregister_notifiers();
 	i40e_unregister_client(&i40iw_client);
 }
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 62f1f45b8737..e52dbbb4165e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -160,7 +160,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
 		return NOTIFY_DONE;
 
 	iwdev = &hdl->device;
-	if (iwdev->init_state < INET_NOTIFIER)
+	if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
 		return NOTIFY_DONE;
 
 	netdev = iwdev->ldev->netdev;
@@ -217,7 +217,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
 		return NOTIFY_DONE;
 
 	iwdev = &hdl->device;
-	if (iwdev->init_state < INET_NOTIFIER)
+	if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
 		return NOTIFY_DONE;
 
 	netdev = iwdev->ldev->netdev;
@@ -266,7 +266,7 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *
 		if (!iwhdl)
 			return NOTIFY_DONE;
 		iwdev = &iwhdl->device;
-		if (iwdev->init_state < INET_NOTIFIER)
+		if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
 			return NOTIFY_DONE;
 		p = (__be32 *)neigh->primary_key;
 		i40iw_copy_ip_ntohl(local_ipaddr, p);
-- 
cgit 


From 471b370d52a4a461bf855ff542b544f3e4a5cb3a Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Tue, 19 Sep 2017 09:19:11 -0500
Subject: i40iw: Call i40iw_cm_disconn on modify QP to disconnect

If QP modify to closing/terminate/error fails, connection is
not torn down as there is no corresponding asynchronous
event that will initiate the teardown.

Add explicit call to i40iw_cm_disconn if not waiting in
modify QP, otherwise schedule it in CM timer.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_verbs.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 1aa411034a27..28b3d02d511b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -1027,7 +1027,19 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 				iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
 				iwqp->last_aeq = I40IW_AE_RESET_SENT;
 				spin_unlock_irqrestore(&iwqp->lock, flags);
+				i40iw_cm_disconn(iwqp);
 			}
+		} else {
+			spin_lock_irqsave(&iwqp->lock, flags);
+			if (iwqp->cm_id) {
+				if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
+					iwqp->cm_id->add_ref(iwqp->cm_id);
+					i40iw_schedule_cm_timer(iwqp->cm_node,
+								(struct i40iw_puda_buf *)iwqp,
+								 I40IW_TIMER_TYPE_CLOSE, 1, 0);
+				}
+			}
+			spin_unlock_irqrestore(&iwqp->lock, flags);
 		}
 	}
 	return 0;
-- 
cgit 


From dfc612b3407e88913a58db00b3bca93685d4f4f9 Mon Sep 17 00:00:00 2001
From: Mustafa Ismail <mustafa.ismail@intel.com>
Date: Tue, 19 Sep 2017 09:19:12 -0500
Subject: i40iw: Add missing VLAN priority

Set the VLAN priority which is in the upper 3 bits of the VLAN
tag field in the QP context.

Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_cm.c | 3 ++-
 drivers/infiniband/hw/i40iw/i40iw_cm.h | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 14f36ba4e5be..b7215448bb63 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -3255,7 +3255,8 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
 	tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss));
 	if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
 		tcp_info->insert_vlan_tag = true;
-		tcp_info->vlan_tag = cpu_to_le16(cm_node->vlan_id);
+		tcp_info->vlan_tag = cpu_to_le16(((u16)cm_node->user_pri << I40IW_VLAN_PRIO_SHIFT) |
+						  cm_node->vlan_id);
 	}
 	if (cm_node->ipv4) {
 		tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index 2e52e38ffcf3..8626e7f1fdd3 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -71,6 +71,8 @@
 #define	I40IW_HW_IRD_SETTING_32	32
 #define	I40IW_HW_IRD_SETTING_64	64
 
+#define I40IW_VLAN_PRIO_SHIFT   13
+
 enum ietf_mpa_flags {
 	IETF_MPA_FLAGS_MARKERS = 0x80,	/* receive Markers */
 	IETF_MPA_FLAGS_CRC = 0x40,	/* receive Markers */
-- 
cgit 


From f16dc0aa5ea20a2cf173e82ade5f05bfecaa850a Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Tue, 19 Sep 2017 09:19:13 -0500
Subject: i40iw: Add support for port reuse on active side connections

During OpenMPI scale up testing, we observe rdma_connect
failures if ports are reused on multiple connections.
This is because the Control Queue-Pair (CQP) command to add
the reused port to Accelerated Port Bit VectorTable (APBVT)
fails as there already exists an entry.

Check for duplicate port before invoking the CQP command
to add APBVT entry and delete the entry only if the port
is not in use.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_cm.c | 151 ++++++++++++++++-----------------
 drivers/infiniband/hw/i40iw/i40iw_cm.h |   3 +
 2 files changed, 78 insertions(+), 76 deletions(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index b7215448bb63..5230dd3c938c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1504,23 +1504,40 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
 }
 
 /**
- * listen_port_in_use - determine if port is in use
- * @port: Listen port number
+ * i40iw_port_in_use - determine if port is in use
+ * @port: port number
+ * @active_side: flag for listener side vs active side
  */
-static bool i40iw_listen_port_in_use(struct i40iw_cm_core *cm_core, u16 port)
+static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side)
 {
 	struct i40iw_cm_listener *listen_node;
+	struct i40iw_cm_node *cm_node;
 	unsigned long flags;
 	bool ret = false;
 
-	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
-	list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
-		if (listen_node->loc_port == port) {
-			ret = true;
-			break;
+	if (active_side) {
+		/* search connected node list */
+		spin_lock_irqsave(&cm_core->ht_lock, flags);
+		list_for_each_entry(cm_node, &cm_core->connected_nodes, list) {
+			if (cm_node->loc_port == port) {
+				ret = true;
+				break;
+			}
+		}
+			if (!ret)
+				clear_bit(port, cm_core->active_side_ports);
+		spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+	} else {
+		spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+		list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+			if (listen_node->loc_port == port) {
+				ret = true;
+				break;
+			}
 		}
+		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
 	}
-	spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
 	return ret;
 }
 
@@ -1868,7 +1885,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
 		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
 
 		if (listener->iwdev) {
-			if (apbvt_del && !i40iw_listen_port_in_use(cm_core, listener->loc_port))
+			if (apbvt_del && !i40iw_port_in_use(cm_core, listener->loc_port, false))
 				i40iw_manage_apbvt(listener->iwdev,
 						   listener->loc_port,
 						   I40IW_MANAGE_APBVT_DEL);
@@ -2247,21 +2264,21 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
 	if (cm_node->listener) {
 		i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
 	} else {
-		if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) &&
-		    cm_node->apbvt_set) {
+		if (!i40iw_port_in_use(cm_core, cm_node->loc_port, true) && cm_node->apbvt_set) {
 			i40iw_manage_apbvt(cm_node->iwdev,
 					   cm_node->loc_port,
 					   I40IW_MANAGE_APBVT_DEL);
-			i40iw_get_addr_info(cm_node, &nfo);
-			if (cm_node->qhash_set) {
-				i40iw_manage_qhash(cm_node->iwdev,
-						   &nfo,
-						   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-						   I40IW_QHASH_MANAGE_TYPE_DELETE,
-						   NULL,
-						   false);
-				cm_node->qhash_set = 0;
-			}
+			cm_node->apbvt_set = 0;
+		}
+		i40iw_get_addr_info(cm_node, &nfo);
+		if (cm_node->qhash_set) {
+			i40iw_manage_qhash(cm_node->iwdev,
+					   &nfo,
+					   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+					   I40IW_QHASH_MANAGE_TYPE_DELETE,
+					   NULL,
+					   false);
+			cm_node->qhash_set = 0;
 		}
 	}
 
@@ -3738,10 +3755,8 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	struct sockaddr_in *raddr;
 	struct sockaddr_in6 *laddr6;
 	struct sockaddr_in6 *raddr6;
-	bool qhash_set = false;
-	int apbvt_set = 0;
-	int err = 0;
-	enum i40iw_status_code status;
+	int ret = 0;
+	unsigned long flags;
 
 	ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
 	if (!ibqp)
@@ -3790,32 +3805,6 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	cm_info.user_pri = rt_tos2priority(cm_id->tos);
 	i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n",
 		    __func__, cm_id->tos, cm_info.user_pri);
-	if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
-	    (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
-				     raddr6->sin6_addr.in6_u.u6_addr32,
-				     sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
-		status = i40iw_manage_qhash(iwdev,
-					    &cm_info,
-					    I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-					    I40IW_QHASH_MANAGE_TYPE_ADD,
-					    NULL,
-					    true);
-		if (status)
-			return -EINVAL;
-		qhash_set = true;
-	}
-	status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
-	if (status) {
-		i40iw_manage_qhash(iwdev,
-				   &cm_info,
-				   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-				   I40IW_QHASH_MANAGE_TYPE_DELETE,
-				   NULL,
-				   false);
-		return -EINVAL;
-	}
-
-	apbvt_set = 1;
 	cm_id->add_ref(cm_id);
 	cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
 				       conn_param->private_data_len,
@@ -3823,17 +3812,40 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 				       &cm_info);
 
 	if (IS_ERR(cm_node)) {
-		err = PTR_ERR(cm_node);
-		goto err_out;
+		ret = PTR_ERR(cm_node);
+		cm_id->rem_ref(cm_id);
+		return ret;
+	}
+
+	if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
+	    (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
+				     raddr6->sin6_addr.in6_u.u6_addr32,
+				     sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
+		if (i40iw_manage_qhash(iwdev, &cm_info, I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+				       I40IW_QHASH_MANAGE_TYPE_ADD, NULL, true)) {
+			ret = -EINVAL;
+			goto err;
+		}
+		cm_node->qhash_set = true;
 	}
 
+	spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
+	if (!test_and_set_bit(cm_info.loc_port, iwdev->cm_core.active_side_ports)) {
+		spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+		if (i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD)) {
+			ret =  -EINVAL;
+			goto err;
+		}
+	} else {
+		spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+	}
+
+	cm_node->apbvt_set = true;
 	i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
 	if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
 	    !cm_node->ord_size)
 		cm_node->ord_size = 1;
 
-	cm_node->apbvt_set = apbvt_set;
-	cm_node->qhash_set = qhash_set;
 	iwqp->cm_node = cm_node;
 	cm_node->iwqp = iwqp;
 	iwqp->cm_id = cm_id;
@@ -3841,11 +3853,9 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
 	if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
 		cm_node->state = I40IW_CM_STATE_SYN_SENT;
-		err = i40iw_send_syn(cm_node, 0);
-		if (err) {
-			i40iw_rem_ref_cm_node(cm_node);
-			goto err_out;
-		}
+		ret = i40iw_send_syn(cm_node, 0);
+		if (ret)
+			goto err;
 	}
 
 	i40iw_debug(cm_node->dev,
@@ -3854,9 +3864,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		    cm_node->rem_port,
 		    cm_node,
 		    cm_node->cm_id);
+
 	return 0;
 
-err_out:
+err:
 	if (cm_info.ipv4)
 		i40iw_debug(&iwdev->sc_dev,
 			    I40IW_DEBUG_CM,
@@ -3868,22 +3879,10 @@ err_out:
 			    "Api - connect() FAILED: dest addr=%pI6",
 			    cm_info.rem_addr);
 
-	if (qhash_set)
-		i40iw_manage_qhash(iwdev,
-				   &cm_info,
-				   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-				   I40IW_QHASH_MANAGE_TYPE_DELETE,
-				   NULL,
-				   false);
-
-	if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
-						   cm_info.loc_port))
-		i40iw_manage_apbvt(iwdev,
-				   cm_info.loc_port,
-				   I40IW_MANAGE_APBVT_DEL);
+	i40iw_rem_ref_cm_node(cm_node);
 	cm_id->rem_ref(cm_id);
 	iwdev->cm_core.stats_connect_errs++;
-	return err;
+	return ret;
 }
 
 /**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index 8626e7f1fdd3..45abef76295b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -71,6 +71,7 @@
 #define	I40IW_HW_IRD_SETTING_32	32
 #define	I40IW_HW_IRD_SETTING_64	64
 
+#define MAX_PORTS		65536
 #define I40IW_VLAN_PRIO_SHIFT   13
 
 enum ietf_mpa_flags {
@@ -413,6 +414,8 @@ struct i40iw_cm_core {
 	spinlock_t ht_lock; /* manage hash table */
 	spinlock_t listen_list_lock; /* listen list */
 
+	unsigned long active_side_ports[BITS_TO_LONGS(MAX_PORTS)];
+
 	u64	stats_nodes_created;
 	u64	stats_nodes_destroyed;
 	u64	stats_listen_created;
-- 
cgit 


From 432654df90f2a7d8ac8438905208074604ed3e00 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Mon, 11 Sep 2017 21:41:43 +0200
Subject: parisc: Fix too large frame size warnings

The parisc architecture has larger stack frames than most other
architectures on 32-bit kernels.

Increase the maximum allowed stack frame to 1280 bytes for parisc to
avoid warnings in the do_sys_poll() and pat_memconfig() functions.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 lib/Kconfig.debug | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b19c491cbc4e..2689b7c50c52 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -219,7 +219,8 @@ config FRAME_WARN
 	range 0 8192
 	default 0 if KASAN
 	default 2048 if GCC_PLUGIN_LATENT_ENTROPY
-	default 1024 if !64BIT
+	default 1280 if (!64BIT && PARISC)
+	default 1024 if (!64BIT && !PARISC)
 	default 2048 if 64BIT
 	help
 	  Tell gcc to warn at build time for stack frames larger than this.
-- 
cgit 


From e77900abfd8be4e207412d8b7752dbb9838e2571 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 17 Sep 2017 21:05:02 +0200
Subject: parisc: Stop unwinding at start of stack

Check stack pointer if we are reaching the stack end and stop unwinding
if we do. This fixes early backtraces and avoids showing unrealistic
call stacks.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/unwind.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 48dc7d4d20bb..caab39dfa95d 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/kallsyms.h>
 #include <linux/sort.h>
+#include <linux/sched.h>
 
 #include <linux/uaccess.h>
 #include <asm/assembly.h>
@@ -279,6 +280,17 @@ static void unwind_frame_regs(struct unwind_frame_info *info)
 
 			info->prev_sp = sp - 64;
 			info->prev_ip = 0;
+
+			/* The stack is at the end inside the thread_union
+			 * struct. If we reach data, we have reached the
+			 * beginning of the stack and should stop unwinding. */
+			if (info->prev_sp >= (unsigned long) task_thread_info(info->t) &&
+			    info->prev_sp < ((unsigned long) task_thread_info(info->t)
+						+ THREAD_SZ_ALGN)) {
+				info->prev_sp = 0;
+				break;
+			}
+
 			if (get_user(tmp, (unsigned long *)(info->prev_sp - RP_OFFSET))) 
 				break;
 			info->prev_ip = tmp;
-- 
cgit 


From 08b8a99b2c5ea8da4d3dd55056881d12baea1e04 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 17 Sep 2017 21:17:10 +0200
Subject: parisc: Move start_parisc() into init section

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index dee6f9d6a153..a31e91c1782b 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -38,6 +38,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/start_kernel.h>
 
 #include <asm/processor.h>
 #include <asm/sections.h>
@@ -398,9 +399,8 @@ static int __init parisc_init(void)
 }
 arch_initcall(parisc_init);
 
-void start_parisc(void)
+void __init start_parisc(void)
 {
-	extern void start_kernel(void);
 	extern void early_trap_init(void);
 
 	int ret, cpunum;
-- 
cgit 


From 77089c5274fe2f72db5a2cd956d0d308aed08e68 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 17 Sep 2017 21:15:09 +0200
Subject: parisc: Add wrapper for pdc_instr() firmware function

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/pdc.h |  1 +
 arch/parisc/kernel/firmware.c | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index 26b4455baa83..510341f62d97 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -280,6 +280,7 @@ void setup_pdc(void);		/* in inventory.c */
 /* wrapper-functions from pdc.c */
 
 int pdc_add_valid(unsigned long address);
+int pdc_instr(unsigned int *instr);
 int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsigned long len);
 int pdc_chassis_disp(unsigned long disp);
 int pdc_chassis_warn(unsigned long *warn);
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index ab80e5c6f651..6d471c00c71a 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -232,6 +232,26 @@ int pdc_add_valid(unsigned long address)
 }
 EXPORT_SYMBOL(pdc_add_valid);
 
+/**
+ * pdc_instr - Get instruction that invokes PDCE_CHECK in HPMC handler.
+ * @instr: Pointer to variable which will get instruction opcode.
+ *
+ * The return value is PDC_OK (0) in case call succeeded.
+ */
+int __init pdc_instr(unsigned int *instr)
+{
+	int retval;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pdc_lock, flags);
+	retval = mem_pdc_call(PDC_INSTR, 0UL, __pa(pdc_result));
+	convert_to_wide(pdc_result);
+	*instr = pdc_result[0];
+	spin_unlock_irqrestore(&pdc_lock, flags);
+
+	return retval;
+}
+
 /**
  * pdc_chassis_info - Return chassis information.
  * @result: The return buffer.
-- 
cgit 


From 8d771b143fe2e3941fc8a32926d21410004578c0 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 17 Sep 2017 21:28:11 +0200
Subject: parisc: Add PDCE_CHECK instruction to HPMC handler

According to the programming note at page 1-31 of the PA 1.1 Firmware
Architecture document, one should use the PDC_INSTR firmware function to
get the instruction that invokes a PDCE_CHECK in the HPMC handler.  This
patch follows this note and sets the instruction which has been a nop up
until now.
Testing on a C3000 and C8000 showed that this firmware call isn't
implemented on those machines, so maybe it's only needed on older ones.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/traps.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 991654c88eec..230333157fe3 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -817,7 +817,7 @@ void __init initialize_ivt(const void *iva)
 	u32 check = 0;
 	u32 *ivap;
 	u32 *hpmcp;
-	u32 length;
+	u32 length, instr;
 
 	if (strcmp((const char *)iva, "cows can fly"))
 		panic("IVT invalid");
@@ -827,6 +827,14 @@ void __init initialize_ivt(const void *iva)
 	for (i = 0; i < 8; i++)
 	    *ivap++ = 0;
 
+	/*
+	 * Use PDC_INSTR firmware function to get instruction that invokes
+	 * PDCE_CHECK in HPMC handler.  See programming note at page 1-31 of
+	 * the PA 1.1 Firmware Architecture document.
+	 */
+	if (pdc_instr(&instr) == PDC_OK)
+		ivap[0] = instr;
+
 	/* Compute Checksum for HPMC handler */
 	length = os_hpmc_size;
 	ivap[7] = length;
-- 
cgit 


From ea6976483fb0ced259fbaa9e4f68a2cdcee7e312 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Mon, 18 Sep 2017 17:55:24 +0200
Subject: parisc: Check if initrd was loaded into broken RAM

While scanning the PDT for reported broken memory modules, warn if the
initrd was coincidentally loaded into bad memory.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/pdt.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c
index 05730a83895c..00aed082969b 100644
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -15,6 +15,7 @@
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/kthread.h>
+#include <linux/initrd.h>
 
 #include <asm/pdc.h>
 #include <asm/pdcpat.h>
@@ -216,8 +217,16 @@ void __init pdc_pdt_init(void)
 	}
 
 	for (i = 0; i < pdt_status.pdt_entries; i++) {
+		unsigned long addr;
+
 		report_mem_err(pdt_entry[i]);
 
+		addr = pdt_entry[i] & PDT_ADDR_PHYS_MASK;
+		if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) &&
+			addr >= initrd_start && addr < initrd_end)
+			pr_crit("CRITICAL: initrd possibly broken "
+				"due to bad memory!\n");
+
 		/* mark memory page bad */
 		memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
 	}
-- 
cgit 


From a7e6601f70a53957b1d01c321319f0237bba5202 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 21 Sep 2017 21:22:27 +0200
Subject: parisc: Move init_per_cpu() into init section

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/smp.h  | 1 +
 arch/parisc/kernel/processor.c | 2 +-
 arch/parisc/kernel/setup.c     | 2 +-
 arch/parisc/kernel/smp.c       | 3 +--
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h
index a5dc9066c6d8..ad9c9c3b4136 100644
--- a/arch/parisc/include/asm/smp.h
+++ b/arch/parisc/include/asm/smp.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_SMP_H
 #define __ASM_SMP_H
 
+extern int init_per_cpu(int cpuid);
 
 #if defined(CONFIG_SMP)
 
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index a778bd3c107c..e120d63c1b28 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -317,7 +317,7 @@ void __init collect_boot_cpu_data(void)
  *
  * o Enable CPU profiling hooks.
  */
-int init_per_cpu(int cpunum)
+int __init init_per_cpu(int cpunum)
 {
 	int ret;
 	struct pdc_coproc_cfg coproc_cfg;
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index a31e91c1782b..f7d0c3b33d70 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -49,6 +49,7 @@
 #include <asm/io.h>
 #include <asm/setup.h>
 #include <asm/unwind.h>
+#include <asm/smp.h>
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
 
@@ -116,7 +117,6 @@ void __init dma_ops_init(void)
 }
 #endif
 
-extern int init_per_cpu(int cpuid);
 extern void collect_boot_cpu_data(void);
 
 void __init setup_arch(char **cmdline_p)
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 63365106ea19..30c28ab14540 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -255,12 +255,11 @@ void arch_send_call_function_single_ipi(int cpu)
 static void __init
 smp_cpu_init(int cpunum)
 {
-	extern int init_per_cpu(int);  /* arch/parisc/kernel/processor.c */
 	extern void init_IRQ(void);    /* arch/parisc/kernel/irq.c */
 	extern void start_cpu_itimer(void); /* arch/parisc/kernel/time.c */
 
 	/* Set modes and Enable floating point coprocessor */
-	(void) init_per_cpu(cpunum);
+	init_per_cpu(cpunum);
 
 	disable_sr_hashing();
 
-- 
cgit 


From 606f95e4255845155f62504a9e1f12665b1853c8 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 21 Sep 2017 21:52:08 +0200
Subject: parisc: Add HWPOISON page fault handler code

Commit 24587380f61d ("parisc: Add MADV_HWPOISON and MADV_SOFT_OFFLINE") added
the necessary constants to handle hardware-poisoning. Those were needed to
support the page deallocation feature from firmware.

But I completely missed to add the relevant fault handler code. This now
showed up when I ran the madvise07 testcase from the Linux Test Project,
which failed with a kernel BUG at arch/parisc/mm/fault.c:320.

With this patch the parisc kernel now behaves like other platforms and
gives the same kernel syslog warnings when poisoning pages.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/mm/fault.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 5b101f6a5607..e247edbca68e 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/extable.h>
 #include <linux/uaccess.h>
+#include <linux/hugetlb.h>
 
 #include <asm/traps.h>
 
@@ -261,7 +262,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	unsigned long acc_type;
-	int fault;
+	int fault = 0;
 	unsigned int flags;
 
 	if (faulthandler_disabled())
@@ -315,7 +316,8 @@ good_area:
 			goto out_of_memory;
 		else if (fault & VM_FAULT_SIGSEGV)
 			goto bad_area;
-		else if (fault & VM_FAULT_SIGBUS)
+		else if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+				  VM_FAULT_HWPOISON_LARGE))
 			goto bad_area;
 		BUG();
 	}
@@ -352,8 +354,7 @@ bad_area:
 
 	if (user_mode(regs)) {
 		struct siginfo si;
-
-		show_signal_msg(regs, code, address, tsk, vma);
+		unsigned int lsb = 0;
 
 		switch (code) {
 		case 15:	/* Data TLB miss fault/Data page fault */
@@ -386,6 +387,30 @@ bad_area:
 			si.si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR;
 			break;
 		}
+
+#ifdef CONFIG_MEMORY_FAILURE
+		if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+			printk(KERN_ERR
+	"MCE: Killing %s:%d due to hardware memory corruption fault at %08lx\n",
+			tsk->comm, tsk->pid, address);
+			si.si_signo = SIGBUS;
+			si.si_code = BUS_MCEERR_AR;
+		}
+#endif
+
+		/*
+		 * Either small page or large page may be poisoned.
+		 * In other words, VM_FAULT_HWPOISON_LARGE and
+		 * VM_FAULT_HWPOISON are mutually exclusive.
+		 */
+		if (fault & VM_FAULT_HWPOISON_LARGE)
+			lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+		else if (fault & VM_FAULT_HWPOISON)
+			lsb = PAGE_SHIFT;
+		else
+			show_signal_msg(regs, code, address, tsk, vma);
+		si.si_addr_lsb = lsb;
+
 		si.si_errno = 0;
 		si.si_addr = (void __user *) address;
 		force_sig_info(si.si_signo, &si, current);
-- 
cgit 


From f9b941baa4b78dafcbc4790c0ed2fbbe2aafed06 Mon Sep 17 00:00:00 2001
From: Devesh Sharma <devesh.sharma@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:28 +0530
Subject: bnxt_re: Fix update of qplib_qp.mtu when modified

The MTU value in the qplib_qp.mtu should be
consistent with whatever mtu was set during
INIT to RTR.The Next PSN and number of packets
are calculated based on this member in the qplib_qp structure.

Signed-off-by: Narender Reddy <narender.reddy@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 03caf48af8e2..11bbf3e748b0 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1436,11 +1436,14 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 		qp->qplib_qp.modify_flags |=
 				CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
 		qp->qplib_qp.path_mtu = __from_ib_mtu(qp_attr->path_mtu);
+		qp->qplib_qp.mtu = ib_mtu_enum_to_int(qp_attr->path_mtu);
 	} else if (qp_attr->qp_state == IB_QPS_RTR) {
 		qp->qplib_qp.modify_flags |=
 			CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
 		qp->qplib_qp.path_mtu =
 			__from_ib_mtu(iboe_get_mtu(rdev->netdev->mtu));
+		qp->qplib_qp.mtu =
+			ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
 	}
 
 	if (qp_attr_mask & IB_QP_TIMEOUT) {
-- 
cgit 


From 2b6376305dcb2cb79d0599cf3b9ce98aa860cb4f Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:29 +0530
Subject: bnxt_re: Stop issuing further cmds to FW once a cmd times out

Once a cmd to FW times out(after 20s) it is reasonable to
assume the FW or atleast the control path is dead.
No point issuing further cmds to the FW as each subsequent cmd
with another 20s timeout will cascade resulting in unnecessary
traces and/or NMI Lockups.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 4 ++++
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 391bb7006e8f..2bdb1562bd21 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -107,6 +107,9 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
 		return -EINVAL;
 	}
 
+	if (test_bit(FIRMWARE_TIMED_OUT, &rcfw->flags))
+		return -ETIMEDOUT;
+
 	/* Cmdq are in 16-byte units, each request can consume 1 or more
 	 * cmdqe
 	 */
@@ -226,6 +229,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 		/* timed out */
 		dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
 			cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
+		set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
 		return rc;
 	}
 
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 0ed312f17c8d..85b16da287f9 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -162,8 +162,9 @@ struct bnxt_qplib_rcfw {
 	unsigned long		*cmdq_bitmap;
 	u32			bmap_size;
 	unsigned long		flags;
-#define FIRMWARE_INITIALIZED_FLAG	1
+#define FIRMWARE_INITIALIZED_FLAG	BIT(0)
 #define FIRMWARE_FIRST_FLAG		BIT(31)
+#define FIRMWARE_TIMED_OUT		BIT(3)
 	wait_queue_head_t	waitq;
 	int			(*aeq_handler)(struct bnxt_qplib_rcfw *,
 					       struct creq_func_event *);
-- 
cgit 


From 55311d055175cada8249d39436371afe790df699 Mon Sep 17 00:00:00 2001
From: Devesh Sharma <devesh.sharma@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:30 +0530
Subject: bnxt_re: Fix compare and swap atomic operands

Driver must assign the user supplied compare/swap values in
the wqe to successfully complete the atomic compare and
swap operation.

Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 11bbf3e748b0..0dbdbe1616ab 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1916,6 +1916,7 @@ static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr,
 	switch (wr->opcode) {
 	case IB_WR_ATOMIC_CMP_AND_SWP:
 		wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP;
+		wqe->atomic.cmp_data = atomic_wr(wr)->compare_add;
 		wqe->atomic.swap_data = atomic_wr(wr)->swap;
 		break;
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
-- 
cgit 


From 027c892924eac22f5ca8db4100bccde423be797d Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:31 +0530
Subject: bnxt_re: Free up devices in module_exit path

Clean up all devices added to the bnxt_re_dev_list in the
module_exit entry point.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/main.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 82d1cbc27aee..00a3b743c156 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1375,6 +1375,22 @@ err_netdev:
 
 static void __exit bnxt_re_mod_exit(void)
 {
+	struct bnxt_re_dev *rdev;
+	LIST_HEAD(to_be_deleted);
+
+	mutex_lock(&bnxt_re_dev_lock);
+	/* Free all adapter allocated resources */
+	if (!list_empty(&bnxt_re_dev_list))
+		list_splice_init(&bnxt_re_dev_list, &to_be_deleted);
+	mutex_unlock(&bnxt_re_dev_lock);
+
+	list_for_each_entry(rdev, &to_be_deleted, list) {
+		dev_info(rdev_to_dev(rdev), "Unregistering Device");
+		bnxt_re_dev_stop(rdev);
+		bnxt_re_ib_unreg(rdev, true);
+		bnxt_re_remove_one(rdev);
+		bnxt_re_dev_unreg(rdev);
+	}
 	unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
 	if (bnxt_re_wq)
 		destroy_workqueue(bnxt_re_wq);
-- 
cgit 


From d5917307bb1caa9cb0a915951c57f4cdbacca443 Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:32 +0530
Subject: bnxt_re: Fix race between the netdev register and unregister events

Upon receipt of the NETDEV_REGISTER event from the netdev notifier chain,
the IB stack registration is spawned off to a workqueue since that also
requires an rtnl lock.
There could be 2 kinds of races between the NETDEV_REGISTER and the
NETDEV_UNREGISTER event handling.
a)The NETDEV_UNREGISTER event is received in rapid succession after
the NETDEV_REGISTER event even before the work queue got a chance to run.
b)The NETDEV_UNREGISTER event is received while the workqueue that handles
registration with the IB stack is still in progress.

Handle both the races with a bit flag that is set just before the work item
is queued and cleared in the workqueue after the event is handled just
before the workqueue item is freed.

While adding the new flag, it was noted that the flags are all used in
*_bit() operations which expect a bit number and not a literal constant
with a bit set.  So change the numbers to be bit numbers.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/bnxt_re.h | 12 +++++++-----
 drivers/infiniband/hw/bnxt_re/main.c    |  8 ++++++++
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index b3ad37fec578..a25f9d240880 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -93,11 +93,13 @@ struct bnxt_re_dev {
 	struct ib_device		ibdev;
 	struct list_head		list;
 	unsigned long			flags;
-#define BNXT_RE_FLAG_NETDEV_REGISTERED	0
-#define BNXT_RE_FLAG_IBDEV_REGISTERED	1
-#define BNXT_RE_FLAG_GOT_MSIX		2
-#define BNXT_RE_FLAG_RCFW_CHANNEL_EN	8
-#define BNXT_RE_FLAG_QOS_WORK_REG	16
+#define BNXT_RE_FLAG_NETDEV_REGISTERED		0
+#define BNXT_RE_FLAG_IBDEV_REGISTERED		1
+#define BNXT_RE_FLAG_GOT_MSIX			2
+#define BNXT_RE_FLAG_HAVE_L2_REF		3
+#define BNXT_RE_FLAG_RCFW_CHANNEL_EN		4
+#define BNXT_RE_FLAG_QOS_WORK_REG		5
+#define BNXT_RE_FLAG_TASK_IN_PROG		6
 	struct net_device		*netdev;
 	unsigned int			version, major, minor;
 	struct bnxt_en_dev		*en_dev;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 00a3b743c156..29c3d7e254af 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1259,6 +1259,8 @@ static void bnxt_re_task(struct work_struct *work)
 	default:
 		break;
 	}
+	smp_mb__before_atomic();
+	clear_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
 	kfree(re_work);
 }
 
@@ -1317,6 +1319,11 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
 		break;
 
 	case NETDEV_UNREGISTER:
+		/* netdev notifier will call NETDEV_UNREGISTER again later since
+		 * we are still holding the reference to the netdev
+		 */
+		if (test_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags))
+			goto exit;
 		bnxt_re_ib_unreg(rdev, false);
 		bnxt_re_remove_one(rdev);
 		bnxt_re_dev_unreg(rdev);
@@ -1335,6 +1342,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
 			re_work->vlan_dev = (real_dev == netdev ?
 					     NULL : netdev);
 			INIT_WORK(&re_work->work, bnxt_re_task);
+			set_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
 			queue_work(bnxt_re_wq, &re_work->work);
 		}
 	}
-- 
cgit 


From 74828b128115033ff25d4140d732a05a36eaeaf0 Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:33 +0530
Subject: bnxt_re: Remove RTNL lock dependency in bnxt_re_query_port

When there is a NETDEV_UNREGISTER event, bnxt_re driver calls
ib_unregister_device() (RTNL lock held).
ib_unregister_device attempts to flush a worker queue scheduled by
ib_core and that queue might have a pending ib_query_port().
ib_query_port in turn calls bnxt_re_query_port(), which while querying the
link speed using ib_get_eth_speed(), tries to acquire the rtnl_lock() which
was already held by NETDEV_UNREGISTER.
Fixing the issue by removing the link speed query from bnxt_re_query_port()
Now the speed is queried post a successful ib_register_device or whenever
there is a NETDEV_CHANGE event.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/bnxt_re.h  |  2 ++
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 11 +++--------
 drivers/infiniband/hw/bnxt_re/main.c     |  4 ++++
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index a25f9d240880..ecbac91b2e14 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -110,6 +110,8 @@ struct bnxt_re_dev {
 
 	struct delayed_work		worker;
 	u8				cur_prio_map;
+	u8				active_speed;
+	u8				active_width;
 
 	/* FP Notification Queue (CQ & SRQ) */
 	struct tasklet_struct		nq_task;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 0dbdbe1616ab..7430ef07a0e1 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -259,14 +259,9 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
 	port_attr->sm_sl = 0;
 	port_attr->subnet_timeout = 0;
 	port_attr->init_type_reply = 0;
-	/* call the underlying netdev's ethtool hooks to query speed settings
-	 * for which we acquire rtnl_lock _only_ if it's registered with
-	 * IB stack to avoid race in the NETDEV_UNREG path
-	 */
-	if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
-		if (ib_get_eth_speed(ibdev, port_num, &port_attr->active_speed,
-				     &port_attr->active_width))
-			return -EINVAL;
+	port_attr->active_speed = rdev->active_speed;
+	port_attr->active_width = rdev->active_width;
+
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 29c3d7e254af..e7450ea92aa9 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1161,6 +1161,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 		}
 	}
 	set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
+	ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
+			 &rdev->active_width);
 	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
 	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
 
@@ -1255,6 +1257,8 @@ static void bnxt_re_task(struct work_struct *work)
 		else if (netif_carrier_ok(rdev->netdev))
 			bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
 					       IB_EVENT_PORT_ACTIVE);
+		ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
+				 &rdev->active_width);
 		break;
 	default:
 		break;
-- 
cgit 


From 1993519be8bc86342687c61d8d11c3ade62b3b84 Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:34 +0530
Subject: bnxt_re: Fix memory leak in FRMR path

This patch fixes a memory leak issue when alloc_mr is used.
mr->pages and mr->npages are used only in alloc_mr path. mr->pages
is allocated when alloc_mr is called or in the case of FRMR, while
creating the MR. mr->npages is updated only when the MR created
is used i.e. after invoking map_mr_sg verb, before data transfer.
In the dereg_mr path, if mr->npages is 0, driver ends up not freeing
the memory created.
Removing the npages check from the dereg_mr path for kernel consumers.

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 7430ef07a0e1..d7be4e4227ce 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -3066,7 +3066,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
 		return rc;
 	}
 
-	if (mr->npages && mr->pages) {
+	if (mr->pages) {
 		rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
 							&mr->qplib_frpl);
 		kfree(mr->pages);
-- 
cgit 


From 89aaca54ba60e91f02c1c168fbef5d71f71a6d43 Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:35 +0530
Subject: bnxt_re: Don't issue cmd to delete GID for QP1 GID entry before the
 QP is destroyed

FW needs the 0th GID Entry in the Table to be preserved before
it's corresponding QP1 is deleted, else it will fail the cmd.
Check for the same and return to prevent error msg being logged for
cmd failure.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index d7be4e4227ce..0d89621d9fe8 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -314,6 +314,7 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
 	struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
 	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
 	struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+	struct bnxt_qplib_gid *gid_to_del;
 
 	/* Delete the entry from the hardware */
 	ctx = *context;
@@ -323,11 +324,25 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
 	if (sgid_tbl && sgid_tbl->active) {
 		if (ctx->idx >= sgid_tbl->max)
 			return -EINVAL;
+		gid_to_del = &sgid_tbl->tbl[ctx->idx];
+		/* DEL_GID is called in WQ context(netdevice_event_work_handler)
+		 * or via the ib_unregister_device path. In the former case QP1
+		 * may not be destroyed yet, in which case just return as FW
+		 * needs that entry to be present and will fail it's deletion.
+		 * We could get invoked again after QP1 is destroyed OR get an
+		 * ADD_GID call with a different GID value for the same index
+		 * where we issue MODIFY_GID cmd to update the GID entry -- TBD
+		 */
+		if (ctx->idx == 0 &&
+		    rdma_link_local_addr((struct in6_addr *)gid_to_del) &&
+		    ctx->refcnt == 1 && rdev->qp1_sqp) {
+			dev_dbg(rdev_to_dev(rdev),
+				"Trying to delete GID0 while QP1 is alive\n");
+			return -EFAULT;
+		}
 		ctx->refcnt--;
 		if (!ctx->refcnt) {
-			rc = bnxt_qplib_del_sgid(sgid_tbl,
-						 &sgid_tbl->tbl[ctx->idx],
-						 true);
+			rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del, true);
 			if (rc) {
 				dev_err(rdev_to_dev(rdev),
 					"Failed to remove GID: %#x", rc);
@@ -811,6 +826,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
 
 		kfree(rdev->sqp_ah);
 		kfree(rdev->qp1_sqp);
+		rdev->qp1_sqp = NULL;
+		rdev->sqp_ah = NULL;
 	}
 
 	if (!IS_ERR_OR_NULL(qp->rumem))
-- 
cgit 


From 19fe43a54fb67b6cc8857e65c78e1dc8aa2e97a3 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 6 Jul 2017 10:56:21 +0200
Subject: apparmor: Fix shadowed local variable in unpack_trans_table()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

with W=2:

    security/apparmor/policy_unpack.c: In function ‘unpack_trans_table’:
    security/apparmor/policy_unpack.c:469: warning: declaration of ‘pos’ shadows a previous local
    security/apparmor/policy_unpack.c:451: warning: shadowed declaration is here

Rename the old "pos" to "saved_pos" to fix this.

Fixes: 5379a3312024a8be ("apparmor: support v7 transition format compatible with label_parse")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index c600f4dd1783..2d5a1a007b06 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -448,7 +448,7 @@ fail:
  */
 static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
 {
-	void *pos = e->pos;
+	void *saved_pos = e->pos;
 
 	/* exec table is optional */
 	if (unpack_nameX(e, AA_STRUCT, "xtable")) {
@@ -511,7 +511,7 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
 
 fail:
 	aa_free_domain_entries(&profile->file.trans);
-	e->pos = pos;
+	e->pos = saved_pos;
 	return 0;
 }
 
-- 
cgit 


From 86aea56f14929ff1c05eca1776e9068e907429d5 Mon Sep 17 00:00:00 2001
From: Christos Gkekas <chris.gekas@gmail.com>
Date: Sat, 8 Jul 2017 20:50:21 +0100
Subject: apparmor: Fix logical error in verify_header()

verify_header() is currently checking whether interface version is less
than 5 *and* greater than 7, which always evaluates to false. Instead it
should check whether it is less than 5 *or* greater than 7.

Signed-off-by: Christos Gkekas <chris.gekas@gmail.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 2d5a1a007b06..bda0dce3b582 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -832,7 +832,7 @@ static int verify_header(struct aa_ext *e, int required, const char **ns)
 	 * if not specified use previous version
 	 * Mask off everything that is not kernel abi version
 	 */
-	if (VERSION_LT(e->version, v5) && VERSION_GT(e->version, v7)) {
+	if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v7)) {
 		audit_iface(NULL, NULL, NULL, "unsupported interface version",
 			    e, error);
 		return error;
-- 
cgit 


From 5d314a81eca29b01939930c1c596dfa44937e970 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 13 Jul 2017 10:39:20 +0300
Subject: apparmor: Fix an error code in aafs_create()

We accidentally forgot to set the error code on this path.  It means we
return NULL instead of an error pointer.  I looked through a bunch of
callers and I don't think it really causes a big issue, but the
documentation says we're supposed to return error pointers here.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 853c2ec8e0c9..2caeb748070c 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -248,8 +248,10 @@ static struct dentry *aafs_create(const char *name, umode_t mode,
 
 	inode_lock(dir);
 	dentry = lookup_one_len(name, parent, strlen(name));
-	if (IS_ERR(dentry))
+	if (IS_ERR(dentry)) {
+		error = PTR_ERR(dentry);
 		goto fail_lock;
+	}
 
 	if (d_really_is_positive(dentry)) {
 		error = -EEXIST;
-- 
cgit 


From c5561700c9cb951ec3a33a0914c840423b09d7c9 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 31 Jul 2017 23:44:37 -0700
Subject: apparmor: Redundant condition: prev_ns. in [label.c:1498]

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/label.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index e052eaba1cf6..e324f4df3e34 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1495,7 +1495,7 @@ static int aa_profile_snxprint(char *str, size_t size, struct aa_ns *view,
 		view = profiles_ns(profile);
 
 	if (view != profile->ns &&
-	    (!prev_ns || (prev_ns && *prev_ns != profile->ns))) {
+	    (!prev_ns || (*prev_ns != profile->ns))) {
 		if (prev_ns)
 			*prev_ns = profile->ns;
 		ns_name = aa_ns_name(view, profile->ns,
-- 
cgit 


From cd1dbf76b23d5ab2cba5e657fe20b1e236a408cc Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 18 Jul 2017 22:56:22 -0700
Subject: apparmor: add the ability to mediate signals

Add signal mediation where the signal can be mediated based on the
signal, direction, or the label or the peer/target. The signal perms
are verified on a cross check to ensure policy consistency in the case
of incremental policy load/replacement.

The optimization of skipping the cross check when policy is guaranteed
to be consistent (single compile unit) remains to be done.

policy rules have the form of
  SIGNAL_RULE = [ QUALIFIERS ] 'signal' [ SIGNAL ACCESS PERMISSIONS ]
                [ SIGNAL SET ] [ SIGNAL PEER ]

  SIGNAL ACCESS PERMISSIONS = SIGNAL ACCESS | SIGNAL ACCESS LIST

  SIGNAL ACCESS LIST = '(' Comma or space separated list of SIGNAL
                           ACCESS ')'

  SIGNAL ACCESS = ( 'r' | 'w' | 'rw' | 'read' | 'write' | 'send' |
                    'receive' )

  SIGNAL SET = 'set' '=' '(' SIGNAL LIST ')'

  SIGNAL LIST = Comma or space separated list of SIGNALS

  SIGNALS = ( 'hup' | 'int' | 'quit' | 'ill' | 'trap' | 'abrt' |
              'bus' | 'fpe' | 'kill' | 'usr1' | 'segv' | 'usr2' |
	      'pipe' | 'alrm' | 'term' | 'stkflt' | 'chld' | 'cont' |
	      'stop' | 'stp' | 'ttin' | 'ttou' | 'urg' | 'xcpu' |
	      'xfsz' | 'vtalrm' | 'prof' | 'winch' | 'io' | 'pwr' |
	      'sys' | 'emt' | 'exists' | 'rtmin+0' ... 'rtmin+32'
            )

  SIGNAL PEER = 'peer' '=' AARE

eg.
  signal,                                 # allow all signals
  signal send set=(hup, kill) peer=foo,

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/apparmorfs.c        |  7 +++
 security/apparmor/include/apparmor.h  |  1 +
 security/apparmor/include/audit.h     |  2 +
 security/apparmor/include/ipc.h       |  6 +++
 security/apparmor/include/sig_names.h | 95 +++++++++++++++++++++++++++++++++
 security/apparmor/ipc.c               | 99 +++++++++++++++++++++++++++++++++++
 security/apparmor/lsm.c               | 21 ++++++++
 7 files changed, 231 insertions(+)
 create mode 100644 security/apparmor/include/sig_names.h

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 2caeb748070c..a5f9e1aa51f7 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -32,6 +32,7 @@
 #include "include/audit.h"
 #include "include/context.h"
 #include "include/crypto.h"
+#include "include/ipc.h"
 #include "include/policy_ns.h"
 #include "include/label.h"
 #include "include/policy.h"
@@ -2129,6 +2130,11 @@ static struct aa_sfs_entry aa_sfs_entry_ptrace[] = {
 	{ }
 };
 
+static struct aa_sfs_entry aa_sfs_entry_signal[] = {
+	AA_SFS_FILE_STRING("mask", AA_SFS_SIG_MASK),
+	{ }
+};
+
 static struct aa_sfs_entry aa_sfs_entry_domain[] = {
 	AA_SFS_FILE_BOOLEAN("change_hat",	1),
 	AA_SFS_FILE_BOOLEAN("change_hatv",	1),
@@ -2179,6 +2185,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
 	AA_SFS_DIR("rlimit",			aa_sfs_entry_rlimit),
 	AA_SFS_DIR("caps",			aa_sfs_entry_caps),
 	AA_SFS_DIR("ptrace",			aa_sfs_entry_ptrace),
+	AA_SFS_DIR("signal",			aa_sfs_entry_signal),
 	AA_SFS_DIR("query",			aa_sfs_entry_query),
 	{ }
 };
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index aaf893f4e4f5..962a20a75e01 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -28,6 +28,7 @@
 #define AA_CLASS_RLIMITS	5
 #define AA_CLASS_DOMAIN		6
 #define AA_CLASS_PTRACE		9
+#define AA_CLASS_SIGNAL		10
 #define AA_CLASS_LABEL		16
 
 #define AA_CLASS_LAST		AA_CLASS_LABEL
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index c68839a44351..d9a156ae11b9 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -86,6 +86,7 @@ enum audit_type {
 #define OP_SHUTDOWN "socket_shutdown"
 
 #define OP_PTRACE "ptrace"
+#define OP_SIGNAL "signal"
 
 #define OP_EXEC "exec"
 
@@ -126,6 +127,7 @@ struct apparmor_audit_data {
 			long pos;
 			const char *ns;
 		} iface;
+		int signal;
 		struct {
 			int rlim;
 			unsigned long max;
diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h
index 656fdb81c8a0..5ffc218d1e74 100644
--- a/security/apparmor/include/ipc.h
+++ b/security/apparmor/include/ipc.h
@@ -27,8 +27,14 @@ struct aa_profile;
 
 #define AA_PTRACE_PERM_MASK (AA_PTRACE_READ | AA_PTRACE_TRACE | \
 			     AA_MAY_BE_READ | AA_MAY_BE_TRACED)
+#define AA_SIGNAL_PERM_MASK (MAY_READ | MAY_WRITE)
+
+#define AA_SFS_SIG_MASK "hup int quit ill trap abrt bus fpe kill usr1 " \
+	"segv usr2 pipe alrm term stkflt chld cont stop stp ttin ttou urg " \
+	"xcpu xfsz vtalrm prof winch io pwr sys emt lost"
 
 int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
 		  u32 request);
+int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig);
 
 #endif /* __AA_IPC_H */
diff --git a/security/apparmor/include/sig_names.h b/security/apparmor/include/sig_names.h
new file mode 100644
index 000000000000..0d4395f231ca
--- /dev/null
+++ b/security/apparmor/include/sig_names.h
@@ -0,0 +1,95 @@
+#include <linux/signal.h>
+
+#define SIGUNKNOWN 0
+#define MAXMAPPED_SIG 35
+/* provide a mapping of arch signal to internal signal # for mediation
+ * those that are always an alias SIGCLD for SIGCLHD and SIGPOLL for SIGIO
+ * map to the same entry those that may/or may not get a separate entry
+ */
+static const int sig_map[MAXMAPPED_SIG] = {
+	[0] = MAXMAPPED_SIG,	/* existence test */
+	[SIGHUP] = 1,
+	[SIGINT] = 2,
+	[SIGQUIT] = 3,
+	[SIGILL] = 4,
+	[SIGTRAP] = 5,		/* -, 5, - */
+	[SIGABRT] = 6,		/*  SIGIOT: -, 6, - */
+	[SIGBUS] = 7,		/* 10, 7, 10 */
+	[SIGFPE] = 8,
+	[SIGKILL] = 9,
+	[SIGUSR1] = 10,		/* 30, 10, 16 */
+	[SIGSEGV] = 11,
+	[SIGUSR2] = 12,		/* 31, 12, 17 */
+	[SIGPIPE] = 13,
+	[SIGALRM] = 14,
+	[SIGTERM] = 15,
+	[SIGSTKFLT] = 16,	/* -, 16, - */
+	[SIGCHLD] = 17,		/* 20, 17, 18.  SIGCHLD -, -, 18 */
+	[SIGCONT] = 18,		/* 19, 18, 25 */
+	[SIGSTOP] = 19,		/* 17, 19, 23 */
+	[SIGTSTP] = 20,		/* 18, 20, 24 */
+	[SIGTTIN] = 21,		/* 21, 21, 26 */
+	[SIGTTOU] = 22,		/* 22, 22, 27 */
+	[SIGURG] = 23,		/* 16, 23, 21 */
+	[SIGXCPU] = 24,		/* 24, 24, 30 */
+	[SIGXFSZ] = 25,		/* 25, 25, 31 */
+	[SIGVTALRM] = 26,	/* 26, 26, 28 */
+	[SIGPROF] = 27,		/* 27, 27, 29 */
+	[SIGWINCH] = 28,	/* 28, 28, 20 */
+	[SIGIO] = 29,		/* SIGPOLL: 23, 29, 22 */
+	[SIGPWR] = 30,		/* 29, 30, 19.  SIGINFO 29, -, - */
+#ifdef SIGSYS
+	[SIGSYS] = 31,		/* 12, 31, 12. often SIG LOST/UNUSED */
+#endif
+#ifdef SIGEMT
+	[SIGEMT] = 32,		/* 7, - , 7 */
+#endif
+#if defined(SIGLOST) && SIGPWR != SIGLOST		/* sparc */
+	[SIGLOST] = 33,		/* unused on Linux */
+#endif
+#if defined(SIGLOST) && defined(SIGSYS) && SIGLOST != SIGSYS
+	[SIGUNUSED] = 34,	/* -, 31, - */
+#endif
+};
+
+/* this table is ordered post sig_map[sig] mapping */
+static const char *const sig_names[MAXMAPPED_SIG + 1] = {
+	"unknown",
+	"hup",
+	"int",
+	"quit",
+	"ill",
+	"trap",
+	"abrt",
+	"bus",
+	"fpe",
+	"kill",
+	"usr1",
+	"segv",
+	"usr2",
+	"pipe",
+	"alrm",
+	"term",
+	"stkflt",
+	"chld",
+	"cont",
+	"stop",
+	"stp",
+	"ttin",
+	"ttou",
+	"urg",
+	"xcpu",
+	"xfsz",
+	"vtalrm",
+	"prof",
+	"winch",
+	"io",
+	"pwr",
+	"sys",
+	"emt",
+	"lost",
+	"unused",
+
+	"exists",	/* always last existence test mapped to MAXMAPPED_SIG */
+};
+
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index 11e66b5bbc42..66fb9ede9447 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -20,6 +20,7 @@
 #include "include/context.h"
 #include "include/policy.h"
 #include "include/ipc.h"
+#include "include/sig_names.h"
 
 /**
  * audit_ptrace_mask - convert mask to permission string
@@ -121,3 +122,101 @@ int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
 }
 
 
+static inline int map_signal_num(int sig)
+{
+	if (sig > SIGRTMAX)
+		return SIGUNKNOWN;
+	else if (sig >= SIGRTMIN)
+		return sig - SIGRTMIN + 128;	/* rt sigs mapped to 128 */
+	else if (sig <= MAXMAPPED_SIG)
+		return sig_map[sig];
+	return SIGUNKNOWN;
+}
+
+/**
+ * audit_file_mask - convert mask to permission string
+ * @buffer: buffer to write string to (NOT NULL)
+ * @mask: permission mask to convert
+ */
+static void audit_signal_mask(struct audit_buffer *ab, u32 mask)
+{
+	if (mask & MAY_READ)
+		audit_log_string(ab, "receive");
+	if (mask & MAY_WRITE)
+		audit_log_string(ab, "send");
+}
+
+/**
+ * audit_cb - call back for signal specific audit fields
+ * @ab: audit_buffer  (NOT NULL)
+ * @va: audit struct to audit values of  (NOT NULL)
+ */
+static void audit_signal_cb(struct audit_buffer *ab, void *va)
+{
+	struct common_audit_data *sa = va;
+
+	if (aad(sa)->request & AA_SIGNAL_PERM_MASK) {
+		audit_log_format(ab, " requested_mask=");
+		audit_signal_mask(ab, aad(sa)->request);
+		if (aad(sa)->denied & AA_SIGNAL_PERM_MASK) {
+			audit_log_format(ab, " denied_mask=");
+			audit_signal_mask(ab, aad(sa)->denied);
+		}
+	}
+	if (aad(sa)->signal <= MAXMAPPED_SIG)
+		audit_log_format(ab, " signal=%s", sig_names[aad(sa)->signal]);
+	else
+		audit_log_format(ab, " signal=rtmin+%d",
+				 aad(sa)->signal - 128);
+	audit_log_format(ab, " peer=");
+	aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer,
+			FLAGS_NONE, GFP_ATOMIC);
+}
+
+/* TODO: update to handle compound name&name2, conditionals */
+static void profile_match_signal(struct aa_profile *profile, const char *label,
+				 int signal, struct aa_perms *perms)
+{
+	unsigned int state;
+
+	/* TODO: secondary cache check <profile, profile, perm> */
+	state = aa_dfa_next(profile->policy.dfa,
+			    profile->policy.start[AA_CLASS_SIGNAL],
+			    signal);
+	state = aa_dfa_match(profile->policy.dfa, state, label);
+	aa_compute_perms(profile->policy.dfa, state, perms);
+}
+
+static int profile_signal_perm(struct aa_profile *profile,
+			       struct aa_profile *peer, u32 request,
+			       struct common_audit_data *sa)
+{
+	struct aa_perms perms;
+
+	if (profile_unconfined(profile) ||
+	    !PROFILE_MEDIATES(profile, AA_CLASS_SIGNAL))
+		return 0;
+
+	aad(sa)->peer = &peer->label;
+	profile_match_signal(profile, peer->base.hname, aad(sa)->signal,
+			     &perms);
+	aa_apply_modes_to_perms(profile, &perms);
+	return aa_check_perms(profile, &perms, request, sa, audit_signal_cb);
+}
+
+static int aa_signal_cross_perm(struct aa_profile *sender,
+				struct aa_profile *target,
+				struct common_audit_data *sa)
+{
+	return xcheck(profile_signal_perm(sender, target, MAY_WRITE, sa),
+		      profile_signal_perm(target, sender, MAY_READ, sa));
+}
+
+int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig)
+{
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_SIGNAL);
+
+	aad(&sa)->signal = map_signal_num(sig);
+	return xcheck_labels_profiles(sender, target, aa_signal_cross_perm,
+				      &sa);
+}
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 867bcd154c7e..af22f3dfbcce 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -656,6 +656,26 @@ static int apparmor_task_setrlimit(struct task_struct *task,
 	return error;
 }
 
+static int apparmor_task_kill(struct task_struct *target, struct siginfo *info,
+			      int sig, u32 secid)
+{
+	struct aa_label *cl, *tl;
+	int error;
+
+	if (secid)
+		/* TODO: after secid to label mapping is done.
+		 *  Dealing with USB IO specific behavior
+		 */
+		return 0;
+	cl = __begin_current_label_crit_section();
+	tl = aa_get_task_label(target);
+	error = aa_may_signal(cl, tl, sig);
+	aa_put_label(tl);
+	__end_current_label_crit_section(cl);
+
+	return error;
+}
+
 static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
 	LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
@@ -697,6 +717,7 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(bprm_secureexec, apparmor_bprm_secureexec),
 
 	LSM_HOOK_INIT(task_setrlimit, apparmor_task_setrlimit),
+	LSM_HOOK_INIT(task_kill, apparmor_task_kill),
 };
 
 /*
-- 
cgit 


From 2ea3ffb7782a84da33a8382f13ebd016da50079b Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 18 Jul 2017 23:04:47 -0700
Subject: apparmor: add mount mediation

Add basic mount mediation. That allows controlling based on basic
mount parameters. It does not include special mount parameters for
apparmor, super block labeling, or any triggers for apparmor namespace
parameter modifications on pivot root.

default userspace policy rules have the form of
  MOUNT RULE = ( MOUNT | REMOUNT | UMOUNT )

  MOUNT = [ QUALIFIERS ] 'mount' [ MOUNT CONDITIONS ] [ SOURCE FILEGLOB ]
          [ '->' MOUNTPOINT FILEGLOB ]

  REMOUNT = [ QUALIFIERS ] 'remount' [ MOUNT CONDITIONS ]
            MOUNTPOINT FILEGLOB

  UMOUNT = [ QUALIFIERS ] 'umount' [ MOUNT CONDITIONS ] MOUNTPOINT FILEGLOB

  MOUNT CONDITIONS = [ ( 'fstype' | 'vfstype' ) ( '=' | 'in' )
                       MOUNT FSTYPE EXPRESSION ]
		       [ 'options' ( '=' | 'in' ) MOUNT FLAGS EXPRESSION ]

  MOUNT FSTYPE EXPRESSION = ( MOUNT FSTYPE LIST | MOUNT EXPRESSION )

  MOUNT FSTYPE LIST = Comma separated list of valid filesystem and
                      virtual filesystem types (eg ext4, debugfs, etc)

  MOUNT FLAGS EXPRESSION = ( MOUNT FLAGS LIST | MOUNT EXPRESSION )

  MOUNT FLAGS LIST = Comma separated list of MOUNT FLAGS.

  MOUNT FLAGS = ( 'ro' | 'rw' | 'nosuid' | 'suid' | 'nodev' | 'dev' |
                  'noexec' | 'exec' | 'sync' | 'async' | 'remount' |
		  'mand' | 'nomand' | 'dirsync' | 'noatime' | 'atime' |
		  'nodiratime' | 'diratime' | 'bind' | 'rbind' | 'move' |
		  'verbose' | 'silent' | 'loud' | 'acl' | 'noacl' |
		  'unbindable' | 'runbindable' | 'private' | 'rprivate' |
		  'slave' | 'rslave' | 'shared' | 'rshared' |
		  'relatime' | 'norelatime' | 'iversion' | 'noiversion' |
		  'strictatime' | 'nouser' | 'user' )

  MOUNT EXPRESSION = ( ALPHANUMERIC | AARE ) ...

  PIVOT ROOT RULE = [ QUALIFIERS ] pivot_root [ oldroot=OLD PUT FILEGLOB ]
                    [ NEW ROOT FILEGLOB ]

  SOURCE FILEGLOB = FILEGLOB

  MOUNTPOINT FILEGLOB = FILEGLOB

eg.
  mount,
  mount /dev/foo,
  mount options=ro /dev/foo -> /mnt/,
  mount options in (ro,atime) /dev/foo -> /mnt/,
  mount options=ro options=atime,

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/Makefile           |   2 +-
 security/apparmor/apparmorfs.c       |   8 +-
 security/apparmor/domain.c           |   4 +-
 security/apparmor/include/apparmor.h |   1 +
 security/apparmor/include/audit.h    |  11 +
 security/apparmor/include/domain.h   |   5 +
 security/apparmor/include/mount.h    |  54 +++
 security/apparmor/lsm.c              |  64 ++++
 security/apparmor/mount.c            | 696 +++++++++++++++++++++++++++++++++++
 9 files changed, 841 insertions(+), 4 deletions(-)
 create mode 100644 security/apparmor/include/mount.h
 create mode 100644 security/apparmor/mount.c

diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
index a16b195274de..81a34426d024 100644
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
 
 apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
               path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
-              resource.o secid.o file.o policy_ns.o label.o
+              resource.o secid.o file.o policy_ns.o label.o mount.o
 apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o
 
 clean-files := capability_names.h rlim_names.h
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index a5f9e1aa51f7..8fa6c898c44b 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -2159,9 +2159,14 @@ static struct aa_sfs_entry aa_sfs_entry_policy[] = {
 	{ }
 };
 
+static struct aa_sfs_entry aa_sfs_entry_mount[] = {
+	AA_SFS_FILE_STRING("mask", "mount umount pivot_root"),
+	{ }
+};
+
 static struct aa_sfs_entry aa_sfs_entry_ns[] = {
 	AA_SFS_FILE_BOOLEAN("profile",		1),
-	AA_SFS_FILE_BOOLEAN("pivot_root",	1),
+	AA_SFS_FILE_BOOLEAN("pivot_root",	0),
 	{ }
 };
 
@@ -2180,6 +2185,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
 	AA_SFS_DIR("policy",			aa_sfs_entry_policy),
 	AA_SFS_DIR("domain",			aa_sfs_entry_domain),
 	AA_SFS_DIR("file",			aa_sfs_entry_file),
+	AA_SFS_DIR("mount",			aa_sfs_entry_mount),
 	AA_SFS_DIR("namespaces",		aa_sfs_entry_ns),
 	AA_SFS_FILE_U64("capability",		VFS_CAP_FLAGS_MASK),
 	AA_SFS_DIR("rlimit",			aa_sfs_entry_rlimit),
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index d0594446ae3f..ffc8c75a6785 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -374,8 +374,8 @@ static const char *next_name(int xtype, const char *name)
  *
  * Returns: refcounted label, or NULL on failure (MAYBE NULL)
  */
-static struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
-				       const char **name)
+struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
+				const char **name)
 {
 	struct aa_label *label = NULL;
 	u32 xtype = xindex & AA_X_TYPE_MASK;
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 962a20a75e01..829082c35faa 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -27,6 +27,7 @@
 #define AA_CLASS_NET		4
 #define AA_CLASS_RLIMITS	5
 #define AA_CLASS_DOMAIN		6
+#define AA_CLASS_MOUNT		7
 #define AA_CLASS_PTRACE		9
 #define AA_CLASS_SIGNAL		10
 #define AA_CLASS_LABEL		16
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index d9a156ae11b9..c3fe1c5ef3bc 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -71,6 +71,10 @@ enum audit_type {
 #define OP_FMPROT "file_mprotect"
 #define OP_INHERIT "file_inherit"
 
+#define OP_PIVOTROOT "pivotroot"
+#define OP_MOUNT "mount"
+#define OP_UMOUNT "umount"
+
 #define OP_CREATE "create"
 #define OP_POST_CREATE "post_create"
 #define OP_BIND "bind"
@@ -132,6 +136,13 @@ struct apparmor_audit_data {
 			int rlim;
 			unsigned long max;
 		} rlim;
+		struct {
+			const char *src_name;
+			const char *type;
+			const char *trans;
+			const char *data;
+			unsigned long flags;
+		} mnt;
 	};
 };
 
diff --git a/security/apparmor/include/domain.h b/security/apparmor/include/domain.h
index bab5810b6e9a..db27403346c5 100644
--- a/security/apparmor/include/domain.h
+++ b/security/apparmor/include/domain.h
@@ -15,6 +15,8 @@
 #include <linux/binfmts.h>
 #include <linux/types.h>
 
+#include "label.h"
+
 #ifndef __AA_DOMAIN_H
 #define __AA_DOMAIN_H
 
@@ -29,6 +31,9 @@ struct aa_domain {
 #define AA_CHANGE_ONEXEC  4
 #define AA_CHANGE_STACK 8
 
+struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
+				const char **name);
+
 int apparmor_bprm_set_creds(struct linux_binprm *bprm);
 int apparmor_bprm_secureexec(struct linux_binprm *bprm);
 
diff --git a/security/apparmor/include/mount.h b/security/apparmor/include/mount.h
new file mode 100644
index 000000000000..25d6067fa6ef
--- /dev/null
+++ b/security/apparmor/include/mount.h
@@ -0,0 +1,54 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor file mediation function definitions.
+ *
+ * Copyright 2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#ifndef __AA_MOUNT_H
+#define __AA_MOUNT_H
+
+#include <linux/fs.h>
+#include <linux/path.h>
+
+#include "domain.h"
+#include "policy.h"
+
+/* mount perms */
+#define AA_MAY_PIVOTROOT	0x01
+#define AA_MAY_MOUNT		0x02
+#define AA_MAY_UMOUNT		0x04
+#define AA_AUDIT_DATA		0x40
+#define AA_MNT_CONT_MATCH	0x40
+
+#define AA_MS_IGNORE_MASK (MS_KERNMOUNT | MS_NOSEC | MS_ACTIVE | MS_BORN)
+
+int aa_remount(struct aa_label *label, const struct path *path,
+	       unsigned long flags, void *data);
+
+int aa_bind_mount(struct aa_label *label, const struct path *path,
+		  const char *old_name, unsigned long flags);
+
+
+int aa_mount_change_type(struct aa_label *label, const struct path *path,
+			 unsigned long flags);
+
+int aa_move_mount(struct aa_label *label, const struct path *path,
+		  const char *old_name);
+
+int aa_new_mount(struct aa_label *label, const char *dev_name,
+		 const struct path *path, const char *type, unsigned long flags,
+		 void *data);
+
+int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags);
+
+int aa_pivotroot(struct aa_label *label, const struct path *old_path,
+		 const struct path *new_path);
+
+#endif /* __AA_MOUNT_H */
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index af22f3dfbcce..4ad0b3a45142 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -38,6 +38,7 @@
 #include "include/policy.h"
 #include "include/policy_ns.h"
 #include "include/procattr.h"
+#include "include/mount.h"
 
 /* Flag indicating whether initialization completed */
 int apparmor_initialized;
@@ -511,6 +512,65 @@ static int apparmor_file_mprotect(struct vm_area_struct *vma,
 			   !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0);
 }
 
+static int apparmor_sb_mount(const char *dev_name, const struct path *path,
+			     const char *type, unsigned long flags, void *data)
+{
+	struct aa_label *label;
+	int error = 0;
+
+	/* Discard magic */
+	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+		flags &= ~MS_MGC_MSK;
+
+	flags &= ~AA_MS_IGNORE_MASK;
+
+	label = __begin_current_label_crit_section();
+	if (!unconfined(label)) {
+		if (flags & MS_REMOUNT)
+			error = aa_remount(label, path, flags, data);
+		else if (flags & MS_BIND)
+			error = aa_bind_mount(label, path, dev_name, flags);
+		else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE |
+				  MS_UNBINDABLE))
+			error = aa_mount_change_type(label, path, flags);
+		else if (flags & MS_MOVE)
+			error = aa_move_mount(label, path, dev_name);
+		else
+			error = aa_new_mount(label, dev_name, path, type,
+					     flags, data);
+	}
+	__end_current_label_crit_section(label);
+
+	return error;
+}
+
+static int apparmor_sb_umount(struct vfsmount *mnt, int flags)
+{
+	struct aa_label *label;
+	int error = 0;
+
+	label = __begin_current_label_crit_section();
+	if (!unconfined(label))
+		error = aa_umount(label, mnt, flags);
+	__end_current_label_crit_section(label);
+
+	return error;
+}
+
+static int apparmor_sb_pivotroot(const struct path *old_path,
+				 const struct path *new_path)
+{
+	struct aa_label *label;
+	int error = 0;
+
+	label = aa_get_current_label();
+	if (!unconfined(label))
+		error = aa_pivotroot(label, old_path, new_path);
+	aa_put_label(label);
+
+	return error;
+}
+
 static int apparmor_getprocattr(struct task_struct *task, char *name,
 				char **value)
 {
@@ -682,6 +742,10 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(capget, apparmor_capget),
 	LSM_HOOK_INIT(capable, apparmor_capable),
 
+	LSM_HOOK_INIT(sb_mount, apparmor_sb_mount),
+	LSM_HOOK_INIT(sb_umount, apparmor_sb_umount),
+	LSM_HOOK_INIT(sb_pivotroot, apparmor_sb_pivotroot),
+
 	LSM_HOOK_INIT(path_link, apparmor_path_link),
 	LSM_HOOK_INIT(path_unlink, apparmor_path_unlink),
 	LSM_HOOK_INIT(path_symlink, apparmor_path_symlink),
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
new file mode 100644
index 000000000000..82a64b58041d
--- /dev/null
+++ b/security/apparmor/mount.c
@@ -0,0 +1,696 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor mediation of files
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+
+#include "include/apparmor.h"
+#include "include/audit.h"
+#include "include/context.h"
+#include "include/domain.h"
+#include "include/file.h"
+#include "include/match.h"
+#include "include/mount.h"
+#include "include/path.h"
+#include "include/policy.h"
+
+
+static void audit_mnt_flags(struct audit_buffer *ab, unsigned long flags)
+{
+	if (flags & MS_RDONLY)
+		audit_log_format(ab, "ro");
+	else
+		audit_log_format(ab, "rw");
+	if (flags & MS_NOSUID)
+		audit_log_format(ab, ", nosuid");
+	if (flags & MS_NODEV)
+		audit_log_format(ab, ", nodev");
+	if (flags & MS_NOEXEC)
+		audit_log_format(ab, ", noexec");
+	if (flags & MS_SYNCHRONOUS)
+		audit_log_format(ab, ", sync");
+	if (flags & MS_REMOUNT)
+		audit_log_format(ab, ", remount");
+	if (flags & MS_MANDLOCK)
+		audit_log_format(ab, ", mand");
+	if (flags & MS_DIRSYNC)
+		audit_log_format(ab, ", dirsync");
+	if (flags & MS_NOATIME)
+		audit_log_format(ab, ", noatime");
+	if (flags & MS_NODIRATIME)
+		audit_log_format(ab, ", nodiratime");
+	if (flags & MS_BIND)
+		audit_log_format(ab, flags & MS_REC ? ", rbind" : ", bind");
+	if (flags & MS_MOVE)
+		audit_log_format(ab, ", move");
+	if (flags & MS_SILENT)
+		audit_log_format(ab, ", silent");
+	if (flags & MS_POSIXACL)
+		audit_log_format(ab, ", acl");
+	if (flags & MS_UNBINDABLE)
+		audit_log_format(ab, flags & MS_REC ? ", runbindable" :
+				 ", unbindable");
+	if (flags & MS_PRIVATE)
+		audit_log_format(ab, flags & MS_REC ? ", rprivate" :
+				 ", private");
+	if (flags & MS_SLAVE)
+		audit_log_format(ab, flags & MS_REC ? ", rslave" :
+				 ", slave");
+	if (flags & MS_SHARED)
+		audit_log_format(ab, flags & MS_REC ? ", rshared" :
+				 ", shared");
+	if (flags & MS_RELATIME)
+		audit_log_format(ab, ", relatime");
+	if (flags & MS_I_VERSION)
+		audit_log_format(ab, ", iversion");
+	if (flags & MS_STRICTATIME)
+		audit_log_format(ab, ", strictatime");
+	if (flags & MS_NOUSER)
+		audit_log_format(ab, ", nouser");
+}
+
+/**
+ * audit_cb - call back for mount specific audit fields
+ * @ab: audit_buffer  (NOT NULL)
+ * @va: audit struct to audit values of  (NOT NULL)
+ */
+static void audit_cb(struct audit_buffer *ab, void *va)
+{
+	struct common_audit_data *sa = va;
+
+	if (aad(sa)->mnt.type) {
+		audit_log_format(ab, " fstype=");
+		audit_log_untrustedstring(ab, aad(sa)->mnt.type);
+	}
+	if (aad(sa)->mnt.src_name) {
+		audit_log_format(ab, " srcname=");
+		audit_log_untrustedstring(ab, aad(sa)->mnt.src_name);
+	}
+	if (aad(sa)->mnt.trans) {
+		audit_log_format(ab, " trans=");
+		audit_log_untrustedstring(ab, aad(sa)->mnt.trans);
+	}
+	if (aad(sa)->mnt.flags) {
+		audit_log_format(ab, " flags=\"");
+		audit_mnt_flags(ab, aad(sa)->mnt.flags);
+		audit_log_format(ab, "\"");
+	}
+	if (aad(sa)->mnt.data) {
+		audit_log_format(ab, " options=");
+		audit_log_untrustedstring(ab, aad(sa)->mnt.data);
+	}
+}
+
+/**
+ * audit_mount - handle the auditing of mount operations
+ * @profile: the profile being enforced  (NOT NULL)
+ * @op: operation being mediated (NOT NULL)
+ * @name: name of object being mediated (MAYBE NULL)
+ * @src_name: src_name of object being mediated (MAYBE_NULL)
+ * @type: type of filesystem (MAYBE_NULL)
+ * @trans: name of trans (MAYBE NULL)
+ * @flags: filesystem idependent mount flags
+ * @data: filesystem mount flags
+ * @request: permissions requested
+ * @perms: the permissions computed for the request (NOT NULL)
+ * @info: extra information message (MAYBE NULL)
+ * @error: 0 if operation allowed else failure error code
+ *
+ * Returns: %0 or error on failure
+ */
+static int audit_mount(struct aa_profile *profile, const char *op,
+		       const char *name, const char *src_name,
+		       const char *type, const char *trans,
+		       unsigned long flags, const void *data, u32 request,
+		       struct aa_perms *perms, const char *info, int error)
+{
+	int audit_type = AUDIT_APPARMOR_AUTO;
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, op);
+
+	if (likely(!error)) {
+		u32 mask = perms->audit;
+
+		if (unlikely(AUDIT_MODE(profile) == AUDIT_ALL))
+			mask = 0xffff;
+
+		/* mask off perms that are not being force audited */
+		request &= mask;
+
+		if (likely(!request))
+			return 0;
+		audit_type = AUDIT_APPARMOR_AUDIT;
+	} else {
+		/* only report permissions that were denied */
+		request = request & ~perms->allow;
+
+		if (request & perms->kill)
+			audit_type = AUDIT_APPARMOR_KILL;
+
+		/* quiet known rejects, assumes quiet and kill do not overlap */
+		if ((request & perms->quiet) &&
+		    AUDIT_MODE(profile) != AUDIT_NOQUIET &&
+		    AUDIT_MODE(profile) != AUDIT_ALL)
+			request &= ~perms->quiet;
+
+		if (!request)
+			return error;
+	}
+
+	aad(&sa)->name = name;
+	aad(&sa)->mnt.src_name = src_name;
+	aad(&sa)->mnt.type = type;
+	aad(&sa)->mnt.trans = trans;
+	aad(&sa)->mnt.flags = flags;
+	if (data && (perms->audit & AA_AUDIT_DATA))
+		aad(&sa)->mnt.data = data;
+	aad(&sa)->info = info;
+	aad(&sa)->error = error;
+
+	return aa_audit(audit_type, profile, &sa, audit_cb);
+}
+
+/**
+ * match_mnt_flags - Do an ordered match on mount flags
+ * @dfa: dfa to match against
+ * @state: state to start in
+ * @flags: mount flags to match against
+ *
+ * Mount flags are encoded as an ordered match. This is done instead of
+ * checking against a simple bitmask, to allow for logical operations
+ * on the flags.
+ *
+ * Returns: next state after flags match
+ */
+static unsigned int match_mnt_flags(struct aa_dfa *dfa, unsigned int state,
+				    unsigned long flags)
+{
+	unsigned int i;
+
+	for (i = 0; i <= 31 ; ++i) {
+		if ((1 << i) & flags)
+			state = aa_dfa_next(dfa, state, i + 1);
+	}
+
+	return state;
+}
+
+/**
+ * compute_mnt_perms - compute mount permission associated with @state
+ * @dfa: dfa to match against (NOT NULL)
+ * @state: state match finished in
+ *
+ * Returns: mount permissions
+ */
+static struct aa_perms compute_mnt_perms(struct aa_dfa *dfa,
+					   unsigned int state)
+{
+	struct aa_perms perms;
+
+	perms.kill = 0;
+	perms.allow = dfa_user_allow(dfa, state);
+	perms.audit = dfa_user_audit(dfa, state);
+	perms.quiet = dfa_user_quiet(dfa, state);
+	perms.xindex = dfa_user_xindex(dfa, state);
+
+	return perms;
+}
+
+static const char * const mnt_info_table[] = {
+	"match succeeded",
+	"failed mntpnt match",
+	"failed srcname match",
+	"failed type match",
+	"failed flags match",
+	"failed data match"
+};
+
+/*
+ * Returns 0 on success else element that match failed in, this is the
+ * index into the mnt_info_table above
+ */
+static int do_match_mnt(struct aa_dfa *dfa, unsigned int start,
+			const char *mntpnt, const char *devname,
+			const char *type, unsigned long flags,
+			void *data, bool binary, struct aa_perms *perms)
+{
+	unsigned int state;
+
+	AA_BUG(!dfa);
+	AA_BUG(!perms);
+
+	state = aa_dfa_match(dfa, start, mntpnt);
+	state = aa_dfa_null_transition(dfa, state);
+	if (!state)
+		return 1;
+
+	if (devname)
+		state = aa_dfa_match(dfa, state, devname);
+	state = aa_dfa_null_transition(dfa, state);
+	if (!state)
+		return 2;
+
+	if (type)
+		state = aa_dfa_match(dfa, state, type);
+	state = aa_dfa_null_transition(dfa, state);
+	if (!state)
+		return 3;
+
+	state = match_mnt_flags(dfa, state, flags);
+	if (!state)
+		return 4;
+	*perms = compute_mnt_perms(dfa, state);
+	if (perms->allow & AA_MAY_MOUNT)
+		return 0;
+
+	/* only match data if not binary and the DFA flags data is expected */
+	if (data && !binary && (perms->allow & AA_MNT_CONT_MATCH)) {
+		state = aa_dfa_null_transition(dfa, state);
+		if (!state)
+			return 4;
+
+		state = aa_dfa_match(dfa, state, data);
+		if (!state)
+			return 5;
+		*perms = compute_mnt_perms(dfa, state);
+		if (perms->allow & AA_MAY_MOUNT)
+			return 0;
+	}
+
+	/* failed at end of flags match */
+	return 4;
+}
+
+
+static int path_flags(struct aa_profile *profile, const struct path *path)
+{
+	AA_BUG(!profile);
+	AA_BUG(!path);
+
+	return profile->path_flags |
+		(S_ISDIR(path->dentry->d_inode->i_mode) ? PATH_IS_DIR : 0);
+}
+
+/**
+ * match_mnt_path_str - handle path matching for mount
+ * @profile: the confining profile
+ * @mntpath: for the mntpnt (NOT NULL)
+ * @buffer: buffer to be used to lookup mntpath
+ * @devnme: string for the devname/src_name (MAY BE NULL OR ERRPTR)
+ * @type: string for the dev type (MAYBE NULL)
+ * @flags: mount flags to match
+ * @data: fs mount data (MAYBE NULL)
+ * @binary: whether @data is binary
+ * @devinfo: error str if (IS_ERR(@devname))
+ *
+ * Returns: 0 on success else error
+ */
+static int match_mnt_path_str(struct aa_profile *profile,
+			      const struct path *mntpath, char *buffer,
+			      const char *devname, const char *type,
+			      unsigned long flags, void *data, bool binary,
+			      const char *devinfo)
+{
+	struct aa_perms perms = { };
+	const char *mntpnt = NULL, *info = NULL;
+	int pos, error;
+
+	AA_BUG(!profile);
+	AA_BUG(!mntpath);
+	AA_BUG(!buffer);
+
+	error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer,
+			     &mntpnt, &info, profile->disconnected);
+	if (error)
+		goto audit;
+	if (IS_ERR(devname)) {
+		error = PTR_ERR(devname);
+		devname = NULL;
+		info = devinfo;
+		goto audit;
+	}
+
+	error = -EACCES;
+	pos = do_match_mnt(profile->policy.dfa,
+			   profile->policy.start[AA_CLASS_MOUNT],
+			   mntpnt, devname, type, flags, data, binary, &perms);
+	if (pos) {
+		info = mnt_info_table[pos];
+		goto audit;
+	}
+	error = 0;
+
+audit:
+	return audit_mount(profile, OP_MOUNT, mntpnt, devname, type, NULL,
+			   flags, data, AA_MAY_MOUNT, &perms, info, error);
+}
+
+/**
+ * match_mnt - handle path matching for mount
+ * @profile: the confining profile
+ * @mntpath: for the mntpnt (NOT NULL)
+ * @buffer: buffer to be used to lookup mntpath
+ * @devpath: path devname/src_name (MAYBE NULL)
+ * @devbuffer: buffer to be used to lookup devname/src_name
+ * @type: string for the dev type (MAYBE NULL)
+ * @flags: mount flags to match
+ * @data: fs mount data (MAYBE NULL)
+ * @binary: whether @data is binary
+ *
+ * Returns: 0 on success else error
+ */
+static int match_mnt(struct aa_profile *profile, const struct path *path,
+		     char *buffer, struct path *devpath, char *devbuffer,
+		     const char *type, unsigned long flags, void *data,
+		     bool binary)
+{
+	const char *devname = NULL, *info = NULL;
+	int error = -EACCES;
+
+	AA_BUG(!profile);
+	AA_BUG(devpath && !devbuffer);
+
+	if (devpath) {
+		error = aa_path_name(devpath, path_flags(profile, devpath),
+				     devbuffer, &devname, &info,
+				     profile->disconnected);
+		if (error)
+			devname = ERR_PTR(error);
+	}
+
+	return match_mnt_path_str(profile, path, buffer, devname, type, flags,
+				  data, binary, info);
+}
+
+int aa_remount(struct aa_label *label, const struct path *path,
+	       unsigned long flags, void *data)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL;
+	bool binary;
+	int error;
+
+	AA_BUG(!label);
+	AA_BUG(!path);
+
+	binary = path->dentry->d_sb->s_type->fs_flags & FS_BINARY_MOUNTDATA;
+
+	get_buffers(buffer);
+	error = fn_for_each_confined(label, profile,
+			match_mnt(profile, path, buffer, NULL, NULL, NULL,
+				  flags, data, binary));
+	put_buffers(buffer);
+
+	return error;
+}
+
+int aa_bind_mount(struct aa_label *label, const struct path *path,
+		  const char *dev_name, unsigned long flags)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL, *old_buffer = NULL;
+	struct path old_path;
+	int error;
+
+	AA_BUG(!label);
+	AA_BUG(!path);
+
+	if (!dev_name || !*dev_name)
+		return -EINVAL;
+
+	flags &= MS_REC | MS_BIND;
+
+	error = kern_path(dev_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
+	if (error)
+		return error;
+
+	get_buffers(buffer, old_buffer);
+	error = fn_for_each_confined(label, profile,
+			match_mnt(profile, path, buffer, &old_path, old_buffer,
+				  NULL, flags, NULL, false));
+	put_buffers(buffer, old_buffer);
+	path_put(&old_path);
+
+	return error;
+}
+
+int aa_mount_change_type(struct aa_label *label, const struct path *path,
+			 unsigned long flags)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL;
+	int error;
+
+	AA_BUG(!label);
+	AA_BUG(!path);
+
+	/* These are the flags allowed by do_change_type() */
+	flags &= (MS_REC | MS_SILENT | MS_SHARED | MS_PRIVATE | MS_SLAVE |
+		  MS_UNBINDABLE);
+
+	get_buffers(buffer);
+	error = fn_for_each_confined(label, profile,
+			match_mnt(profile, path, buffer, NULL, NULL, NULL,
+				  flags, NULL, false));
+	put_buffers(buffer);
+
+	return error;
+}
+
+int aa_move_mount(struct aa_label *label, const struct path *path,
+		  const char *orig_name)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL, *old_buffer = NULL;
+	struct path old_path;
+	int error;
+
+	AA_BUG(!label);
+	AA_BUG(!path);
+
+	if (!orig_name || !*orig_name)
+		return -EINVAL;
+
+	error = kern_path(orig_name, LOOKUP_FOLLOW, &old_path);
+	if (error)
+		return error;
+
+	get_buffers(buffer, old_buffer);
+	error = fn_for_each_confined(label, profile,
+			match_mnt(profile, path, buffer, &old_path, old_buffer,
+				  NULL, MS_MOVE, NULL, false));
+	put_buffers(buffer, old_buffer);
+	path_put(&old_path);
+
+	return error;
+}
+
+int aa_new_mount(struct aa_label *label, const char *dev_name,
+		 const struct path *path, const char *type, unsigned long flags,
+		 void *data)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL, *dev_buffer = NULL;
+	bool binary = true;
+	int error;
+	int requires_dev = 0;
+	struct path tmp_path, *dev_path = NULL;
+
+	AA_BUG(!label);
+	AA_BUG(!path);
+
+	if (type) {
+		struct file_system_type *fstype;
+
+		fstype = get_fs_type(type);
+		if (!fstype)
+			return -ENODEV;
+		binary = fstype->fs_flags & FS_BINARY_MOUNTDATA;
+		requires_dev = fstype->fs_flags & FS_REQUIRES_DEV;
+		put_filesystem(fstype);
+
+		if (requires_dev) {
+			if (!dev_name || !*dev_name)
+				return -ENOENT;
+
+			error = kern_path(dev_name, LOOKUP_FOLLOW, &tmp_path);
+			if (error)
+				return error;
+			dev_path = &tmp_path;
+		}
+	}
+
+	get_buffers(buffer, dev_buffer);
+	if (dev_path) {
+		error = fn_for_each_confined(label, profile,
+			match_mnt(profile, path, buffer, dev_path, dev_buffer,
+				  type, flags, data, binary));
+	} else {
+		error = fn_for_each_confined(label, profile,
+			match_mnt_path_str(profile, path, buffer, dev_name,
+					   type, flags, data, binary, NULL));
+	}
+	put_buffers(buffer, dev_buffer);
+	if (dev_path)
+		path_put(dev_path);
+
+	return error;
+}
+
+static int profile_umount(struct aa_profile *profile, struct path *path,
+			  char *buffer)
+{
+	struct aa_perms perms = { };
+	const char *name = NULL, *info = NULL;
+	unsigned int state;
+	int error;
+
+	AA_BUG(!profile);
+	AA_BUG(!path);
+
+	error = aa_path_name(path, path_flags(profile, path), buffer, &name,
+			     &info, profile->disconnected);
+	if (error)
+		goto audit;
+
+	state = aa_dfa_match(profile->policy.dfa,
+			     profile->policy.start[AA_CLASS_MOUNT],
+			     name);
+	perms = compute_mnt_perms(profile->policy.dfa, state);
+	if (AA_MAY_UMOUNT & ~perms.allow)
+		error = -EACCES;
+
+audit:
+	return audit_mount(profile, OP_UMOUNT, name, NULL, NULL, NULL, 0, NULL,
+			   AA_MAY_UMOUNT, &perms, info, error);
+}
+
+int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL;
+	int error;
+	struct path path = { .mnt = mnt, .dentry = mnt->mnt_root };
+
+	AA_BUG(!label);
+	AA_BUG(!mnt);
+
+	get_buffers(buffer);
+	error = fn_for_each_confined(label, profile,
+			profile_umount(profile, &path, buffer));
+	put_buffers(buffer);
+
+	return error;
+}
+
+/* helper fn for transition on pivotroot
+ *
+ * Returns: label for transition or ERR_PTR. Does not return NULL
+ */
+static struct aa_label *build_pivotroot(struct aa_profile *profile,
+					const struct path *new_path,
+					char *new_buffer,
+					const struct path *old_path,
+					char *old_buffer)
+{
+	const char *old_name, *new_name = NULL, *info = NULL;
+	const char *trans_name = NULL;
+	struct aa_perms perms = { };
+	unsigned int state;
+	int error;
+
+	AA_BUG(!profile);
+	AA_BUG(!new_path);
+	AA_BUG(!old_path);
+
+	if (profile_unconfined(profile))
+		return aa_get_newest_label(&profile->label);
+
+	error = aa_path_name(old_path, path_flags(profile, old_path),
+			     old_buffer, &old_name, &info,
+			     profile->disconnected);
+	if (error)
+		goto audit;
+	error = aa_path_name(new_path, path_flags(profile, new_path),
+			     new_buffer, &new_name, &info,
+			     profile->disconnected);
+	if (error)
+		goto audit;
+
+	error = -EACCES;
+	state = aa_dfa_match(profile->policy.dfa,
+			     profile->policy.start[AA_CLASS_MOUNT],
+			     new_name);
+	state = aa_dfa_null_transition(profile->policy.dfa, state);
+	state = aa_dfa_match(profile->policy.dfa, state, old_name);
+	perms = compute_mnt_perms(profile->policy.dfa, state);
+
+	if (AA_MAY_PIVOTROOT & perms.allow)
+		error = 0;
+
+audit:
+	error = audit_mount(profile, OP_PIVOTROOT, new_name, old_name,
+			    NULL, trans_name, 0, NULL, AA_MAY_PIVOTROOT,
+			    &perms, info, error);
+	if (error)
+		return ERR_PTR(error);
+
+	return aa_get_newest_label(&profile->label);
+}
+
+int aa_pivotroot(struct aa_label *label, const struct path *old_path,
+		 const struct path *new_path)
+{
+	struct aa_profile *profile;
+	struct aa_label *target = NULL;
+	char *old_buffer = NULL, *new_buffer = NULL, *info = NULL;
+	int error;
+
+	AA_BUG(!label);
+	AA_BUG(!old_path);
+	AA_BUG(!new_path);
+
+	get_buffers(old_buffer, new_buffer);
+	target = fn_label_build(label, profile, GFP_ATOMIC,
+			build_pivotroot(profile, new_path, new_buffer,
+					old_path, old_buffer));
+	if (!target) {
+		info = "label build failed";
+		error = -ENOMEM;
+		goto fail;
+	} else if (!IS_ERR(target)) {
+		error = aa_replace_current_label(target);
+		if (error) {
+			/* TODO: audit target */
+			aa_put_label(target);
+			goto out;
+		}
+	} else
+		/* already audited error */
+		error = PTR_ERR(target);
+out:
+	put_buffers(old_buffer, new_buffer);
+
+	return error;
+
+fail:
+	/* TODO: add back in auditing of new_name and old_name */
+	error = fn_for_each(label, profile,
+			audit_mount(profile, OP_PIVOTROOT, NULL /*new_name */,
+				    NULL /* old_name */,
+				    NULL, NULL,
+				    0, NULL, AA_MAY_PIVOTROOT, &nullperms, info,
+				    error));
+	goto out;
+}
-- 
cgit 


From f872af75d325cc449b6621a0d30a4f2ba77dd092 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sun, 6 Aug 2017 05:36:40 -0700
Subject: apparmor: cleanup conditional check for label in label_print

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/label.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index e324f4df3e34..38be7a89cc31 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1450,9 +1450,11 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp)
  * cached label name is present and visible
  * @label->hname only exists if label is namespace hierachical
  */
-static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label)
+static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label,
+				   int flags)
 {
-	if (label->hname && labels_ns(label) == ns)
+	if (label->hname && (!ns || labels_ns(label) == ns) &&
+	    !(flags & ~FLAG_SHOW_MODE))
 		return true;
 
 	return false;
@@ -1710,10 +1712,8 @@ void aa_label_xaudit(struct audit_buffer *ab, struct aa_ns *ns,
 	AA_BUG(!ab);
 	AA_BUG(!label);
 
-	if (!ns)
-		ns = labels_ns(label);
-
-	if (!use_label_hname(ns, label) || display_mode(ns, label, flags)) {
+	if (!use_label_hname(ns, label, flags) ||
+	    display_mode(ns, label, flags)) {
 		len  = aa_label_asxprint(&name, ns, label, flags, gfp);
 		if (len == -1) {
 			AA_DEBUG("label print error");
@@ -1738,10 +1738,7 @@ void aa_label_seq_xprint(struct seq_file *f, struct aa_ns *ns,
 	AA_BUG(!f);
 	AA_BUG(!label);
 
-	if (!ns)
-		ns = labels_ns(label);
-
-	if (!use_label_hname(ns, label)) {
+	if (!use_label_hname(ns, label, flags)) {
 		char *str;
 		int len;
 
@@ -1764,10 +1761,7 @@ void aa_label_xprintk(struct aa_ns *ns, struct aa_label *label, int flags,
 {
 	AA_BUG(!label);
 
-	if (!ns)
-		ns = labels_ns(label);
-
-	if (!use_label_hname(ns, label)) {
+	if (!use_label_hname(ns, label, flags)) {
 		char *str;
 		int len;
 
-- 
cgit 


From 26b7899510ae243e392960704ebdba52d05fbb13 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sun, 6 Aug 2017 05:39:08 -0700
Subject: apparmor: add support for absolute root view based labels

With apparmor policy virtualization based on policy namespace View's
we don't generally want/need absolute root based views, however there
are cases like debugging and some secid based conversions where
using a root based view is important.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/include/label.h |  1 +
 security/apparmor/label.c         | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h
index 9a283b722755..af22dcbbcb8a 100644
--- a/security/apparmor/include/label.h
+++ b/security/apparmor/include/label.h
@@ -310,6 +310,7 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp);
 #define FLAG_SHOW_MODE 1
 #define FLAG_VIEW_SUBNS 2
 #define FLAG_HIDDEN_UNCONFINED 4
+#define FLAG_ABS_ROOT 8
 int aa_label_snxprint(char *str, size_t size, struct aa_ns *view,
 		      struct aa_label *label, int flags);
 int aa_label_asxprint(char **strp, struct aa_ns *ns, struct aa_label *label,
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 38be7a89cc31..52b4ef14840d 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1607,8 +1607,13 @@ int aa_label_snxprint(char *str, size_t size, struct aa_ns *ns,
 	AA_BUG(!str && size != 0);
 	AA_BUG(!label);
 
-	if (!ns)
+	if (flags & FLAG_ABS_ROOT) {
+		ns = root_ns;
+		len = snprintf(str, size, "=");
+		update_for_len(total, len, size, str);
+	} else if (!ns) {
 		ns = labels_ns(label);
+	}
 
 	label_for_each(i, label, profile) {
 		if (aa_ns_visible(ns, profile->ns, flags & FLAG_VIEW_SUBNS)) {
@@ -1868,6 +1873,9 @@ struct aa_label *aa_label_parse(struct aa_label *base, const char *str,
 		if (*str == '&')
 			str++;
 	}
+	if (*str == '=')
+		base = &root_ns->unconfined->label;
+
 	error = vec_setup(profile, vec, len, gfp);
 	if (error)
 		return ERR_PTR(error);
-- 
cgit 


From 2410aa96d6b4930ed25fd02c3d173f14b962e0f4 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 18 Jul 2017 23:37:18 -0700
Subject: apparmor: make policy_unpack able to audit different info messages

Switch unpack auditing to using the generic name field in the audit
struct and make it so we can start adding new info messages about
why an unpack failed.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/include/audit.h |  4 +--
 security/apparmor/policy_unpack.c | 52 ++++++++++++++++++++++++++++-----------
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index c3fe1c5ef3bc..620e81169659 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -127,9 +127,9 @@ struct apparmor_audit_data {
 			} fs;
 		};
 		struct {
-			const char *name;
-			long pos;
+			struct aa_profile *profile;
 			const char *ns;
+			long pos;
 		} iface;
 		int signal;
 		struct {
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index bda0dce3b582..4ede87c30f8b 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -85,9 +85,9 @@ static void audit_cb(struct audit_buffer *ab, void *va)
 		audit_log_format(ab, " ns=");
 		audit_log_untrustedstring(ab, aad(sa)->iface.ns);
 	}
-	if (aad(sa)->iface.name) {
+	if (aad(sa)->name) {
 		audit_log_format(ab, " name=");
-		audit_log_untrustedstring(ab, aad(sa)->iface.name);
+		audit_log_untrustedstring(ab, aad(sa)->name);
 	}
 	if (aad(sa)->iface.pos)
 		audit_log_format(ab, " offset=%ld", aad(sa)->iface.pos);
@@ -114,9 +114,9 @@ static int audit_iface(struct aa_profile *new, const char *ns_name,
 		aad(&sa)->iface.pos = e->pos - e->start;
 	aad(&sa)->iface.ns = ns_name;
 	if (new)
-		aad(&sa)->iface.name = new->base.hname;
+		aad(&sa)->name = new->base.hname;
 	else
-		aad(&sa)->iface.name = name;
+		aad(&sa)->name = name;
 	aad(&sa)->info = info;
 	aad(&sa)->error = error;
 
@@ -583,6 +583,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 {
 	struct aa_profile *profile = NULL;
 	const char *tmpname, *tmpns = NULL, *name = NULL;
+	const char *info = "failed to unpack profile";
 	size_t ns_len;
 	struct rhashtable_params params = { 0 };
 	char *key = NULL;
@@ -604,8 +605,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	tmpname = aa_splitn_fqname(name, strlen(name), &tmpns, &ns_len);
 	if (tmpns) {
 		*ns_name = kstrndup(tmpns, ns_len, GFP_KERNEL);
-		if (!*ns_name)
+		if (!*ns_name) {
+			info = "out of memory";
 			goto fail;
+		}
 		name = tmpname;
 	}
 
@@ -624,12 +627,15 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	if (IS_ERR(profile->xmatch)) {
 		error = PTR_ERR(profile->xmatch);
 		profile->xmatch = NULL;
+		info = "bad xmatch";
 		goto fail;
 	}
 	/* xmatch_len is not optional if xmatch is set */
 	if (profile->xmatch) {
-		if (!unpack_u32(e, &tmp, NULL))
+		if (!unpack_u32(e, &tmp, NULL)) {
+			info = "missing xmatch len";
 			goto fail;
+		}
 		profile->xmatch_len = tmp;
 	}
 
@@ -637,8 +643,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	(void) unpack_str(e, &profile->disconnected, "disconnected");
 
 	/* per profile debug flags (complain, audit) */
-	if (!unpack_nameX(e, AA_STRUCT, "flags"))
+	if (!unpack_nameX(e, AA_STRUCT, "flags")) {
+		info = "profile missing flags";
 		goto fail;
+	}
+	info = "failed to unpack profile flags";
 	if (!unpack_u32(e, &tmp, NULL))
 		goto fail;
 	if (tmp & PACKED_FLAG_HAT)
@@ -667,6 +676,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		/* set a default value if path_flags field is not present */
 		profile->path_flags = PATH_MEDIATE_DELETED;
 
+	info = "failed to unpack profile capabilities";
 	if (!unpack_u32(e, &(profile->caps.allow.cap[0]), NULL))
 		goto fail;
 	if (!unpack_u32(e, &(profile->caps.audit.cap[0]), NULL))
@@ -676,6 +686,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	if (!unpack_u32(e, &tmpcap.cap[0], NULL))
 		goto fail;
 
+	info = "failed to unpack upper profile capabilities";
 	if (unpack_nameX(e, AA_STRUCT, "caps64")) {
 		/* optional upper half of 64 bit caps */
 		if (!unpack_u32(e, &(profile->caps.allow.cap[1]), NULL))
@@ -690,6 +701,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 			goto fail;
 	}
 
+	info = "failed to unpack extended profile capabilities";
 	if (unpack_nameX(e, AA_STRUCT, "capsx")) {
 		/* optional extended caps mediation mask */
 		if (!unpack_u32(e, &(profile->caps.extended.cap[0]), NULL))
@@ -700,11 +712,14 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 			goto fail;
 	}
 
-	if (!unpack_rlimits(e, profile))
+	if (!unpack_rlimits(e, profile)) {
+		info = "failed to unpack profile rlimits";
 		goto fail;
+	}
 
 	if (unpack_nameX(e, AA_STRUCT, "policydb")) {
 		/* generic policy dfa - optional and may be NULL */
+		info = "failed to unpack policydb";
 		profile->policy.dfa = unpack_dfa(e);
 		if (IS_ERR(profile->policy.dfa)) {
 			error = PTR_ERR(profile->policy.dfa);
@@ -734,6 +749,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	if (IS_ERR(profile->file.dfa)) {
 		error = PTR_ERR(profile->file.dfa);
 		profile->file.dfa = NULL;
+		info = "failed to unpack profile file rules";
 		goto fail;
 	} else if (profile->file.dfa) {
 		if (!unpack_u32(e, &profile->file.start, "dfa_start"))
@@ -746,10 +762,13 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	} else
 		profile->file.dfa = aa_get_dfa(nulldfa);
 
-	if (!unpack_trans_table(e, profile))
+	if (!unpack_trans_table(e, profile)) {
+		info = "failed to unpack profile transition table";
 		goto fail;
+	}
 
 	if (unpack_nameX(e, AA_STRUCT, "data")) {
+		info = "out of memory";
 		profile->data = kzalloc(sizeof(*profile->data), GFP_KERNEL);
 		if (!profile->data)
 			goto fail;
@@ -761,8 +780,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		params.hashfn = strhash;
 		params.obj_cmpfn = datacmp;
 
-		if (rhashtable_init(profile->data, &params))
+		if (rhashtable_init(profile->data, &params)) {
+			info = "failed to init key, value hash table";
 			goto fail;
+		}
 
 		while (unpack_strdup(e, &key, NULL)) {
 			data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -784,12 +805,16 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 					       profile->data->p);
 		}
 
-		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+		if (!unpack_nameX(e, AA_STRUCTEND, NULL)) {
+			info = "failed to unpack end of key, value data table";
 			goto fail;
+		}
 	}
 
-	if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+	if (!unpack_nameX(e, AA_STRUCTEND, NULL)) {
+		info = "failed to unpack end of profile";
 		goto fail;
+	}
 
 	return profile;
 
@@ -798,8 +823,7 @@ fail:
 		name = NULL;
 	else if (!name)
 		name = "unknown";
-	audit_iface(profile, NULL, name, "failed to unpack profile", e,
-		    error);
+	audit_iface(profile, NULL, name, info, e, error);
 	aa_free_profile(profile);
 
 	return ERR_PTR(error);
-- 
cgit 


From cbf2d0e1a9e4876046a628e0e036a7545a3a4c40 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 18 Jul 2017 23:41:13 -0700
Subject: apparmor: add more debug asserts to apparmorfs

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/apparmorfs.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 8fa6c898c44b..7acea14c850b 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1446,6 +1446,10 @@ void __aafs_profile_migrate_dents(struct aa_profile *old,
 {
 	int i;
 
+	AA_BUG(!old);
+	AA_BUG(!new);
+	AA_BUG(!mutex_is_locked(&profiles_ns(old)->lock));
+
 	for (i = 0; i < AAFS_PROF_SIZEOF; i++) {
 		new->dents[i] = old->dents[i];
 		if (new->dents[i])
@@ -1509,6 +1513,9 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent)
 	struct dentry *dent = NULL, *dir;
 	int error;
 
+	AA_BUG(!profile);
+	AA_BUG(!mutex_is_locked(&profiles_ns(profile)->lock));
+
 	if (!parent) {
 		struct aa_profile *p;
 		p = aa_deref_parent(profile);
@@ -1734,6 +1741,7 @@ void __aafs_ns_rmdir(struct aa_ns *ns)
 
 	if (!ns)
 		return;
+	AA_BUG(!mutex_is_locked(&ns->lock));
 
 	list_for_each_entry(child, &ns->base.profiles, base.list)
 		__aafs_profile_rmdir(child);
@@ -1906,6 +1914,10 @@ static struct aa_ns *__next_ns(struct aa_ns *root, struct aa_ns *ns)
 {
 	struct aa_ns *parent, *next;
 
+	AA_BUG(!root);
+	AA_BUG(!ns);
+	AA_BUG(ns != root && !mutex_is_locked(&ns->parent->lock));
+
 	/* is next namespace a child */
 	if (!list_empty(&ns->sub_ns)) {
 		next = list_first_entry(&ns->sub_ns, typeof(*ns), base.list);
@@ -1940,6 +1952,9 @@ static struct aa_ns *__next_ns(struct aa_ns *root, struct aa_ns *ns)
 static struct aa_profile *__first_profile(struct aa_ns *root,
 					  struct aa_ns *ns)
 {
+	AA_BUG(!root);
+	AA_BUG(ns && !mutex_is_locked(&ns->lock));
+
 	for (; ns; ns = __next_ns(root, ns)) {
 		if (!list_empty(&ns->base.profiles))
 			return list_first_entry(&ns->base.profiles,
@@ -1962,6 +1977,8 @@ static struct aa_profile *__next_profile(struct aa_profile *p)
 	struct aa_profile *parent;
 	struct aa_ns *ns = p->ns;
 
+	AA_BUG(!mutex_is_locked(&profiles_ns(p)->lock));
+
 	/* is next profile a child */
 	if (!list_empty(&p->base.profiles))
 		return list_first_entry(&p->base.profiles, typeof(*p),
-- 
cgit 


From 651e28c5537abb39076d3949fb7618536f1d242e Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 18 Jul 2017 23:18:33 -0700
Subject: apparmor: add base infastructure for socket mediation

Provide a basic mediation of sockets. This is not a full net mediation
but just whether a spcific family of socket can be used by an
application, along with setting up some basic infrastructure for
network mediation to follow.

the user space rule hav the basic form of
  NETWORK RULE = [ QUALIFIERS ] 'network' [ DOMAIN ]
                 [ TYPE | PROTOCOL ]

  DOMAIN = ( 'inet' | 'ax25' | 'ipx' | 'appletalk' | 'netrom' |
             'bridge' | 'atmpvc' | 'x25' | 'inet6' | 'rose' |
	     'netbeui' | 'security' | 'key' | 'packet' | 'ash' |
	     'econet' | 'atmsvc' | 'sna' | 'irda' | 'pppox' |
	     'wanpipe' | 'bluetooth' | 'netlink' | 'unix' | 'rds' |
	     'llc' | 'can' | 'tipc' | 'iucv' | 'rxrpc' | 'isdn' |
	     'phonet' | 'ieee802154' | 'caif' | 'alg' | 'nfc' |
	     'vsock' | 'mpls' | 'ib' | 'kcm' ) ','

  TYPE = ( 'stream' | 'dgram' | 'seqpacket' |  'rdm' | 'raw' |
           'packet' )

  PROTOCOL = ( 'tcp' | 'udp' | 'icmp' )

eg.
  network,
  network inet,

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/.gitignore       |   1 +
 security/apparmor/Makefile         |  43 ++++-
 security/apparmor/apparmorfs.c     |   1 +
 security/apparmor/file.c           |  30 +++
 security/apparmor/include/audit.h  |  26 ++-
 security/apparmor/include/net.h    | 114 +++++++++++
 security/apparmor/include/perms.h  |   5 +-
 security/apparmor/include/policy.h |  13 ++
 security/apparmor/lib.c            |   5 +-
 security/apparmor/lsm.c            | 387 +++++++++++++++++++++++++++++++++++++
 security/apparmor/net.c            | 184 ++++++++++++++++++
 security/apparmor/policy_unpack.c  |  47 ++++-
 12 files changed, 840 insertions(+), 16 deletions(-)
 create mode 100644 security/apparmor/include/net.h
 create mode 100644 security/apparmor/net.c

diff --git a/security/apparmor/.gitignore b/security/apparmor/.gitignore
index 9cdec70d72b8..d5b291e94264 100644
--- a/security/apparmor/.gitignore
+++ b/security/apparmor/.gitignore
@@ -1,5 +1,6 @@
 #
 # Generated include files
 #
+net_names.h
 capability_names.h
 rlim_names.h
diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
index 81a34426d024..dafdd387d42b 100644
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -4,11 +4,44 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
 
 apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
               path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
-              resource.o secid.o file.o policy_ns.o label.o mount.o
+              resource.o secid.o file.o policy_ns.o label.o mount.o net.o
 apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o
 
-clean-files := capability_names.h rlim_names.h
+clean-files := capability_names.h rlim_names.h net_names.h
 
+# Build a lower case string table of address family names
+# Transform lines from
+#    #define AF_LOCAL		1	/* POSIX name for AF_UNIX	*/
+#    #define AF_INET		2	/* Internet IP Protocol 	*/
+# to
+#    [1] = "local",
+#    [2] = "inet",
+#
+# and build the securityfs entries for the mapping.
+# Transforms lines from
+#    #define AF_INET		2	/* Internet IP Protocol 	*/
+# to
+#    #define AA_SFS_AF_MASK "local inet"
+quiet_cmd_make-af = GEN     $@
+cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\
+	sed $< >>$@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \
+	 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\
+	echo "};" >> $@ ;\
+	printf '%s' '\#define AA_SFS_AF_MASK "' >> $@ ;\
+	sed -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \
+	 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/\L\1/p'\
+	 $< | tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@
+
+# Build a lower case string table of sock type names
+# Transform lines from
+#    SOCK_STREAM	= 1,
+# to
+#    [1] = "stream",
+quiet_cmd_make-sock = GEN     $@
+cmd_make-sock = echo "static const char *sock_type_names[] = {" >> $@ ;\
+	sed $^ >>$@ -r -n \
+	-e 's/^\tSOCK_([A-Z0-9_]+)[\t]+=[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\
+	echo "};" >> $@
 
 # Build a lower case string table of capability names
 # Transforms lines from
@@ -61,6 +94,7 @@ cmd_make-rlim = echo "static const char *const rlim_names[RLIM_NLIMITS] = {" \
 	    tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@
 
 $(obj)/capability.o : $(obj)/capability_names.h
+$(obj)/net.o : $(obj)/net_names.h
 $(obj)/resource.o : $(obj)/rlim_names.h
 $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \
 			    $(src)/Makefile
@@ -68,3 +102,8 @@ $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \
 $(obj)/rlim_names.h : $(srctree)/include/uapi/asm-generic/resource.h \
 		      $(src)/Makefile
 	$(call cmd,make-rlim)
+$(obj)/net_names.h : $(srctree)/include/linux/socket.h \
+		     $(srctree)/include/linux/net.h \
+		     $(src)/Makefile
+	$(call cmd,make-af)
+	$(call cmd,make-sock)
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 7acea14c850b..125dad5c3fde 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -2202,6 +2202,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
 	AA_SFS_DIR("policy",			aa_sfs_entry_policy),
 	AA_SFS_DIR("domain",			aa_sfs_entry_domain),
 	AA_SFS_DIR("file",			aa_sfs_entry_file),
+	AA_SFS_DIR("network",			aa_sfs_entry_network),
 	AA_SFS_DIR("mount",			aa_sfs_entry_mount),
 	AA_SFS_DIR("namespaces",		aa_sfs_entry_ns),
 	AA_SFS_FILE_U64("capability",		VFS_CAP_FLAGS_MASK),
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index 3382518b87fa..db80221891c6 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -21,6 +21,7 @@
 #include "include/context.h"
 #include "include/file.h"
 #include "include/match.h"
+#include "include/net.h"
 #include "include/path.h"
 #include "include/policy.h"
 #include "include/label.h"
@@ -566,6 +567,32 @@ static int __file_path_perm(const char *op, struct aa_label *label,
 	return error;
 }
 
+static int __file_sock_perm(const char *op, struct aa_label *label,
+			    struct aa_label *flabel, struct file *file,
+			    u32 request, u32 denied)
+{
+	struct socket *sock = (struct socket *) file->private_data;
+	int error;
+
+	AA_BUG(!sock);
+
+	/* revalidation due to label out of date. No revocation at this time */
+	if (!denied && aa_label_is_subset(flabel, label))
+		return 0;
+
+	/* TODO: improve to skip profiles cached in flabel */
+	error = aa_sock_file_perm(label, op, request, sock);
+	if (denied) {
+		/* TODO: improve to skip profiles checked above */
+		/* check every profile in file label to is cached */
+		last_error(error, aa_sock_file_perm(flabel, op, request, sock));
+	}
+	if (!error)
+		update_file_ctx(file_ctx(file), label, request);
+
+	return error;
+}
+
 /**
  * aa_file_perm - do permission revalidation check & audit for @file
  * @op: operation being checked
@@ -610,6 +637,9 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
 		error = __file_path_perm(op, label, flabel, file, request,
 					 denied);
 
+	else if (S_ISSOCK(file_inode(file)->i_mode))
+		error = __file_sock_perm(op, label, flabel, file, request,
+					 denied);
 done:
 	rcu_read_unlock();
 
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index 620e81169659..ff4316e1068d 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -121,21 +121,29 @@ struct apparmor_audit_data {
 		/* these entries require a custom callback fn */
 		struct {
 			struct aa_label *peer;
-			struct {
-				const char *target;
-				kuid_t ouid;
-			} fs;
+			union {
+				struct {
+					kuid_t ouid;
+					const char *target;
+				} fs;
+				struct {
+					int type, protocol;
+					struct sock *peer_sk;
+					void *addr;
+					int addrlen;
+				} net;
+				int signal;
+				struct {
+					int rlim;
+					unsigned long max;
+				} rlim;
+			};
 		};
 		struct {
 			struct aa_profile *profile;
 			const char *ns;
 			long pos;
 		} iface;
-		int signal;
-		struct {
-			int rlim;
-			unsigned long max;
-		} rlim;
 		struct {
 			const char *src_name;
 			const char *type;
diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h
new file mode 100644
index 000000000000..140c8efcf364
--- /dev/null
+++ b/security/apparmor/include/net.h
@@ -0,0 +1,114 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor network mediation definitions.
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#ifndef __AA_NET_H
+#define __AA_NET_H
+
+#include <net/sock.h>
+#include <linux/path.h>
+
+#include "apparmorfs.h"
+#include "label.h"
+#include "perms.h"
+#include "policy.h"
+
+#define AA_MAY_SEND		AA_MAY_WRITE
+#define AA_MAY_RECEIVE		AA_MAY_READ
+
+#define AA_MAY_SHUTDOWN		AA_MAY_DELETE
+
+#define AA_MAY_CONNECT		AA_MAY_OPEN
+#define AA_MAY_ACCEPT		0x00100000
+
+#define AA_MAY_BIND		0x00200000
+#define AA_MAY_LISTEN		0x00400000
+
+#define AA_MAY_SETOPT		0x01000000
+#define AA_MAY_GETOPT		0x02000000
+
+#define NET_PERMS_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE |    \
+			AA_MAY_SHUTDOWN | AA_MAY_BIND | AA_MAY_LISTEN |	  \
+			AA_MAY_CONNECT | AA_MAY_ACCEPT | AA_MAY_SETATTR | \
+			AA_MAY_GETATTR | AA_MAY_SETOPT | AA_MAY_GETOPT)
+
+#define NET_FS_PERMS (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE |	\
+		      AA_MAY_SHUTDOWN | AA_MAY_CONNECT | AA_MAY_RENAME |\
+		      AA_MAY_SETATTR | AA_MAY_GETATTR | AA_MAY_CHMOD |	\
+		      AA_MAY_CHOWN | AA_MAY_CHGRP | AA_MAY_LOCK |	\
+		      AA_MAY_MPROT)
+
+#define NET_PEER_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CONNECT |	\
+		       AA_MAY_ACCEPT)
+struct aa_sk_ctx {
+	struct aa_label *label;
+	struct aa_label *peer;
+	struct path path;
+};
+
+#define SK_CTX(X) ((X)->sk_security)
+#define SOCK_ctx(X) SOCK_INODE(X)->i_security
+#define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P)				  \
+	struct lsm_network_audit NAME ## _net = { .sk = (SK),		  \
+						  .family = (F)};	  \
+	DEFINE_AUDIT_DATA(NAME,						  \
+			  ((SK) && (F) != AF_UNIX) ? LSM_AUDIT_DATA_NET : \
+						     LSM_AUDIT_DATA_NONE, \
+			  OP);						  \
+	NAME.u.net = &(NAME ## _net);					  \
+	aad(&NAME)->net.type = (T);					  \
+	aad(&NAME)->net.protocol = (P)
+
+#define DEFINE_AUDIT_SK(NAME, OP, SK)					\
+	DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type,	\
+			 (SK)->sk_protocol)
+
+/* struct aa_net - network confinement data
+ * @allow: basic network families permissions
+ * @audit: which network permissions to force audit
+ * @quiet: which network permissions to quiet rejects
+ */
+struct aa_net {
+	u16 allow[AF_MAX];
+	u16 audit[AF_MAX];
+	u16 quiet[AF_MAX];
+};
+
+
+extern struct aa_sfs_entry aa_sfs_entry_network[];
+
+void audit_net_cb(struct audit_buffer *ab, void *va);
+int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
+		       u32 request, u16 family, int type);
+int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family,
+	       int type, int protocol);
+static inline int aa_profile_af_sk_perm(struct aa_profile *profile,
+					struct common_audit_data *sa,
+					u32 request,
+					struct sock *sk)
+{
+	return aa_profile_af_perm(profile, sa, request, sk->sk_family,
+				  sk->sk_type);
+}
+int aa_sk_perm(const char *op, u32 request, struct sock *sk);
+
+int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
+		      struct socket *sock);
+
+
+static inline void aa_free_net_rules(struct aa_net *new)
+{
+	/* NOP */
+}
+
+#endif /* __AA_NET_H */
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index 2b27bb79aec4..af04d5a7d73d 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -135,9 +135,10 @@ extern struct aa_perms allperms;
 
 
 void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask);
-void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask);
+void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names,
+			 u32 mask);
 void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs,
-			u32 chrsmask, const char **names, u32 namesmask);
+			u32 chrsmask, const char * const *names, u32 namesmask);
 void aa_apply_modes_to_perms(struct aa_profile *profile,
 			     struct aa_perms *perms);
 void aa_compute_perms(struct aa_dfa *dfa, unsigned int state,
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 17fe41a9cac3..4364088a0b9e 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -30,6 +30,7 @@
 #include "file.h"
 #include "lib.h"
 #include "label.h"
+#include "net.h"
 #include "perms.h"
 #include "resource.h"
 
@@ -111,6 +112,7 @@ struct aa_data {
  * @policy: general match rules governing policy
  * @file: The set of rules governing basic file access and domain transitions
  * @caps: capabilities for the profile
+ * @net: network controls for the profile
  * @rlimits: rlimits for the profile
  *
  * @dents: dentries for the profiles file entries in apparmorfs
@@ -148,6 +150,7 @@ struct aa_profile {
 	struct aa_policydb policy;
 	struct aa_file_rules file;
 	struct aa_caps caps;
+	struct aa_net net;
 	struct aa_rlimit rlimits;
 
 	struct aa_loaddata *rawdata;
@@ -220,6 +223,16 @@ static inline unsigned int PROFILE_MEDIATES_SAFE(struct aa_profile *profile,
 	return 0;
 }
 
+static inline unsigned int PROFILE_MEDIATES_AF(struct aa_profile *profile,
+					       u16 AF) {
+	unsigned int state = PROFILE_MEDIATES(profile, AA_CLASS_NET);
+	u16 be_af = cpu_to_be16(AF);
+
+	if (!state)
+		return 0;
+	return aa_dfa_match_len(profile->policy.dfa, state, (char *) &be_af, 2);
+}
+
 /**
  * aa_get_profile - increment refcount on profile @p
  * @p: profile  (MAYBE NULL)
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 08ca26bcca77..8818621b5d95 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -211,7 +211,8 @@ void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask)
 	*str = '\0';
 }
 
-void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask)
+void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names,
+			 u32 mask)
 {
 	const char *fmt = "%s";
 	unsigned int i, perm = 1;
@@ -229,7 +230,7 @@ void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask)
 }
 
 void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs,
-			u32 chrsmask, const char **names, u32 namesmask)
+			u32 chrsmask, const char * const *names, u32 namesmask)
 {
 	char str[33];
 
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 4ad0b3a45142..cc5ab23a2d84 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -33,6 +33,7 @@
 #include "include/context.h"
 #include "include/file.h"
 #include "include/ipc.h"
+#include "include/net.h"
 #include "include/path.h"
 #include "include/label.h"
 #include "include/policy.h"
@@ -736,6 +737,368 @@ static int apparmor_task_kill(struct task_struct *target, struct siginfo *info,
 	return error;
 }
 
+/**
+ * apparmor_sk_alloc_security - allocate and attach the sk_security field
+ */
+static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags)
+{
+	struct aa_sk_ctx *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), flags);
+	if (!ctx)
+		return -ENOMEM;
+
+	SK_CTX(sk) = ctx;
+
+	return 0;
+}
+
+/**
+ * apparmor_sk_free_security - free the sk_security field
+ */
+static void apparmor_sk_free_security(struct sock *sk)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+	SK_CTX(sk) = NULL;
+	aa_put_label(ctx->label);
+	aa_put_label(ctx->peer);
+	path_put(&ctx->path);
+	kfree(ctx);
+}
+
+/**
+ * apparmor_clone_security - clone the sk_security field
+ */
+static void apparmor_sk_clone_security(const struct sock *sk,
+				       struct sock *newsk)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+	struct aa_sk_ctx *new = SK_CTX(newsk);
+
+	new->label = aa_get_label(ctx->label);
+	new->peer = aa_get_label(ctx->peer);
+	new->path = ctx->path;
+	path_get(&new->path);
+}
+
+static int aa_sock_create_perm(struct aa_label *label, int family, int type,
+			       int protocol)
+{
+	AA_BUG(!label);
+	AA_BUG(in_interrupt());
+
+	return aa_af_perm(label, OP_CREATE, AA_MAY_CREATE, family, type,
+			  protocol);
+}
+
+
+/**
+ * apparmor_socket_create - check perms before creating a new socket
+ */
+static int apparmor_socket_create(int family, int type, int protocol, int kern)
+{
+	struct aa_label *label;
+	int error = 0;
+
+	label = begin_current_label_crit_section();
+	if (!(kern || unconfined(label)))
+		error = aa_sock_create_perm(label, family, type, protocol);
+	end_current_label_crit_section(label);
+
+	return error;
+}
+
+/**
+ * apparmor_socket_post_create - setup the per-socket security struct
+ *
+ * Note:
+ * -   kernel sockets currently labeled unconfined but we may want to
+ *     move to a special kernel label
+ * -   socket may not have sk here if created with sock_create_lite or
+ *     sock_alloc. These should be accept cases which will be handled in
+ *     sock_graft.
+ */
+static int apparmor_socket_post_create(struct socket *sock, int family,
+				       int type, int protocol, int kern)
+{
+	struct aa_label *label;
+
+	if (kern) {
+		struct aa_ns *ns = aa_get_current_ns();
+
+		label = aa_get_label(ns_unconfined(ns));
+		aa_put_ns(ns);
+	} else
+		label = aa_get_current_label();
+
+	if (sock->sk) {
+		struct aa_sk_ctx *ctx = SK_CTX(sock->sk);
+
+		aa_put_label(ctx->label);
+		ctx->label = aa_get_label(label);
+	}
+	aa_put_label(label);
+
+	return 0;
+}
+
+/**
+ * apparmor_socket_bind - check perms before bind addr to socket
+ */
+static int apparmor_socket_bind(struct socket *sock,
+				struct sockaddr *address, int addrlen)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!address);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk);
+}
+
+/**
+ * apparmor_socket_connect - check perms before connecting @sock to @address
+ */
+static int apparmor_socket_connect(struct socket *sock,
+				   struct sockaddr *address, int addrlen)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!address);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk);
+}
+
+/**
+ * apparmor_socket_list - check perms before allowing listen
+ */
+static int apparmor_socket_listen(struct socket *sock, int backlog)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk);
+}
+
+/**
+ * apparmor_socket_accept - check perms before accepting a new connection.
+ *
+ * Note: while @newsock is created and has some information, the accept
+ *       has not been done.
+ */
+static int apparmor_socket_accept(struct socket *sock, struct socket *newsock)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!newsock);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk);
+}
+
+static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock,
+			    struct msghdr *msg, int size)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!msg);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(op, request, sock->sk);
+}
+
+/**
+ * apparmor_socket_sendmsg - check perms before sending msg to another socket
+ */
+static int apparmor_socket_sendmsg(struct socket *sock,
+				   struct msghdr *msg, int size)
+{
+	return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size);
+}
+
+/**
+ * apparmor_socket_recvmsg - check perms before receiving a message
+ */
+static int apparmor_socket_recvmsg(struct socket *sock,
+				   struct msghdr *msg, int size, int flags)
+{
+	return aa_sock_msg_perm(OP_RECVMSG, AA_MAY_RECEIVE, sock, msg, size);
+}
+
+/* revaliation, get/set attr, shutdown */
+static int aa_sock_perm(const char *op, u32 request, struct socket *sock)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(op, request, sock->sk);
+}
+
+/**
+ * apparmor_socket_getsockname - check perms before getting the local address
+ */
+static int apparmor_socket_getsockname(struct socket *sock)
+{
+	return aa_sock_perm(OP_GETSOCKNAME, AA_MAY_GETATTR, sock);
+}
+
+/**
+ * apparmor_socket_getpeername - check perms before getting remote address
+ */
+static int apparmor_socket_getpeername(struct socket *sock)
+{
+	return aa_sock_perm(OP_GETPEERNAME, AA_MAY_GETATTR, sock);
+}
+
+/* revaliation, get/set attr, opt */
+static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock,
+			    int level, int optname)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(op, request, sock->sk);
+}
+
+/**
+ * apparmor_getsockopt - check perms before getting socket options
+ */
+static int apparmor_socket_getsockopt(struct socket *sock, int level,
+				      int optname)
+{
+	return aa_sock_opt_perm(OP_GETSOCKOPT, AA_MAY_GETOPT, sock,
+				level, optname);
+}
+
+/**
+ * apparmor_setsockopt - check perms before setting socket options
+ */
+static int apparmor_socket_setsockopt(struct socket *sock, int level,
+				      int optname)
+{
+	return aa_sock_opt_perm(OP_SETSOCKOPT, AA_MAY_SETOPT, sock,
+				level, optname);
+}
+
+/**
+ * apparmor_socket_shutdown - check perms before shutting down @sock conn
+ */
+static int apparmor_socket_shutdown(struct socket *sock, int how)
+{
+	return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock);
+}
+
+/**
+ * apparmor_socket_sock_recv_skb - check perms before associating skb to sk
+ *
+ * Note: can not sleep may be called with locks held
+ *
+ * dont want protocol specific in __skb_recv_datagram()
+ * to deny an incoming connection  socket_sock_rcv_skb()
+ */
+static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	return 0;
+}
+
+
+static struct aa_label *sk_peer_label(struct sock *sk)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+	if (ctx->peer)
+		return ctx->peer;
+
+	return ERR_PTR(-ENOPROTOOPT);
+}
+
+/**
+ * apparmor_socket_getpeersec_stream - get security context of peer
+ *
+ * Note: for tcp only valid if using ipsec or cipso on lan
+ */
+static int apparmor_socket_getpeersec_stream(struct socket *sock,
+					     char __user *optval,
+					     int __user *optlen,
+					     unsigned int len)
+{
+	char *name;
+	int slen, error = 0;
+	struct aa_label *label;
+	struct aa_label *peer;
+
+	label = begin_current_label_crit_section();
+	peer = sk_peer_label(sock->sk);
+	if (IS_ERR(peer)) {
+		error = PTR_ERR(peer);
+		goto done;
+	}
+	slen = aa_label_asxprint(&name, labels_ns(label), peer,
+				 FLAG_SHOW_MODE | FLAG_VIEW_SUBNS |
+				 FLAG_HIDDEN_UNCONFINED, GFP_KERNEL);
+	/* don't include terminating \0 in slen, it breaks some apps */
+	if (slen < 0) {
+		error = -ENOMEM;
+	} else {
+		if (slen > len) {
+			error = -ERANGE;
+		} else if (copy_to_user(optval, name, slen)) {
+			error = -EFAULT;
+			goto out;
+		}
+		if (put_user(slen, optlen))
+			error = -EFAULT;
+out:
+		kfree(name);
+
+	}
+
+done:
+	end_current_label_crit_section(label);
+
+	return error;
+}
+
+/**
+ * apparmor_socket_getpeersec_dgram - get security label of packet
+ * @sock: the peer socket
+ * @skb: packet data
+ * @secid: pointer to where to put the secid of the packet
+ *
+ * Sets the netlabel socket state on sk from parent
+ */
+static int apparmor_socket_getpeersec_dgram(struct socket *sock,
+					    struct sk_buff *skb, u32 *secid)
+
+{
+	/* TODO: requires secid support */
+	return -ENOPROTOOPT;
+}
+
+/**
+ * apparmor_sock_graft - Initialize newly created socket
+ * @sk: child sock
+ * @parent: parent socket
+ *
+ * Note: could set off of SOCK_CTX(parent) but need to track inode and we can
+ *       just set sk security information off of current creating process label
+ *       Labeling of sk for accept case - probably should be sock based
+ *       instead of task, because of the case where an implicitly labeled
+ *       socket is shared by different tasks.
+ */
+static void apparmor_sock_graft(struct sock *sk, struct socket *parent)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+	if (!ctx->label)
+		ctx->label = aa_get_current_label();
+}
+
 static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
 	LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
@@ -770,6 +1133,30 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(getprocattr, apparmor_getprocattr),
 	LSM_HOOK_INIT(setprocattr, apparmor_setprocattr),
 
+	LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security),
+	LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security),
+	LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security),
+
+	LSM_HOOK_INIT(socket_create, apparmor_socket_create),
+	LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create),
+	LSM_HOOK_INIT(socket_bind, apparmor_socket_bind),
+	LSM_HOOK_INIT(socket_connect, apparmor_socket_connect),
+	LSM_HOOK_INIT(socket_listen, apparmor_socket_listen),
+	LSM_HOOK_INIT(socket_accept, apparmor_socket_accept),
+	LSM_HOOK_INIT(socket_sendmsg, apparmor_socket_sendmsg),
+	LSM_HOOK_INIT(socket_recvmsg, apparmor_socket_recvmsg),
+	LSM_HOOK_INIT(socket_getsockname, apparmor_socket_getsockname),
+	LSM_HOOK_INIT(socket_getpeername, apparmor_socket_getpeername),
+	LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt),
+	LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt),
+	LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown),
+	LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb),
+	LSM_HOOK_INIT(socket_getpeersec_stream,
+		      apparmor_socket_getpeersec_stream),
+	LSM_HOOK_INIT(socket_getpeersec_dgram,
+		      apparmor_socket_getpeersec_dgram),
+	LSM_HOOK_INIT(sock_graft, apparmor_sock_graft),
+
 	LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank),
 	LSM_HOOK_INIT(cred_free, apparmor_cred_free),
 	LSM_HOOK_INIT(cred_prepare, apparmor_cred_prepare),
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
new file mode 100644
index 000000000000..33d54435f8d6
--- /dev/null
+++ b/security/apparmor/net.c
@@ -0,0 +1,184 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor network mediation
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include "include/apparmor.h"
+#include "include/audit.h"
+#include "include/context.h"
+#include "include/label.h"
+#include "include/net.h"
+#include "include/policy.h"
+
+#include "net_names.h"
+
+
+struct aa_sfs_entry aa_sfs_entry_network[] = {
+	AA_SFS_FILE_STRING("af_mask",	AA_SFS_AF_MASK),
+	{ }
+};
+
+static const char * const net_mask_names[] = {
+	"unknown",
+	"send",
+	"receive",
+	"unknown",
+
+	"create",
+	"shutdown",
+	"connect",
+	"unknown",
+
+	"setattr",
+	"getattr",
+	"setcred",
+	"getcred",
+
+	"chmod",
+	"chown",
+	"chgrp",
+	"lock",
+
+	"mmap",
+	"mprot",
+	"unknown",
+	"unknown",
+
+	"accept",
+	"bind",
+	"listen",
+	"unknown",
+
+	"setopt",
+	"getopt",
+	"unknown",
+	"unknown",
+
+	"unknown",
+	"unknown",
+	"unknown",
+	"unknown",
+};
+
+
+/* audit callback for net specific fields */
+void audit_net_cb(struct audit_buffer *ab, void *va)
+{
+	struct common_audit_data *sa = va;
+
+	audit_log_format(ab, " family=");
+	if (address_family_names[sa->u.net->family])
+		audit_log_string(ab, address_family_names[sa->u.net->family]);
+	else
+		audit_log_format(ab, "\"unknown(%d)\"", sa->u.net->family);
+	audit_log_format(ab, " sock_type=");
+	if (sock_type_names[aad(sa)->net.type])
+		audit_log_string(ab, sock_type_names[aad(sa)->net.type]);
+	else
+		audit_log_format(ab, "\"unknown(%d)\"", aad(sa)->net.type);
+	audit_log_format(ab, " protocol=%d", aad(sa)->net.protocol);
+
+	if (aad(sa)->request & NET_PERMS_MASK) {
+		audit_log_format(ab, " requested_mask=");
+		aa_audit_perm_mask(ab, aad(sa)->request, NULL, 0,
+				   net_mask_names, NET_PERMS_MASK);
+
+		if (aad(sa)->denied & NET_PERMS_MASK) {
+			audit_log_format(ab, " denied_mask=");
+			aa_audit_perm_mask(ab, aad(sa)->denied, NULL, 0,
+					   net_mask_names, NET_PERMS_MASK);
+		}
+	}
+	if (aad(sa)->peer) {
+		audit_log_format(ab, " peer=");
+		aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer,
+				FLAGS_NONE, GFP_ATOMIC);
+	}
+}
+
+
+/* Generic af perm */
+int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
+		       u32 request, u16 family, int type)
+{
+	struct aa_perms perms = { };
+
+	AA_BUG(family >= AF_MAX);
+	AA_BUG(type < 0 || type >= SOCK_MAX);
+
+	if (profile_unconfined(profile))
+		return 0;
+
+	perms.allow = (profile->net.allow[family] & (1 << type)) ?
+		ALL_PERMS_MASK : 0;
+	perms.audit = (profile->net.audit[family] & (1 << type)) ?
+		ALL_PERMS_MASK : 0;
+	perms.quiet = (profile->net.quiet[family] & (1 << type)) ?
+		ALL_PERMS_MASK : 0;
+	aa_apply_modes_to_perms(profile, &perms);
+
+	return aa_check_perms(profile, &perms, request, sa, audit_net_cb);
+}
+
+int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family,
+	       int type, int protocol)
+{
+	struct aa_profile *profile;
+	DEFINE_AUDIT_NET(sa, op, NULL, family, type, protocol);
+
+	return fn_for_each_confined(label, profile,
+			aa_profile_af_perm(profile, &sa, request, family,
+					   type));
+}
+
+static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request,
+			    struct sock *sk)
+{
+	struct aa_profile *profile;
+	DEFINE_AUDIT_SK(sa, op, sk);
+
+	AA_BUG(!label);
+	AA_BUG(!sk);
+
+	if (unconfined(label))
+		return 0;
+
+	return fn_for_each_confined(label, profile,
+			aa_profile_af_sk_perm(profile, &sa, request, sk));
+}
+
+int aa_sk_perm(const char *op, u32 request, struct sock *sk)
+{
+	struct aa_label *label;
+	int error;
+
+	AA_BUG(!sk);
+	AA_BUG(in_interrupt());
+
+	/* TODO: switch to begin_current_label ???? */
+	label = begin_current_label_crit_section();
+	error = aa_label_sk_perm(label, op, request, sk);
+	end_current_label_crit_section(label);
+
+	return error;
+}
+
+
+int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
+		      struct socket *sock)
+{
+	AA_BUG(!label);
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+
+	return aa_label_sk_perm(label, op, request, sock->sk);
+}
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 4ede87c30f8b..5a2aec358322 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -275,6 +275,19 @@ fail:
 	return 0;
 }
 
+static bool unpack_u16(struct aa_ext *e, u16 *data, const char *name)
+{
+	if (unpack_nameX(e, AA_U16, name)) {
+		if (!inbounds(e, sizeof(u16)))
+			return 0;
+		if (data)
+			*data = le16_to_cpu(get_unaligned((__le16 *) e->pos));
+		e->pos += sizeof(u16);
+		return 1;
+	}
+	return 0;
+}
+
 static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name)
 {
 	if (unpack_nameX(e, AA_U32, name)) {
@@ -584,7 +597,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	struct aa_profile *profile = NULL;
 	const char *tmpname, *tmpns = NULL, *name = NULL;
 	const char *info = "failed to unpack profile";
-	size_t ns_len;
+	size_t size = 0, ns_len;
 	struct rhashtable_params params = { 0 };
 	char *key = NULL;
 	struct aa_data *data;
@@ -717,6 +730,38 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		goto fail;
 	}
 
+	size = unpack_array(e, "net_allowed_af");
+	if (size) {
+
+		for (i = 0; i < size; i++) {
+			/* discard extraneous rules that this kernel will
+			 * never request
+			 */
+			if (i >= AF_MAX) {
+				u16 tmp;
+
+				if (!unpack_u16(e, &tmp, NULL) ||
+				    !unpack_u16(e, &tmp, NULL) ||
+				    !unpack_u16(e, &tmp, NULL))
+					goto fail;
+				continue;
+			}
+			if (!unpack_u16(e, &profile->net.allow[i], NULL))
+				goto fail;
+			if (!unpack_u16(e, &profile->net.audit[i], NULL))
+				goto fail;
+			if (!unpack_u16(e, &profile->net.quiet[i], NULL))
+				goto fail;
+		}
+		if (!unpack_nameX(e, AA_ARRAYEND, NULL))
+			goto fail;
+	}
+	if (VERSION_LT(e->version, v7)) {
+		/* pre v7 policy always allowed these */
+		profile->net.allow[AF_UNIX] = 0xffff;
+		profile->net.allow[AF_NETLINK] = 0xffff;
+	}
+
 	if (unpack_nameX(e, AA_STRUCT, "policydb")) {
 		/* generic policy dfa - optional and may be NULL */
 		info = "failed to unpack policydb";
-- 
cgit 


From d07881d2edb0ab783846730629ac2faeaafdf4f1 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 16 Aug 2017 08:59:57 -0700
Subject: apparmor: move new_null_profile to after profile lookup fns()

new_null_profile will need to use some of the profile lookup fns()
so move instead of doing forward fn declarations.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy.c | 158 ++++++++++++++++++++++-----------------------
 1 file changed, 79 insertions(+), 79 deletions(-)

diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 244ea4a4a8f0..a81a384a63b1 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -289,85 +289,6 @@ fail:
 	return NULL;
 }
 
-/**
- * aa_new_null_profile - create or find a null-X learning profile
- * @parent: profile that caused this profile to be created (NOT NULL)
- * @hat: true if the null- learning profile is a hat
- * @base: name to base the null profile off of
- * @gfp: type of allocation
- *
- * Find/Create a null- complain mode profile used in learning mode.  The
- * name of the profile is unique and follows the format of parent//null-XXX.
- * where XXX is based on the @name or if that fails or is not supplied
- * a unique number
- *
- * null profiles are added to the profile list but the list does not
- * hold a count on them so that they are automatically released when
- * not in use.
- *
- * Returns: new refcounted profile else NULL on failure
- */
-struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
-				       const char *base, gfp_t gfp)
-{
-	struct aa_profile *profile;
-	char *name;
-
-	AA_BUG(!parent);
-
-	if (base) {
-		name = kmalloc(strlen(parent->base.hname) + 8 + strlen(base),
-			       gfp);
-		if (name) {
-			sprintf(name, "%s//null-%s", parent->base.hname, base);
-			goto name;
-		}
-		/* fall through to try shorter uniq */
-	}
-
-	name = kmalloc(strlen(parent->base.hname) + 2 + 7 + 8, gfp);
-	if (!name)
-		return NULL;
-	sprintf(name, "%s//null-%x", parent->base.hname,
-		atomic_inc_return(&parent->ns->uniq_null));
-
-name:
-	/* lookup to see if this is a dup creation */
-	profile = aa_find_child(parent, basename(name));
-	if (profile)
-		goto out;
-
-	profile = aa_alloc_profile(name, NULL, gfp);
-	if (!profile)
-		goto fail;
-
-	profile->mode = APPARMOR_COMPLAIN;
-	profile->label.flags |= FLAG_NULL;
-	if (hat)
-		profile->label.flags |= FLAG_HAT;
-	profile->path_flags = parent->path_flags;
-
-	/* released on free_profile */
-	rcu_assign_pointer(profile->parent, aa_get_profile(parent));
-	profile->ns = aa_get_ns(parent->ns);
-	profile->file.dfa = aa_get_dfa(nulldfa);
-	profile->policy.dfa = aa_get_dfa(nulldfa);
-
-	mutex_lock(&profile->ns->lock);
-	__add_profile(&parent->base.profiles, profile);
-	mutex_unlock(&profile->ns->lock);
-
-	/* refcount released by caller */
-out:
-	kfree(name);
-
-	return profile;
-
-fail:
-	aa_free_profile(profile);
-	return NULL;
-}
-
 /* TODO: profile accounting - setup in remove */
 
 /**
@@ -558,6 +479,85 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base,
 	return profile;
 }
 
+/**
+ * aa_new_null_profile - create or find a null-X learning profile
+ * @parent: profile that caused this profile to be created (NOT NULL)
+ * @hat: true if the null- learning profile is a hat
+ * @base: name to base the null profile off of
+ * @gfp: type of allocation
+ *
+ * Find/Create a null- complain mode profile used in learning mode.  The
+ * name of the profile is unique and follows the format of parent//null-XXX.
+ * where XXX is based on the @name or if that fails or is not supplied
+ * a unique number
+ *
+ * null profiles are added to the profile list but the list does not
+ * hold a count on them so that they are automatically released when
+ * not in use.
+ *
+ * Returns: new refcounted profile else NULL on failure
+ */
+struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
+				       const char *base, gfp_t gfp)
+{
+	struct aa_profile *profile;
+	char *name;
+
+	AA_BUG(!parent);
+
+	if (base) {
+		name = kmalloc(strlen(parent->base.hname) + 8 + strlen(base),
+			       gfp);
+		if (name) {
+			sprintf(name, "%s//null-%s", parent->base.hname, base);
+			goto name;
+		}
+		/* fall through to try shorter uniq */
+	}
+
+	name = kmalloc(strlen(parent->base.hname) + 2 + 7 + 8, gfp);
+	if (!name)
+		return NULL;
+	sprintf(name, "%s//null-%x", parent->base.hname,
+		atomic_inc_return(&parent->ns->uniq_null));
+
+name:
+	/* lookup to see if this is a dup creation */
+	profile = aa_find_child(parent, basename(name));
+	if (profile)
+		goto out;
+
+	profile = aa_alloc_profile(name, NULL, gfp);
+	if (!profile)
+		goto fail;
+
+	profile->mode = APPARMOR_COMPLAIN;
+	profile->label.flags |= FLAG_NULL;
+	if (hat)
+		profile->label.flags |= FLAG_HAT;
+	profile->path_flags = parent->path_flags;
+
+	/* released on free_profile */
+	rcu_assign_pointer(profile->parent, aa_get_profile(parent));
+	profile->ns = aa_get_ns(parent->ns);
+	profile->file.dfa = aa_get_dfa(nulldfa);
+	profile->policy.dfa = aa_get_dfa(nulldfa);
+
+	mutex_lock(&profile->ns->lock);
+	__add_profile(&parent->base.profiles, profile);
+	mutex_unlock(&profile->ns->lock);
+
+	/* refcount released by caller */
+out:
+	kfree(name);
+
+	return profile;
+
+fail:
+	aa_free_profile(profile);
+	return NULL;
+}
+
 /**
  * replacement_allowed - test to see if replacement is allowed
  * @profile: profile to test if it can be replaced  (MAYBE NULL)
-- 
cgit 


From 290638a52a808d658bd04b746b3ca46886c157e0 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 16 Aug 2017 05:40:49 -0700
Subject: apparmor: fix race condition in null profile creation

There is a race when null- profile is being created between the
initial lookup/creation of the profile and lock/addition of the
profile. This could result in multiple version of a profile being
added to the list which need to be removed/replaced.

Since these are learning profile their is no affect on mediation.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index a81a384a63b1..4243b0c3f0e4 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -500,7 +500,8 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base,
 struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
 				       const char *base, gfp_t gfp)
 {
-	struct aa_profile *profile;
+	struct aa_profile *p, *profile;
+	const char *bname;
 	char *name;
 
 	AA_BUG(!parent);
@@ -523,7 +524,8 @@ struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
 
 name:
 	/* lookup to see if this is a dup creation */
-	profile = aa_find_child(parent, basename(name));
+	bname = basename(name);
+	profile = aa_find_child(parent, bname);
 	if (profile)
 		goto out;
 
@@ -544,7 +546,13 @@ name:
 	profile->policy.dfa = aa_get_dfa(nulldfa);
 
 	mutex_lock(&profile->ns->lock);
-	__add_profile(&parent->base.profiles, profile);
+	p = __find_child(&parent->base.profiles, bname);
+	if (p) {
+		aa_free_profile(profile);
+		profile = aa_get_profile(p);
+	} else {
+		__add_profile(&parent->base.profiles, profile);
+	}
 	mutex_unlock(&profile->ns->lock);
 
 	/* refcount released by caller */
-- 
cgit 


From 15372b97aa7593c6f5bc1afe69f42fd403c40685 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 16 Aug 2017 05:48:06 -0700
Subject: apparmor: ensure unconfined profiles have dfas initialized

Generally unconfined has early bailout tests and does not need the
dfas initialized, however if an early bailout test is ever missed
it will result in an oops.

Be defensive and initialize the unconfined profile to have null dfas
(no permission) so if an early bailout test is missed we fail
closed (no perms granted) instead of oopsing.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_ns.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c
index 351d3bab3a3d..62a3589c62ab 100644
--- a/security/apparmor/policy_ns.c
+++ b/security/apparmor/policy_ns.c
@@ -112,6 +112,8 @@ static struct aa_ns *alloc_ns(const char *prefix, const char *name)
 	ns->unconfined->label.flags |= FLAG_IX_ON_NAME_ERROR |
 		FLAG_IMMUTIBLE | FLAG_NS_COUNT | FLAG_UNCONFINED;
 	ns->unconfined->mode = APPARMOR_UNCONFINED;
+	ns->unconfined->file.dfa = aa_get_dfa(nulldfa);
+	ns->unconfined->policy.dfa = aa_get_dfa(nulldfa);
 
 	/* ns and ns->unconfined share ns->unconfined refcount */
 	ns->unconfined->ns = ns;
-- 
cgit 


From bc4d82fb946e7b471eab2a80e384227c4eb15652 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 16 Aug 2017 09:33:48 -0700
Subject: apparmor: fix incorrect type assignment when freeing proxies

sparse reports

poisoning the proxy->label before freeing the struct is resulting in
a sparse build warning.
../security/apparmor/label.c:52:30: warning: incorrect type in assignment (different address spaces)
../security/apparmor/label.c:52:30:    expected struct aa_label [noderef] <asn:4>*label
../security/apparmor/label.c:52:30:    got struct aa_label *<noident>

fix with RCU_INIT_POINTER as this is one of those cases where
rcu_assign_pointer() is not needed.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/label.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 52b4ef14840d..c5b99b954580 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -49,7 +49,7 @@ static void free_proxy(struct aa_proxy *proxy)
 		/* p->label will not updated any more as p is dead */
 		aa_put_label(rcu_dereference_protected(proxy->label, true));
 		memset(proxy, 0, sizeof(*proxy));
-		proxy->label = (struct aa_label *) PROXY_POISON;
+		RCU_INIT_POINTER(proxy->label, (struct aa_label *)PROXY_POISON);
 		kfree(proxy);
 	}
 }
-- 
cgit 


From b1545dba092ba543eab1f7b5ed757a4988e267c8 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 23 Aug 2017 12:10:39 -0700
Subject: apparmor: fix build failure on sparc caused by undeclared signals

  In file included from security/apparmor/ipc.c:23:0:
  security/apparmor/include/sig_names.h:26:3: error: 'SIGSTKFLT' undeclared here (not in a function)
    [SIGSTKFLT] = 16, /* -, 16, - */
     ^
  security/apparmor/include/sig_names.h:26:3: error: array index in initializer not of integer type
  security/apparmor/include/sig_names.h:26:3: note: (near initialization for 'sig_map')
  security/apparmor/include/sig_names.h:51:3: error: 'SIGUNUSED' undeclared here (not in a function)
    [SIGUNUSED] = 34, /* -, 31, - */
     ^
  security/apparmor/include/sig_names.h:51:3: error: array index in initializer not of integer type
  security/apparmor/include/sig_names.h:51:3: note: (near initialization for 'sig_map')

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Fixes: c6bf1adaecaa ("apparmor: add the ability to mediate signals")
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/sig_names.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/include/sig_names.h b/security/apparmor/include/sig_names.h
index 0d4395f231ca..92e62fe95292 100644
--- a/security/apparmor/include/sig_names.h
+++ b/security/apparmor/include/sig_names.h
@@ -23,7 +23,9 @@ static const int sig_map[MAXMAPPED_SIG] = {
 	[SIGPIPE] = 13,
 	[SIGALRM] = 14,
 	[SIGTERM] = 15,
+#ifdef SIGSTKFLT
 	[SIGSTKFLT] = 16,	/* -, 16, - */
+#endif
 	[SIGCHLD] = 17,		/* 20, 17, 18.  SIGCHLD -, -, 18 */
 	[SIGCONT] = 18,		/* 19, 18, 25 */
 	[SIGSTOP] = 19,		/* 17, 19, 23 */
@@ -47,7 +49,8 @@ static const int sig_map[MAXMAPPED_SIG] = {
 #if defined(SIGLOST) && SIGPWR != SIGLOST		/* sparc */
 	[SIGLOST] = 33,		/* unused on Linux */
 #endif
-#if defined(SIGLOST) && defined(SIGSYS) && SIGLOST != SIGSYS
+#if defined(SIGUNUSED) && \
+    defined(SIGLOST) && defined(SIGSYS) && SIGLOST != SIGSYS
 	[SIGUNUSED] = 34,	/* -, 31, - */
 #endif
 };
-- 
cgit 


From bf81100f63db7ea243d17b9d5008ba3af2fdf6b2 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Thu, 31 Aug 2017 09:54:43 -0700
Subject: apparmor: fix apparmorfs DAC access permissions

The DAC access permissions for several apparmorfs files are wrong.

.access - needs to be writable by all tasks to perform queries
the others in the set only provide a read fn so should be read only.

With policy namespace virtualization all apparmor needs to control
the permission and visibility checks directly which means DAC
access has to be allowed for all user, group, and other.

BugLink: http://bugs.launchpad.net/bugs/1713103
Fixes: c97204baf840b ("apparmor: rename apparmor file fns and data to indicate use")
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 125dad5c3fde..518d5928661b 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -2215,12 +2215,12 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
 };
 
 static struct aa_sfs_entry aa_sfs_entry_apparmor[] = {
-	AA_SFS_FILE_FOPS(".access", 0640, &aa_sfs_access),
+	AA_SFS_FILE_FOPS(".access", 0666, &aa_sfs_access),
 	AA_SFS_FILE_FOPS(".stacked", 0444, &seq_ns_stacked_fops),
 	AA_SFS_FILE_FOPS(".ns_stacked", 0444, &seq_ns_nsstacked_fops),
-	AA_SFS_FILE_FOPS(".ns_level", 0666, &seq_ns_level_fops),
-	AA_SFS_FILE_FOPS(".ns_name", 0640, &seq_ns_name_fops),
-	AA_SFS_FILE_FOPS("profiles", 0440, &aa_sfs_profiles_fops),
+	AA_SFS_FILE_FOPS(".ns_level", 0444, &seq_ns_level_fops),
+	AA_SFS_FILE_FOPS(".ns_name", 0444, &seq_ns_name_fops),
+	AA_SFS_FILE_FOPS("profiles", 0444, &aa_sfs_profiles_fops),
 	AA_SFS_DIR("features", aa_sfs_entry_features),
 	{ }
 };
-- 
cgit 


From af21b01d1166248f282fc02d0f459c94de06615e Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 22 Sep 2017 22:24:02 +0200
Subject: parisc: Reintroduce option to gzip-compress the kernel

By adding the feature to build the kernel as self-extracting
executeable, the possibility to simply compress the kernel with gzip was
lost.

This patch now reintroduces this possibilty again and leaves it up to
the user to decide how the kernel should be built.

The palo bootloader is able to natively load both formats.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/Kconfig  | 12 ++++++++++++
 arch/parisc/Makefile |  5 +++++
 2 files changed, 17 insertions(+)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index ba7b7ddc3844..a57dedbfc7b7 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -257,6 +257,18 @@ config PARISC_PAGE_SIZE_64KB
 
 endchoice
 
+config PARISC_SELF_EXTRACT
+	bool "Build kernel as self-extracting executable"
+	default y
+	help
+	  Say Y if you want to build the parisc kernel as a kind of
+	  self-extracting executable.
+
+	  If you say N here, the kernel will be compressed with gzip
+	  which can be loaded by the palo bootloader directly too.
+
+	  If you don't know what to do here, say Y.
+
 config SMP
 	bool "Symmetric multi-processing support"
 	---help---
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 58fae5d2449d..01946ebaff72 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -129,8 +129,13 @@ Image: vmlinux
 bzImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
+ifdef CONFIG_PARISC_SELF_EXTRACT
 vmlinuz: bzImage
 	$(OBJCOPY) $(boot)/bzImage $@
+else
+vmlinuz: vmlinux
+	@gzip -cf -9 $< > $@
+endif
 
 install:
 	$(CONFIG_SHELL) $(src)/arch/parisc/install.sh \
-- 
cgit 


From 8c031ba63f8f2a9efc471cb45b2ff18271556544 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 22 Sep 2017 21:57:11 +0200
Subject: parisc: Unbreak bootloader due to gcc-7 optimizations

gcc-7 optimizes the byte-wise accesses of get_unaligned_le32() into
word-wise accesses if the 32-bit integer output_len is declared as
external. This panics then the bootloader since we don't have the
unaligned access fault trap handler installed during boot time.

Avoid this optimization by declaring output_len as byte-aligned and thus
unbreak the bootloader code.

Additionally, compile the boot code optimized for size.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/boot/compressed/Makefile | 2 +-
 arch/parisc/boot/compressed/misc.c   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile
index 5450a11c9d10..7d7e594bda36 100644
--- a/arch/parisc/boot/compressed/Makefile
+++ b/arch/parisc/boot/compressed/Makefile
@@ -15,7 +15,7 @@ targets += misc.o piggy.o sizes.h head.o real2.o firmware.o
 KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
-KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs
+KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os
 ifndef CONFIG_64BIT
 KBUILD_CFLAGS += -mfast-indirect-calls
 endif
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 13a4bf9ac4da..9345b44b86f0 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -24,7 +24,8 @@
 /* Symbols defined by linker scripts */
 extern char input_data[];
 extern int input_len;
-extern __le32 output_len;	/* at unaligned address, little-endian */
+/* output_len is inserted by the linker possibly at an unaligned address */
+extern __le32 output_len __aligned(1);
 extern char _text, _end;
 extern char _bss, _ebss;
 extern char _startcode_end;
-- 
cgit 


From c17c02040bf0d186cebd3e66ff349f955575bf38 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 22 Sep 2017 09:42:42 +0200
Subject: arch: remove unused *_segments() macros/functions

Some architectures define the no-op macros/functions copy_segments,
release_segments and forget_segments. These are used nowhere in the
tree, so removed them.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Vineet Gupta <vgupta@synopsys.com>   [for arch/arc]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/include/asm/processor.h        |  3 ---
 arch/c6x/include/asm/processor.h        |  3 ---
 arch/frv/include/asm/processor.h        |  4 ----
 arch/m32r/include/asm/processor.h       |  8 --------
 arch/metag/include/asm/processor.h      |  3 ---
 arch/mn10300/kernel/process.c           | 12 ------------
 arch/sh/include/asm/processor_32.h      |  4 ----
 arch/sh/include/asm/processor_64.h      |  4 ----
 arch/um/include/asm/processor-generic.h |  5 -----
 arch/xtensa/include/asm/processor.h     |  5 -----
 10 files changed, 51 deletions(-)

diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index d400a2161935..8ee41e988169 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -78,9 +78,6 @@ struct task_struct;
 
 #endif
 
-#define copy_segments(tsk, mm)      do { } while (0)
-#define release_segments(mm)        do { } while (0)
-
 #define KSTK_EIP(tsk)   (task_pt_regs(tsk)->ret)
 #define KSTK_ESP(tsk)   (task_pt_regs(tsk)->sp)
 
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
index 7c87b5be53b5..8f7cce829f8e 100644
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -92,9 +92,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-#define copy_segments(tsk, mm)		do { } while (0)
-#define release_segments(mm)		do { } while (0)
-
 /*
  * saved kernel SP and DP of a blocked thread.
  */
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index e4d08d74ed9f..021cce78b401 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -92,10 +92,6 @@ static inline void release_thread(struct task_struct *dead_task)
 extern asmlinkage void save_user_regs(struct user_context *target);
 extern asmlinkage void *restore_user_regs(const struct user_context *target, ...);
 
-#define copy_segments(tsk, mm)		do { } while (0)
-#define release_segments(mm)		do { } while (0)
-#define forget_segments()		do { } while (0)
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	((tsk)->thread.frame0->pc)
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 657874eeeccc..c70fa9ac7169 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -118,14 +118,6 @@ struct mm_struct;
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/* Copy and release all segment info associated with a VM */
-extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
-extern void release_segments(struct mm_struct * mm);
-
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)  do { } while (0)
-#define release_segments(mm)  do { } while (0)
-
 unsigned long get_wchan(struct task_struct *p);
 #define KSTK_EIP(tsk)  ((tsk)->thread.lr)
 #define KSTK_ESP(tsk)  ((tsk)->thread.sp)
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
index ec6a49076980..8ae92d6abfd2 100644
--- a/arch/metag/include/asm/processor.h
+++ b/arch/metag/include/asm/processor.h
@@ -131,9 +131,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-#define copy_segments(tsk, mm)		do { } while (0)
-#define release_segments(mm)		do { } while (0)
-
 /*
  * Return saved PC of a blocked thread.
  */
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 89e8027e07fb..7c475fd99c46 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -59,10 +59,6 @@ void arch_cpu_idle(void)
 }
 #endif
 
-void release_segments(struct mm_struct *mm)
-{
-}
-
 void machine_restart(char *cmd)
 {
 #ifdef CONFIG_KERNEL_DEBUGGER
@@ -112,14 +108,6 @@ void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
- */
-void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
-{
-}
-
 /*
  * this gets called so that we can store lazy state into memory and copy the
  * current task into the new thread.
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index 18e0377f72bb..88ce1e22237b 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -136,10 +136,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_pc, unsigned lo
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)	do { } while(0)
-#define release_segments(mm)	do { } while(0)
-
 /*
  * FPU lazy state save handling.
  */
diff --git a/arch/sh/include/asm/processor_64.h b/arch/sh/include/asm/processor_64.h
index eedd4f625d07..777a16318aff 100644
--- a/arch/sh/include/asm/processor_64.h
+++ b/arch/sh/include/asm/processor_64.h
@@ -170,10 +170,6 @@ struct mm_struct;
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)	do { } while (0)
-#define release_segments(mm)	do { } while (0)
-#define forget_segments()	do { } while (0)
 /*
  * FPU lazy state save handling.
  */
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index f6d1a3f747a9..86942a492454 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -58,11 +58,6 @@ static inline void release_thread(struct task_struct *task)
 {
 }
 
-static inline void mm_copy_segments(struct mm_struct *from_mm,
-				    struct mm_struct *new_mm)
-{
-}
-
 #define init_stack	(init_thread_union.stack)
 
 /*
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 30ee8c608853..5b0027d4ecc0 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -208,11 +208,6 @@ struct mm_struct;
 /* Free all resources held by a thread. */
 #define release_thread(thread) do { } while(0)
 
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)	do { } while(0)
-#define release_segments(mm)	do { } while(0)
-#define forget_segments()	do { } while (0)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->pc)
-- 
cgit 


From 6e70e26dc52be62c1f39f81b5f71fa5e643677aa Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Thu, 21 Sep 2017 21:32:29 -0500
Subject: SMB3: handle new statx fields

We weren't returning the creation time or the two easily supported
attributes (ENCRYPTED or COMPRESSED) for the getattr call to
allow statx to return these fields.

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>\
Acked-by: Jeff Layton <jlayton@poochiereds.net>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/inode.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a8693632235f..7c732cb44164 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -234,6 +234,8 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
 	fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
 	fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime);
 	fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
+	/* old POSIX extensions don't get create time */
+
 	fattr->cf_mode = le64_to_cpu(info->Permissions);
 
 	/*
@@ -2024,6 +2026,19 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
 	stat->blksize = CIFS_MAX_MSGSIZE;
 	stat->ino = CIFS_I(inode)->uniqueid;
 
+	/* old CIFS Unix Extensions doesn't return create time */
+	if (CIFS_I(inode)->createtime) {
+		stat->result_mask |= STATX_BTIME;
+		stat->btime =
+		      cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime));
+	}
+
+	stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED);
+	if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_COMPRESSED)
+		stat->attributes |= STATX_ATTR_COMPRESSED;
+	if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_ENCRYPTED)
+		stat->attributes |= STATX_ATTR_ENCRYPTED;
+
 	/*
 	 * If on a multiuser mount without unix extensions or cifsacl being
 	 * enabled, and the admin hasn't overridden them, set the ownership
-- 
cgit 


From 1013e760d10e614dc10b5624ce9fc41563ba2e65 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Fri, 22 Sep 2017 01:40:27 -0500
Subject: SMB3: Don't ignore O_SYNC/O_DSYNC and O_DIRECT flags

Signed-off-by: Steve French <smfrench@gmail.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/file.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 822311919163..92fdf9c35de2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -224,6 +224,13 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 	if (backup_cred(cifs_sb))
 		create_options |= CREATE_OPEN_BACKUP_INTENT;
 
+	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
+	if (f_flags & O_SYNC)
+		create_options |= CREATE_WRITE_THROUGH;
+
+	if (f_flags & O_DIRECT)
+		create_options |= CREATE_NO_BUFFER;
+
 	oparms.tcon = tcon;
 	oparms.cifs_sb = cifs_sb;
 	oparms.desired_access = desired_access;
-- 
cgit 


From b9b95da92de9d498ece7e6a82e2d6dcfc76fd9d8 Mon Sep 17 00:00:00 2001
From: Stefan Schmidt <stefan@osg.samsung.com>
Date: Fri, 22 Sep 2017 14:28:46 +0200
Subject: MAINTAINERS: update git tree locations for ieee802154 subsystem

Patches for ieee802154 will go through my new trees towards netdev from
now on. The 6LoWPAN subsystem will stay as is (shared between ieee802154
and bluetooth) and go through the bluetooth tree as usual.

Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 955f034fd523..61ee134cf6a8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6642,8 +6642,8 @@ M:	Alexander Aring <alex.aring@gmail.com>
 M:	Stefan Schmidt <stefan@osg.samsung.com>
 L:	linux-wpan@vger.kernel.org
 W:	http://wpan.cakelab.org/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan-next.git
 S:	Maintained
 F:	net/ieee802154/
 F:	net/mac802154/
-- 
cgit 


From 581fe0ea61584d88072527ae9fb9dcb9d1f2783e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 22 Sep 2017 19:42:37 -0400
Subject: net: orphan frags on stand-alone ptype in dev_queue_xmit_nit

Zerocopy skbs frags are copied when the skb is looped to a local sock.
Commit 1080e512d44d ("net: orphan frags on receive") introduced calls
to skb_orphan_frags to deliver_skb and __netif_receive_skb for this.

With msg_zerocopy, these skbs can also exist in the tx path and thus
loop from dev_queue_xmit_nit. This already calls deliver_skb in its
loop. But it does not orphan before a separate pt_prev->func().

Add the missing skb_orphan_frags_rx.

Changes
  v1->v2: handle skb_orphan_frags_rx failure

Fixes: 1f8b977ab32d ("sock: enable MSG_ZEROCOPY")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 9a2254f9802f..588b473194a8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1948,8 +1948,12 @@ again:
 		goto again;
 	}
 out_unlock:
-	if (pt_prev)
-		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+	if (pt_prev) {
+		if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC))
+			pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+		else
+			kfree_skb(skb2);
+	}
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
-- 
cgit 


From cbb2fb5c72f48d3029c144be0f0e61da1c7bccf7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 22 Sep 2017 20:20:06 -0400
Subject: net: set tb->fast_sk_family

We need to set the tb->fast_sk_family properly so we can use the proper
comparison function for all subsequent reuseport bind requests.

Fixes: 637bc8bbe6c0 ("inet: reset tb->fastreuseport when adding a reuseport sk")
Reported-and-tested-by: Cole Robinson <crobinso@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b9c64b40a83a..f87f4805e244 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -328,6 +328,7 @@ success:
 			tb->fastuid = uid;
 			tb->fast_rcv_saddr = sk->sk_rcv_saddr;
 			tb->fast_ipv6_only = ipv6_only_sock(sk);
+			tb->fast_sk_family = sk->sk_family;
 #if IS_ENABLED(CONFIG_IPV6)
 			tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 #endif
@@ -354,6 +355,7 @@ success:
 				tb->fastuid = uid;
 				tb->fast_rcv_saddr = sk->sk_rcv_saddr;
 				tb->fast_ipv6_only = ipv6_only_sock(sk);
+				tb->fast_sk_family = sk->sk_family;
 #if IS_ENABLED(CONFIG_IPV6)
 				tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 #endif
-- 
cgit 


From 7a56673b58f2414679e926bba80309a037a4fd35 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 22 Sep 2017 20:20:07 -0400
Subject: net: use inet6_rcv_saddr to compare sockets

In ipv6_rcv_saddr_equal() we need to use inet6_rcv_saddr(sk) for the
ipv6 compare with the fast socket information to make sure we're doing
the proper comparisons.

Fixes: 637bc8bbe6c0 ("inet: reset tb->fastreuseport when adding a reuseport sk")
Reported-and-tested-by: Cole Robinson <crobinso@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f87f4805e244..a1bf30438bc5 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -266,7 +266,7 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
 #if IS_ENABLED(CONFIG_IPV6)
 	if (tb->fast_sk_family == AF_INET6)
 		return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
-					    &sk->sk_v6_rcv_saddr,
+					    inet6_rcv_saddr(sk),
 					    tb->fast_rcv_saddr,
 					    sk->sk_rcv_saddr,
 					    tb->fast_ipv6_only,
-- 
cgit 


From fbed24bcc69d3e48c5402c371f19f5c7688871e5 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 22 Sep 2017 20:20:08 -0400
Subject: inet: fix improper empty comparison

When doing my reuseport rework I screwed up and changed a

if (hlist_empty(&tb->owners))

to

if (!hlist_empty(&tb->owners))

This is obviously bad as all of the reuseport/reuse logic was reversed,
which caused weird problems like allowing an ipv4 bind conflict if we
opened an ipv4 only socket on a port followed by an ipv6 only socket on
the same port.

Fixes: b9470c27607b ("inet: kill smallest_size and smallest_port")
Reported-by: Cole Robinson <crobinso@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a1bf30438bc5..c039c937ba90 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -321,7 +321,7 @@ tb_found:
 			goto fail_unlock;
 	}
 success:
-	if (!hlist_empty(&tb->owners)) {
+	if (hlist_empty(&tb->owners)) {
 		tb->fastreuse = reuse;
 		if (sk->sk_reuseport) {
 			tb->fastreuseport = FASTREUSEPORT_ANY;
-- 
cgit 


From 0d0970eef3b03ef08b19da5bc3044410731cf38f Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 20 Sep 2017 16:24:32 -0500
Subject: objtool: Handle another GCC stack pointer adjustment bug

The kbuild bot reported the following warning with GCC 4.4 and a
randconfig:

  net/socket.o: warning: objtool: compat_sock_ioctl()+0x1083: stack state mismatch: cfa1=7+160 cfa2=-1+0

This is caused by another GCC non-optimization, where it backs up and
restores the stack pointer for no apparent reason:

    2f91:       48 89 e0                mov    %rsp,%rax
    2f94:       4c 89 e7                mov    %r12,%rdi
    2f97:       4c 89 f6                mov    %r14,%rsi
    2f9a:       ba 20 00 00 00          mov    $0x20,%edx
    2f9f:       48 89 c4                mov    %rax,%rsp

This issue would have been happily ignored before the following commit:

  dd88a0a0c861 ("objtool: Handle GCC stack pointer adjustment bug")

But now that objtool is paying attention to such stack pointer writes
to/from a register, it needs to understand them properly.  In this case
that means recognizing that the "mov %rsp, %rax" instruction is
potentially a backup of the stack pointer.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: dd88a0a0c861 ("objtool: Handle GCC stack pointer adjustment bug")
Link: http://lkml.kernel.org/r/8c7aa8e9a36fbbb6655d9d8e7cea58958c912da8.1505942196.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/arch/x86/decode.c |  6 +++---
 tools/objtool/check.c           | 43 +++++++++++++++++++++++++++--------------
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 0e8c8ec4fd4e..0f22768c0d4d 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -208,14 +208,14 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		break;
 
 	case 0x89:
-		if (rex == 0x48 && modrm == 0xe5) {
+		if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
 
-			/* mov %rsp, %rbp */
+			/* mov %rsp, reg */
 			*type = INSN_STACK;
 			op->src.type = OP_SRC_REG;
 			op->src.reg = CFI_SP;
 			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_BP;
+			op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
 			break;
 		}
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f744617c9946..a0c518ecf085 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1203,24 +1203,39 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 		switch (op->src.type) {
 
 		case OP_SRC_REG:
-			if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP) {
+			if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
+			    cfa->base == CFI_SP &&
+			    regs[CFI_BP].base == CFI_CFA &&
+			    regs[CFI_BP].offset == -cfa->offset) {
+
+				/* mov %rsp, %rbp */
+				cfa->base = op->dest.reg;
+				state->bp_scratch = false;
+			}
 
-				if (cfa->base == CFI_SP &&
-				    regs[CFI_BP].base == CFI_CFA &&
-				    regs[CFI_BP].offset == -cfa->offset) {
+			else if (op->src.reg == CFI_SP &&
+				 op->dest.reg == CFI_BP && state->drap) {
 
-					/* mov %rsp, %rbp */
-					cfa->base = op->dest.reg;
-					state->bp_scratch = false;
-				}
+				/* drap: mov %rsp, %rbp */
+				regs[CFI_BP].base = CFI_BP;
+				regs[CFI_BP].offset = -state->stack_size;
+				state->bp_scratch = false;
+			}
 
-				else if (state->drap) {
+			else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
 
-					/* drap: mov %rsp, %rbp */
-					regs[CFI_BP].base = CFI_BP;
-					regs[CFI_BP].offset = -state->stack_size;
-					state->bp_scratch = false;
-				}
+				/*
+				 * mov %rsp, %reg
+				 *
+				 * This is needed for the rare case where GCC
+				 * does:
+				 *
+				 *   mov    %rsp, %rax
+				 *   ...
+				 *   mov    %rax, %rsp
+				 */
+				state->vals[op->dest.reg].base = CFI_CFA;
+				state->vals[op->dest.reg].offset = -state->stack_size;
 			}
 
 			else if (op->dest.reg == cfa->base) {
-- 
cgit 


From f5caf621ee357279e759c0911daf6d55c7d36f03 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 20 Sep 2017 16:24:33 -0500
Subject: x86/asm: Fix inline asm call constraints for Clang

For inline asm statements which have a CALL instruction, we list the
stack pointer as a constraint to convince GCC to ensure the frame
pointer is set up first:

  static inline void foo()
  {
	register void *__sp asm(_ASM_SP);
	asm("call bar" : "+r" (__sp))
  }

Unfortunately, that pattern causes Clang to corrupt the stack pointer.

The fix is easy: convert the stack pointer register variable to a global
variable.

It should be noted that the end result is different based on the GCC
version.  With GCC 6.4, this patch has exactly the same result as
before:

	defconfig	defconfig-nofp	distro		distro-nofp
 before	9820389		9491555		8816046		8516940
 after	9820389		9491555		8816046		8516940

With GCC 7.2, however, GCC's behavior has changed.  It now changes its
behavior based on the conversion of the register variable to a global.
That somehow convinces it to *always* set up the frame pointer before
inserting *any* inline asm.  (Therefore, listing the variable as an
output constraint is a no-op and is no longer necessary.)  It's a bit
overkill, but the performance impact should be negligible.  And in fact,
there's a nice improvement with frame pointers disabled:

	defconfig	defconfig-nofp	distro		distro-nofp
 before	9796316		9468236		9076191		8790305
 after	9796957		9464267		9076381		8785949

So in summary, while listing the stack pointer as an output constraint
is no longer necessary for newer versions of GCC, it's still needed for
older versions.

Suggested-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reported-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/3db862e970c432ae823cf515c52b54fec8270e0e.1505942196.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/alternative.h               |  3 +--
 arch/x86/include/asm/asm.h                       | 11 +++++++++++
 arch/x86/include/asm/mshyperv.h                  | 10 ++++------
 arch/x86/include/asm/paravirt_types.h            | 14 +++++++-------
 arch/x86/include/asm/preempt.h                   | 15 +++++----------
 arch/x86/include/asm/processor.h                 |  6 ++----
 arch/x86/include/asm/rwsem.h                     |  4 ++--
 arch/x86/include/asm/uaccess.h                   |  4 ++--
 arch/x86/include/asm/xen/hypercall.h             |  5 ++---
 arch/x86/kvm/emulate.c                           |  3 +--
 arch/x86/kvm/vmx.c                               |  3 +--
 arch/x86/mm/fault.c                              |  3 +--
 tools/objtool/Documentation/stack-validation.txt |  6 +++---
 13 files changed, 42 insertions(+), 45 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1b020381ab38..c096624137ae 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
 #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
 			   output, input...)				      \
 {									      \
-	register void *__sp asm(_ASM_SP);				      \
 	asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
 		"call %P[new2]", feature2)				      \
-		: output, "+r" (__sp)					      \
+		: output, ASM_CALL_CONSTRAINT				      \
 		: [old] "i" (oldfunc), [new1] "i" (newfunc1),		      \
 		  [new2] "i" (newfunc2), ## input);			      \
 }
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 676ee5807d86..c1eadbaf1115 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -132,4 +132,15 @@
 /* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
+#ifndef __ASSEMBLY__
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction.  Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function.  If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned int __asm_call_sp asm("esp");
+#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
+#endif
+
 #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 63cc96f064dc..738503e1f80c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -179,7 +179,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	u64 input_address = input ? virt_to_phys(input) : 0;
 	u64 output_address = output ? virt_to_phys(output) : 0;
 	u64 hv_status;
-	register void *__sp asm(_ASM_SP);
 
 #ifdef CONFIG_X86_64
 	if (!hv_hypercall_pg)
@@ -187,7 +186,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
 	__asm__ __volatile__("mov %4, %%r8\n"
 			     "call *%5"
-			     : "=a" (hv_status), "+r" (__sp),
+			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 			       "+c" (control), "+d" (input_address)
 			     :  "r" (output_address), "m" (hv_hypercall_pg)
 			     : "cc", "memory", "r8", "r9", "r10", "r11");
@@ -202,7 +201,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
 	__asm__ __volatile__("call *%7"
 			     : "=A" (hv_status),
-			       "+c" (input_address_lo), "+r" (__sp)
+			       "+c" (input_address_lo), ASM_CALL_CONSTRAINT
 			     : "A" (control),
 			       "b" (input_address_hi),
 			       "D"(output_address_hi), "S"(output_address_lo),
@@ -224,12 +223,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 {
 	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
-	register void *__sp asm(_ASM_SP);
 
 #ifdef CONFIG_X86_64
 	{
 		__asm__ __volatile__("call *%4"
-				     : "=a" (hv_status), "+r" (__sp),
+				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 				       "+c" (control), "+d" (input1)
 				     : "m" (hv_hypercall_pg)
 				     : "cc", "r8", "r9", "r10", "r11");
@@ -242,7 +240,7 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 		__asm__ __volatile__ ("call *%5"
 				      : "=A"(hv_status),
 					"+c"(input1_lo),
-					"+r"(__sp)
+					ASM_CALL_CONSTRAINT
 				      :	"A" (control),
 					"b" (input1_hi),
 					"m" (hv_hypercall_pg)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 42873edd9f9d..280d94c36dad 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -459,8 +459,8 @@ int paravirt_disable_iospace(void);
  */
 #ifdef CONFIG_X86_32
 #define PVOP_VCALL_ARGS							\
-	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;	\
-	register void *__sp asm("esp")
+	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
+
 #define PVOP_CALL_ARGS			PVOP_VCALL_ARGS
 
 #define PVOP_CALL_ARG1(x)		"a" ((unsigned long)(x))
@@ -480,8 +480,8 @@ int paravirt_disable_iospace(void);
 /* [re]ax isn't an arg, but the return val */
 #define PVOP_VCALL_ARGS						\
 	unsigned long __edi = __edi, __esi = __esi,		\
-		__edx = __edx, __ecx = __ecx, __eax = __eax;	\
-	register void *__sp asm("rsp")
+		__edx = __edx, __ecx = __ecx, __eax = __eax;
+
 #define PVOP_CALL_ARGS		PVOP_VCALL_ARGS
 
 #define PVOP_CALL_ARG1(x)		"D" ((unsigned long)(x))
@@ -532,7 +532,7 @@ int paravirt_disable_iospace(void);
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : call_clbr, "+r" (__sp)		\
+				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
 				       paravirt_clobber(clbr),		\
 				       ##__VA_ARGS__			\
@@ -542,7 +542,7 @@ int paravirt_disable_iospace(void);
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : call_clbr, "+r" (__sp)		\
+				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
 				       paravirt_clobber(clbr),		\
 				       ##__VA_ARGS__			\
@@ -569,7 +569,7 @@ int paravirt_disable_iospace(void);
 		asm volatile(pre					\
 			     paravirt_alt(PARAVIRT_CALL)		\
 			     post					\
-			     : call_clbr, "+r" (__sp)			\
+			     : call_clbr, ASM_CALL_CONSTRAINT		\
 			     : paravirt_type(op),			\
 			       paravirt_clobber(clbr),			\
 			       ##__VA_ARGS__				\
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index ec1f3c651150..4f44505dbf87 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset)
 
 #ifdef CONFIG_PREEMPT
   extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule()					\
-({								\
-	register void *__sp asm(_ASM_SP);			\
-	asm volatile ("call ___preempt_schedule" : "+r"(__sp));	\
-})
+# define __preempt_schedule() \
+	asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
 
   extern asmlinkage void preempt_schedule(void);
   extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace()					\
-({									\
-	register void *__sp asm(_ASM_SP);				\
-	asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp));	\
-})
+# define __preempt_schedule_notrace() \
+	asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+
   extern asmlinkage void preempt_schedule_notrace(void);
 #endif
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3fa26a61eabc..b390ff76e58f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -677,8 +677,6 @@ static inline void sync_core(void)
 	 * Like all of Linux's memory ordering operations, this is a
 	 * compiler barrier as well.
 	 */
-	register void *__sp asm(_ASM_SP);
-
 #ifdef CONFIG_X86_32
 	asm volatile (
 		"pushfl\n\t"
@@ -686,7 +684,7 @@ static inline void sync_core(void)
 		"pushl $1f\n\t"
 		"iret\n\t"
 		"1:"
-		: "+r" (__sp) : : "memory");
+		: ASM_CALL_CONSTRAINT : : "memory");
 #else
 	unsigned int tmp;
 
@@ -703,7 +701,7 @@ static inline void sync_core(void)
 		"iretq\n\t"
 		UNWIND_HINT_RESTORE
 		"1:"
-		: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
+		: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
 #endif
 }
 
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index a34e0d4b957d..7116b7931c7b 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
 ({							\
 	long tmp;					\
 	struct rw_semaphore* ret;			\
-	register void *__sp asm(_ASM_SP);		\
 							\
 	asm volatile("# beginning down_write\n\t"	\
 		     LOCK_PREFIX "  xadd      %1,(%4)\n\t"	\
@@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
 		     "  call " slow_path "\n"		\
 		     "1:\n"				\
 		     "# ending down_write"		\
-		     : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
+		     : "+m" (sem->count), "=d" (tmp),	\
+		       "=a" (ret), ASM_CALL_CONSTRAINT	\
 		     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
 		     : "memory", "cc");			\
 	ret;						\
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 184eb9894dae..78e8fcc87d4c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -166,11 +166,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 ({									\
 	int __ret_gu;							\
 	register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX);		\
-	register void *__sp asm(_ASM_SP);				\
 	__chk_user_ptr(ptr);						\
 	might_fault();							\
 	asm volatile("call __get_user_%P4"				\
-		     : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp)	\
+		     : "=a" (__ret_gu), "=r" (__val_gu),		\
+			ASM_CALL_CONSTRAINT				\
 		     : "0" (ptr), "i" (sizeof(*(ptr))));		\
 	(x) = (__force __typeof__(*(ptr))) __val_gu;			\
 	__builtin_expect(__ret_gu, 0);					\
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9606688caa4b..128a1a0b1450 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[];
 	register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
 	register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
 	register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
-	register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \
-	register void *__sp asm(_ASM_SP);
+	register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
 
-#define __HYPERCALL_0PARAM	"=r" (__res), "+r" (__sp)
+#define __HYPERCALL_0PARAM	"=r" (__res), ASM_CALL_CONSTRAINT
 #define __HYPERCALL_1PARAM	__HYPERCALL_0PARAM, "+r" (__arg1)
 #define __HYPERCALL_2PARAM	__HYPERCALL_1PARAM, "+r" (__arg2)
 #define __HYPERCALL_3PARAM	__HYPERCALL_2PARAM, "+r" (__arg3)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 16bf6655aa85..f23f13403f33 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5296,7 +5296,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 
 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 {
-	register void *__sp asm(_ASM_SP);
 	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
 
 	if (!(ctxt->d & ByteOp))
@@ -5304,7 +5303,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 
 	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
 	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
-	      [fastop]"+S"(fop), "+r"(__sp)
+	      [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
 	    : "c"(ctxt->src2.val));
 
 	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06c0c6d0541e..6ee237f509dc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9036,7 +9036,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 {
 	u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-	register void *__sp asm(_ASM_SP);
 
 	if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
 			== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
@@ -9065,7 +9064,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_X86_64
 			[sp]"=&r"(tmp),
 #endif
-			"+r"(__sp)
+			ASM_CALL_CONSTRAINT
 			:
 			[entry]"r"(entry),
 			[ss]"i"(__KERNEL_DS),
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b836a7274e12..39567b5c33da 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -806,7 +806,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	if (is_vmalloc_addr((void *)address) &&
 	    (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
 	     address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
-		register void *__sp asm("rsp");
 		unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
 		/*
 		 * We're likely to be running with very little stack space
@@ -821,7 +820,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 		asm volatile ("movq %[stack], %%rsp\n\t"
 			      "call handle_stack_overflow\n\t"
 			      "1: jmp 1b"
-			      : "+r" (__sp)
+			      : ASM_CALL_CONSTRAINT
 			      : "D" ("kernel stack overflow (page fault)"),
 				"S" (regs), "d" (address),
 				[stack] "rm" (stack));
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 6a1af43862df..3995735a878f 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -194,10 +194,10 @@ they mean, and suggestions for how to fix them.
    If it's a GCC-compiled .c file, the error may be because the function
    uses an inline asm() statement which has a "call" instruction.  An
    asm() statement with a call instruction must declare the use of the
-   stack pointer in its output operand.  For example, on x86_64:
+   stack pointer in its output operand.  On x86_64, this means adding
+   the ASM_CALL_CONSTRAINT as an output constraint:
 
-     register void *__sp asm("rsp");
-     asm volatile("call func" : "+r" (__sp));
+     asm volatile("call func" : ASM_CALL_CONSTRAINT);
 
    Otherwise the stack frame may not get created before the call.
 
-- 
cgit 


From af2e658fc08a397b10352265e50b83f27e25d73e Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Fri, 22 Sep 2017 12:32:35 +0300
Subject: as3645a: Use ams,input-max-microamp as documented in DT bindings

DT bindings document the property "ams,input-max-microamp" that limits the
chip's maximum input current. The driver and the DTS however used
"peak-current-limit" property. Fix this by using the property documented
in DT binding documentation.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 arch/arm/boot/dts/omap3-n950-n9.dtsi | 2 +-
 drivers/leds/leds-as3645a.c          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi
index cb47ae79a5f9..b86fc83a5a65 100644
--- a/arch/arm/boot/dts/omap3-n950-n9.dtsi
+++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi
@@ -273,7 +273,7 @@
 			flash-timeout-us = <150000>;
 			flash-max-microamp = <320000>;
 			led-max-microamp = <60000>;
-			peak-current-limit = <1750000>;
+			ams,input-max-microamp = <1750000>;
 		};
 		indicator {
 			led-max-microamp = <10000>;
diff --git a/drivers/leds/leds-as3645a.c b/drivers/leds/leds-as3645a.c
index bbbbe0898233..e3f89c6130d2 100644
--- a/drivers/leds/leds-as3645a.c
+++ b/drivers/leds/leds-as3645a.c
@@ -534,7 +534,7 @@ static int as3645a_parse_node(struct as3645a *flash,
 	of_property_read_u32(flash->flash_node, "voltage-reference",
 			     &cfg->voltage_reference);
 
-	of_property_read_u32(flash->flash_node, "peak-current-limit",
+	of_property_read_u32(flash->flash_node, "ams,input-max-microamp",
 			     &cfg->peak);
 	cfg->peak = AS_PEAK_mA_TO_REG(cfg->peak);
 
-- 
cgit 


From 75f9f7279e874ff95d1abe4613abc0826c9a8dcc Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Fri, 22 Sep 2017 12:32:36 +0300
Subject: dt: bindings: as3645a: Use LED number to refer to LEDs

Use integers (reg property) to tell the number of the LED to the driver
instead of the node name. While both of these approaches are currently
used by the LED bindings, using integers will require less driver changes
for ACPI support. Additionally, it will make possible LED naming using
chip and LED node names, effectively making the label property most useful
for human-readable names only.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 .../devicetree/bindings/leds/ams,as3645a.txt       | 28 ++++++++++++++--------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/Documentation/devicetree/bindings/leds/ams,as3645a.txt b/Documentation/devicetree/bindings/leds/ams,as3645a.txt
index 12c5ef26ec73..fdc40e354a64 100644
--- a/Documentation/devicetree/bindings/leds/ams,as3645a.txt
+++ b/Documentation/devicetree/bindings/leds/ams,as3645a.txt
@@ -15,11 +15,14 @@ Required properties
 
 compatible	: Must be "ams,as3645a".
 reg		: The I2C address of the device. Typically 0x30.
+#address-cells	: 1
+#size-cells	: 0
 
 
-Required properties of the "flash" child node
-=============================================
+Required properties of the flash child node (0)
+===============================================
 
+reg: 0
 flash-timeout-us: Flash timeout in microseconds. The value must be in
 		  the range [100000, 850000] and divisible by 50000.
 flash-max-microamp: Maximum flash current in microamperes. Has to be
@@ -33,20 +36,21 @@ ams,input-max-microamp: Maximum flash controller input current. The
 			and divisible by 50000.
 
 
-Optional properties of the "flash" child node
-=============================================
+Optional properties of the flash child node
+===========================================
 
 label		: The label of the flash LED.
 
 
-Required properties of the "indicator" child node
-=================================================
+Required properties of the indicator child node (1)
+===================================================
 
+reg: 1
 led-max-microamp: Maximum indicator current. The allowed values are
 		  2500, 5000, 7500 and 10000.
 
-Optional properties of the "indicator" child node
-=================================================
+Optional properties of the indicator child node
+===============================================
 
 label		: The label of the indicator LED.
 
@@ -55,16 +59,20 @@ Example
 =======
 
 	as3645a@30 {
+		#address-cells: 1
+		#size-cells: 0
 		reg = <0x30>;
 		compatible = "ams,as3645a";
-		flash {
+		flash@0 {
+			reg = <0x0>;
 			flash-timeout-us = <150000>;
 			flash-max-microamp = <320000>;
 			led-max-microamp = <60000>;
 			ams,input-max-microamp = <1750000>;
 			label = "as3645a:flash";
 		};
-		indicator {
+		indicator@1 {
+			reg = <0x1>;
 			led-max-microamp = <10000>;
 			label = "as3645a:indicator";
 		};
-- 
cgit 


From e626c325277531db15314b80610d1f5a1c2637b2 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Fri, 22 Sep 2017 12:32:37 +0300
Subject: as3645a: Use integer numbers for parsing LEDs

Use integer numbers for LEDs, 0 is the flash and 1 is the indicator.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 arch/arm/boot/dts/omap3-n950-n9.dtsi |  8 ++++++--
 drivers/leds/leds-as3645a.c          | 26 ++++++++++++++++++++++++--
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi
index b86fc83a5a65..1b0bd72945f2 100644
--- a/arch/arm/boot/dts/omap3-n950-n9.dtsi
+++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi
@@ -267,15 +267,19 @@
 	clock-frequency = <400000>;
 
 	as3645a@30 {
+		#address-cells = <1>;
+		#size-cells = <0>;
 		reg = <0x30>;
 		compatible = "ams,as3645a";
-		flash {
+		flash@0 {
+			reg = <0x0>;
 			flash-timeout-us = <150000>;
 			flash-max-microamp = <320000>;
 			led-max-microamp = <60000>;
 			ams,input-max-microamp = <1750000>;
 		};
-		indicator {
+		indicator@1 {
+			reg = <0x1>;
 			led-max-microamp = <10000>;
 		};
 	};
diff --git a/drivers/leds/leds-as3645a.c b/drivers/leds/leds-as3645a.c
index e3f89c6130d2..605e0c64e974 100644
--- a/drivers/leds/leds-as3645a.c
+++ b/drivers/leds/leds-as3645a.c
@@ -112,6 +112,10 @@
 #define AS_PEAK_mA_TO_REG(a) \
 	((min_t(u32, AS_PEAK_mA_MAX, a) - 1250) / 250)
 
+/* LED numbers for Devicetree */
+#define AS_LED_FLASH				0
+#define AS_LED_INDICATOR			1
+
 enum as_mode {
 	AS_MODE_EXT_TORCH = 0 << AS_CONTROL_MODE_SETTING_SHIFT,
 	AS_MODE_INDICATOR = 1 << AS_CONTROL_MODE_SETTING_SHIFT,
@@ -491,10 +495,29 @@ static int as3645a_parse_node(struct as3645a *flash,
 			      struct device_node *node)
 {
 	struct as3645a_config *cfg = &flash->cfg;
+	struct device_node *child;
 	const char *name;
 	int rval;
 
-	flash->flash_node = of_get_child_by_name(node, "flash");
+	for_each_child_of_node(node, child) {
+		u32 id = 0;
+
+		of_property_read_u32(child, "reg", &id);
+
+		switch (id) {
+		case AS_LED_FLASH:
+			flash->flash_node = of_node_get(child);
+			break;
+		case AS_LED_INDICATOR:
+			flash->indicator_node = of_node_get(child);
+			break;
+		default:
+			dev_warn(&flash->client->dev,
+				 "unknown LED %u encountered, ignoring\n", id);
+			break;
+		}
+	}
+
 	if (!flash->flash_node) {
 		dev_err(&flash->client->dev, "can't find flash node\n");
 		return -ENODEV;
@@ -538,7 +561,6 @@ static int as3645a_parse_node(struct as3645a *flash,
 			     &cfg->peak);
 	cfg->peak = AS_PEAK_mA_TO_REG(cfg->peak);
 
-	flash->indicator_node = of_get_child_by_name(node, "indicator");
 	if (!flash->indicator_node) {
 		dev_warn(&flash->client->dev,
 			 "can't find indicator node\n");
-- 
cgit 


From 12c4b878e71fa8b65bc479b2460765c7d1d81a26 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Fri, 22 Sep 2017 12:32:38 +0300
Subject: as3645a: Unregister indicator LED on device unbind

The indicator LED was registered in probe but was not removed in driver
remove callback. Fix this.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 drivers/leds/leds-as3645a.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/leds/leds-as3645a.c b/drivers/leds/leds-as3645a.c
index 605e0c64e974..9a257f969300 100644
--- a/drivers/leds/leds-as3645a.c
+++ b/drivers/leds/leds-as3645a.c
@@ -743,6 +743,7 @@ static int as3645a_remove(struct i2c_client *client)
 	as3645a_set_control(flash, AS_MODE_EXT_TORCH, false);
 
 	v4l2_flash_release(flash->vf);
+	v4l2_flash_release(flash->vfind);
 
 	led_classdev_flash_unregister(&flash->fled);
 	led_classdev_unregister(&flash->iled_cdev);
-- 
cgit 


From 28585a832602747cbfa88ad8934013177a3aae38 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 22 Sep 2017 14:10:22 -0700
Subject: rcu: Allow for page faults in NMI handlers

A number of architecture invoke rcu_irq_enter() on exception entry in
order to allow RCU read-side critical sections in the exception handler
when the exception is from an idle or nohz_full CPU.  This works, at
least unless the exception happens in an NMI handler.  In that case,
rcu_nmi_enter() would already have exited the extended quiescent state,
which would mean that rcu_irq_enter() would (incorrectly) cause RCU
to think that it is again in an extended quiescent state.  This will
in turn result in lockdep splats in response to later RCU read-side
critical sections.

This commit therefore causes rcu_irq_enter() and rcu_irq_exit() to
take no action if there is an rcu_nmi_enter() in effect, thus avoiding
the unscheduled return to RCU quiescent state.  This in turn should
make the kernel safe for on-demand RCU voyeurism.

Link: http://lkml.kernel.org/r/20170922211022.GA18084@linux.vnet.ibm.com

Cc: stable@vger.kernel.org
Fixes: 0be964be0 ("module: Sanitize RCU usage and locking")
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/rcu/tree.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 51d4c3acf32d..63bee8e1b193 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -888,6 +888,11 @@ void rcu_irq_exit(void)
 
 	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
 	rdtp = this_cpu_ptr(&rcu_dynticks);
+
+	/* Page faults can happen in NMI handlers, so check... */
+	if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+		return;
+
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 		     rdtp->dynticks_nesting < 1);
 	if (rdtp->dynticks_nesting <= 1) {
@@ -1020,6 +1025,11 @@ void rcu_irq_enter(void)
 
 	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
 	rdtp = this_cpu_ptr(&rcu_dynticks);
+
+	/* Page faults can happen in NMI handlers, so check... */
+	if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+		return;
+
 	oldval = rdtp->dynticks_nesting;
 	rdtp->dynticks_nesting++;
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-- 
cgit 


From 9aadde91b3c035413c806619beb3e3ef6e697953 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 22 Sep 2017 17:22:19 -0400
Subject: extable: Consolidate *kernel_text_address() functions

The functionality between kernel_text_address() and _kernel_text_address()
is the same except that _kernel_text_address() does a little more (that
function needs a rename, but that can be done another time). Instead of
having duplicate code in both, simply have _kernel_text_address() calls
kernel_text_address() instead.

This is marked for stable because there's an RCU bug that can happen if
one of these functions gets called while RCU is not watching. That fix
depends on this fix to keep from having to write the fix twice.

Cc: stable@vger.kernel.org
Fixes: 0be964be0 ("module: Sanitize RCU usage and locking")
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/extable.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/kernel/extable.c b/kernel/extable.c
index 38c2412401a1..a7024a494faf 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -102,15 +102,7 @@ int core_kernel_data(unsigned long addr)
 
 int __kernel_text_address(unsigned long addr)
 {
-	if (core_kernel_text(addr))
-		return 1;
-	if (is_module_text_address(addr))
-		return 1;
-	if (is_ftrace_trampoline(addr))
-		return 1;
-	if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
-		return 1;
-	if (is_bpf_text_address(addr))
+	if (kernel_text_address(addr))
 		return 1;
 	/*
 	 * There might be init symbols in saved stacktraces.
-- 
cgit 


From e8cac8b1d10589be45671a5ade0926a639b543b7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 22 Sep 2017 17:36:32 -0400
Subject: extable: Enable RCU if it is not watching in kernel_text_address()

If kernel_text_address() is called when RCU is not watching, it can cause an
RCU bug because is_module_text_address(), the is_kprobe_*insn_slot()
and is_bpf_text_address() functions require the use of RCU.

Only enable RCU if it is not currently watching before it calls
is_module_text_address(). The use of rcu_nmi_enter() is used to enable RCU
because kernel_text_address() can happen pretty much anywhere (like an NMI),
and even from within an NMI. It is called via save_stack_trace() that can be
called by any WARN() or tracing function, which can happen while RCU is not
watching (for example, going to or coming from idle, or during CPU take down
or bring up).

Cc: stable@vger.kernel.org
Fixes: 0be964be0 ("module: Sanitize RCU usage and locking")
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/extable.c | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/kernel/extable.c b/kernel/extable.c
index a7024a494faf..9aa1cc41ecf7 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -119,17 +119,42 @@ int __kernel_text_address(unsigned long addr)
 
 int kernel_text_address(unsigned long addr)
 {
+	bool no_rcu;
+	int ret = 1;
+
 	if (core_kernel_text(addr))
 		return 1;
+
+	/*
+	 * If a stack dump happens while RCU is not watching, then
+	 * RCU needs to be notified that it requires to start
+	 * watching again. This can happen either by tracing that
+	 * triggers a stack trace, or a WARN() that happens during
+	 * coming back from idle, or cpu on or offlining.
+	 *
+	 * is_module_text_address() as well as the kprobe slots
+	 * and is_bpf_text_address() require RCU to be watching.
+	 */
+	no_rcu = !rcu_is_watching();
+
+	/* Treat this like an NMI as it can happen anywhere */
+	if (no_rcu)
+		rcu_nmi_enter();
+
 	if (is_module_text_address(addr))
-		return 1;
+		goto out;
 	if (is_ftrace_trampoline(addr))
-		return 1;
+		goto out;
 	if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
-		return 1;
+		goto out;
 	if (is_bpf_text_address(addr))
-		return 1;
-	return 0;
+		goto out;
+	ret = 0;
+out:
+	if (no_rcu)
+		rcu_nmi_exit();
+
+	return ret;
 }
 
 /*
-- 
cgit 


From 15516c89acce948debc4c598e03c3fee53045797 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 21 Sep 2017 13:00:21 -0400
Subject: tracing: Remove RCU work arounds from stack tracer

Currently the stack tracer calls rcu_irq_enter() to make sure RCU
is watching when it records a stack trace. But if the stack tracer
is triggered while tracing inside of a rcu_irq_enter(), calling
rcu_irq_enter() unconditionally can be problematic.

The reason for having rcu_irq_enter() in the first place has been
fixed from within the saving of the stack trace code, and there's no
reason for doing it in the stack tracer itself. Just remove it.

Cc: stable@vger.kernel.org
Fixes: 0be964be0 ("module: Sanitize RCU usage and locking")
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Suggested-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_stack.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index a4df67cbc711..49cb41412eec 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -96,23 +96,9 @@ check_stack(unsigned long ip, unsigned long *stack)
 	if (in_nmi())
 		return;
 
-	/*
-	 * There's a slight chance that we are tracing inside the
-	 * RCU infrastructure, and rcu_irq_enter() will not work
-	 * as expected.
-	 */
-	if (unlikely(rcu_irq_enter_disabled()))
-		return;
-
 	local_irq_save(flags);
 	arch_spin_lock(&stack_trace_max_lock);
 
-	/*
-	 * RCU may not be watching, make it see us.
-	 * The stack trace code uses rcu_sched.
-	 */
-	rcu_irq_enter();
-
 	/* In case another CPU set the tracer_frame on us */
 	if (unlikely(!frame_size))
 		this_size -= tracer_frame;
@@ -205,7 +191,6 @@ check_stack(unsigned long ip, unsigned long *stack)
 	}
 
  out:
-	rcu_irq_exit();
 	arch_spin_unlock(&stack_trace_max_lock);
 	local_irq_restore(flags);
 }
-- 
cgit 


From ab5348c9c23cd253f5902980d2d8fe067dc24c82 Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.vnet.ibm.com>
Date: Wed, 26 Jul 2017 22:27:05 -0400
Subject: security: fix description of values returned by
 cap_inode_need_killpriv

cap_inode_need_killpriv returns 1 if security.capability exists and
has a value and inode_killpriv() is required, 0 otherwise. Fix the
description of the return value to reflect this.

Signed-off-by: Stefan Berger <stefanb@linux.vnet.ibm.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/commoncap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/security/commoncap.c b/security/commoncap.c
index 7abebd782d5e..123426900d25 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -300,10 +300,10 @@ static inline void bprm_clear_caps(struct linux_binprm *bprm)
  *
  * Determine if an inode having a change applied that's marked ATTR_KILL_PRIV
  * affects the security markings on that inode, and if it is, should
- * inode_killpriv() be invoked or the change rejected?
+ * inode_killpriv() be invoked or the change rejected.
  *
- * Returns 0 if granted; +ve if granted, but inode_killpriv() is required; and
- * -ve to deny the change.
+ * Returns 1 if security.capability has a value, meaning inode_killpriv()
+ * is required, 0 otherwise, meaning inode_killpriv() is not required.
  */
 int cap_inode_need_killpriv(struct dentry *dentry)
 {
-- 
cgit 


From c2a9c4bf034ab7415ec272c4c86f1c0b796f80e6 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Thu, 17 Aug 2017 23:04:21 +0530
Subject: tpm: vtpm: constify vio_device_id

vio_device_id are not supposed to change at runtime. All functions
working with vio_device_id provided by <asm/vio.h> work with
const vio_device_id. So mark the non-const structs as const.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 drivers/char/tpm/tpm_ibmvtpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index f01d083eced2..d2ce46bd2471 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -32,7 +32,7 @@
 
 static const char tpm_ibmvtpm_driver_name[] = "tpm_ibmvtpm";
 
-static struct vio_device_id tpm_ibmvtpm_device_table[] = {
+static const struct vio_device_id tpm_ibmvtpm_device_table[] = {
 	{ "IBM,vtpm", "IBM,vtpm"},
 	{ "", "" }
 };
-- 
cgit 


From e1ec650f9ae7d1e66f77bf14c86e4e9dc629a4d5 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Thu, 6 Jul 2017 23:18:39 +0530
Subject: tpm: tpm_crb: constify acpi_device_id.

acpi_device_id are not supposed to change at runtime. All functions
working with acpi_device_id provided by <acpi/acpi_bus.h> work with
const acpi_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   4198	    608	      0	   4806	   12c6	drivers/char/tpm/tpm_crb.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   4262	    520	      0	   4782	   12ae	drivers/char/tpm/tpm_crb.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 drivers/char/tpm/tpm_crb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index a4ac63a21d8a..8f0a98dea327 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -665,7 +665,7 @@ static const struct dev_pm_ops crb_pm = {
 	SET_RUNTIME_PM_OPS(crb_pm_runtime_suspend, crb_pm_runtime_resume, NULL)
 };
 
-static struct acpi_device_id crb_device_ids[] = {
+static const struct acpi_device_id crb_device_ids[] = {
 	{"MSFT0101", 0},
 	{"", 0},
 };
-- 
cgit 


From 5d0e4d78149b9745ea34848e950e734f7db3b95f Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Tue, 27 Jun 2017 12:27:23 +0200
Subject: Documentation: tpm: add powered-while-suspended binding documentation

Add a new powered-while-suspended property to control the behavior of the
TPM suspend/resume.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt b/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt
index 8cb638b7e89c..85c8216fc335 100644
--- a/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt
+++ b/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt
@@ -8,6 +8,12 @@ Required properties:
                    the firmware event log
 - linux,sml-size : size of the memory allocated for the firmware event log
 
+Optional properties:
+
+- powered-while-suspended: present when the TPM is left powered on between
+                           suspend and resume (makes the suspend/resume
+                           callbacks do nothing).
+
 Example (for OpenPower Systems with Nuvoton TPM 2.0 on I2C)
 ----------------------------------------------------------
 
-- 
cgit 


From 9f3fc7bcddcb51234e23494531f93ab60475e1c3 Mon Sep 17 00:00:00 2001
From: Hamza Attak <hamza@hpe.com>
Date: Mon, 14 Aug 2017 19:09:16 +0100
Subject: tpm: replace msleep() with  usleep_range() in TPM 1.2/2.0 generic
 drivers

The patch simply replaces all msleep function calls with usleep_range calls
in the generic drivers.

Tested with an Infineon TPM 1.2, using the generic tpm-tis module, for a
thousand PCR extends, we see results going from 1m57s unpatched to 40s
with the new patch. We obtain similar results when using the original and
patched tpm_infineon driver, which is also part of the patch.
Similarly with a STM TPM 2.0, using the CRB driver, it takes about 20ms per
extend unpatched and around 7ms with the new patch.

Note that the PCR consistency is untouched with this patch, each TPM has
been tested with 10 million extends and the aggregated PCR value is
continuously verified to be correct.

As an extension of this work, this could potentially and easily be applied
to other vendor's drivers. Still, these changes are not included in the
proposed patch as they are untested.

Signed-off-by: Hamza Attak <hamza@hpe.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 drivers/char/tpm/tpm-interface.c | 10 +++++-----
 drivers/char/tpm/tpm.h           |  9 ++++++++-
 drivers/char/tpm/tpm2-cmd.c      |  2 +-
 drivers/char/tpm/tpm_infineon.c  |  6 +++---
 drivers/char/tpm/tpm_tis_core.c  |  8 ++++----
 5 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index fe597e6c55c4..1d6729be4cd6 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -455,7 +455,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space,
 			goto out;
 		}
 
-		msleep(TPM_TIMEOUT);	/* CHECK */
+		tpm_msleep(TPM_TIMEOUT);
 		rmb();
 	} while (time_before(jiffies, stop));
 
@@ -970,7 +970,7 @@ int tpm_do_selftest(struct tpm_chip *chip)
 			dev_info(
 			    &chip->dev, HW_ERR
 			    "TPM command timed out during continue self test");
-			msleep(delay_msec);
+			tpm_msleep(delay_msec);
 			continue;
 		}
 
@@ -985,7 +985,7 @@ int tpm_do_selftest(struct tpm_chip *chip)
 		}
 		if (rc != TPM_WARN_DOING_SELFTEST)
 			return rc;
-		msleep(delay_msec);
+		tpm_msleep(delay_msec);
 	} while (--loops > 0);
 
 	return rc;
@@ -1085,7 +1085,7 @@ again:
 		}
 	} else {
 		do {
-			msleep(TPM_TIMEOUT);
+			tpm_msleep(TPM_TIMEOUT);
 			status = chip->ops->status(chip);
 			if ((status & mask) == mask)
 				return 0;
@@ -1150,7 +1150,7 @@ int tpm_pm_suspend(struct device *dev)
 		 */
 		if (rc != TPM_WARN_RETRY)
 			break;
-		msleep(TPM_TIMEOUT_RETRY);
+		tpm_msleep(TPM_TIMEOUT_RETRY);
 	}
 
 	if (rc)
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 04fbff2edbf3..2d5466a72e40 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -50,7 +50,8 @@ enum tpm_const {
 
 enum tpm_timeout {
 	TPM_TIMEOUT = 5,	/* msecs */
-	TPM_TIMEOUT_RETRY = 100 /* msecs */
+	TPM_TIMEOUT_RETRY = 100, /* msecs */
+	TPM_TIMEOUT_RANGE_US = 300	/* usecs */
 };
 
 /* TPM addresses */
@@ -527,6 +528,12 @@ int tpm_pm_resume(struct device *dev);
 int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout,
 		      wait_queue_head_t *queue, bool check_cancel);
 
+static inline void tpm_msleep(unsigned int delay_msec)
+{
+	usleep_range(delay_msec * 1000,
+		     (delay_msec * 1000) + TPM_TIMEOUT_RANGE_US);
+};
+
 struct tpm_chip *tpm_chip_find_get(int chip_num);
 __must_check int tpm_try_get_ops(struct tpm_chip *chip);
 void tpm_put_ops(struct tpm_chip *chip);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index f7f34b2aa981..e1a41b788f08 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -899,7 +899,7 @@ static int tpm2_do_selftest(struct tpm_chip *chip)
 		if (rc != TPM2_RC_TESTING)
 			break;
 
-		msleep(delay_msec);
+		tpm_msleep(delay_msec);
 	}
 
 	return rc;
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index 3b1b9f9322d5..d8f10047fbba 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -191,7 +191,7 @@ static int wait(struct tpm_chip *chip, int wait_for_bit)
 		/* check the status-register if wait_for_bit is set */
 		if (status & 1 << wait_for_bit)
 			break;
-		msleep(TPM_MSLEEP_TIME);
+		tpm_msleep(TPM_MSLEEP_TIME);
 	}
 	if (i == TPM_MAX_TRIES) {	/* timeout occurs */
 		if (wait_for_bit == STAT_XFE)
@@ -226,7 +226,7 @@ static void tpm_wtx(struct tpm_chip *chip)
 	wait_and_send(chip, TPM_CTRL_WTX);
 	wait_and_send(chip, 0x00);
 	wait_and_send(chip, 0x00);
-	msleep(TPM_WTX_MSLEEP_TIME);
+	tpm_msleep(TPM_WTX_MSLEEP_TIME);
 }
 
 static void tpm_wtx_abort(struct tpm_chip *chip)
@@ -237,7 +237,7 @@ static void tpm_wtx_abort(struct tpm_chip *chip)
 	wait_and_send(chip, 0x00);
 	wait_and_send(chip, 0x00);
 	number_of_wtx = 0;
-	msleep(TPM_WTX_MSLEEP_TIME);
+	tpm_msleep(TPM_WTX_MSLEEP_TIME);
 }
 
 static int tpm_inf_recv(struct tpm_chip *chip, u8 * buf, size_t count)
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index b617b2eeb080..63bc6c3b949e 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -51,7 +51,7 @@ static int wait_startup(struct tpm_chip *chip, int l)
 
 		if (access & TPM_ACCESS_VALID)
 			return 0;
-		msleep(TPM_TIMEOUT);
+		tpm_msleep(TPM_TIMEOUT);
 	} while (time_before(jiffies, stop));
 	return -1;
 }
@@ -117,7 +117,7 @@ again:
 		do {
 			if (check_locality(chip, l))
 				return l;
-			msleep(TPM_TIMEOUT);
+			tpm_msleep(TPM_TIMEOUT);
 		} while (time_before(jiffies, stop));
 	}
 	return -1;
@@ -164,7 +164,7 @@ static int get_burstcount(struct tpm_chip *chip)
 		burstcnt = (value >> 8) & 0xFFFF;
 		if (burstcnt)
 			return burstcnt;
-		msleep(TPM_TIMEOUT);
+		tpm_msleep(TPM_TIMEOUT);
 	} while (time_before(jiffies, stop));
 	return -EBUSY;
 }
@@ -396,7 +396,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
 	priv->irq = irq;
 	chip->flags |= TPM_CHIP_FLAG_IRQ;
 	if (!priv->irq_tested)
-		msleep(1);
+		tpm_msleep(1);
 	if (!priv->irq_tested)
 		disable_interrupts(chip);
 	priv->irq_tested = true;
-- 
cgit 


From fb154e0e0a95249459df054241a9e8f4cca56062 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Fri, 24 Feb 2017 20:35:16 +0100
Subject: tpm: ibmvtpm: simplify crq initialization and document crq format

The crq is passed in registers and is the same on BE and LE hosts.
However, current implementation allocates a structure on-stack to
represent the crq, initializes the members swapping them to BE, and
loads the structure swapping it from BE. This is pointless and causes
GCC warnings about ununitialized members. Get rid of the structure and
the warnings.

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 drivers/char/tpm/tpm_ibmvtpm.c | 96 ++++++++++++++++++++++++++----------------
 1 file changed, 60 insertions(+), 36 deletions(-)

diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index d2ce46bd2471..25f6e2665385 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -39,19 +39,63 @@ static const struct vio_device_id tpm_ibmvtpm_device_table[] = {
 MODULE_DEVICE_TABLE(vio, tpm_ibmvtpm_device_table);
 
 /**
+ *
+ * ibmvtpm_send_crq_word - Send a CRQ request
+ * @vdev:	vio device struct
+ * @w1:		pre-constructed first word of tpm crq (second word is reserved)
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvtpm_send_crq_word(struct vio_dev *vdev, u64 w1)
+{
+	return plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, w1, 0);
+}
+
+/**
+ *
  * ibmvtpm_send_crq - Send a CRQ request
  *
  * @vdev:	vio device struct
- * @w1:		first word
- * @w2:		second word
+ * @valid:	Valid field
+ * @msg:	Type field
+ * @len:	Length field
+ * @data:	Data field
+ *
+ * The ibmvtpm crq is defined as follows:
+ *
+ * Byte  |   0   |   1   |   2   |   3   |   4   |   5   |   6   |   7
+ * -----------------------------------------------------------------------
+ * Word0 | Valid | Type  |     Length    |              Data
+ * -----------------------------------------------------------------------
+ * Word1 |                Reserved
+ * -----------------------------------------------------------------------
+ *
+ * Which matches the following structure (on bigendian host):
+ *
+ * struct ibmvtpm_crq {
+ *         u8 valid;
+ *         u8 msg;
+ *         __be16 len;
+ *         __be32 data;
+ *         __be64 reserved;
+ * } __attribute__((packed, aligned(8)));
+ *
+ * However, the value is passed in a register so just compute the numeric value
+ * to load into the register avoiding byteswap altogether. Endian only affects
+ * memory loads and stores - registers are internally represented the same.
  *
  * Return:
- *	0 -Sucess
+ *	0 (H_SUCCESS) - Success
  *	Non-zero - Failure
  */
-static int ibmvtpm_send_crq(struct vio_dev *vdev, u64 w1, u64 w2)
+static int ibmvtpm_send_crq(struct vio_dev *vdev,
+		u8 valid, u8 msg, u16 len, u32 data)
 {
-	return plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, w1, w2);
+	u64 w1 = ((u64)valid << 56) | ((u64)msg << 48) | ((u64)len << 32) |
+		(u64)data;
+	return ibmvtpm_send_crq_word(vdev, w1);
 }
 
 /**
@@ -109,8 +153,6 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
 {
 	struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev);
-	struct ibmvtpm_crq crq;
-	__be64 *word = (__be64 *)&crq;
 	int rc, sig;
 
 	if (!ibmvtpm->rtce_buf) {
@@ -137,10 +179,6 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
 	spin_lock(&ibmvtpm->rtce_lock);
 	ibmvtpm->res_len = 0;
 	memcpy((void *)ibmvtpm->rtce_buf, (void *)buf, count);
-	crq.valid = (u8)IBMVTPM_VALID_CMD;
-	crq.msg = (u8)VTPM_TPM_COMMAND;
-	crq.len = cpu_to_be16(count);
-	crq.data = cpu_to_be32(ibmvtpm->rtce_dma_handle);
 
 	/*
 	 * set the processing flag before the Hcall, since we may get the
@@ -148,8 +186,9 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
 	 */
 	ibmvtpm->tpm_processing_cmd = true;
 
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, be64_to_cpu(word[0]),
-			      be64_to_cpu(word[1]));
+	rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+			IBMVTPM_VALID_CMD, VTPM_TPM_COMMAND,
+			count, ibmvtpm->rtce_dma_handle);
 	if (rc != H_SUCCESS) {
 		dev_err(ibmvtpm->dev, "tpm_ibmvtpm_send failed rc=%d\n", rc);
 		rc = 0;
@@ -182,15 +221,10 @@ static u8 tpm_ibmvtpm_status(struct tpm_chip *chip)
  */
 static int ibmvtpm_crq_get_rtce_size(struct ibmvtpm_dev *ibmvtpm)
 {
-	struct ibmvtpm_crq crq;
-	u64 *buf = (u64 *) &crq;
 	int rc;
 
-	crq.valid = (u8)IBMVTPM_VALID_CMD;
-	crq.msg = (u8)VTPM_GET_RTCE_BUFFER_SIZE;
-
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]),
-			      cpu_to_be64(buf[1]));
+	rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+			IBMVTPM_VALID_CMD, VTPM_GET_RTCE_BUFFER_SIZE, 0, 0);
 	if (rc != H_SUCCESS)
 		dev_err(ibmvtpm->dev,
 			"ibmvtpm_crq_get_rtce_size failed rc=%d\n", rc);
@@ -210,15 +244,10 @@ static int ibmvtpm_crq_get_rtce_size(struct ibmvtpm_dev *ibmvtpm)
  */
 static int ibmvtpm_crq_get_version(struct ibmvtpm_dev *ibmvtpm)
 {
-	struct ibmvtpm_crq crq;
-	u64 *buf = (u64 *) &crq;
 	int rc;
 
-	crq.valid = (u8)IBMVTPM_VALID_CMD;
-	crq.msg = (u8)VTPM_GET_VERSION;
-
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]),
-			      cpu_to_be64(buf[1]));
+	rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+			IBMVTPM_VALID_CMD, VTPM_GET_VERSION, 0, 0);
 	if (rc != H_SUCCESS)
 		dev_err(ibmvtpm->dev,
 			"ibmvtpm_crq_get_version failed rc=%d\n", rc);
@@ -238,7 +267,7 @@ static int ibmvtpm_crq_send_init_complete(struct ibmvtpm_dev *ibmvtpm)
 {
 	int rc;
 
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, INIT_CRQ_COMP_CMD, 0);
+	rc = ibmvtpm_send_crq_word(ibmvtpm->vdev, INIT_CRQ_COMP_CMD);
 	if (rc != H_SUCCESS)
 		dev_err(ibmvtpm->dev,
 			"ibmvtpm_crq_send_init_complete failed rc=%d\n", rc);
@@ -258,7 +287,7 @@ static int ibmvtpm_crq_send_init(struct ibmvtpm_dev *ibmvtpm)
 {
 	int rc;
 
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, INIT_CRQ_CMD, 0);
+	rc = ibmvtpm_send_crq_word(ibmvtpm->vdev, INIT_CRQ_CMD);
 	if (rc != H_SUCCESS)
 		dev_err(ibmvtpm->dev,
 			"ibmvtpm_crq_send_init failed rc=%d\n", rc);
@@ -340,15 +369,10 @@ static int tpm_ibmvtpm_suspend(struct device *dev)
 {
 	struct tpm_chip *chip = dev_get_drvdata(dev);
 	struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev);
-	struct ibmvtpm_crq crq;
-	u64 *buf = (u64 *) &crq;
 	int rc = 0;
 
-	crq.valid = (u8)IBMVTPM_VALID_CMD;
-	crq.msg = (u8)VTPM_PREPARE_TO_SUSPEND;
-
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]),
-			      cpu_to_be64(buf[1]));
+	rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+			IBMVTPM_VALID_CMD, VTPM_PREPARE_TO_SUSPEND, 0, 0);
 	if (rc != H_SUCCESS)
 		dev_err(ibmvtpm->dev,
 			"tpm_ibmvtpm_suspend failed rc=%d\n", rc);
-- 
cgit 


From 656f083116a4799d8c0194976b8a2d66bf306538 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:44 +0200
Subject: x86/fpu: Rename copyin_to_xsaves()/copyout_from_xsaves() to
 copy_user_to_xstate()/copy_xstate_to_user()

The 'copyin/copyout' nomenclature needlessly departs from what the modern FPU code
uses, which is:

 copy_fpregs_to_fpstate()
 copy_fpstate_to_sigframe()
 copy_fregs_to_user()
 copy_fxregs_to_kernel()
 copy_fxregs_to_user()
 copy_kernel_to_fpregs()
 copy_kernel_to_fregs()
 copy_kernel_to_fxregs()
 copy_kernel_to_xregs()
 copy_user_to_fregs()
 copy_user_to_fxregs()
 copy_user_to_xregs()
 copy_xregs_to_kernel()
 copy_xregs_to_user()

I.e. according to this pattern, the following rename should be done:

  copyin_to_xsaves()    -> copy_user_to_xstate()
  copyout_from_xsaves() -> copy_xstate_to_user()

or, if we want to be pedantic, denote that that the user-space format is ptrace:

  copyin_to_xsaves()    -> copy_user_ptrace_to_xstate()
  copyout_from_xsaves() -> copy_xstate_to_user_ptrace()

But I'd suggest the shorter, non-pedantic name.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-2-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h | 4 ++--
 arch/x86/kernel/fpu/regset.c      | 4 ++--
 arch/x86/kernel/fpu/signal.c      | 2 +-
 arch/x86/kernel/fpu/xstate.c      | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 1b2799e0699a..a1baa17e9748 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
+int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 			void __user *ubuf, struct xregs_state *xsave);
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
+int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index b188b16841e3..165d0545c924 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -92,7 +92,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 	fpu__activate_fpstate_read(fpu);
 
 	if (using_compacted_format()) {
-		ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
+		ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave);
 	} else {
 		fpstate_sanitize_xstate(fpu);
 		/*
@@ -132,7 +132,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	fpu__activate_fpstate_write(fpu);
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		ret = copyin_to_xsaves(kbuf, ubuf, xsave);
+		ret = copy_user_to_xstate(kbuf, ubuf, xsave);
 	else
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 83c23c230b4c..b1fe9a1fc4e0 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -324,7 +324,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		fpu__drop(fpu);
 
 		if (using_compacted_format()) {
-			err = copyin_to_xsaves(NULL, buf_fx,
+			err = copy_user_to_xstate(NULL, buf_fx,
 					       &fpu->state.xsave);
 		} else {
 			err = __copy_from_user(&fpu->state.xsave,
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c24ac1efb12d..e7bb41723eaa 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -951,7 +951,7 @@ static inline int xstate_copyout(unsigned int pos, unsigned int count,
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
+int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 			void __user *ubuf, struct xregs_state *xsave)
 {
 	unsigned int offset, size;
@@ -1023,7 +1023,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
  */
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
+int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave)
 {
 	unsigned int offset, size;
-- 
cgit 


From f0d4f30a7fd299587840a028655285a87f334904 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:45 +0200
Subject: x86/fpu: Split copy_xstate_to_user() into copy_xstate_to_kernel() &
 copy_xstate_to_user()

copy_xstate_to_user() is a weird API - in part due to a bad API inherited
from the regset APIs.

But don't propagate that bad API choice into the FPU code - so as a first
step split the API into kernel and user buffer handling routines.

(Also split the xstate_copyout() internal helper.)

The split API is a dumb duplication that should be obviously correct, the
real splitting will be done in the next patch.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-3-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |   4 +-
 arch/x86/kernel/fpu/regset.c      |   5 +-
 arch/x86/kernel/fpu/xstate.c      | 110 +++++++++++++++++++++++++++++++++++---
 3 files changed, 109 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index a1baa17e9748..92dc8ca14124 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
-			void __user *ubuf, struct xregs_state *xsave);
+int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave);
+int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 165d0545c924..b6d12d66d04b 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -92,7 +92,10 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 	fpu__activate_fpstate_read(fpu);
 
 	if (using_compacted_format()) {
-		ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave);
+		if (kbuf)
+			ret = copy_xstate_to_kernel(pos, count, kbuf, ubuf, xsave);
+		else
+			ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave);
 	} else {
 		fpstate_sanitize_xstate(fpu);
 		/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index e7bb41723eaa..38561539cb99 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -924,10 +924,106 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  * This is similar to user_regset_copyout(), but will not add offset to
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  */
-static inline int xstate_copyout(unsigned int pos, unsigned int count,
-				 void *kbuf, void __user *ubuf,
-				 const void *data, const int start_pos,
-				 const int end_pos)
+static inline int
+__copy_xstate_to_kernel(unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf,
+			const void *data, const int start_pos,
+			const int end_pos)
+{
+	if ((count == 0) || (pos < start_pos))
+		return 0;
+
+	if (end_pos < 0 || pos < end_pos) {
+		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+
+		if (kbuf) {
+			memcpy(kbuf + pos, data, copy);
+		} else {
+			if (__copy_to_user(ubuf + pos, data, copy))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Convert from kernel XSAVES compacted format to standard format and copy
+ * to a kernel-space ptrace buffer.
+ *
+ * It supports partial copy but pos always starts from zero. This is called
+ * from xstateregs_get() and there we check the CPU has XSAVES.
+ */
+int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf,
+			void __user *ubuf, struct xregs_state *xsave)
+{
+	unsigned int offset, size;
+	int ret, i;
+	struct xstate_header header;
+
+	/*
+	 * Currently copy_regset_to_user() starts from pos 0:
+	 */
+	if (unlikely(pos != 0))
+		return -EFAULT;
+
+	/*
+	 * The destination is a ptrace buffer; we put in only user xstates:
+	 */
+	memset(&header, 0, sizeof(header));
+	header.xfeatures = xsave->header.xfeatures;
+	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Copy xregs_state->header:
+	 */
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(header);
+
+	ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, &header, 0, count);
+
+	if (ret)
+		return ret;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		/*
+		 * Copy only in-use xstates:
+		 */
+		if ((header.xfeatures >> i) & 1) {
+			void *src = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, src, 0, count);
+
+			if (ret)
+				return ret;
+
+			if (offset + size >= count)
+				break;
+		}
+
+	}
+
+	/*
+	 * Fill xsave->i387.sw_reserved value for ptrace frame:
+	 */
+	offset = offsetof(struct fxregs_state, sw_reserved);
+	size = sizeof(xstate_fx_sw_bytes);
+
+	ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
+
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static inline int
+__copy_xstate_to_user(unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf,
+		      const void *data, const int start_pos,
+		      const int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
@@ -977,7 +1073,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
+	ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, &header, 0, count);
 
 	if (ret)
 		return ret;
@@ -992,7 +1088,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
+			ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, src, 0, count);
 
 			if (ret)
 				return ret;
@@ -1009,7 +1105,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
+	ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
 
 	if (ret)
 		return ret;
-- 
cgit 


From 4d981cf2d96f29cdfa7d4972c8b377fe7baa9c4c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:46 +0200
Subject: x86/fpu: Remove 'ubuf' parameter from the copy_xstate_to_kernel()
 APIs

The 'ubuf' parameter is unused in the _kernel() side of the API, remove it.

This simplifies the code and makes it easier to think about.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-4-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/regset.c      |  2 +-
 arch/x86/kernel/fpu/xstate.c      | 21 ++++++---------------
 3 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 92dc8ca14124..c762574a245f 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,7 +48,7 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave);
+int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave);
 int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index b6d12d66d04b..34e74adf9d5d 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -93,7 +93,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 
 	if (using_compacted_format()) {
 		if (kbuf)
-			ret = copy_xstate_to_kernel(pos, count, kbuf, ubuf, xsave);
+			ret = copy_xstate_to_kernel(pos, count, kbuf, xsave);
 		else
 			ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave);
 	} else {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 38561539cb99..71d3bda2b898 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -926,7 +926,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  */
 static inline int
 __copy_xstate_to_kernel(unsigned int pos, unsigned int count,
-			void *kbuf, void __user *ubuf,
+			void *kbuf,
 			const void *data, const int start_pos,
 			const int end_pos)
 {
@@ -936,12 +936,7 @@ __copy_xstate_to_kernel(unsigned int pos, unsigned int count,
 	if (end_pos < 0 || pos < end_pos) {
 		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
 
-		if (kbuf) {
-			memcpy(kbuf + pos, data, copy);
-		} else {
-			if (__copy_to_user(ubuf + pos, data, copy))
-				return -EFAULT;
-		}
+		memcpy(kbuf + pos, data, copy);
 	}
 	return 0;
 }
@@ -953,8 +948,7 @@ __copy_xstate_to_kernel(unsigned int pos, unsigned int count,
  * It supports partial copy but pos always starts from zero. This is called
  * from xstateregs_get() and there we check the CPU has XSAVES.
  */
-int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf,
-			void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -979,8 +973,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, &header, 0, count);
-
+	ret = __copy_xstate_to_kernel(offset, size, kbuf, &header, 0, count);
 	if (ret)
 		return ret;
 
@@ -994,8 +987,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, src, 0, count);
-
+			ret = __copy_xstate_to_kernel(offset, size, kbuf, src, 0, count);
 			if (ret)
 				return ret;
 
@@ -1011,8 +1003,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
-
+	ret = __copy_xstate_to_kernel(offset, size, kbuf, xstate_fx_sw_bytes, 0, count);
 	if (ret)
 		return ret;
 
-- 
cgit 


From a69c158fb3e7a91220f55029bf222a4e678d16e9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:47 +0200
Subject: x86/fpu: Remove 'kbuf' parameter from the copy_xstate_to_user() APIs

The 'kbuf' parameter is unused in the _user() side of the API, remove it.

This simplifies the code and makes it easier to think about.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-5-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/regset.c      |  2 +-
 arch/x86/kernel/fpu/xstate.c      | 25 +++++++------------------
 3 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index c762574a245f..65bd68c30cd0 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -49,7 +49,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
 int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave);
-int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave);
+int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 34e74adf9d5d..fd6dbdd8fde6 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -95,7 +95,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 		if (kbuf)
 			ret = copy_xstate_to_kernel(pos, count, kbuf, xsave);
 		else
-			ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave);
+			ret = copy_xstate_to_user(pos, count, ubuf, xsave);
 	} else {
 		fpstate_sanitize_xstate(fpu);
 		/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 71d3bda2b898..2d8f3344875d 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1011,10 +1011,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru
 }
 
 static inline int
-__copy_xstate_to_user(unsigned int pos, unsigned int count,
-		      void *kbuf, void __user *ubuf,
-		      const void *data, const int start_pos,
-		      const int end_pos)
+__copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, const void *data, const int start_pos, const int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
@@ -1022,12 +1019,8 @@ __copy_xstate_to_user(unsigned int pos, unsigned int count,
 	if (end_pos < 0 || pos < end_pos) {
 		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
 
-		if (kbuf) {
-			memcpy(kbuf + pos, data, copy);
-		} else {
-			if (__copy_to_user(ubuf + pos, data, copy))
-				return -EFAULT;
-		}
+		if (__copy_to_user(ubuf + pos, data, copy))
+			return -EFAULT;
 	}
 	return 0;
 }
@@ -1038,8 +1031,7 @@ __copy_xstate_to_user(unsigned int pos, unsigned int count,
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */
-int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
-			void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -1064,8 +1056,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, &header, 0, count);
-
+	ret = __copy_xstate_to_user(offset, size, ubuf, &header, 0, count);
 	if (ret)
 		return ret;
 
@@ -1079,8 +1070,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, src, 0, count);
-
+			ret = __copy_xstate_to_user(offset, size, ubuf, src, 0, count);
 			if (ret)
 				return ret;
 
@@ -1096,8 +1086,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
-
+	ret = __copy_xstate_to_user(offset, size, ubuf, xstate_fx_sw_bytes, 0, count);
 	if (ret)
 		return ret;
 
-- 
cgit 


From d7eda6c99cc75f1c41d67abf988f37a10045a370 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:48 +0200
Subject: x86/fpu: Clean up parameter order in the copy_xstate_to_*() APIs

Parameter ordering is weird:

  int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave);
  int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave);

'pos' and 'count', which are attributes of the destination buffer, are listed before the destination
buffer itself ...

List them after the primary arguments instead.

This makes the code more similar to regular memcpy() variant APIs.

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-6-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  4 ++--
 arch/x86/kernel/fpu/regset.c      |  4 ++--
 arch/x86/kernel/fpu/xstate.c      | 25 ++++++++++++-------------
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 65bd68c30cd0..e4430b84939d 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave);
-int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave);
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count);
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index fd6dbdd8fde6..ec1404194b65 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -93,9 +93,9 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 
 	if (using_compacted_format()) {
 		if (kbuf)
-			ret = copy_xstate_to_kernel(pos, count, kbuf, xsave);
+			ret = copy_xstate_to_kernel(kbuf, xsave, pos, count);
 		else
-			ret = copy_xstate_to_user(pos, count, ubuf, xsave);
+			ret = copy_xstate_to_user(ubuf, xsave, pos, count);
 	} else {
 		fpstate_sanitize_xstate(fpu);
 		/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 2d8f3344875d..0a299468510f 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -925,10 +925,9 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  */
 static inline int
-__copy_xstate_to_kernel(unsigned int pos, unsigned int count,
-			void *kbuf,
-			const void *data, const int start_pos,
-			const int end_pos)
+__copy_xstate_to_kernel(void *kbuf,
+			const void *data,
+			unsigned int pos, unsigned int count, const int start_pos, const int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
@@ -948,7 +947,7 @@ __copy_xstate_to_kernel(unsigned int pos, unsigned int count,
  * It supports partial copy but pos always starts from zero. This is called
  * from xstateregs_get() and there we check the CPU has XSAVES.
  */
-int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave)
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -973,7 +972,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_kernel(offset, size, kbuf, &header, 0, count);
+	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, 0, count);
 	if (ret)
 		return ret;
 
@@ -987,7 +986,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_kernel(offset, size, kbuf, src, 0, count);
+			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, 0, count);
 			if (ret)
 				return ret;
 
@@ -1003,7 +1002,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_kernel(offset, size, kbuf, xstate_fx_sw_bytes, 0, count);
+	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, 0, count);
 	if (ret)
 		return ret;
 
@@ -1011,7 +1010,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru
 }
 
 static inline int
-__copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, const void *data, const int start_pos, const int end_pos)
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, const int start_pos, const int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
@@ -1031,7 +1030,7 @@ __copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, c
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */
-int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -1056,7 +1055,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_user(offset, size, ubuf, &header, 0, count);
+	ret = __copy_xstate_to_user(ubuf, &header, offset, size, 0, count);
 	if (ret)
 		return ret;
 
@@ -1070,7 +1069,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_user(offset, size, ubuf, src, 0, count);
+			ret = __copy_xstate_to_user(ubuf, src, offset, size, 0, count);
 			if (ret)
 				return ret;
 
@@ -1086,7 +1085,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf,
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_user(offset, size, ubuf, xstate_fx_sw_bytes, 0, count);
+	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, 0, count);
 	if (ret)
 		return ret;
 
-- 
cgit 


From becb2bb72ff906cc0d2bac3ee9574f694364823b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:49 +0200
Subject: x86/fpu: Clean up the parameter definitions of copy_xstate_to_*()

Remove pointless 'const' of non-pointer input parameter.

Remove unnecessary parenthesis that shows uncertainty about arithmetic operator precedence.

Clarify copy_xstate_to_user() description.

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-7-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0a299468510f..9647e7256179 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -927,13 +927,13 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline int
 __copy_xstate_to_kernel(void *kbuf,
 			const void *data,
-			unsigned int pos, unsigned int count, const int start_pos, const int end_pos)
+			unsigned int pos, unsigned int count, int start_pos, int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
 
 	if (end_pos < 0 || pos < end_pos) {
-		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+		unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos);
 
 		memcpy(kbuf + pos, data, copy);
 	}
@@ -1010,13 +1010,13 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 }
 
 static inline int
-__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, const int start_pos, const int end_pos)
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int start_pos, int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
 
 	if (end_pos < 0 || pos < end_pos) {
-		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+		unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos);
 
 		if (__copy_to_user(ubuf + pos, data, copy))
 			return -EFAULT;
@@ -1026,7 +1026,7 @@ __copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, uns
 
 /*
  * Convert from kernel XSAVES compacted format to standard format and copy
- * to a ptrace buffer. It supports partial copy but pos always starts from
+ * to a user-space buffer. It supports partial copy but pos always starts from
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */
-- 
cgit 


From 8a5b731889cbf004b406d988dc591c8a7aac773e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:50 +0200
Subject: x86/fpu: Remove the 'start_pos' parameter from the
 __copy_xstate_to_*() functions

'start_pos' is always 0, so remove it and remove the pointless check of 'pos < 0'
which can not ever be true as 'pos' is unsigned ...

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-8-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 9647e7256179..1f50fdaf4c5a 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -927,9 +927,9 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline int
 __copy_xstate_to_kernel(void *kbuf,
 			const void *data,
-			unsigned int pos, unsigned int count, int start_pos, int end_pos)
+			unsigned int pos, unsigned int count, int end_pos)
 {
-	if ((count == 0) || (pos < start_pos))
+	if (!count)
 		return 0;
 
 	if (end_pos < 0 || pos < end_pos) {
@@ -972,7 +972,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, 0, count);
+	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, count);
 	if (ret)
 		return ret;
 
@@ -986,7 +986,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, 0, count);
+			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, count);
 			if (ret)
 				return ret;
 
@@ -1002,7 +1002,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, 0, count);
+	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, count);
 	if (ret)
 		return ret;
 
@@ -1010,9 +1010,9 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 }
 
 static inline int
-__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int start_pos, int end_pos)
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int end_pos)
 {
-	if ((count == 0) || (pos < start_pos))
+	if (!count)
 		return 0;
 
 	if (end_pos < 0 || pos < end_pos) {
@@ -1055,7 +1055,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_user(ubuf, &header, offset, size, 0, count);
+	ret = __copy_xstate_to_user(ubuf, &header, offset, size, count);
 	if (ret)
 		return ret;
 
@@ -1069,7 +1069,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_user(ubuf, src, offset, size, 0, count);
+			ret = __copy_xstate_to_user(ubuf, src, offset, size, count);
 			if (ret)
 				return ret;
 
@@ -1085,7 +1085,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, 0, count);
+	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, count);
 	if (ret)
 		return ret;
 
-- 
cgit 


From 56583c9a1400fe1935edd55b24b4fbbc779b59cb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:51 +0200
Subject: x86/fpu: Clarify parameter names in the copy_xstate_to_*() methods

Right now there's a confusing mixture of 'offset' and 'size' parameters:

 - __copy_xstate_to_*() input parameter 'end_pos' not not really an offset,
   but the full size of the copy to be performed.

 - input parameter 'count' to copy_xstate_to_*() shadows that of
   __copy_xstate_to_*()'s 'count' parameter name - but the roles
   are different: the first one is the total number of bytes to
   be copied, while the second one is a partial copy size.

To unconfuse all this, use a consistent set of parameter names:

 - 'size' is the partial copy size within a single xstate component
 - 'size_total' is the total copy requested
 - 'offset_start' is the requested starting offset.
 - 'offset' is the offset within an xstate component.

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-9-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  4 ++--
 arch/x86/kernel/fpu/xstate.c      | 44 +++++++++++++++++++--------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index e4430b84939d..fed6617a1079 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count);
-int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count);
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 1f50fdaf4c5a..c13083579655 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -927,15 +927,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline int
 __copy_xstate_to_kernel(void *kbuf,
 			const void *data,
-			unsigned int pos, unsigned int count, int end_pos)
+			unsigned int offset, unsigned int size, int size_total)
 {
-	if (!count)
+	if (!size)
 		return 0;
 
-	if (end_pos < 0 || pos < end_pos) {
-		unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos);
+	if (size_total < 0 || offset < size_total) {
+		unsigned int copy = size_total < 0 ? size : min(size, size_total - offset);
 
-		memcpy(kbuf + pos, data, copy);
+		memcpy(kbuf + offset, data, copy);
 	}
 	return 0;
 }
@@ -947,7 +947,7 @@ __copy_xstate_to_kernel(void *kbuf,
  * It supports partial copy but pos always starts from zero. This is called
  * from xstateregs_get() and there we check the CPU has XSAVES.
  */
-int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count)
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -956,7 +956,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 	/*
 	 * Currently copy_regset_to_user() starts from pos 0:
 	 */
-	if (unlikely(pos != 0))
+	if (unlikely(offset_start != 0))
 		return -EFAULT;
 
 	/*
@@ -972,7 +972,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, count);
+	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
 	if (ret)
 		return ret;
 
@@ -986,11 +986,11 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, count);
+			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
 			if (ret)
 				return ret;
 
-			if (offset + size >= count)
+			if (offset + size >= size_total)
 				break;
 		}
 
@@ -1002,7 +1002,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, count);
+	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
 	if (ret)
 		return ret;
 
@@ -1010,15 +1010,15 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 }
 
 static inline int
-__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int end_pos)
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, int size_total)
 {
-	if (!count)
+	if (!size)
 		return 0;
 
-	if (end_pos < 0 || pos < end_pos) {
-		unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos);
+	if (size_total < 0 || offset < size_total) {
+		unsigned int copy = size_total < 0 ? size : min(size, size_total - offset);
 
-		if (__copy_to_user(ubuf + pos, data, copy))
+		if (__copy_to_user(ubuf + offset, data, copy))
 			return -EFAULT;
 	}
 	return 0;
@@ -1030,7 +1030,7 @@ __copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, uns
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */
-int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count)
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -1039,7 +1039,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 	/*
 	 * Currently copy_regset_to_user() starts from pos 0:
 	 */
-	if (unlikely(pos != 0))
+	if (unlikely(offset_start != 0))
 		return -EFAULT;
 
 	/*
@@ -1055,7 +1055,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_user(ubuf, &header, offset, size, count);
+	ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
 	if (ret)
 		return ret;
 
@@ -1069,11 +1069,11 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_user(ubuf, src, offset, size, count);
+			ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
 			if (ret)
 				return ret;
 
-			if (offset + size >= count)
+			if (offset + size >= size_total)
 				break;
 		}
 
@@ -1085,7 +1085,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, count);
+	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
 	if (ret)
 		return ret;
 
-- 
cgit 


From 6ff15f8db7eaf29ef5ead6afbec9b25485fe8703 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:52 +0200
Subject: x86/fpu: Change 'size_total' parameter to unsigned and standardize
 the size checks in copy_xstate_to_*()

'size_total' is derived from an unsigned input parameter - and then converted
to 'int' and checked for negative ranges:

	if (size_total < 0 || offset < size_total) {

This conversion and the checks are unnecessary obfuscation, reject overly
large requested copy sizes outright and simplify the underlying code.

Reported-by: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-10-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c13083579655..b18c5457065a 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -925,15 +925,11 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  */
 static inline int
-__copy_xstate_to_kernel(void *kbuf,
-			const void *data,
-			unsigned int offset, unsigned int size, int size_total)
+__copy_xstate_to_kernel(void *kbuf, const void *data,
+			unsigned int offset, unsigned int size, unsigned int size_total)
 {
-	if (!size)
-		return 0;
-
-	if (size_total < 0 || offset < size_total) {
-		unsigned int copy = size_total < 0 ? size : min(size, size_total - offset);
+	if (offset < size_total) {
+		unsigned int copy = min(size, size_total - offset);
 
 		memcpy(kbuf + offset, data, copy);
 	}
@@ -986,12 +982,13 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
+			/* The next component has to fit fully into the output buffer: */
+			if (offset + size > size_total)
+				break;
+
 			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
 			if (ret)
 				return ret;
-
-			if (offset + size >= size_total)
-				break;
 		}
 
 	}
@@ -1010,13 +1007,13 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 }
 
 static inline int
-__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, int size_total)
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
 {
 	if (!size)
 		return 0;
 
-	if (size_total < 0 || offset < size_total) {
-		unsigned int copy = size_total < 0 ? size : min(size, size_total - offset);
+	if (offset < size_total) {
+		unsigned int copy = min(size, size_total - offset);
 
 		if (__copy_to_user(ubuf + offset, data, copy))
 			return -EFAULT;
@@ -1069,12 +1066,13 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
+			/* The next component has to fit fully into the output buffer: */
+			if (offset + size > size_total)
+				break;
+
 			ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
 			if (ret)
 				return ret;
-
-			if (offset + size >= size_total)
-				break;
 		}
 
 	}
-- 
cgit 


From 8c0817f4a3188ac5485ce14f96f12a175800b881 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:53 +0200
Subject: x86/fpu: Simplify __copy_xstate_to_kernel() return values

__copy_xstate_to_kernel() can only return 0 (because kernel copies cannot fail),
simplify the code throughout.

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-11-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b18c5457065a..00c3b41c3cf1 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -924,7 +924,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  * This is similar to user_regset_copyout(), but will not add offset to
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  */
-static inline int
+static inline void
 __copy_xstate_to_kernel(void *kbuf, const void *data,
 			unsigned int offset, unsigned int size, unsigned int size_total)
 {
@@ -933,7 +933,6 @@ __copy_xstate_to_kernel(void *kbuf, const void *data,
 
 		memcpy(kbuf + offset, data, copy);
 	}
-	return 0;
 }
 
 /*
@@ -946,8 +945,8 @@ __copy_xstate_to_kernel(void *kbuf, const void *data,
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
 {
 	unsigned int offset, size;
-	int ret, i;
 	struct xstate_header header;
+	int i;
 
 	/*
 	 * Currently copy_regset_to_user() starts from pos 0:
@@ -968,9 +967,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
-	if (ret)
-		return ret;
+	__copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
 		/*
@@ -986,9 +983,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 			if (offset + size > size_total)
 				break;
 
-			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
-			if (ret)
-				return ret;
+			__copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
 		}
 
 	}
@@ -999,9 +994,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
-	if (ret)
-		return ret;
+	__copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
 
 	return 0;
 }
-- 
cgit 


From 79fecc2b7506f29fb91becc65e8788e5ae7eba9f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:54 +0200
Subject: x86/fpu: Split copy_user_to_xstate() into copy_kernel_to_xstate() &
 copy_user_to_xstate()

Similar to:

  x86/fpu: Split copy_xstate_to_user() into copy_xstate_to_kernel() & copy_xstate_to_user()

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-12-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  4 +--
 arch/x86/kernel/fpu/regset.c      | 10 ++++--
 arch/x86/kernel/fpu/xstate.c      | 66 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index fed6617a1079..79af79dbcab6 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -50,6 +50,6 @@ const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
-int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
-		     struct xregs_state *xsave);
+int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave);
+int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index ec1404194b65..cb45dd81d617 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -134,10 +134,14 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	fpu__activate_fpstate_write(fpu);
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		ret = copy_user_to_xstate(kbuf, ubuf, xsave);
-	else
+	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+		if (kbuf)
+			ret = copy_kernel_to_xstate(kbuf, ubuf, xsave);
+		else
+			ret = copy_user_to_xstate(kbuf, ubuf, xsave);
+	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	}
 
 	/*
 	 * In case of failure, mark all states as init:
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 00c3b41c3cf1..1ad25d1b8056 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1084,7 +1084,71 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 }
 
 /*
- * Convert from a ptrace standard-format buffer to kernel XSAVES format
+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set() and
+ * there we check the CPU has XSAVES and a whole standard-sized buffer
+ * exists.
+ */
+int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf,
+		     struct xregs_state *xsave)
+{
+	unsigned int offset, size;
+	int i;
+	u64 xfeatures;
+	u64 allowed_features;
+
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(xfeatures);
+
+	if (kbuf) {
+		memcpy(&xfeatures, kbuf + offset, size);
+	} else {
+		if (__copy_from_user(&xfeatures, ubuf + offset, size))
+			return -EFAULT;
+	}
+
+	/*
+	 * Reject if the user sets any disabled or supervisor features:
+	 */
+	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+
+	if (xfeatures & ~allowed_features)
+		return -EINVAL;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		u64 mask = ((u64)1 << i);
+
+		if (xfeatures & mask) {
+			void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			if (kbuf) {
+				memcpy(dst, kbuf + offset, size);
+			} else {
+				if (__copy_from_user(dst, ubuf + offset, size))
+					return -EFAULT;
+			}
+		}
+	}
+
+	/*
+	 * The state that came in from userspace was user-state only.
+	 * Mask all the user states out of 'xfeatures':
+	 */
+	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Add back in the features that came in from userspace:
+	 */
+	xsave->header.xfeatures |= xfeatures;
+
+	return 0;
+}
+
+/*
+ * Convert from a ptrace standard-format user-space buffer to kernel XSAVES format
  * and copy to the target thread. This is called from xstateregs_set() and
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
-- 
cgit 


From 59dffa4edba1f15b2bfdbe608aca1efe664c674c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:55 +0200
Subject: x86/fpu: Remove 'ubuf' parameter from the copy_kernel_to_xstate() API

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-13-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/regset.c      |  2 +-
 arch/x86/kernel/fpu/xstate.c      | 17 +++--------------
 3 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 79af79dbcab6..f10889bc0c88 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -50,6 +50,6 @@ const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
-int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave);
+int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index cb45dd81d617..785302c75f38 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -136,7 +136,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
 		if (kbuf)
-			ret = copy_kernel_to_xstate(kbuf, ubuf, xsave);
+			ret = copy_kernel_to_xstate(kbuf, xsave);
 		else
 			ret = copy_user_to_xstate(kbuf, ubuf, xsave);
 	} else {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 1ad25d1b8056..71cc8d367fdd 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1089,8 +1089,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
  */
-int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf,
-		     struct xregs_state *xsave)
+int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave)
 {
 	unsigned int offset, size;
 	int i;
@@ -1100,12 +1099,7 @@ int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(xfeatures);
 
-	if (kbuf) {
-		memcpy(&xfeatures, kbuf + offset, size);
-	} else {
-		if (__copy_from_user(&xfeatures, ubuf + offset, size))
-			return -EFAULT;
-	}
+	memcpy(&xfeatures, kbuf + offset, size);
 
 	/*
 	 * Reject if the user sets any disabled or supervisor features:
@@ -1124,12 +1118,7 @@ int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			if (kbuf) {
-				memcpy(dst, kbuf + offset, size);
-			} else {
-				if (__copy_from_user(dst, ubuf + offset, size))
-					return -EFAULT;
-			}
+			memcpy(dst, kbuf + offset, size);
 		}
 	}
 
-- 
cgit 


From 7b9094c688f807c110a2dab6f6edc5876bfa7b0b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:56 +0200
Subject: x86/fpu: Remove 'kbuf' parameter from the copy_user_to_xstate() API

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-14-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/regset.c      |  2 +-
 arch/x86/kernel/fpu/signal.c      | 11 ++++-------
 arch/x86/kernel/fpu/xstate.c      | 19 +++++--------------
 4 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index f10889bc0c88..4ceb90740d80 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -51,5 +51,5 @@ int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave);
-int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave);
+int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 785302c75f38..caf723f31737 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -138,7 +138,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 		if (kbuf)
 			ret = copy_kernel_to_xstate(kbuf, xsave);
 		else
-			ret = copy_user_to_xstate(kbuf, ubuf, xsave);
+			ret = copy_user_to_xstate(ubuf, xsave);
 	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 	}
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index b1fe9a1fc4e0..2c685b492fd6 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -323,13 +323,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		fpu__drop(fpu);
 
-		if (using_compacted_format()) {
-			err = copy_user_to_xstate(NULL, buf_fx,
-					       &fpu->state.xsave);
-		} else {
-			err = __copy_from_user(&fpu->state.xsave,
-					       buf_fx, state_size);
-		}
+		if (using_compacted_format())
+			err = copy_user_to_xstate(buf_fx, &fpu->state.xsave);
+		else
+			err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
 
 		if (err || __copy_from_user(&env, buf, sizeof(env))) {
 			fpstate_init(&fpu->state);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 71cc8d367fdd..b1f3e4dae2e3 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1142,8 +1142,7 @@ int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave)
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
  */
-int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
-		     struct xregs_state *xsave)
+int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave)
 {
 	unsigned int offset, size;
 	int i;
@@ -1153,12 +1152,8 @@ int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(xfeatures);
 
-	if (kbuf) {
-		memcpy(&xfeatures, kbuf + offset, size);
-	} else {
-		if (__copy_from_user(&xfeatures, ubuf + offset, size))
-			return -EFAULT;
-	}
+	if (__copy_from_user(&xfeatures, ubuf + offset, size))
+		return -EFAULT;
 
 	/*
 	 * Reject if the user sets any disabled or supervisor features:
@@ -1177,12 +1172,8 @@ int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			if (kbuf) {
-				memcpy(dst, kbuf + offset, size);
-			} else {
-				if (__copy_from_user(dst, ubuf + offset, size))
-					return -EFAULT;
-			}
+			if (__copy_from_user(dst, ubuf + offset, size))
+				return -EFAULT;
 		}
 	}
 
-- 
cgit 


From 6d7f7da5533a3f841eeb1d9657257c9367924274 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:57 +0200
Subject: x86/fpu: Flip the parameter order in copy_*_to_xstate()

Make it more consistent with regular memcpy() semantics, where the destination
argument comes first.

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-15-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h | 4 ++--
 arch/x86/kernel/fpu/regset.c      | 4 ++--
 arch/x86/kernel/fpu/signal.c      | 2 +-
 arch/x86/kernel/fpu/xstate.c      | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 4ceb90740d80..579ac2358e63 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -50,6 +50,6 @@ const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
-int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave);
-int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave);
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index caf723f31737..19a7385a912c 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -136,9 +136,9 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
 		if (kbuf)
-			ret = copy_kernel_to_xstate(kbuf, xsave);
+			ret = copy_kernel_to_xstate(xsave, kbuf);
 		else
-			ret = copy_user_to_xstate(ubuf, xsave);
+			ret = copy_user_to_xstate(xsave, ubuf);
 	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 	}
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 2c685b492fd6..2d682dac35d4 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -324,7 +324,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		fpu__drop(fpu);
 
 		if (using_compacted_format())
-			err = copy_user_to_xstate(buf_fx, &fpu->state.xsave);
+			err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
 		else
 			err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b1f3e4dae2e3..0ef35040d0ad 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1089,7 +1089,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
  */
-int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave)
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 {
 	unsigned int offset, size;
 	int i;
@@ -1142,7 +1142,7 @@ int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave)
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
  */
-int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave)
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 {
 	unsigned int offset, size;
 	int i;
-- 
cgit 


From b3a163081c28d1a4d1ad76259a9d93b34a82f1da Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:58 +0200
Subject: x86/fpu: Simplify fpu->fpregs_active use

The fpregs_active() inline function is pretty pointless - in almost
all the callsites it can be replaced with a direct fpu->fpregs_active
access.

Do so and eliminate the extra layer of obfuscation.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-16-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h | 17 +----------------
 arch/x86/kernel/fpu/core.c          |  2 +-
 arch/x86/kernel/fpu/signal.c        |  9 +++++----
 arch/x86/mm/pkeys.c                 |  2 +-
 4 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 554cdb205d17..b223c57dd5dc 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -542,21 +542,6 @@ static inline void fpregs_activate(struct fpu *fpu)
 	trace_x86_fpu_regs_activated(fpu);
 }
 
-/*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int fpregs_active(void)
-{
-	return current->thread.fpu.fpregs_active;
-}
-
 /*
  * FPU state switching for scheduling.
  *
@@ -617,7 +602,7 @@ static inline void user_fpu_begin(void)
 	struct fpu *fpu = &current->thread.fpu;
 
 	preempt_disable();
-	if (!fpregs_active())
+	if (!fpu->fpregs_active)
 		fpregs_activate(fpu);
 	preempt_enable();
 }
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e1114f070c2d..bad57248e5a0 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -367,7 +367,7 @@ void fpu__current_fpstate_write_end(void)
 	 * registers may still be out of date.  Update them with
 	 * an XRSTOR if they are active.
 	 */
-	if (fpregs_active())
+	if (fpu->fpregs_active)
 		copy_kernel_to_fpregs(&fpu->state);
 
 	/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 2d682dac35d4..684025654d0c 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
  */
 int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 {
-	struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+	struct fpu *fpu = &current->thread.fpu;
+	struct xregs_state *xsave = &fpu->state.xsave;
 	struct task_struct *tsk = current;
 	int ia32_fxstate = (buf != buf_fx);
 
@@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpregs_active() || using_compacted_format()) {
+	if (fpu->fpregs_active || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
 		/* Update the thread's fxstate to save the fsave header. */
 		if (ia32_fxstate)
-			copy_fxregs_to_kernel(&tsk->thread.fpu);
+			copy_fxregs_to_kernel(fpu);
 	} else {
 		/*
 		 * It is a *bug* if kernel uses compacted-format for xsave
@@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			return -1;
 		}
 
-		fpstate_sanitize_xstate(&tsk->thread.fpu);
+		fpstate_sanitize_xstate(fpu);
 		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
 			return -1;
 	}
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 2dab69a706ec..e2c23472233e 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -45,7 +45,7 @@ int __execute_only_pkey(struct mm_struct *mm)
 	 */
 	preempt_disable();
 	if (!need_to_set_mm_pkey &&
-	    fpregs_active() &&
+	    current->thread.fpu.fpregs_active &&
 	    !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
 		preempt_enable();
 		return execute_only_pkey;
-- 
cgit 


From a10b6a16cdad88170f546d008c77453cddf918e6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:59 +0200
Subject: x86/fpu: Make the fpu state change in fpu__clear() scheduler-atomic

Do this temporarily only, to make it easier to change the FPU state machine,
in particular this change couples the fpu->fpregs_active and fpu->fpstate_active
states: they are only set/cleared together (as far as the scheduler sees them).

This will be removed by later patches.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-17-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index bad57248e5a0..b7dc3833d41a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -462,9 +462,11 @@ void fpu__clear(struct fpu *fpu)
 	 * Make sure fpstate is cleared and initialized.
 	 */
 	if (static_cpu_has(X86_FEATURE_FPU)) {
+		preempt_disable();
 		fpu__activate_curr(fpu);
 		user_fpu_begin();
 		copy_init_fpstate_to_fpregs();
+		preempt_enable();
 	}
 }
 
-- 
cgit 


From b6aa85558d7e7b18fc3470d2bc1731d2205dd275 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:00 +0200
Subject: x86/fpu: Split the state handling in fpu__drop()

Prepare fpu__drop() to use fpu->fpregs_active.

There are two distinct usecases for fpu__drop() in this context:
exit_thread() when called for 'current' in exit(), and when called
for another task in fork().

This patch does not change behavior, it only adds a couple of
debug checks and structures the code to make the ->fpregs_active
change more obviously correct.

All the complications will be removed later on.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-18-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index b7dc3833d41a..815dfba7781a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -414,12 +414,19 @@ void fpu__drop(struct fpu *fpu)
 {
 	preempt_disable();
 
-	if (fpu->fpregs_active) {
-		/* Ignore delayed exceptions from user space */
-		asm volatile("1: fwait\n"
-			     "2:\n"
-			     _ASM_EXTABLE(1b, 2b));
-		fpregs_deactivate(fpu);
+	if (fpu == &current->thread.fpu) {
+		WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
+
+		if (fpu->fpregs_active) {
+			/* Ignore delayed exceptions from user space */
+			asm volatile("1: fwait\n"
+				     "2:\n"
+				     _ASM_EXTABLE(1b, 2b));
+			if (fpu->fpregs_active)
+				fpregs_deactivate(fpu);
+		}
+	} else {
+		WARN_ON_FPU(fpu->fpregs_active);
 	}
 
 	fpu->fpstate_active = 0;
-- 
cgit 


From f1c8cd0176078c7bcafdc89cac447cab672a0b5e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:01 +0200
Subject: x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active

We want to simplify the FPU state machine by eliminating fpu->fpregs_active,
and we can do that because the two state flags (::fpregs_active and
::fpstate_active) are set essentially together.

The old lazy FPU switching code used to make a distinction - but there's
no lazy switching code anymore, we always switch in an 'eager' fashion.

Do this by first changing all substantial uses of fpu->fpregs_active
to fpu->fpstate_active and adding a few debug checks to double check
our assumption is correct.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-19-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h |  4 +++-
 arch/x86/kernel/fpu/core.c          | 16 ++++++++++------
 arch/x86/kernel/fpu/signal.c        |  4 +++-
 arch/x86/mm/pkeys.c                 |  3 +--
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index b223c57dd5dc..7fa676f93ac1 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -556,7 +556,9 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-	if (old_fpu->fpregs_active) {
+	WARN_ON_FPU(old_fpu->fpregs_active != old_fpu->fpstate_active);
+
+	if (old_fpu->fpstate_active) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
 		else
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 815dfba7781a..eab244622402 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
 
 	kernel_fpu_disable();
 
-	if (fpu->fpregs_active) {
+	if (fpu->fpstate_active) {
 		/*
 		 * Ignore return value -- we don't care if reg state
 		 * is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
-	if (fpu->fpregs_active)
+	if (fpu->fpstate_active)
 		copy_kernel_to_fpregs(&fpu->state);
 
 	kernel_fpu_enable();
@@ -147,8 +147,10 @@ void fpu__save(struct fpu *fpu)
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
 	preempt_disable();
+	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
+
 	trace_x86_fpu_before_save(fpu);
-	if (fpu->fpregs_active) {
+	if (fpu->fpstate_active) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
 			copy_kernel_to_fpregs(&fpu->state);
 		}
@@ -262,11 +264,12 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr);
  */
 void fpu__activate_fpstate_read(struct fpu *fpu)
 {
+	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 	/*
 	 * If fpregs are active (in the current CPU), then
 	 * copy them to the fpstate:
 	 */
-	if (fpu->fpregs_active) {
+	if (fpu->fpstate_active) {
 		fpu__save(fpu);
 	} else {
 		if (!fpu->fpstate_active) {
@@ -362,12 +365,13 @@ void fpu__current_fpstate_write_end(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
+	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 	/*
 	 * 'fpu' now has an updated copy of the state, but the
 	 * registers may still be out of date.  Update them with
 	 * an XRSTOR if they are active.
 	 */
-	if (fpu->fpregs_active)
+	if (fpu->fpstate_active)
 		copy_kernel_to_fpregs(&fpu->state);
 
 	/*
@@ -417,7 +421,7 @@ void fpu__drop(struct fpu *fpu)
 	if (fpu == &current->thread.fpu) {
 		WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 
-		if (fpu->fpregs_active) {
+		if (fpu->fpstate_active) {
 			/* Ignore delayed exceptions from user space */
 			asm volatile("1: fwait\n"
 				     "2:\n"
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 684025654d0c..a88083ba7f8b 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -171,7 +171,9 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpu->fpregs_active || using_compacted_format()) {
+	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
+
+	if (fpu->fpstate_active || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index e2c23472233e..4d24269c071f 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -18,7 +18,6 @@
 
 #include <asm/cpufeature.h>             /* boot_cpu_has, ...            */
 #include <asm/mmu_context.h>            /* vma_pkey()                   */
-#include <asm/fpu/internal.h>           /* fpregs_active()              */
 
 int __execute_only_pkey(struct mm_struct *mm)
 {
@@ -45,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm)
 	 */
 	preempt_disable();
 	if (!need_to_set_mm_pkey &&
-	    current->thread.fpu.fpregs_active &&
+	    current->thread.fpu.fpstate_active &&
 	    !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
 		preempt_enable();
 		return execute_only_pkey;
-- 
cgit 


From 6cf4edbe0526db311a28734609da888fdfcb3604 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:02 +0200
Subject: x86/fpu: Decouple fpregs_activate()/fpregs_deactivate() from
 fpu->fpregs_active

The fpregs_activate()/fpregs_deactivate() are currently called in such a pattern:

	if (!fpu->fpregs_active)
		fpregs_activate(fpu);

	...

	if (fpu->fpregs_active)
		fpregs_deactivate(fpu);

But note that it's actually safe to call them without checking the flag first.

This further decouples the fpu->fpregs_active flag from actual FPU logic.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-20-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h | 7 +------
 arch/x86/kernel/fpu/core.c          | 3 +--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 7fa676f93ac1..42a601673c09 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -526,8 +526,6 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
  */
 static inline void fpregs_deactivate(struct fpu *fpu)
 {
-	WARN_ON_FPU(!fpu->fpregs_active);
-
 	fpu->fpregs_active = 0;
 	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
 	trace_x86_fpu_regs_deactivated(fpu);
@@ -535,8 +533,6 @@ static inline void fpregs_deactivate(struct fpu *fpu)
 
 static inline void fpregs_activate(struct fpu *fpu)
 {
-	WARN_ON_FPU(fpu->fpregs_active);
-
 	fpu->fpregs_active = 1;
 	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
 	trace_x86_fpu_regs_activated(fpu);
@@ -604,8 +600,7 @@ static inline void user_fpu_begin(void)
 	struct fpu *fpu = &current->thread.fpu;
 
 	preempt_disable();
-	if (!fpu->fpregs_active)
-		fpregs_activate(fpu);
+	fpregs_activate(fpu);
 	preempt_enable();
 }
 
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index eab244622402..01a47e9edfb4 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -426,8 +426,7 @@ void fpu__drop(struct fpu *fpu)
 			asm volatile("1: fwait\n"
 				     "2:\n"
 				     _ASM_EXTABLE(1b, 2b));
-			if (fpu->fpregs_active)
-				fpregs_deactivate(fpu);
+			fpregs_deactivate(fpu);
 		}
 	} else {
 		WARN_ON_FPU(fpu->fpregs_active);
-- 
cgit 


From 99dc26bda233ee722bbd370bddf20beece3ffb93 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:03 +0200
Subject: x86/fpu: Remove struct fpu::fpregs_active

The previous changes paved the way for the removal of the
fpu::fpregs_active state flag - we now only have the
fpu::fpstate_active and fpu::last_cpu fields left.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-21-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h |  5 -----
 arch/x86/include/asm/fpu/types.h    | 23 -----------------------
 arch/x86/include/asm/trace/fpu.h    |  5 +----
 arch/x86/kernel/fpu/core.c          |  9 ---------
 arch/x86/kernel/fpu/signal.c        |  2 --
 5 files changed, 1 insertion(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 42a601673c09..629e7abcd6c9 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -526,14 +526,12 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
  */
 static inline void fpregs_deactivate(struct fpu *fpu)
 {
-	fpu->fpregs_active = 0;
 	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
 	trace_x86_fpu_regs_deactivated(fpu);
 }
 
 static inline void fpregs_activate(struct fpu *fpu)
 {
-	fpu->fpregs_active = 1;
 	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
 	trace_x86_fpu_regs_activated(fpu);
 }
@@ -552,8 +550,6 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-	WARN_ON_FPU(old_fpu->fpregs_active != old_fpu->fpstate_active);
-
 	if (old_fpu->fpstate_active) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
@@ -561,7 +557,6 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 			old_fpu->last_cpu = cpu;
 
 		/* But leave fpu_fpregs_owner_ctx! */
-		old_fpu->fpregs_active = 0;
 		trace_x86_fpu_regs_deactivated(old_fpu);
 	} else
 		old_fpu->last_cpu = -1;
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c80f5b9c09d..0c314a397cf5 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -298,29 +298,6 @@ struct fpu {
 	 */
 	unsigned char			fpstate_active;
 
-	/*
-	 * @fpregs_active:
-	 *
-	 * This flag determines whether a given context is actively
-	 * loaded into the FPU's registers and that those registers
-	 * represent the task's current FPU state.
-	 *
-	 * Note the interaction with fpstate_active:
-	 *
-	 *   # task does not use the FPU:
-	 *   fpstate_active == 0
-	 *
-	 *   # task uses the FPU and regs are active:
-	 *   fpstate_active == 1 && fpregs_active == 1
-	 *
-	 *   # the regs are inactive but still match fpstate:
-	 *   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
-	 *
-	 * The third state is what we use for the lazy restore optimization
-	 * on lazy-switching CPUs.
-	 */
-	unsigned char			fpregs_active;
-
 	/*
 	 * @state:
 	 *
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 342e59789fcd..da565aae9fd2 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,7 +12,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
 	TP_STRUCT__entry(
 		__field(struct fpu *, fpu)
-		__field(bool, fpregs_active)
 		__field(bool, fpstate_active)
 		__field(u64, xfeatures)
 		__field(u64, xcomp_bv)
@@ -20,16 +19,14 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
 	TP_fast_assign(
 		__entry->fpu		= fpu;
-		__entry->fpregs_active	= fpu->fpregs_active;
 		__entry->fpstate_active	= fpu->fpstate_active;
 		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
 			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
 		}
 	),
-	TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
+	TP_printk("x86/fpu: %p fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
 			__entry->fpu,
-			__entry->fpregs_active,
 			__entry->fpstate_active,
 			__entry->xfeatures,
 			__entry->xcomp_bv
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 01a47e9edfb4..93103a909c47 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -147,8 +147,6 @@ void fpu__save(struct fpu *fpu)
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
 	preempt_disable();
-	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
-
 	trace_x86_fpu_before_save(fpu);
 	if (fpu->fpstate_active) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
@@ -191,7 +189,6 @@ EXPORT_SYMBOL_GPL(fpstate_init);
 
 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
-	dst_fpu->fpregs_active = 0;
 	dst_fpu->last_cpu = -1;
 
 	if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
@@ -264,7 +261,6 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr);
  */
 void fpu__activate_fpstate_read(struct fpu *fpu)
 {
-	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 	/*
 	 * If fpregs are active (in the current CPU), then
 	 * copy them to the fpstate:
@@ -365,7 +361,6 @@ void fpu__current_fpstate_write_end(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
-	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 	/*
 	 * 'fpu' now has an updated copy of the state, but the
 	 * registers may still be out of date.  Update them with
@@ -419,8 +414,6 @@ void fpu__drop(struct fpu *fpu)
 	preempt_disable();
 
 	if (fpu == &current->thread.fpu) {
-		WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
-
 		if (fpu->fpstate_active) {
 			/* Ignore delayed exceptions from user space */
 			asm volatile("1: fwait\n"
@@ -428,8 +421,6 @@ void fpu__drop(struct fpu *fpu)
 				     _ASM_EXTABLE(1b, 2b));
 			fpregs_deactivate(fpu);
 		}
-	} else {
-		WARN_ON_FPU(fpu->fpregs_active);
 	}
 
 	fpu->fpstate_active = 0;
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index a88083ba7f8b..629106e51a29 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -171,8 +171,6 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
-
 	if (fpu->fpstate_active || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
-- 
cgit 


From 0852b374173bb57f870d78e6c6839c77b339be5f Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 23 Sep 2017 15:00:04 +0200
Subject: x86/fpu: Add FPU state copying quirk to handle XRSTOR failure on
 Intel Skylake CPUs

On Skylake CPUs I noticed that XRSTOR is unable to deal with states
created by copyout_from_xsaves() if the xstate has only SSE/YMM state, and
no FP state. That is, xfeatures had XFEATURE_MASK_SSE set, but not
XFEATURE_MASK_FP.

The reason is that part of the SSE/YMM state lives in the MXCSR and
MXCSR_FLAGS fields of the FP state.

Ensure that whenever we copy SSE or YMM state around, the MXCSR and
MXCSR_FLAGS fields are also copied around.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170210085445.0f1cc708@annuminas.surriel.com
Link: http://lkml.kernel.org/r/20170923130016.21448-22-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/types.h |  3 +++
 arch/x86/kernel/fpu/xstate.c     | 42 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 0c314a397cf5..71db45ca8870 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -68,6 +68,9 @@ struct fxregs_state {
 /* Default value for fxregs_state.mxcsr: */
 #define MXCSR_DEFAULT		0x1f80
 
+/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */
+#define MXCSR_AND_FLAGS_SIZE sizeof(u64)
+
 /*
  * Software based FPU emulation state. This is arbitrary really,
  * it matches the x87 format to make it easier to understand:
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0ef35040d0ad..41c52256bdce 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -920,6 +920,23 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 }
 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
 
+/*
+ * Weird legacy quirk: SSE and YMM states store information in the
+ * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP
+ * area is marked as unused in the xfeatures header, we need to copy
+ * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use.
+ */
+static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
+{
+	if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
+		return 0;
+
+	if (xfeatures & XFEATURE_MASK_FP)
+		return 0;
+
+	return 1;
+}
+
 /*
  * This is similar to user_regset_copyout(), but will not add offset to
  * the source data pointer or increment pos, count, kbuf, and ubuf.
@@ -988,6 +1005,12 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 
 	}
 
+	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		__copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
+	}
+
 	/*
 	 * Fill xsave->i387.sw_reserved value for ptrace frame:
 	 */
@@ -1070,6 +1093,12 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 
 	}
 
+	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		__copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
+	}
+
 	/*
 	 * Fill xsave->i387.sw_reserved value for ptrace frame:
 	 */
@@ -1122,6 +1151,12 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 		}
 	}
 
+	if (xfeatures_mxcsr_quirk(xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
+	}
+
 	/*
 	 * The state that came in from userspace was user-state only.
 	 * Mask all the user states out of 'xfeatures':
@@ -1177,6 +1212,13 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 		}
 	}
 
+	if (xfeatures_mxcsr_quirk(xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
+			return -EFAULT;
+	}
+
 	/*
 	 * The state that came in from userspace was user-state only.
 	 * Mask all the user states out of 'xfeatures':
-- 
cgit 


From 4f8cef59bad29344aca0e2e6b0ad18dadd078fd0 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Sat, 23 Sep 2017 15:00:05 +0200
Subject: x86/fpu: Fix boolreturn.cocci warnings

arch/x86/kernel/fpu/xstate.c:931:9-10: WARNING: return of 0/1 in function 'xfeatures_mxcsr_quirk' with return type bool

 Return statements in functions returning bool should use true/false instead of 1/0.

Generated by: scripts/coccinelle/misc/boolreturn.cocci

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kbuild-all@01.org
Cc: tipbuild@zytor.com
Link: http://lkml.kernel.org/r/20170306004553.GA25764@lkp-wsm-ep1
Link: http://lkml.kernel.org/r/20170923130016.21448-23-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 41c52256bdce..fda1109cc355 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -929,12 +929,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
 {
 	if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
-		return 0;
+		return false;
 
 	if (xfeatures & XFEATURE_MASK_FP)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
 /*
-- 
cgit 


From 03eaec81ac09814817e9f0307d572ffe8365f980 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 23 Sep 2017 15:00:06 +0200
Subject: x86/fpu: Turn WARN_ON() in context switch into WARN_ON_FPU()

copy_xregs_to_kernel checks if the alternatives have been already
patched.

This WARN_ON() is always executed in every context switch.

All the other checks in fpu internal.h are WARN_ON_FPU(), but
this one is plain WARN_ON(). I assume it was forgotten to switch it.

So switch it to WARN_ON_FPU() too to avoid some unnecessary code
in the context switch, and a potentially expensive cache line miss for the
global variable.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170329062605.4970-1-andi@firstfloor.org
Link: http://lkml.kernel.org/r/20170923130016.21448-24-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 629e7abcd6c9..2dca7c65319c 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -350,7 +350,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
 	u32 hmask = mask >> 32;
 	int err;
 
-	WARN_ON(!alternatives_patched);
+	WARN_ON_FPU(!alternatives_patched);
 
 	XSTATE_XSAVE(xstate, lmask, hmask, err);
 
-- 
cgit 


From e19b205be43d11bff638cad4487008c48d21c103 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 24 Sep 2017 16:38:56 -0700
Subject: Linux 4.14-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 64cbc66cebca..d1119941261c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
-- 
cgit 


From 5f3d862a736398e7068fa67142133f1713fdee8c Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Mon, 18 Sep 2017 09:41:45 +0200
Subject: qxl: fix framebuffer unpinning

qxl_plane_cleanup_fb() unpins the just activated framebuffer
instead of the old one.  Oops.  Fix it.

Cc: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
Fixes: 1277eed5fecb8830c8cc414ad70c1ef640464bc0
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170918074145.2257-1-kraxel@redhat.com
---
 drivers/gpu/drm/qxl/qxl_display.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index e1dd05423e86..afbf50d0c08f 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -702,14 +702,15 @@ static void qxl_plane_cleanup_fb(struct drm_plane *plane,
 	struct drm_gem_object *obj;
 	struct qxl_bo *user_bo;
 
-	if (!plane->state->fb) {
-		/* we never executed prepare_fb, so there's nothing to
+	if (!old_state->fb) {
+		/*
+		 * we never executed prepare_fb, so there's nothing to
 		 * unpin.
 		 */
 		return;
 	}
 
-	obj = to_qxl_framebuffer(plane->state->fb)->obj;
+	obj = to_qxl_framebuffer(old_state->fb)->obj;
 	user_bo = gem_to_qxl_bo(obj);
 	qxl_bo_unpin(user_bo);
 }
-- 
cgit 


From 814fb7bb7db5433757d76f4c4502c96fc53b0b5e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 23 Sep 2017 15:00:07 +0200
Subject: x86/fpu: Don't let userspace set bogus xcomp_bv

On x86, userspace can use the ptrace() or rt_sigreturn() system calls to
set a task's extended state (xstate) or "FPU" registers.  ptrace() can
set them for another task using the PTRACE_SETREGSET request with
NT_X86_XSTATE, while rt_sigreturn() can set them for the current task.
In either case, registers can be set to any value, but the kernel
assumes that the XSAVE area itself remains valid in the sense that the
CPU can restore it.

However, in the case where the kernel is using the uncompacted xstate
format (which it does whenever the XSAVES instruction is unavailable),
it was possible for userspace to set the xcomp_bv field in the
xstate_header to an arbitrary value.  However, all bits in that field
are reserved in the uncompacted case, so when switching to a task with
nonzero xcomp_bv, the XRSTOR instruction failed with a #GP fault.  This
caused the WARN_ON_FPU(err) in copy_kernel_to_xregs() to be hit.  In
addition, since the error is otherwise ignored, the FPU registers from
the task previously executing on the CPU were leaked.

Fix the bug by checking that the user-supplied value of xcomp_bv is 0 in
the uncompacted case, and returning an error otherwise.

The reason for validating xcomp_bv rather than simply overwriting it
with 0 is that we want userspace to see an error if it (incorrectly)
provides an XSAVE area in compacted format rather than in uncompacted
format.

Note that as before, in case of error we clear the task's FPU state.
This is perhaps non-ideal, especially for PTRACE_SETREGSET; it might be
better to return an error before changing anything.  But it seems the
"clear on error" behavior is fine for now, and it's a little tricky to
do otherwise because it would mean we couldn't simply copy the full
userspace state into kernel memory in one __copy_from_user().

This bug was found by syzkaller, which hit the above-mentioned
WARN_ON_FPU():

    WARNING: CPU: 1 PID: 0 at ./arch/x86/include/asm/fpu/internal.h:373 __switch_to+0x5b5/0x5d0
    CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.13.0 #453
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    task: ffff9ba2bc8e42c0 task.stack: ffffa78cc036c000
    RIP: 0010:__switch_to+0x5b5/0x5d0
    RSP: 0000:ffffa78cc08bbb88 EFLAGS: 00010082
    RAX: 00000000fffffffe RBX: ffff9ba2b8bf2180 RCX: 00000000c0000100
    RDX: 00000000ffffffff RSI: 000000005cb10700 RDI: ffff9ba2b8bf36c0
    RBP: ffffa78cc08bbbd0 R08: 00000000929fdf46 R09: 0000000000000001
    R10: 0000000000000000 R11: 0000000000000000 R12: ffff9ba2bc8e42c0
    R13: 0000000000000000 R14: ffff9ba2b8bf3680 R15: ffff9ba2bf5d7b40
    FS:  00007f7e5cb10700(0000) GS:ffff9ba2bf400000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 00000000004005cc CR3: 0000000079fd5000 CR4: 00000000001406e0
    Call Trace:
    Code: 84 00 00 00 00 00 e9 11 fd ff ff 0f ff 66 0f 1f 84 00 00 00 00 00 e9 e7 fa ff ff 0f ff 66 0f 1f 84 00 00 00 00 00 e9 c2 fa ff ff <0f> ff 66 0f 1f 84 00 00 00 00 00 e9 d4 fc ff ff 66 66 2e 0f 1f

Here is a C reproducer.  The expected behavior is that the program spin
forever with no output.  However, on a buggy kernel running on a
processor with the "xsave" feature but without the "xsaves" feature
(e.g. Sandy Bridge through Broadwell for Intel), within a second or two
the program reports that the xmm registers were corrupted, i.e. were not
restored correctly.  With CONFIG_X86_DEBUG_FPU=y it also hits the above
kernel warning.

    #define _GNU_SOURCE
    #include <stdbool.h>
    #include <inttypes.h>
    #include <linux/elf.h>
    #include <stdio.h>
    #include <sys/ptrace.h>
    #include <sys/uio.h>
    #include <sys/wait.h>
    #include <unistd.h>

    int main(void)
    {
        int pid = fork();
        uint64_t xstate[512];
        struct iovec iov = { .iov_base = xstate, .iov_len = sizeof(xstate) };

        if (pid == 0) {
            bool tracee = true;
            for (int i = 0; i < sysconf(_SC_NPROCESSORS_ONLN) && tracee; i++)
                tracee = (fork() != 0);
            uint32_t xmm0[4] = { [0 ... 3] = tracee ? 0x00000000 : 0xDEADBEEF };
            asm volatile("   movdqu %0, %%xmm0\n"
                         "   mov %0, %%rbx\n"
                         "1: movdqu %%xmm0, %0\n"
                         "   mov %0, %%rax\n"
                         "   cmp %%rax, %%rbx\n"
                         "   je 1b\n"
                         : "+m" (xmm0) : : "rax", "rbx", "xmm0");
            printf("BUG: xmm registers corrupted!  tracee=%d, xmm0=%08X%08X%08X%08X\n",
                   tracee, xmm0[0], xmm0[1], xmm0[2], xmm0[3]);
        } else {
            usleep(100000);
            ptrace(PTRACE_ATTACH, pid, 0, 0);
            wait(NULL);
            ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov);
            xstate[65] = -1;
            ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov);
            ptrace(PTRACE_CONT, pid, 0, 0);
            wait(NULL);
        }
        return 1;
    }

Note: the program only tests for the bug using the ptrace() system call.
The bug can also be reproduced using the rt_sigreturn() system call, but
only when called from a 32-bit program, since for 64-bit programs the
kernel restores the FPU state from the signal frame by doing XRSTOR
directly from userspace memory (with proper error checking).

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: <stable@vger.kernel.org> [v3.17+]
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Fixes: 0b29643a5843 ("x86/xsaves: Change compacted format xsave area header")
Link: http://lkml.kernel.org/r/20170922174156.16780-2-ebiggers3@gmail.com
Link: http://lkml.kernel.org/r/20170923130016.21448-25-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/regset.c | 4 ++++
 arch/x86/kernel/fpu/signal.c | 9 +++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 19a7385a912c..c764f7405322 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -141,6 +141,10 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 			ret = copy_user_to_xstate(xsave, ubuf);
 	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+
+		/* xcomp_bv must be 0 when using uncompacted format */
+		if (!ret && xsave->header.xcomp_bv)
+			ret = -EINVAL;
 	}
 
 	/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 629106e51a29..da68ea1c3a44 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -324,11 +324,16 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		fpu__drop(fpu);
 
-		if (using_compacted_format())
+		if (using_compacted_format()) {
 			err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
-		else
+		} else {
 			err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
 
+			/* xcomp_bv must be 0 when using uncompacted format */
+			if (!err && state_size > offsetof(struct xregs_state, header) && fpu->state.xsave.header.xcomp_bv)
+				err = -EINVAL;
+		}
+
 		if (err || __copy_from_user(&env, buf, sizeof(env))) {
 			fpstate_init(&fpu->state);
 			trace_x86_fpu_init_state(fpu);
-- 
cgit 


From d5c8028b4788f62b31fb79a331b3ad3e041fa366 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 23 Sep 2017 15:00:09 +0200
Subject: x86/fpu: Reinitialize FPU registers if restoring FPU state fails

Userspace can change the FPU state of a task using the ptrace() or
rt_sigreturn() system calls.  Because reserved bits in the FPU state can
cause the XRSTOR instruction to fail, the kernel has to carefully
validate that no reserved bits or other invalid values are being set.

Unfortunately, there have been bugs in this validation code.  For
example, we were not checking that the 'xcomp_bv' field in the
xstate_header was 0.  As-is, such bugs are exploitable to read the FPU
registers of other processes on the system.  To do so, an attacker can
create a task, assign to it an invalid FPU state, then spin in a loop
and monitor the values of the FPU registers.  Because the task's FPU
registers are not being restored, sometimes the FPU registers will have
the values from another process.

This is likely to continue to be a problem in the future because the
validation done by the CPU instructions like XRSTOR is not immediately
visible to kernel developers.  Nor will invalid FPU states ever be
encountered during ordinary use --- they will only be seen during
fuzzing or exploits.  There can even be reserved bits outside the
xstate_header which are easy to forget about.  For example, the MXCSR
register contains reserved bits, which were not validated by the
KVM_SET_XSAVE ioctl until commit a575813bfe4b ("KVM: x86: Fix load
damaged SSEx MXCSR register").

Therefore, mitigate this class of vulnerability by restoring the FPU
registers from init_fpstate if restoring from the task's state fails.

We actually used to do this, but it was (perhaps unwisely) removed by
commit 9ccc27a5d297 ("x86/fpu: Remove error return values from
copy_kernel_to_*regs() functions").  This new patch is also a bit
different.  First, it only clears the registers, not also the bad
in-memory state; this is simpler and makes it easier to make the
mitigation cover all callers of __copy_kernel_to_fpregs().  Second, it
does the register clearing in an exception handler so that no extra
instructions are added to context switches.  In fact, we *remove*
instructions, since previously we were always zeroing the register
containing 'err' even if CONFIG_X86_DEBUG_FPU was disabled.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170922174156.16780-4-ebiggers3@gmail.com
Link: http://lkml.kernel.org/r/20170923130016.21448-27-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h | 51 +++++++++++--------------------------
 arch/x86/mm/extable.c               | 24 +++++++++++++++++
 2 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 2dca7c65319c..cf290d424e48 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -120,20 +120,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
 	err;								\
 })
 
-#define check_insn(insn, output, input...)				\
-({									\
-	int err;							\
+#define kernel_insn(insn, output, input...)				\
 	asm volatile("1:" #insn "\n\t"					\
 		     "2:\n"						\
-		     ".section .fixup,\"ax\"\n"				\
-		     "3:  movl $-1,%[err]\n"				\
-		     "    jmp  2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE(1b, 3b)				\
-		     : [err] "=r" (err), output				\
-		     : "0"(0), input);					\
-	err;								\
-})
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore)	\
+		     : output : input)
 
 static inline int copy_fregs_to_user(struct fregs_state __user *fx)
 {
@@ -153,20 +144,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
 
 static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
 {
-	int err;
-
 	if (IS_ENABLED(CONFIG_X86_32)) {
-		err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+		kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 	} else {
 		if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
-			err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+			kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
 		} else {
 			/* See comment in copy_fxregs_to_kernel() below. */
-			err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
+			kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
 		}
 	}
-	/* Copying from a kernel buffer to FPU registers should never fail: */
-	WARN_ON_FPU(err);
 }
 
 static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
@@ -183,9 +170,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
 
 static inline void copy_kernel_to_fregs(struct fregs_state *fx)
 {
-	int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-
-	WARN_ON_FPU(err);
+	kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 }
 
 static inline int copy_user_to_fregs(struct fregs_state __user *fx)
@@ -281,18 +266,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
  * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
  * XSAVE area format.
  */
-#define XSTATE_XRESTORE(st, lmask, hmask, err)				\
+#define XSTATE_XRESTORE(st, lmask, hmask)				\
 	asm volatile(ALTERNATIVE(XRSTOR,				\
 				 XRSTORS, X86_FEATURE_XSAVES)		\
 		     "\n"						\
-		     "xor %[err], %[err]\n"				\
 		     "3:\n"						\
-		     ".pushsection .fixup,\"ax\"\n"			\
-		     "4: movl $-2, %[err]\n"				\
-		     "jmp 3b\n"						\
-		     ".popsection\n"					\
-		     _ASM_EXTABLE(661b, 4b)				\
-		     : [err] "=r" (err)					\
+		     _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
+		     :							\
 		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
 		     : "memory")
 
@@ -336,7 +316,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
 	else
 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
 
-	/* We should never fault when copying from a kernel buffer: */
+	/*
+	 * We should never fault when copying from a kernel buffer, and the FPU
+	 * state we set at boot time should be valid.
+	 */
 	WARN_ON_FPU(err);
 }
 
@@ -365,12 +348,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
 {
 	u32 lmask = mask;
 	u32 hmask = mask >> 32;
-	int err;
-
-	XSTATE_XRESTORE(xstate, lmask, hmask, err);
 
-	/* We should never fault when copying from a kernel buffer: */
-	WARN_ON_FPU(err);
+	XSTATE_XRESTORE(xstate, lmask, hmask);
 }
 
 /*
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index c076f710de4c..c3521e2be396 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -2,6 +2,7 @@
 #include <linux/uaccess.h>
 #include <linux/sched/debug.h>
 
+#include <asm/fpu/internal.h>
 #include <asm/traps.h>
 #include <asm/kdebug.h>
 
@@ -78,6 +79,29 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL_GPL(ex_handler_refcount);
 
+/*
+ * Handler for when we fail to restore a task's FPU state.  We should never get
+ * here because the FPU state of a task using the FPU (task->thread.fpu.state)
+ * should always be valid.  However, past bugs have allowed userspace to set
+ * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
+ * These caused XRSTOR to fail when switching to the task, leaking the FPU
+ * registers of the task previously executing on the CPU.  Mitigate this class
+ * of vulnerability by restoring from the initial state (essentially, zeroing
+ * out all the FPU registers) if we can't restore from the task's FPU state.
+ */
+bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+			  struct pt_regs *regs, int trapnr)
+{
+	regs->ip = ex_fixup_addr(fixup);
+
+	WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
+		  (void *)instruction_pointer(regs));
+
+	__copy_kernel_to_fpregs(&init_fpstate, -1);
+	return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_fprestore);
+
 bool ex_handler_ext(const struct exception_table_entry *fixup,
 		   struct pt_regs *regs, int trapnr)
 {
-- 
cgit 


From 10430364ebb562311ba6a6efa74e0c2298007912 Mon Sep 17 00:00:00 2001
From: Bhumika Goyal <bhumirks@gmail.com>
Date: Tue, 29 Aug 2017 23:47:11 +0530
Subject: x86/numachip: Add const and __initconst to numachip2_clockevent

Make this const as it is only used during a copy operation and add
__initconst as this usage is during the initialization phase.

Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: julia.lawall@lip6.fr
Cc: daniel.lezcano@linaro.org
Link: http://lkml.kernel.org/r/1504030631-10812-1-git-send-email-bhumirks@gmail.com
---
 drivers/clocksource/numachip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/numachip.c b/drivers/clocksource/numachip.c
index 6a20dc8b253f..9a7d7f0f23fe 100644
--- a/drivers/clocksource/numachip.c
+++ b/drivers/clocksource/numachip.c
@@ -43,7 +43,7 @@ static int numachip2_set_next_event(unsigned long delta, struct clock_event_devi
 	return 0;
 }
 
-static struct clock_event_device numachip2_clockevent = {
+static const struct clock_event_device numachip2_clockevent __initconst = {
 	.name            = "numachip2",
 	.rating          = 400,
 	.set_next_event  = numachip2_set_next_event,
-- 
cgit 


From a3c4fb7c9c2ebfd50b8c60f6c069932bb319bc37 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Date: Mon, 4 Sep 2017 10:32:15 +0200
Subject: x86/mm: Fix fault error path using unsafe vma pointer

commit 7b2d0dbac489 ("x86/mm/pkeys: Pass VMA down in to fault signal
generation code") passes down a vma pointer to the error path, but that is
done once the mmap_sem is released when calling mm_fault_error() from
__do_page_fault().

This is dangerous as the vma structure is no more safe to be used once the
mmap_sem has been released. As only the protection key value is required in
the error processing, we could just pass down this value.

Fix it by passing a pointer to a protection key value down to the fault
signal generation code. The use of a pointer allows to keep the check
generating a warning message in fill_sig_info_pkey() when the vma was not
known. If the pointer is valid, the protection value can be accessed by
deferencing the pointer.

[ tglx: Made *pkey u32 as that's the type which is passed in siginfo ]

Fixes: 7b2d0dbac489 ("x86/mm/pkeys: Pass VMA down in to fault signal generation code")
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1504513935-12742-1-git-send-email-ldufour@linux.vnet.ibm.com
---
 arch/x86/mm/fault.c | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 39567b5c33da..e2baeaa053a5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -192,8 +192,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
  * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
  *	     faulted on a pte with its pkey=4.
  */
-static void fill_sig_info_pkey(int si_code, siginfo_t *info,
-		struct vm_area_struct *vma)
+static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
 {
 	/* This is effectively an #ifdef */
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
@@ -209,7 +208,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
 	 * valid VMA, so we should never reach this without a
 	 * valid VMA.
 	 */
-	if (!vma) {
+	if (!pkey) {
 		WARN_ONCE(1, "PKU fault with no VMA passed in");
 		info->si_pkey = 0;
 		return;
@@ -219,13 +218,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
 	 * absolutely guranteed to be 100% accurate because of
 	 * the race explained above.
 	 */
-	info->si_pkey = vma_pkey(vma);
+	info->si_pkey = *pkey;
 }
 
 static void
 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
-		     struct task_struct *tsk, struct vm_area_struct *vma,
-		     int fault)
+		     struct task_struct *tsk, u32 *pkey, int fault)
 {
 	unsigned lsb = 0;
 	siginfo_t info;
@@ -240,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
 		lsb = PAGE_SHIFT;
 	info.si_addr_lsb = lsb;
 
-	fill_sig_info_pkey(si_code, &info, vma);
+	fill_sig_info_pkey(si_code, &info, pkey);
 
 	force_sig_info(si_signo, &info, tsk);
 }
@@ -762,8 +760,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	struct task_struct *tsk = current;
 	unsigned long flags;
 	int sig;
-	/* No context means no VMA to pass down */
-	struct vm_area_struct *vma = NULL;
 
 	/* Are we prepared to handle this kernel fault? */
 	if (fixup_exception(regs, X86_TRAP_PF)) {
@@ -788,7 +784,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 
 			/* XXX: hwpoison faults will set the wrong code. */
 			force_sig_info_fault(signal, si_code, address,
-					     tsk, vma, 0);
+					     tsk, NULL, 0);
 		}
 
 		/*
@@ -896,8 +892,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
 
 static void
 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
-		       unsigned long address, struct vm_area_struct *vma,
-		       int si_code)
+		       unsigned long address, u32 *pkey, int si_code)
 {
 	struct task_struct *tsk = current;
 
@@ -945,7 +940,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 		tsk->thread.error_code	= error_code;
 		tsk->thread.trap_nr	= X86_TRAP_PF;
 
-		force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
+		force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
 
 		return;
 	}
@@ -958,9 +953,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 
 static noinline void
 bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
-		     unsigned long address, struct vm_area_struct *vma)
+		     unsigned long address, u32 *pkey)
 {
-	__bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
+	__bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
 }
 
 static void
@@ -968,6 +963,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
 	   unsigned long address,  struct vm_area_struct *vma, int si_code)
 {
 	struct mm_struct *mm = current->mm;
+	u32 pkey;
+
+	if (vma)
+		pkey = vma_pkey(vma);
 
 	/*
 	 * Something tried to access memory that isn't in our memory map..
@@ -975,7 +974,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
 	 */
 	up_read(&mm->mmap_sem);
 
-	__bad_area_nosemaphore(regs, error_code, address, vma, si_code);
+	__bad_area_nosemaphore(regs, error_code, address,
+			       (vma) ? &pkey : NULL, si_code);
 }
 
 static noinline void
@@ -1018,7 +1018,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
 
 static void
 do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
-	  struct vm_area_struct *vma, unsigned int fault)
+	  u32 *pkey, unsigned int fault)
 {
 	struct task_struct *tsk = current;
 	int code = BUS_ADRERR;
@@ -1045,13 +1045,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 		code = BUS_MCEERR_AR;
 	}
 #endif
-	force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
+	force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
 }
 
 static noinline void
 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
-	       unsigned long address, struct vm_area_struct *vma,
-	       unsigned int fault)
+	       unsigned long address, u32 *pkey, unsigned int fault)
 {
 	if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
 		no_context(regs, error_code, address, 0, 0);
@@ -1075,9 +1074,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 	} else {
 		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
 			     VM_FAULT_HWPOISON_LARGE))
-			do_sigbus(regs, error_code, address, vma, fault);
+			do_sigbus(regs, error_code, address, pkey, fault);
 		else if (fault & VM_FAULT_SIGSEGV)
-			bad_area_nosemaphore(regs, error_code, address, vma);
+			bad_area_nosemaphore(regs, error_code, address, pkey);
 		else
 			BUG();
 	}
@@ -1267,6 +1266,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	struct mm_struct *mm;
 	int fault, major = 0;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+	u32 pkey;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -1467,9 +1467,10 @@ good_area:
 		return;
 	}
 
+	pkey = vma_pkey(vma);
 	up_read(&mm->mmap_sem);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
-		mm_fault_error(regs, error_code, address, vma, fault);
+		mm_fault_error(regs, error_code, address, &pkey, fault);
 		return;
 	}
 
-- 
cgit 


From 7d7099433d9eaaa5a989a55f1fa354c16a3ad297 Mon Sep 17 00:00:00 2001
From: Sean Fu <fxinrong@gmail.com>
Date: Mon, 11 Sep 2017 08:33:21 +0800
Subject: x86/sysfs: Fix off-by-one error in loop termination

An off-by-one error in loop terminantion conditions in
create_setup_data_nodes() will lead to memory leak when
create_setup_data_node() failed.

Signed-off-by: Sean Fu <fxinrong@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1505090001-1157-1-git-send-email-fxinrong@gmail.com
---
 arch/x86/kernel/ksysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4b0592ca9e47..8c1cc08f514f 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject *parent)
 	return 0;
 
 out_clean_nodes:
-	for (j = i - 1; j > 0; j--)
+	for (j = i - 1; j >= 0; j--)
 		cleanup_setup_data_node(*(kobjp + j));
 	kfree(kobjp);
 out_setup_data_kobj:
-- 
cgit 


From 5ac751d9e6b187c4a0000879d6598eb2292db949 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Tue, 12 Sep 2017 19:40:00 +0300
Subject: x86: Don't cast away the __user in __get_user_asm_u64()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't cast away the __user in __get_user_asm_u64() on x86-32.
Prevents sparse getting upset.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20170912164000.13745-1-ville.syrjala@linux.intel.com
---
 arch/x86/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 78e8fcc87d4c..4b892917edeb 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -337,7 +337,7 @@ do {									\
 		     _ASM_EXTABLE(1b, 4b)				\
 		     _ASM_EXTABLE(2b, 4b)				\
 		     : "=r" (retval), "=&A"(x)				\
-		     : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1),	\
+		     : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1),	\
 		       "i" (errret), "0" (retval));			\
 })
 
-- 
cgit 


From b09c146f8f63c0e03adba74df76bf9c2be466fec Mon Sep 17 00:00:00 2001
From: Kan Liang <Kan.liang@intel.com>
Date: Fri, 8 Sep 2017 17:34:47 -0400
Subject: perf/x86/intel/cstate: Add missing CPU IDs

Skylake server uses the same C-state residency events as Sandy Bridge.

Denverton and Gemini lake use the same C-state residency events as
Apollo Lake.

Signed-off-by: Kan Liang <Kan.liang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: ak@linux.intel.com
Cc: peterz@infradead.org
Cc: piotr.luc@intel.com
Cc: harry.pan@intel.com
Cc: srinivas.pandruvada@linux.intel.com
Link: http://lkml.kernel.org/r/20170908213449.6224-1-kan.liang@intel.com
---
 arch/x86/events/intel/cstate.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 4cf100ff2a37..72db0664a53d 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -552,6 +552,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
 
 	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE,  snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
@@ -560,6 +561,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
 
 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
+
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
 	{ },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
-- 
cgit 


From 1aaccc40a1864053da26605b0297be16dd52641e Mon Sep 17 00:00:00 2001
From: Kan Liang <Kan.liang@intel.com>
Date: Fri, 8 Sep 2017 17:34:48 -0400
Subject: perf/x86/msr: Add missing CPU IDs

Goldmont, Glodmont plus and Xeon Phi have MSR_SMI_COUNT as well.

Signed-off-by: Kan Liang <Kan.liang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: ak@linux.intel.com
Cc: peterz@infradead.org
Cc: piotr.luc@intel.com
Cc: harry.pan@intel.com
Cc: srinivas.pandruvada@linux.intel.com
Link: http://lkml.kernel.org/r/20170908213449.6224-2-kan.liang@intel.com
---
 arch/x86/events/msr.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 4bb3ec69e8ea..06723671ae4e 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -63,6 +63,14 @@ static bool test_intel(int idx)
 	case INTEL_FAM6_ATOM_SILVERMONT1:
 	case INTEL_FAM6_ATOM_SILVERMONT2:
 	case INTEL_FAM6_ATOM_AIRMONT:
+
+	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_ATOM_DENVERTON:
+
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
+
+	case INTEL_FAM6_XEON_PHI_KNL:
+	case INTEL_FAM6_XEON_PHI_KNM:
 		if (idx == PERF_MSR_SMI)
 			return true;
 		break;
-- 
cgit 


From 450a97893559354b927c935f39ee11126f01f520 Mon Sep 17 00:00:00 2001
From: Kan Liang <Kan.liang@intel.com>
Date: Fri, 8 Sep 2017 17:34:49 -0400
Subject: perf/x86/intel/rapl: Add missing CPU IDs

DENVERTON and GEMINI_LAKE support same RAPL counters as Apollo Lake.

Signed-off-by: Kan Liang <Kan.liang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: ak@linux.intel.com
Cc: peterz@infradead.org
Cc: piotr.luc@intel.com
Cc: harry.pan@intel.com
Cc: srinivas.pandruvada@linux.intel.com
Link: http://lkml.kernel.org/r/20170908213449.6224-3-kan.liang@intel.com
---
 arch/x86/events/intel/rapl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 8e2457cb6b4a..005908ee9333 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -775,6 +775,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
+
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
 	{},
 };
 
-- 
cgit 


From 29b46dfb136cdbeece542b3f01115237e43f2855 Mon Sep 17 00:00:00 2001
From: Kan Liang <Kan.liang@intel.com>
Date: Mon, 11 Sep 2017 10:10:15 -0700
Subject: perf/x86/intel/uncore: Correct num_boxes for IIO and IRP

There are 6 IIO/IRP boxes for CBDMA, PCIe0-2, MCP 0 and MCP 1
separately. Correct the num_boxes.

Signed-off-by: Kan Liang <Kan.liang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: ak@linux.intel.com
Cc: peterz@infradead.org
Cc: eranian@google.com
Cc: acme@kernel.org
Link: http://lkml.kernel.org/r/1505149816-12580-1-git-send-email-kan.liang@intel.com
---
 arch/x86/events/intel/uncore_snbep.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index db1fe377e6dd..a7196818416a 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3462,7 +3462,7 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
 static struct intel_uncore_type skx_uncore_iio = {
 	.name			= "iio",
 	.num_counters		= 4,
-	.num_boxes		= 5,
+	.num_boxes		= 6,
 	.perf_ctr_bits		= 48,
 	.event_ctl		= SKX_IIO0_MSR_PMON_CTL0,
 	.perf_ctr		= SKX_IIO0_MSR_PMON_CTR0,
@@ -3492,7 +3492,7 @@ static const struct attribute_group skx_uncore_format_group = {
 static struct intel_uncore_type skx_uncore_irp = {
 	.name			= "irp",
 	.num_counters		= 2,
-	.num_boxes		= 5,
+	.num_boxes		= 6,
 	.perf_ctr_bits		= 48,
 	.event_ctl		= SKX_IRP0_MSR_PMON_CTL0,
 	.perf_ctr		= SKX_IRP0_MSR_PMON_CTR0,
-- 
cgit 


From 0add53713b1c07a1c71e27a20e21eb7c180b4e7b Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Tue, 19 Sep 2017 16:40:21 +0200
Subject: microblaze: Add missing kvm_para.h to Kbuild

Running make allmodconfig;make is throwing compilation error:
  CC      kernel/watchdog.o
In file included from ./include/linux/kvm_para.h:4:0,
                 from kernel/watchdog.c:29:
./include/uapi/linux/kvm_para.h:32:26: fatal error: asm/kvm_para.h: No
such file or directory
 #include <asm/kvm_para.h>
                          ^
compilation terminated.
make[1]: *** [kernel/watchdog.o] Error 1
make: *** [kernel/watchdog.o] Error 2

Reported-by: Michal Hocko <mhocko@kernel.org>
Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Fixes: 83f0124ad81e87b ("microblaze: remove asm-generic wrapper headers")
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
Tested-by: Michal Hocko <mhocko@suse.com>
---
 arch/microblaze/include/uapi/asm/Kbuild | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index e77a596f3f1e..06609ca36115 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -7,6 +7,7 @@ generic-y += fcntl.h
 generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
+generic-y += kvm_para.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += param.h
-- 
cgit 


From 64c99853baca40e2f06038c4a926009edd14c7c3 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Thu, 21 Sep 2017 00:29:36 +0200
Subject: microblaze: Cocci spatch "vma_pages"

Use vma_pages function on vma object instead of explicit computation.
Found by coccinelle spatch "api/vma_pages.cocci"

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 arch/microblaze/kernel/dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index e45ada8fb006..94700c5270a9 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -165,7 +165,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 			     unsigned long attrs)
 {
 #ifdef CONFIG_MMU
-	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long user_count = vma_pages(vma);
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long off = vma->vm_pgoff;
 	unsigned long pfn;
-- 
cgit 


From 428dbf156cc5a8f9994d1f1a5c79373d15476f3c Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@oracle.com>
Date: Mon, 18 Sep 2017 10:53:29 -0600
Subject: arch: change default endian for microblaze

Fix the default for microblaze. Michal Simek mentioned default for
microblaze should be CPU_LITTLE_ENDIAN.

Fixes : commit 206d3642d8ee ("arch/microblaze: add choice for endianness
	and update Makefile")

Signed-off-by: Babu Moger <babu.moger@oracle.com>
Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 arch/microblaze/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 9d26abdf0dc1..4f798aa671dd 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -39,7 +39,7 @@ config MICROBLAZE
 # Endianness selection
 choice
 	prompt "Endianness selection"
-	default CPU_BIG_ENDIAN
+	default CPU_LITTLE_ENDIAN
 	help
 	  microblaze architectures can be configured for either little or
 	  big endian formats. Be sure to select the appropriate mode.
-- 
cgit 


From 89975bd335f37b96ffd3cc24b9effb1fa25e7788 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 20 Sep 2017 16:41:34 -0300
Subject: perf tools: Get all of tools/{arch,include}/ in the MANIFEST

Now that I'm switching the container builds from using a local volume
pointing to the kernel repository with the perf sources, instead getting
a detached tarball to be able to use a container cluster, some places
broke because I forgot to put some of the required files in
tools/perf/MANIFEST, namely some bitsperlong.h files.

So, to fix it do the same as for tools/build/ and pack the whole
tools/arch/ directory.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-wmenpjfjsobwdnfde30qqncj@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/MANIFEST | 87 ++---------------------------------------------------
 1 file changed, 2 insertions(+), 85 deletions(-)

diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 62072822dc85..627b7cada144 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,34 +1,8 @@
 tools/perf
-tools/arch/alpha/include/asm/barrier.h
-tools/arch/arm/include/asm/barrier.h
-tools/arch/arm64/include/asm/barrier.h
-tools/arch/ia64/include/asm/barrier.h
-tools/arch/mips/include/asm/barrier.h
-tools/arch/powerpc/include/asm/barrier.h
-tools/arch/s390/include/asm/barrier.h
-tools/arch/sh/include/asm/barrier.h
-tools/arch/sparc/include/asm/barrier.h
-tools/arch/sparc/include/asm/barrier_32.h
-tools/arch/sparc/include/asm/barrier_64.h
-tools/arch/tile/include/asm/barrier.h
-tools/arch/x86/include/asm/barrier.h
-tools/arch/x86/include/asm/cmpxchg.h
-tools/arch/x86/include/asm/cpufeatures.h
-tools/arch/x86/include/asm/disabled-features.h
-tools/arch/x86/include/asm/required-features.h
-tools/arch/x86/include/uapi/asm/svm.h
-tools/arch/x86/include/uapi/asm/vmx.h
-tools/arch/x86/include/uapi/asm/kvm.h
-tools/arch/x86/include/uapi/asm/kvm_perf.h
-tools/arch/x86/lib/memcpy_64.S
-tools/arch/x86/lib/memset_64.S
-tools/arch/s390/include/uapi/asm/kvm_perf.h
-tools/arch/s390/include/uapi/asm/sie.h
-tools/arch/xtensa/include/asm/barrier.h
+tools/arch
 tools/scripts
 tools/build
-tools/arch/x86/include/asm/atomic.h
-tools/arch/x86/include/asm/rmwcc.h
+tools/include
 tools/lib/traceevent
 tools/lib/api
 tools/lib/bpf
@@ -42,60 +16,3 @@ tools/lib/find_bit.c
 tools/lib/bitmap.c
 tools/lib/str_error_r.c
 tools/lib/vsprintf.c
-tools/include/asm/alternative-asm.h
-tools/include/asm/atomic.h
-tools/include/asm/barrier.h
-tools/include/asm/bug.h
-tools/include/asm-generic/atomic-gcc.h
-tools/include/asm-generic/barrier.h
-tools/include/asm-generic/bitops/arch_hweight.h
-tools/include/asm-generic/bitops/atomic.h
-tools/include/asm-generic/bitops/const_hweight.h
-tools/include/asm-generic/bitops/__ffs.h
-tools/include/asm-generic/bitops/__ffz.h
-tools/include/asm-generic/bitops/__fls.h
-tools/include/asm-generic/bitops/find.h
-tools/include/asm-generic/bitops/fls64.h
-tools/include/asm-generic/bitops/fls.h
-tools/include/asm-generic/bitops/hweight.h
-tools/include/asm-generic/bitops.h
-tools/include/linux/atomic.h
-tools/include/linux/bitops.h
-tools/include/linux/compiler.h
-tools/include/linux/compiler-gcc.h
-tools/include/linux/coresight-pmu.h
-tools/include/linux/bug.h
-tools/include/linux/filter.h
-tools/include/linux/hash.h
-tools/include/linux/kernel.h
-tools/include/linux/list.h
-tools/include/linux/log2.h
-tools/include/uapi/asm-generic/fcntl.h
-tools/include/uapi/asm-generic/ioctls.h
-tools/include/uapi/asm-generic/mman-common.h
-tools/include/uapi/asm-generic/mman.h
-tools/include/uapi/drm/drm.h
-tools/include/uapi/drm/i915_drm.h
-tools/include/uapi/linux/bpf.h
-tools/include/uapi/linux/bpf_common.h
-tools/include/uapi/linux/fcntl.h
-tools/include/uapi/linux/hw_breakpoint.h
-tools/include/uapi/linux/kvm.h
-tools/include/uapi/linux/mman.h
-tools/include/uapi/linux/perf_event.h
-tools/include/uapi/linux/sched.h
-tools/include/uapi/linux/stat.h
-tools/include/uapi/linux/vhost.h
-tools/include/uapi/sound/asound.h
-tools/include/linux/poison.h
-tools/include/linux/rbtree.h
-tools/include/linux/rbtree_augmented.h
-tools/include/linux/refcount.h
-tools/include/linux/string.h
-tools/include/linux/stringify.h
-tools/include/linux/types.h
-tools/include/linux/err.h
-tools/include/linux/bitmap.h
-tools/include/linux/time64.h
-tools/arch/*/include/uapi/asm/mman.h
-tools/arch/*/include/uapi/asm/perf_regs.h
-- 
cgit 


From 549a3976523c69a0245c0a310210c824a0b26e35 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 13 Sep 2017 09:38:23 +0200
Subject: tools include: Sync kernel ABI headers with tooling headers

Time for a sync with ABI/uapi headers with the upcoming v4.14 kernel.

None of the ABI changes require any source code level changes to our
existing in-kernel tooling code:

  - tools/arch/s390/include/uapi/asm/kvm.h:

      New KVM_S390_VM_TOD_EXT ABI, not used by in-kernel tooling.

  - tools/arch/x86/include/asm/cpufeatures.h:
    tools/arch/x86/include/asm/disabled-features.h:

      New PCID, SME and VGIF x86 CPU feature bits defined.

  - tools/include/asm-generic/hugetlb_encode.h:
    tools/include/uapi/asm-generic/mman-common.h:
    tools/include/uapi/linux/mman.h:

      Two new madvise() flags, plus a hugetlb system call mmap flags
      restructuring/extension changes.

  - tools/include/uapi/drm/drm.h:
    tools/include/uapi/drm/i915_drm.h:

      New drm_syncobj_create flags definitions, new drm_syncobj_wait
      and drm_syncobj_array ABIs. DRM_I915_PERF_* calls and a new
      I915_PARAM_HAS_EXEC_FENCE_ARRAY ABI for the Intel driver.

  - tools/include/uapi/linux/bpf.h:

      New bpf_sock fields (::mark and ::priority), new XDP_REDIRECT
      action, new kvm_ppc_smmu_info fields (::data_keys, instr_keys)

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Yao Jin <yao.jin@linux.intel.com>
Link: http://lkml.kernel.org/r/20170913073823.lxmi4c7ejqlfabjx@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/s390/include/uapi/asm/kvm.h         |  6 +++
 tools/arch/x86/include/asm/cpufeatures.h       |  2 +
 tools/arch/x86/include/asm/disabled-features.h |  4 +-
 tools/include/asm-generic/hugetlb_encode.h     | 34 +++++++++++++++++
 tools/include/uapi/asm-generic/mman-common.h   | 14 ++-----
 tools/include/uapi/drm/drm.h                   | 22 +++++++++++
 tools/include/uapi/drm/i915_drm.h              | 51 +++++++++++++++++++++++++-
 tools/include/uapi/linux/bpf.h                 | 32 ++++++++++------
 tools/include/uapi/linux/kvm.h                 |  3 +-
 tools/include/uapi/linux/mman.h                | 24 +++++++++++-
 10 files changed, 164 insertions(+), 28 deletions(-)
 create mode 100644 tools/include/asm-generic/hugetlb_encode.h

diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 69d09c39bbcd..cd7359e23d86 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
 /* kvm attributes for KVM_S390_VM_TOD */
 #define KVM_S390_VM_TOD_LOW		0
 #define KVM_S390_VM_TOD_HIGH		1
+#define KVM_S390_VM_TOD_EXT		2
+
+struct kvm_s390_vm_tod_clock {
+	__u8  epoch_idx;
+	__u64 tod;
+};
 
 /* kvm attributes for KVM_S390_VM_CPU_MODEL */
 /* processor related attributes are r/w */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 8ea315a11fe0..2519c6c801c9 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -196,6 +196,7 @@
 
 #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME		( 7*32+10) /* AMD Secure Memory Encryption */
 
 #define X86_FEATURE_INTEL_PPIN	( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
@@ -287,6 +288,7 @@
 #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
 #define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF	(15*32+16) /* Virtual GIF */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
 #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 5dff775af7cd..c10c9128f54e 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -21,11 +21,13 @@
 # define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))
 # define DISABLE_CYRIX_ARR	(1<<(X86_FEATURE_CYRIX_ARR & 31))
 # define DISABLE_CENTAUR_MCR	(1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID		0
 #else
 # define DISABLE_VME		0
 # define DISABLE_K6_MTRR	0
 # define DISABLE_CYRIX_ARR	0
 # define DISABLE_CENTAUR_MCR	0
+# define DISABLE_PCID		(1<<(X86_FEATURE_PCID & 31))
 #endif /* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -49,7 +51,7 @@
 #define DISABLED_MASK1	0
 #define DISABLED_MASK2	0
 #define DISABLED_MASK3	(DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4	0
+#define DISABLED_MASK4	(DISABLE_PCID)
 #define DISABLED_MASK5	0
 #define DISABLED_MASK6	0
 #define DISABLED_MASK7	0
diff --git a/tools/include/asm-generic/hugetlb_encode.h b/tools/include/asm-generic/hugetlb_encode.h
new file mode 100644
index 000000000000..e4732d3c2998
--- /dev/null
+++ b/tools/include/asm-generic/hugetlb_encode.h
@@ -0,0 +1,34 @@
+#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_
+#define _ASM_GENERIC_HUGETLB_ENCODE_H_
+
+/*
+ * Several system calls take a flag to request "hugetlb" huge pages.
+ * Without further specification, these system calls will use the
+ * system's default huge page size.  If a system supports multiple
+ * huge page sizes, the desired huge page size can be specified in
+ * bits [26:31] of the flag arguments.  The value in these 6 bits
+ * will encode the log2 of the huge page size.
+ *
+ * The following definitions are associated with this huge page size
+ * encoding in flag arguments.  System call specific header files
+ * that use this encoding should include this file.  They can then
+ * provide definitions based on these with their own specific prefix.
+ * for example:
+ * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+ */
+
+#define HUGETLB_FLAG_ENCODE_SHIFT	26
+#define HUGETLB_FLAG_ENCODE_MASK	0x3f
+
+#define HUGETLB_FLAG_ENCODE_64KB	(16 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512KB	(19 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1MB		(20 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2MB		(21 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_8MB		(23 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16MB	(24 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_256MB	(28 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1GB		(30 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2GB		(31 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16GB	(34 << HUGETLB_FLAG_ENCODE_SHIFT)
+
+#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index 8c27db0c5c08..203268f9231e 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -58,20 +58,12 @@
 					   overrides the coredump filter bits */
 #define MADV_DODUMP	17		/* Clear the MADV_DONTDUMP flag */
 
+#define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT	26
-#define MAP_HUGE_MASK	0x3f
-
 #define PKEY_DISABLE_ACCESS	0x1
 #define PKEY_DISABLE_WRITE	0x2
 #define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 101593ab10ac..97677cd6964d 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -700,6 +700,7 @@ struct drm_prime_handle {
 
 struct drm_syncobj_create {
 	__u32 handle;
+#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
 	__u32 flags;
 };
 
@@ -718,6 +719,24 @@ struct drm_syncobj_handle {
 	__u32 pad;
 };
 
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
+struct drm_syncobj_wait {
+	__u64 handles;
+	/* absolute timeout */
+	__s64 timeout_nsec;
+	__u32 count_handles;
+	__u32 flags;
+	__u32 first_signaled; /* only valid when not waiting all */
+	__u32 pad;
+};
+
+struct drm_syncobj_array {
+	__u64 handles;
+	__u32 count_handles;
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -840,6 +859,9 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_DESTROY	DRM_IOWR(0xC0, struct drm_syncobj_destroy)
 #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD	DRM_IOWR(0xC1, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT		DRM_IOWR(0xC3, struct drm_syncobj_wait)
+#define DRM_IOCTL_SYNCOBJ_RESET		DRM_IOWR(0xC4, struct drm_syncobj_array)
+#define DRM_IOCTL_SYNCOBJ_SIGNAL	DRM_IOWR(0xC5, struct drm_syncobj_array)
 
 /**
  * Device specific ioctls should only be in their respective headers
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 7ccbd6a2bbe0..6598fb76d2c2 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_CONTEXT_GETPARAM	0x34
 #define DRM_I915_GEM_CONTEXT_SETPARAM	0x35
 #define DRM_I915_PERF_OPEN		0x36
+#define DRM_I915_PERF_ADD_CONFIG	0x37
+#define DRM_I915_PERF_REMOVE_CONFIG	0x38
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
 #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
 #define DRM_IOCTL_I915_PERF_OPEN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
+#define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
+#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -431,6 +435,11 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_BATCH_FIRST	 48
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * drm_i915_gem_exec_fence structures.  See I915_EXEC_FENCE_ARRAY.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -812,6 +821,17 @@ struct drm_i915_gem_exec_object2 {
 	__u64 rsvd2;
 };
 
+struct drm_i915_gem_exec_fence {
+	/**
+	 * User's handle for a drm_syncobj to wait on or signal.
+	 */
+	__u32 handle;
+
+#define I915_EXEC_FENCE_WAIT            (1<<0)
+#define I915_EXEC_FENCE_SIGNAL          (1<<1)
+	__u32 flags;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
@@ -826,7 +846,11 @@ struct drm_i915_gem_execbuffer2 {
 	__u32 DR1;
 	__u32 DR4;
 	__u32 num_cliprects;
-	/** This is a struct drm_clip_rect *cliprects */
+	/**
+	 * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
+	 * is not set.  If I915_EXEC_FENCE_ARRAY is set, then this is a
+	 * struct drm_i915_gem_exec_fence *fences.
+	 */
 	__u64 cliprects_ptr;
 #define I915_EXEC_RING_MASK              (7<<0)
 #define I915_EXEC_DEFAULT                (0<<0)
@@ -927,7 +951,14 @@ struct drm_i915_gem_execbuffer2 {
  * element).
  */
 #define I915_EXEC_BATCH_FIRST		(1<<18)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
+
+/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr
+ * define an array of i915_gem_exec_fence structures which specify a set of
+ * dma fences to wait upon or signal.
+ */
+#define I915_EXEC_FENCE_ARRAY   (1<<19)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
@@ -1467,6 +1498,22 @@ enum drm_i915_perf_record_type {
 	DRM_I915_PERF_RECORD_MAX /* non-ABI */
 };
 
+/**
+ * Structure to upload perf dynamic configuration into the kernel.
+ */
+struct drm_i915_perf_oa_config {
+	/** String formatted like "%08x-%04x-%04x-%04x-%012x" */
+	char uuid[36];
+
+	__u32 n_mux_regs;
+	__u32 n_boolean_regs;
+	__u32 n_flex_regs;
+
+	__u64 __user mux_regs_ptr;
+	__u64 __user boolean_regs_ptr;
+	__u64 __user flex_regs_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 461811e57140..43ab5c402f98 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -143,12 +143,6 @@ enum bpf_attach_type {
 
 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
 
-enum bpf_sockmap_flags {
-	BPF_SOCKMAP_UNSPEC,
-	BPF_SOCKMAP_STRPARSER,
-	__MAX_BPF_SOCKMAP_FLAG
-};
-
 /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
  * to the given target_fd cgroup the descendent cgroup will be able to
  * override effective bpf program that was inherited from this cgroup
@@ -368,9 +362,20 @@ union bpf_attr {
  * int bpf_redirect(ifindex, flags)
  *     redirect to another netdev
  *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: TC_ACT_REDIRECT
+ *     @flags:
+ *	  cls_bpf:
+ *          bit 0 - if set, redirect to ingress instead of egress
+ *          other bits - reserved
+ *	  xdp_bpf:
+ *	    all bits - reserved
+ *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
+ *	       xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
+ * int bpf_redirect_map(map, key, flags)
+ *     redirect to endpoint in map
+ *     @map: pointer to dev map
+ *     @key: index in map to lookup
+ *     @flags: --
+ *     Return: XDP_REDIRECT on success or XDP_ABORT on error
  *
  * u32 bpf_get_route_realm(skb)
  *     retrieve a dst's tclassid
@@ -632,7 +637,7 @@ union bpf_attr {
 	FN(skb_adjust_room),		\
 	FN(redirect_map),		\
 	FN(sk_redirect_map),		\
-	FN(sock_map_update),
+	FN(sock_map_update),		\
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -753,20 +758,23 @@ struct bpf_sock {
 	__u32 family;
 	__u32 type;
 	__u32 protocol;
+	__u32 mark;
+	__u32 priority;
 };
 
 #define XDP_PACKET_HEADROOM 256
 
 /* User return codes for XDP prog type.
  * A valid XDP program must return one of these defined values. All other
- * return codes are reserved for future use. Unknown return codes will result
- * in packet drop.
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
  */
 enum xdp_action {
 	XDP_ABORTED = 0,
 	XDP_DROP,
 	XDP_PASS,
 	XDP_TX,
+	XDP_REDIRECT,
 };
 
 /* user accessible metadata for XDP packet hook
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 6cd63c18708a..838887587411 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size {
 struct kvm_ppc_smmu_info {
 	__u64 flags;
 	__u32 slb_size;
-	__u32 pad;
+	__u16 data_keys;	/* # storage keys supported for data */
+	__u16 instr_keys;	/* # storage keys supported for instructions */
 	struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
 };
 
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index 81d8edf11789..a937480d7cd3 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -1,7 +1,8 @@
 #ifndef _UAPI_LINUX_MMAN_H
 #define _UAPI_LINUX_MMAN_H
 
-#include <uapi/asm/mman.h>
+#include <asm/mman.h>
+#include <asm-generic/hugetlb_encode.h>
 
 #define MREMAP_MAYMOVE	1
 #define MREMAP_FIXED	2
@@ -10,4 +11,25 @@
 #define OVERCOMMIT_ALWAYS		1
 #define OVERCOMMIT_NEVER		2
 
+/*
+ * Huge page size encoding when MAP_HUGETLB is specified, and a huge page
+ * size other than the default is desired.  See hugetlb_encode.h.
+ * All known huge page size encodings are provided here.  It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system.  See mmap(2) man page for details.
+ */
+#define MAP_HUGE_SHIFT	HUGETLB_FLAG_ENCODE_SHIFT
+#define MAP_HUGE_MASK	HUGETLB_FLAG_ENCODE_MASK
+
+#define MAP_HUGE_64KB	HUGETLB_FLAG_ENCODE_64KB
+#define MAP_HUGE_512KB	HUGETLB_FLAG_ENCODE_512KB
+#define MAP_HUGE_1MB	HUGETLB_FLAG_ENCODE_1MB
+#define MAP_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
+#define MAP_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
+#define MAP_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
+#define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
+#define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
+
 #endif /* _UAPI_LINUX_MMAN_H */
-- 
cgit 


From f1e52f14a69386ac460a8d700df0647a631cf595 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 22 Sep 2017 15:41:44 -0300
Subject: perf evsel: Fix attr.exclude_kernel setting for default cycles:p

Yet another fix for probing the max attr.precise_ip setting: it is not
enough settting attr.exclude_kernel for !root users, as they _can_
profile the kernel if the kernel.perf_event_paranoid sysctl is set to
-1, so check that as well.

Testing it:

As non root:

  $ sysctl kernel.perf_event_paranoid
  kernel.perf_event_paranoid = 2
  $ perf record sleep 1
  $ perf evlist -v
  cycles:uppp: ..., exclude_kernel: 1, ... precise_ip: 3, ...

Now as non-root, but with kernel.perf_event_paranoid set set to the
most permissive value, -1:

  $ sysctl kernel.perf_event_paranoid
  kernel.perf_event_paranoid = -1
  $ perf record sleep 1
  $ perf evlist -v
  cycles:ppp: ..., exclude_kernel: 0, ... precise_ip: 3, ...
  $

I.e. non-root, default kernel.perf_event_paranoid: :uppp modifier = not allowed to sample the kernel,
     non-root, most permissible kernel.perf_event_paranoid: :ppp = allowed to sample the kernel.

In both cases, use the highest available precision: attr.precise_ip = 3.

Reported-and-Tested-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: d37a36979077 ("perf evsel: Fix attr.exclude_kernel setting for default cycles:p")
Link: http://lkml.kernel.org/n/tip-nj2qkf75xsd6pw6hhjzfqqdx@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4bb89373eb52..0dccdb89572c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -271,12 +271,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
 	return evsel;
 }
 
+static bool perf_event_can_profile_kernel(void)
+{
+	return geteuid() == 0 || perf_event_paranoid() == -1;
+}
+
 struct perf_evsel *perf_evsel__new_cycles(bool precise)
 {
 	struct perf_event_attr attr = {
 		.type	= PERF_TYPE_HARDWARE,
 		.config	= PERF_COUNT_HW_CPU_CYCLES,
-		.exclude_kernel	= geteuid() != 0,
+		.exclude_kernel	= !perf_event_can_profile_kernel(),
 	};
 	struct perf_evsel *evsel;
 
-- 
cgit 


From 44d8143340a99b167c74365e844516b73523c087 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 21 Sep 2017 13:57:40 -0700
Subject: KEYS: fix cred refcount leak in request_key_auth_new()

In request_key_auth_new(), if key_alloc() or key_instantiate_and_link()
were to fail, we would leak a reference to the 'struct cred'.  Currently
this can only happen if key_alloc() fails to allocate memory.  But it
still should be fixed, as it is a more severe bug waiting to happen.

Fix it by cleaning things up to use a helper function which frees a
'struct request_key_auth' correctly.

Fixes: d84f4f992cbd ("CRED: Inaugurate COW credentials")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/request_key_auth.c | 68 ++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 37 deletions(-)

diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index afe9d22ab361..69d6b3b35470 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -120,6 +120,18 @@ static void request_key_auth_revoke(struct key *key)
 	}
 }
 
+static void free_request_key_auth(struct request_key_auth *rka)
+{
+	if (!rka)
+		return;
+	key_put(rka->target_key);
+	key_put(rka->dest_keyring);
+	if (rka->cred)
+		put_cred(rka->cred);
+	kfree(rka->callout_info);
+	kfree(rka);
+}
+
 /*
  * Destroy an instantiation authorisation token key.
  */
@@ -129,15 +141,7 @@ static void request_key_auth_destroy(struct key *key)
 
 	kenter("{%d}", key->serial);
 
-	if (rka->cred) {
-		put_cred(rka->cred);
-		rka->cred = NULL;
-	}
-
-	key_put(rka->target_key);
-	key_put(rka->dest_keyring);
-	kfree(rka->callout_info);
-	kfree(rka);
+	free_request_key_auth(rka);
 }
 
 /*
@@ -151,22 +155,17 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 	const struct cred *cred = current->cred;
 	struct key *authkey = NULL;
 	char desc[20];
-	int ret;
+	int ret = -ENOMEM;
 
 	kenter("%d,", target->serial);
 
 	/* allocate a auth record */
-	rka = kmalloc(sizeof(*rka), GFP_KERNEL);
-	if (!rka) {
-		kleave(" = -ENOMEM");
-		return ERR_PTR(-ENOMEM);
-	}
+	rka = kzalloc(sizeof(*rka), GFP_KERNEL);
+	if (!rka)
+		goto error;
 	rka->callout_info = kmalloc(callout_len, GFP_KERNEL);
-	if (!rka->callout_info) {
-		kleave(" = -ENOMEM");
-		kfree(rka);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (!rka->callout_info)
+		goto error_free_rka;
 
 	/* see if the calling process is already servicing the key request of
 	 * another process */
@@ -176,8 +175,12 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 
 		/* if the auth key has been revoked, then the key we're
 		 * servicing is already instantiated */
-		if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags))
-			goto auth_key_revoked;
+		if (test_bit(KEY_FLAG_REVOKED,
+			     &cred->request_key_auth->flags)) {
+			up_read(&cred->request_key_auth->sem);
+			ret = -EKEYREVOKED;
+			goto error_free_rka;
+		}
 
 		irka = cred->request_key_auth->payload.data[0];
 		rka->cred = get_cred(irka->cred);
@@ -205,32 +208,23 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 			    KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL);
 	if (IS_ERR(authkey)) {
 		ret = PTR_ERR(authkey);
-		goto error_alloc;
+		goto error_free_rka;
 	}
 
 	/* construct the auth key */
 	ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL);
 	if (ret < 0)
-		goto error_inst;
+		goto error_put_authkey;
 
 	kleave(" = {%d,%d}", authkey->serial, refcount_read(&authkey->usage));
 	return authkey;
 
-auth_key_revoked:
-	up_read(&cred->request_key_auth->sem);
-	kfree(rka->callout_info);
-	kfree(rka);
-	kleave("= -EKEYREVOKED");
-	return ERR_PTR(-EKEYREVOKED);
-
-error_inst:
+error_put_authkey:
 	key_revoke(authkey);
 	key_put(authkey);
-error_alloc:
-	key_put(rka->target_key);
-	key_put(rka->dest_keyring);
-	kfree(rka->callout_info);
-	kfree(rka);
+error_free_rka:
+	free_request_key_auth(rka);
+error:
 	kleave("= %d", ret);
 	return ERR_PTR(ret);
 }
-- 
cgit 


From f7b48cf08fa63a68b59c2894806ee478216d7f91 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 21 Sep 2017 13:57:41 -0700
Subject: KEYS: don't revoke uninstantiated key in request_key_auth_new()

If key_instantiate_and_link() were to fail (which fortunately isn't
possible currently), the call to key_revoke(authkey) would crash with a
NULL pointer dereference in request_key_auth_revoke() because the key
has not yet been instantiated.

Fix this by removing the call to key_revoke().  key_put() is sufficient,
as it's not possible for an uninstantiated authkey to have been used for
anything yet.

Fixes: b5f545c880a2 ("[PATCH] keys: Permit running process to instantiate keys")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/request_key_auth.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 69d6b3b35470..e356075ed2f8 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -220,7 +220,6 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 	return authkey;
 
 error_put_authkey:
-	key_revoke(authkey);
 	key_put(authkey);
 error_free_rka:
 	free_request_key_auth(rka);
-- 
cgit 


From 884bee0215fcc239b30c062c37ca29077005e064 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Sep 2017 11:36:12 -0700
Subject: KEYS: fix key refcount leak in keyctl_assume_authority()

In keyctl_assume_authority(), if keyctl_change_reqkey_auth() were to
fail, we would leak the reference to the 'authkey'.  Currently this can
only happen if prepare_creds() fails to allocate memory.  But it still
should be fixed, as it is a more severe bug waiting to happen.

This patch also moves the read of 'authkey->serial' to before the
reference to the authkey is dropped.  Doing the read after dropping the
reference is very fragile because it assumes we still hold another
reference to the key.  (Which we do, in current->cred->request_key_auth,
but there's no reason not to write it in the "obviously correct" way.)

Fixes: d84f4f992cbd ("CRED: Inaugurate COW credentials")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/keyctl.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index ab0b337c84b4..562f7fe287a0 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1406,11 +1406,9 @@ long keyctl_assume_authority(key_serial_t id)
 	}
 
 	ret = keyctl_change_reqkey_auth(authkey);
-	if (ret < 0)
-		goto error;
+	if (ret == 0)
+		ret = authkey->serial;
 	key_put(authkey);
-
-	ret = authkey->serial;
 error:
 	return ret;
 }
-- 
cgit 


From 7fc0786d956d9e59b68d282be9b156179846ea3d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Sep 2017 11:36:31 -0700
Subject: KEYS: fix key refcount leak in keyctl_read_key()

In keyctl_read_key(), if key_permission() were to return an error code
other than EACCES, we would leak a the reference to the key.  This can't
actually happen currently because key_permission() can only return an
error code other than EACCES if security_key_permission() does, only
SELinux and Smack implement that hook, and neither can return an error
code other than EACCES.  But it should still be fixed, as it is a bug
waiting to happen.

Fixes: 29db91906340 ("[PATCH] Keys: Add LSM hooks for key management [try #3]")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/keyctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 562f7fe287a0..aa1d11a29136 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -771,7 +771,7 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
 	if (ret == 0)
 		goto can_read_key;
 	if (ret != -EACCES)
-		goto error;
+		goto error2;
 
 	/* we can't; see if it's searchable from this process's keyrings
 	 * - we automatically take account of the fact that it may be
-- 
cgit 


From e645016abc803dafc75e4b8f6e4118f088900ffb Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Sep 2017 11:36:45 -0700
Subject: KEYS: fix writing past end of user-supplied buffer in keyring_read()

Userspace can call keyctl_read() on a keyring to get the list of IDs of
keys in the keyring.  But if the user-supplied buffer is too small, the
kernel would write the full list anyway --- which will corrupt whatever
userspace memory happened to be past the end of the buffer.  Fix it by
only filling the space that is available.

Fixes: b2a4df200d57 ("KEYS: Expand the capacity of a keyring")
Cc: <stable@vger.kernel.org>	[v3.13+]
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/keyring.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index de81793f9920..94f038967c17 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -423,7 +423,7 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m)
 }
 
 struct keyring_read_iterator_context {
-	size_t			qty;
+	size_t			buflen;
 	size_t			count;
 	key_serial_t __user	*buffer;
 };
@@ -435,9 +435,9 @@ static int keyring_read_iterator(const void *object, void *data)
 	int ret;
 
 	kenter("{%s,%d},,{%zu/%zu}",
-	       key->type->name, key->serial, ctx->count, ctx->qty);
+	       key->type->name, key->serial, ctx->count, ctx->buflen);
 
-	if (ctx->count >= ctx->qty)
+	if (ctx->count >= ctx->buflen)
 		return 1;
 
 	ret = put_user(key->serial, ctx->buffer);
@@ -472,16 +472,12 @@ static long keyring_read(const struct key *keyring,
 		return 0;
 
 	/* Calculate how much data we could return */
-	ctx.qty = nr_keys * sizeof(key_serial_t);
-
 	if (!buffer || !buflen)
-		return ctx.qty;
-
-	if (buflen > ctx.qty)
-		ctx.qty = buflen;
+		return nr_keys * sizeof(key_serial_t);
 
 	/* Copy the IDs of the subscribed keys into the buffer */
 	ctx.buffer = (key_serial_t __user *)buffer;
+	ctx.buflen = buflen;
 	ctx.count = 0;
 	ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx);
 	if (ret < 0) {
-- 
cgit 


From 237bbd29f7a049d310d907f4b2716a7feef9abf3 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Sep 2017 11:37:03 -0700
Subject: KEYS: prevent creating a different user's keyrings

It was possible for an unprivileged user to create the user and user
session keyrings for another user.  For example:

    sudo -u '#3000' sh -c 'keyctl add keyring _uid.4000 "" @u
                           keyctl add keyring _uid_ses.4000 "" @u
                           sleep 15' &
    sleep 1
    sudo -u '#4000' keyctl describe @u
    sudo -u '#4000' keyctl describe @us

This is problematic because these "fake" keyrings won't have the right
permissions.  In particular, the user who created them first will own
them and will have full access to them via the possessor permissions,
which can be used to compromise the security of a user's keys:

    -4: alswrv-----v------------  3000     0 keyring: _uid.4000
    -5: alswrv-----v------------  3000     0 keyring: _uid_ses.4000

Fix it by marking user and user session keyrings with a flag
KEY_FLAG_UID_KEYRING.  Then, when searching for a user or user session
keyring by name, skip all keyrings that don't have the flag set.

Fixes: 69664cf16af4 ("keys: don't generate user and user session keyrings unless they're accessed")
Cc: <stable@vger.kernel.org>	[v2.6.26+]
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/key.h          |  2 ++
 security/keys/internal.h     |  2 +-
 security/keys/key.c          |  2 ++
 security/keys/keyring.c      | 23 ++++++++++++++---------
 security/keys/process_keys.c |  6 ++++--
 5 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/include/linux/key.h b/include/linux/key.h
index 044114185120..e315e16b6ff8 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -187,6 +187,7 @@ struct key {
 #define KEY_FLAG_BUILTIN	8	/* set if key is built in to the kernel */
 #define KEY_FLAG_ROOT_CAN_INVAL	9	/* set if key can be invalidated by root without permission */
 #define KEY_FLAG_KEEP		10	/* set if key should not be removed */
+#define KEY_FLAG_UID_KEYRING	11	/* set if key is a user or user session keyring */
 
 	/* the key type and key description string
 	 * - the desc is used to match a key against search criteria
@@ -243,6 +244,7 @@ extern struct key *key_alloc(struct key_type *type,
 #define KEY_ALLOC_NOT_IN_QUOTA		0x0002	/* not in quota */
 #define KEY_ALLOC_BUILT_IN		0x0004	/* Key is built into kernel */
 #define KEY_ALLOC_BYPASS_RESTRICTION	0x0008	/* Override the check on restricted keyrings */
+#define KEY_ALLOC_UID_KEYRING		0x0010	/* allocating a user or user session keyring */
 
 extern void key_revoke(struct key *key);
 extern void key_invalidate(struct key *key);
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 1c02c6547038..503adbae7b0d 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -141,7 +141,7 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
 extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
 
-extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
+extern struct key *find_keyring_by_name(const char *name, bool uid_keyring);
 
 extern int install_user_keyrings(void);
 extern int install_thread_keyring_to_cred(struct cred *);
diff --git a/security/keys/key.c b/security/keys/key.c
index 83da68d98b40..e5c0896c3a8f 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -302,6 +302,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 		key->flags |= 1 << KEY_FLAG_IN_QUOTA;
 	if (flags & KEY_ALLOC_BUILT_IN)
 		key->flags |= 1 << KEY_FLAG_BUILTIN;
+	if (flags & KEY_ALLOC_UID_KEYRING)
+		key->flags |= 1 << KEY_FLAG_UID_KEYRING;
 
 #ifdef KEY_DEBUGGING
 	key->magic = KEY_DEBUG_MAGIC;
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 94f038967c17..4fa82a8a9c0e 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -1097,15 +1097,15 @@ found:
 /*
  * Find a keyring with the specified name.
  *
- * All named keyrings in the current user namespace are searched, provided they
- * grant Search permission directly to the caller (unless this check is
- * skipped).  Keyrings whose usage points have reached zero or who have been
- * revoked are skipped.
+ * Only keyrings that have nonzero refcount, are not revoked, and are owned by a
+ * user in the current user namespace are considered.  If @uid_keyring is %true,
+ * the keyring additionally must have been allocated as a user or user session
+ * keyring; otherwise, it must grant Search permission directly to the caller.
  *
  * Returns a pointer to the keyring with the keyring's refcount having being
  * incremented on success.  -ENOKEY is returned if a key could not be found.
  */
-struct key *find_keyring_by_name(const char *name, bool skip_perm_check)
+struct key *find_keyring_by_name(const char *name, bool uid_keyring)
 {
 	struct key *keyring;
 	int bucket;
@@ -1133,10 +1133,15 @@ struct key *find_keyring_by_name(const char *name, bool skip_perm_check)
 			if (strcmp(keyring->description, name) != 0)
 				continue;
 
-			if (!skip_perm_check &&
-			    key_permission(make_key_ref(keyring, 0),
-					   KEY_NEED_SEARCH) < 0)
-				continue;
+			if (uid_keyring) {
+				if (!test_bit(KEY_FLAG_UID_KEYRING,
+					      &keyring->flags))
+					continue;
+			} else {
+				if (key_permission(make_key_ref(keyring, 0),
+						   KEY_NEED_SEARCH) < 0)
+					continue;
+			}
 
 			/* we've got a match but we might end up racing with
 			 * key_cleanup() if the keyring is currently 'dead'
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 86bced9fdbdf..293d3598153b 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -77,7 +77,8 @@ int install_user_keyrings(void)
 		if (IS_ERR(uid_keyring)) {
 			uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID,
 						    cred, user_keyring_perm,
-						    KEY_ALLOC_IN_QUOTA,
+						    KEY_ALLOC_UID_KEYRING |
+							KEY_ALLOC_IN_QUOTA,
 						    NULL, NULL);
 			if (IS_ERR(uid_keyring)) {
 				ret = PTR_ERR(uid_keyring);
@@ -94,7 +95,8 @@ int install_user_keyrings(void)
 			session_keyring =
 				keyring_alloc(buf, user->uid, INVALID_GID,
 					      cred, user_keyring_perm,
-					      KEY_ALLOC_IN_QUOTA,
+					      KEY_ALLOC_UID_KEYRING |
+						  KEY_ALLOC_IN_QUOTA,
 					      NULL, NULL);
 			if (IS_ERR(session_keyring)) {
 				ret = PTR_ERR(session_keyring);
-- 
cgit 


From 37863c43b2c6464f252862bf2e9768264e961678 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Sep 2017 11:37:23 -0700
Subject: KEYS: prevent KEYCTL_READ on negative key

Because keyctl_read_key() looks up the key with no permissions
requested, it may find a negatively instantiated key.  If the key is
also possessed, we went ahead and called ->read() on the key.  But the
key payload will actually contain the ->reject_error rather than the
normal payload.  Thus, the kernel oopses trying to read the
user_key_payload from memory address (int)-ENOKEY = 0x00000000ffffff82.

Fortunately the payload data is stored inline, so it shouldn't be
possible to abuse this as an arbitrary memory read primitive...

Reproducer:
    keyctl new_session
    keyctl request2 user desc '' @s
    keyctl read $(keyctl show | awk '/user: desc/ {print $1}')

It causes a crash like the following:
     BUG: unable to handle kernel paging request at 00000000ffffff92
     IP: user_read+0x33/0xa0
     PGD 36a54067 P4D 36a54067 PUD 0
     Oops: 0000 [#1] SMP
     CPU: 0 PID: 211 Comm: keyctl Not tainted 4.14.0-rc1 #337
     Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-20170228_101828-anatol 04/01/2014
     task: ffff90aa3b74c3c0 task.stack: ffff9878c0478000
     RIP: 0010:user_read+0x33/0xa0
     RSP: 0018:ffff9878c047bee8 EFLAGS: 00010246
     RAX: 0000000000000001 RBX: ffff90aa3d7da340 RCX: 0000000000000017
     RDX: 0000000000000000 RSI: 00000000ffffff82 RDI: ffff90aa3d7da340
     RBP: ffff9878c047bf00 R08: 00000024f95da94f R09: 0000000000000000
     R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
     R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
     FS:  00007f58ece69740(0000) GS:ffff90aa3e200000(0000) knlGS:0000000000000000
     CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
     CR2: 00000000ffffff92 CR3: 0000000036adc001 CR4: 00000000003606f0
     Call Trace:
      keyctl_read_key+0xac/0xe0
      SyS_keyctl+0x99/0x120
      entry_SYSCALL_64_fastpath+0x1f/0xbe
     RIP: 0033:0x7f58ec787bb9
     RSP: 002b:00007ffc8d401678 EFLAGS: 00000206 ORIG_RAX: 00000000000000fa
     RAX: ffffffffffffffda RBX: 00007ffc8d402800 RCX: 00007f58ec787bb9
     RDX: 0000000000000000 RSI: 00000000174a63ac RDI: 000000000000000b
     RBP: 0000000000000004 R08: 00007ffc8d402809 R09: 0000000000000020
     R10: 0000000000000000 R11: 0000000000000206 R12: 00007ffc8d402800
     R13: 00007ffc8d4016e0 R14: 0000000000000000 R15: 0000000000000000
     Code: e5 41 55 49 89 f5 41 54 49 89 d4 53 48 89 fb e8 a4 b4 ad ff 85 c0 74 09 80 3d b9 4c 96 00 00 74 43 48 8b b3 20 01 00 00 4d 85 ed <0f> b7 5e 10 74 29 4d 85 e4 74 24 4c 39 e3 4c 89 e2 4c 89 ef 48
     RIP: user_read+0x33/0xa0 RSP: ffff9878c047bee8
     CR2: 00000000ffffff92

Fixes: 61ea0c0ba904 ("KEYS: Skip key state checks when checking for possession")
Cc: <stable@vger.kernel.org>	[v3.13+]
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/keyctl.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index aa1d11a29136..365ff85d7e27 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -766,6 +766,11 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
 
 	key = key_ref_to_ptr(key_ref);
 
+	if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
+		ret = -ENOKEY;
+		goto error2;
+	}
+
 	/* see if we can read it directly */
 	ret = key_permission(key_ref, KEY_NEED_READ);
 	if (ret == 0)
-- 
cgit 


From 8f674565d405a8c0b36ee531849df87f43e72ed5 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Sep 2017 11:37:39 -0700
Subject: KEYS: reset parent each time before searching key_user_tree

In key_user_lookup(), if there is no key_user for the given uid, we drop
key_user_lock, allocate a new key_user, and search the tree again.  But
we failed to set 'parent' to NULL at the beginning of the second search.
If the tree were to be empty for the second search, the insertion would
be done with an invalid 'parent', scribbling over freed memory.

Fortunately this can't actually happen currently because the tree always
contains at least the root_key_user.  But it still should be fixed to
make the code more robust.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/key.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/keys/key.c b/security/keys/key.c
index e5c0896c3a8f..eb914a838840 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -54,10 +54,10 @@ void __key_check(const struct key *key)
 struct key_user *key_user_lookup(kuid_t uid)
 {
 	struct key_user *candidate = NULL, *user;
-	struct rb_node *parent = NULL;
-	struct rb_node **p;
+	struct rb_node *parent, **p;
 
 try_again:
+	parent = NULL;
 	p = &key_user_tree.rb_node;
 	spin_lock(&key_user_lock);
 
-- 
cgit 


From 4aa68e07d845562561f5e73c04aa521376e95252 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Sep 2017 11:38:29 -0700
Subject: KEYS: restrict /proc/keys by credentials at open time

When checking for permission to view keys whilst reading from
/proc/keys, we should use the credentials with which the /proc/keys file
was opened.  This is because, in a classic type of exploit, it can be
possible to bypass checks for the *current* credentials by passing the
file descriptor to a suid program.

Following commit 34dbbcdbf633 ("Make file credentials available to the
seqfile interfaces") we can finally fix it.  So let's do it.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/proc.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/security/keys/proc.c b/security/keys/proc.c
index bf08d02b6646..de834309d100 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -187,7 +187,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
 	struct keyring_search_context ctx = {
 		.index_key.type		= key->type,
 		.index_key.description	= key->description,
-		.cred			= current_cred(),
+		.cred			= m->file->f_cred,
 		.match_data.cmp		= lookup_user_key_possessed,
 		.match_data.raw_data	= key,
 		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
@@ -207,11 +207,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
 		}
 	}
 
-	/* check whether the current task is allowed to view the key (assuming
-	 * non-possession)
-	 * - the caller holds a spinlock, and thus the RCU read lock, making our
-	 *   access to __current_cred() safe
-	 */
+	/* check whether the current task is allowed to view the key */
 	rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW);
 	if (rc < 0)
 		return 0;
-- 
cgit 


From e007ce9c59bddd1e67b94bc29036d920f5c5428a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 21 Sep 2017 13:57:42 -0700
Subject: KEYS: use kmemdup() in request_key_auth_new()

kmemdup() is preferred to kmalloc() followed by memcpy().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/request_key_auth.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index e356075ed2f8..6ebf1af8fce9 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -163,9 +163,10 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 	rka = kzalloc(sizeof(*rka), GFP_KERNEL);
 	if (!rka)
 		goto error;
-	rka->callout_info = kmalloc(callout_len, GFP_KERNEL);
+	rka->callout_info = kmemdup(callout_info, callout_len, GFP_KERNEL);
 	if (!rka->callout_info)
 		goto error_free_rka;
+	rka->callout_len = callout_len;
 
 	/* see if the calling process is already servicing the key request of
 	 * another process */
@@ -196,8 +197,6 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 
 	rka->target_key = key_get(target);
 	rka->dest_keyring = key_get(dest_keyring);
-	memcpy(rka->callout_info, callout_info, callout_len);
-	rka->callout_len = callout_len;
 
 	/* allocate the auth key */
 	sprintf(desc, "%x", target->serial);
-- 
cgit 


From c74aef2d06a9f59cece89093eecc552933cba72a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 22 Sep 2017 17:48:06 +0200
Subject: futex: Fix pi_state->owner serialization

There was a reported suspicion about a race between exit_pi_state_list()
and put_pi_state(). The same report mentioned the comment with
put_pi_state() said it should be called with hb->lock held, and it no
longer is in all places.

As it turns out, the pi_state->owner serialization is indeed broken. As per
the new rules:

  734009e96d19 ("futex: Change locking rules")

pi_state->owner should be serialized by pi_state->pi_mutex.wait_lock.
For the sites setting pi_state->owner we already hold wait_lock (where
required) but exit_pi_state_list() and put_pi_state() were not and
raced on clearing it.

Fixes: 734009e96d19 ("futex: Change locking rules")
Reported-by: Gratian Crisan <gratian.crisan@ni.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: dvhart@infradead.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20170922154806.jd3ffltfk24m4o4y@hirez.programming.kicks-ass.net
---
 kernel/futex.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 3d38eaf05492..0518a0bfc746 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -821,8 +821,6 @@ static void get_pi_state(struct futex_pi_state *pi_state)
 /*
  * Drops a reference to the pi_state object and frees or caches it
  * when the last reference is gone.
- *
- * Must be called with the hb lock held.
  */
 static void put_pi_state(struct futex_pi_state *pi_state)
 {
@@ -837,16 +835,22 @@ static void put_pi_state(struct futex_pi_state *pi_state)
 	 * and has cleaned up the pi_state already
 	 */
 	if (pi_state->owner) {
-		raw_spin_lock_irq(&pi_state->owner->pi_lock);
-		list_del_init(&pi_state->list);
-		raw_spin_unlock_irq(&pi_state->owner->pi_lock);
+		struct task_struct *owner;
 
-		rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
+		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+		owner = pi_state->owner;
+		if (owner) {
+			raw_spin_lock(&owner->pi_lock);
+			list_del_init(&pi_state->list);
+			raw_spin_unlock(&owner->pi_lock);
+		}
+		rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
+		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 	}
 
-	if (current->pi_state_cache)
+	if (current->pi_state_cache) {
 		kfree(pi_state);
-	else {
+	} else {
 		/*
 		 * pi_state->list is already empty.
 		 * clear pi_state->owner.
@@ -907,13 +911,14 @@ void exit_pi_state_list(struct task_struct *curr)
 		raw_spin_unlock_irq(&curr->pi_lock);
 
 		spin_lock(&hb->lock);
-
-		raw_spin_lock_irq(&curr->pi_lock);
+		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+		raw_spin_lock(&curr->pi_lock);
 		/*
 		 * We dropped the pi-lock, so re-check whether this
 		 * task still owns the PI-state:
 		 */
 		if (head->next != next) {
+			raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
 			spin_unlock(&hb->lock);
 			continue;
 		}
@@ -922,9 +927,10 @@ void exit_pi_state_list(struct task_struct *curr)
 		WARN_ON(list_empty(&pi_state->list));
 		list_del_init(&pi_state->list);
 		pi_state->owner = NULL;
-		raw_spin_unlock_irq(&curr->pi_lock);
+		raw_spin_unlock(&curr->pi_lock);
 
 		get_pi_state(pi_state);
+		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 		spin_unlock(&hb->lock);
 
 		rt_mutex_futex_unlock(&pi_state->pi_mutex);
@@ -1208,6 +1214,10 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
 
 	WARN_ON(!list_empty(&pi_state->list));
 	list_add(&pi_state->list, &p->pi_state_list);
+	/*
+	 * Assignment without holding pi_state->pi_mutex.wait_lock is safe
+	 * because there is no concurrency as the object is not published yet.
+	 */
 	pi_state->owner = p;
 	raw_spin_unlock_irq(&p->pi_lock);
 
@@ -2878,6 +2888,7 @@ retry:
 		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
 		spin_unlock(&hb->lock);
 
+		/* drops pi_state->pi_mutex.wait_lock */
 		ret = wake_futex_pi(uaddr, uval, pi_state);
 
 		put_pi_state(pi_state);
-- 
cgit 


From 2827a418ca1b23e432e62c9b3d0e7cf3255dfe88 Mon Sep 17 00:00:00 2001
From: Alexandru Moise <00moses.alexander00@gmail.com>
Date: Tue, 19 Sep 2017 22:04:12 +0200
Subject: genirq: Check __free_irq() return value for NULL

__free_irq() can return a NULL irqaction for example when trying to free
already-free IRQ, but the callsite unconditionally dereferences the
returned pointer.

Fix this by adding a check and return NULL.

Signed-off-by: Alexandru Moise <00moses.alexander00@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20170919200412.GA29985@gmail.com
---
 kernel/irq/manage.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 573dc52b0806..d00132b5c325 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1643,6 +1643,10 @@ const void *free_irq(unsigned int irq, void *dev_id)
 #endif
 
 	action = __free_irq(irq, dev_id);
+
+	if (!action)
+		return NULL;
+
 	devname = action->name;
 	kfree(action);
 	return devname;
-- 
cgit 


From 02a4843618fb35f847cf8c31cd3893873aa0edde Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 13 Sep 2017 09:17:57 -0400
Subject: brd: fix overflow in __brd_direct_access

The code in __brd_direct_access multiplies the pgoff variable by page size
and divides it by 512. It can cause overflow on 32-bit architectures. The
overflow happens if we create ramdisk larger than 4G and use it as a
sparse device.

This patch replaces multiplication and division with multiplication by the
number of sectors per page.

Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Fixes: 1647b9b959c7 ("brd: add dax_operations support")
Cc: stable@vger.kernel.org	# 4.12+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/brd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index bbd0d186cfc0..2d7178f7754e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -342,7 +342,7 @@ static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
 
 	if (!brd)
 		return -ENODEV;
-	page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512);
+	page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT);
 	if (!page)
 		return -ENOSPC;
 	*kaddr = page_address(page);
-- 
cgit 


From f507b54dccfd8000c517d740bc45f20c74532d18 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 7 Sep 2017 13:54:35 +0200
Subject: bsg-lib: don't free job in bsg_prepare_job

The job structure is allocated as part of the request, so we should not
free it in the error path of bsg_prepare_job.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index c82408c7cc3c..dbddff8174e5 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -154,7 +154,6 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
 failjob_rls_rqst_payload:
 	kfree(job->request_payload.sg_list);
 failjob_rls_job:
-	kfree(job);
 	return -ENOMEM;
 }
 
-- 
cgit 


From 1dae69bedeeca0b57e441eae491fbd38049c0b47 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 5 May 2017 22:25:18 -0400
Subject: nbd: ignore non-nbd ioctl's

In testing we noticed that nbd would spew if you ran a fio job against
the raw device itself.  This is because fio calls a block device
specific ioctl, however the block layer will first pass this back to the
driver ioctl handler in case the driver wants to do something special.
Since the device was setup using netlink this caused us to spew every
time fio called this ioctl.  Since we don't have special handling, just
error out for any non-nbd specific ioctl's that come in.  This fixes the
spew.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/nbd.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 2aa87cbdede0..3684e21d543f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1194,6 +1194,12 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	/* The block layer will pass back some non-nbd ioctls in case we have
+	 * special handling for them, but we don't so just return an error.
+	 */
+	if (_IOC_TYPE(cmd) != 0xab)
+		return -EINVAL;
+
 	mutex_lock(&nbd->config_lock);
 
 	/* Don't allow ioctl operations on a nbd device that was created with
-- 
cgit 


From 5acb3cc2c2e9d3020a4fee43763c6463767f1572 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Wed, 20 Sep 2017 13:12:20 -0600
Subject: blktrace: Fix potential deadlock between delete & sysfs ops

The lockdep code had reported the following unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(s_active#228);
                               lock(&bdev->bd_mutex/1);
                               lock(s_active#228);
  lock(&bdev->bd_mutex);

 *** DEADLOCK ***

The deadlock may happen when one task (CPU1) is trying to delete a
partition in a block device and another task (CPU0) is accessing
tracing sysfs file (e.g. /sys/block/dm-1/trace/act_mask) in that
partition.

The s_active isn't an actual lock. It is a reference count (kn->count)
on the sysfs (kernfs) file. Removal of a sysfs file, however, require
a wait until all the references are gone. The reference count is
treated like a rwsem using lockdep instrumentation code.

The fact that a thread is in the sysfs callback method or in the
ioctl call means there is a reference to the opended sysfs or device
file. That should prevent the underlying block structure from being
removed.

Instead of using bd_mutex in the block_device structure, a new
blk_trace_mutex is now added to the request_queue structure to protect
access to the blk_trace structure.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>

Fix typo in patch subject line, and prune a comment detailing how
the code used to work.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c        |  3 +++
 include/linux/blkdev.h  |  1 +
 kernel/trace/blktrace.c | 18 ++++++++++++------
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index aebe676225e6..048be4aa6024 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -854,6 +854,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
 
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+	mutex_init(&q->blk_trace_mutex);
+#endif
 	mutex_init(&q->sysfs_lock);
 	spin_lock_init(&q->__queue_lock);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 460294bb0fa5..02fa42d24b52 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -551,6 +551,7 @@ struct request_queue {
 	int			node;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	struct blk_trace	*blk_trace;
+	struct mutex		blk_trace_mutex;
 #endif
 	/*
 	 * for flush operations
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2a685b45b73b..45a3928544ce 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -648,6 +648,12 @@ int blk_trace_startstop(struct request_queue *q, int start)
 }
 EXPORT_SYMBOL_GPL(blk_trace_startstop);
 
+/*
+ * When reading or writing the blktrace sysfs files, the references to the
+ * opened sysfs or device files should prevent the underlying block device
+ * from being removed. So no further delete protection is really needed.
+ */
+
 /**
  * blk_trace_ioctl: - handle the ioctls associated with tracing
  * @bdev:	the block device
@@ -665,7 +671,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 	if (!q)
 		return -ENXIO;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&q->blk_trace_mutex);
 
 	switch (cmd) {
 	case BLKTRACESETUP:
@@ -691,7 +697,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 		break;
 	}
 
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&q->blk_trace_mutex);
 	return ret;
 }
 
@@ -1727,7 +1733,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
 	if (q == NULL)
 		goto out_bdput;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&q->blk_trace_mutex);
 
 	if (attr == &dev_attr_enable) {
 		ret = sprintf(buf, "%u\n", !!q->blk_trace);
@@ -1746,7 +1752,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
 		ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
 
 out_unlock_bdev:
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&q->blk_trace_mutex);
 out_bdput:
 	bdput(bdev);
 out:
@@ -1788,7 +1794,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 	if (q == NULL)
 		goto out_bdput;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&q->blk_trace_mutex);
 
 	if (attr == &dev_attr_enable) {
 		if (value)
@@ -1814,7 +1820,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 	}
 
 out_unlock_bdev:
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&q->blk_trace_mutex);
 out_bdput:
 	bdput(bdev);
 out:
-- 
cgit 


From e5313c141b49c1b1af43d1ca81398185d66ad1a6 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 20 Sep 2017 14:24:34 -0700
Subject: loop: remove union of use_aio and ref in struct loop_cmd

When the request is completed, lo_complete_rq() checks cmd->use_aio.
However, if this is in fact an aio request, cmd->use_aio will have
already been reused as cmd->ref by lo_rw_aio*. Fix it by not using a
union. On x86_64, there's a hole after the union anyways, so this
doesn't make struct loop_cmd any bigger.

Fixes: 92d773324b7e ("block/loop: fix use after free")
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index f68c1d50802f..1f3956702993 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -67,10 +67,8 @@ struct loop_device {
 struct loop_cmd {
 	struct kthread_work work;
 	struct request *rq;
-	union {
-		bool use_aio; /* use AIO interface to handle I/O */
-		atomic_t ref; /* only for aio */
-	};
+	bool use_aio; /* use AIO interface to handle I/O */
+	atomic_t ref; /* only for aio */
 	long ret;
 	struct kiocb iocb;
 	struct bio_vec *bvec;
-- 
cgit 


From 56b7103a06083b8ce1160f8289460ba2f584e182 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 16:27:26 -0700
Subject: nvme-fc: remove use of FC-specific error codes

The FC-NVME transport used the FC-specific error codes in cases where
it had to fabricate an error to go back up stack. Instead of using the
FC-specific values, now use a generic value (NVME_SC_INTERNAL).

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/fc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index d2e882c0f496..9100779b58c9 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1376,7 +1376,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	if (atomic_read(&op->state) == FCPOP_STATE_ABORTED)
 		status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
 	else if (freq->status)
-		status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+		status = cpu_to_le16(NVME_SC_INTERNAL << 1);
 
 	/*
 	 * For the linux implementation, if we have an unsuccesful
@@ -1404,7 +1404,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 		 */
 		if (freq->transferred_length !=
 			be32_to_cpu(op->cmd_iu.data_len)) {
-			status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+			status = cpu_to_le16(NVME_SC_INTERNAL << 1);
 			goto done;
 		}
 		result.u64 = 0;
@@ -1421,7 +1421,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 					freq->transferred_length ||
 			     op->rsp_iu.status_code ||
 			     sqe->common.command_id != cqe->command_id)) {
-			status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+			status = cpu_to_le16(NVME_SC_INTERNAL << 1);
 			goto done;
 		}
 		result = cqe->result;
@@ -1429,7 +1429,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 		break;
 
 	default:
-		status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+		status = cpu_to_le16(NVME_SC_INTERNAL << 1);
 		goto done;
 	}
 
-- 
cgit 


From 29b3d26ecc8046838de88205b7c4b182ac27ff65 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 16:27:27 -0700
Subject: nvmet-fc: remove use of FC-specific error codes

The FC-NVME target transport used the FC-specific error codes in
return codes when the transport or lldd failed. Instead of using the
FC-specific values, now use a generic value (NVME_SC_INTERNAL).

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fc.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 421e43bf1dd7..088f07250d76 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1910,8 +1910,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
 			spin_lock_irqsave(&fod->flock, flags);
 			fod->writedataactive = false;
 			spin_unlock_irqrestore(&fod->flock, flags);
-			nvmet_req_complete(&fod->req,
-					NVME_SC_FC_TRANSPORT_ERROR);
+			nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
 		} else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ {
 			fcpreq->fcp_error = ret;
 			fcpreq->transferred_length = 0;
@@ -1929,8 +1928,7 @@ __nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort)
 	/* if in the middle of an io and we need to tear down */
 	if (abort) {
 		if (fcpreq->op == NVMET_FCOP_WRITEDATA) {
-			nvmet_req_complete(&fod->req,
-					NVME_SC_FC_TRANSPORT_ERROR);
+			nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
 			return true;
 		}
 
@@ -1968,8 +1966,7 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
 			fod->abort = true;
 			spin_unlock(&fod->flock);
 
-			nvmet_req_complete(&fod->req,
-					NVME_SC_FC_TRANSPORT_ERROR);
+			nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
 			return;
 		}
 
-- 
cgit 


From fc9608e8b4dc3c2545fa0bc5d3eef158ca56ccf8 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 16:27:28 -0700
Subject: nvmet-fcloop: remove use of FC-specific error codes

The FC-NVME transport loopback test module used the FC-specific error
codes in cases where it emulated a transport abort case. Instead of
using the FC-specific values, now use a generic value (NVME_SC_INTERNAL).

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fcloop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 1cb9847ec261..1fd1afbb8b2a 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -576,7 +576,7 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	tfcp_req->aborted = true;
 	spin_unlock(&tfcp_req->reqlock);
 
-	tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED;
+	tfcp_req->status = NVME_SC_INTERNAL;
 
 	/*
 	 * nothing more to do. If io wasn't active, the transport should
-- 
cgit 


From 8e009ce84683fa124b23ff5cb7fd87c48b331b88 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 16:27:29 -0700
Subject: lpfc: remove use of FC-specific error codes

The lpfc driver uses the FC-specific error when it needed to return an
error to the FC-NVME transport. Convert to use a generic value instead.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/lpfc/lpfc_nvme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 79ba3ce063a4..23bdb1ca106e 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -884,7 +884,7 @@ out_err:
 					 wcqe->total_data_placed);
 			nCmd->transferred_length = 0;
 			nCmd->rcv_rsplen = 0;
-			nCmd->status = NVME_SC_FC_TRANSPORT_ERROR;
+			nCmd->status = NVME_SC_INTERNAL;
 		}
 	}
 
-- 
cgit 


From 39a550d2d9eaee8b618084e6011441eac6a2a3b7 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 14 Sep 2017 11:30:15 -0700
Subject: qla2xxx: remove use of FC-specific error codes

The qla2xxx driver uses the FC-specific error when it needed to return an
error to the FC-NVME transport.  Convert to use a generic value instead.

Signed-off-by: James Smart <james.smart@broadcom.com>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/qla2xxx/qla_nvme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index 1f59e7a74c7b..6b33a1f24f56 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -180,7 +180,7 @@ static void qla_nvme_sp_done(void *ptr, int res)
 		goto rel;
 
 	if (unlikely(res == QLA_FUNCTION_FAILED))
-		fd->status = NVME_SC_FC_TRANSPORT_ERROR;
+		fd->status = NVME_SC_INTERNAL;
 	else
 		fd->status = 0;
 
-- 
cgit 


From c98cb3bd882119e7e1a7c8df2f1eacfcc701450b Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 16:27:25 -0700
Subject: nvme.h: remove FC transport-specific error values

The NVM express group recinded the reserved range for the transport.
Remove the FC-centric values that had been defined.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/nvme.h | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 87723c86f136..2440be32be1d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1127,19 +1127,6 @@ enum {
 	NVME_SC_UNWRITTEN_BLOCK		= 0x287,
 
 	NVME_SC_DNR			= 0x4000,
-
-
-	/*
-	 * FC Transport-specific error status values for NVME commands
-	 *
-	 * Transport-specific status code values must be in the range 0xB0..0xBF
-	 */
-
-	/* Generic FC failure - catchall */
-	NVME_SC_FC_TRANSPORT_ERROR	= 0x00B0,
-
-	/* I/O failure due to FC ABTS'd */
-	NVME_SC_FC_TRANSPORT_ABORTED	= 0x00B1,
 };
 
 struct nvme_completion {
-- 
cgit 


From d85cf207499e6740ab9c490ff4f360af5c432d23 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 13:20:23 -0700
Subject: nvme: add transport SGL definitions

Add transport SGL defintions from NVMe TP 4008, required for
the final NVMe-FC standard.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/nvme.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2440be32be1d..9310ce77d8e1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -471,12 +471,14 @@ enum nvme_opcode {
  *
  * @NVME_SGL_FMT_ADDRESS:     absolute address of the data block
  * @NVME_SGL_FMT_OFFSET:      relative offset of the in-capsule data block
+ * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA
  * @NVME_SGL_FMT_INVALIDATE:  RDMA transport specific remote invalidation
  *                            request subtype
  */
 enum {
 	NVME_SGL_FMT_ADDRESS		= 0x00,
 	NVME_SGL_FMT_OFFSET		= 0x01,
+	NVME_SGL_FMT_TRANSPORT_A	= 0x0A,
 	NVME_SGL_FMT_INVALIDATE		= 0x0f,
 };
 
@@ -490,12 +492,16 @@ enum {
  *
  * For struct nvme_keyed_sgl_desc:
  *   @NVME_KEY_SGL_FMT_DATA_DESC:	keyed data block descriptor
+ *
+ * Transport-specific SGL types:
+ *   @NVME_TRANSPORT_SGL_DATA_DESC:	Transport SGL data dlock descriptor
  */
 enum {
 	NVME_SGL_FMT_DATA_DESC		= 0x00,
 	NVME_SGL_FMT_SEG_DESC		= 0x02,
 	NVME_SGL_FMT_LAST_SEG_DESC	= 0x03,
 	NVME_KEY_SGL_FMT_DATA_DESC	= 0x04,
+	NVME_TRANSPORT_SGL_DATA_DESC	= 0x05,
 };
 
 struct nvme_sgl_desc {
-- 
cgit 


From d9d34c0b2327e85da0ad1476575264fe957fc6ef Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 13:20:24 -0700
Subject: nvme-fc: use transport-specific sgl format

Sync with NVM Express spec change and FC-NVME 1.18.

FC transport sets SGL type to Transport SGL Data Block Descriptor and
subtype to transport-specific value 0x0A.

Removed the warn-on's on the PRP fields. They are unneeded. They were
to check for values from the upper layer that weren't set right, and
for the most part were fine. But, with Async events, which reuse the
same structure and 2nd time issued the SGL overlay converted them to
the Transport SGL values - the warn-on's were errantly firing.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/fc.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 9100779b58c9..af075e998944 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1989,16 +1989,17 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	 * as well as those by FC-NVME spec.
 	 */
 	WARN_ON_ONCE(sqe->common.metadata);
-	WARN_ON_ONCE(sqe->common.dptr.prp1);
-	WARN_ON_ONCE(sqe->common.dptr.prp2);
 	sqe->common.flags |= NVME_CMD_SGL_METABUF;
 
 	/*
-	 * format SQE DPTR field per FC-NVME rules
-	 *    type=data block descr; subtype=offset;
-	 *    offset is currently 0.
+	 * format SQE DPTR field per FC-NVME rules:
+	 *    type=0x5     Transport SGL Data Block Descriptor
+	 *    subtype=0xA  Transport-specific value
+	 *    address=0
+	 *    length=length of the data series
 	 */
-	sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET;
+	sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
+					NVME_SGL_FMT_TRANSPORT_A;
 	sqe->rw.dptr.sgl.length = cpu_to_le32(data_len);
 	sqe->rw.dptr.sgl.addr = 0;
 
-- 
cgit 


From deb61742e060d4447712598bc11bb50f8b2e51dd Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 11 Sep 2017 16:16:53 -0700
Subject: nvmet-fc: fix failing max io queue connections

fc transport is treating NVMET_NR_QUEUES as maximum queue count, e.g.
admin queue plus NVMET_NR_QUEUES-1 io queues.  But NVMET_NR_QUEUES is
the number of io queues, so maximum queue count is really
NVMET_NR_QUEUES+1.

Fix the handling in the target fc transport

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 088f07250d76..c48c83d97e30 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -148,7 +148,7 @@ struct nvmet_fc_tgt_assoc {
 	u32				a_id;
 	struct nvmet_fc_tgtport		*tgtport;
 	struct list_head		a_list;
-	struct nvmet_fc_tgt_queue	*queues[NVMET_NR_QUEUES];
+	struct nvmet_fc_tgt_queue	*queues[NVMET_NR_QUEUES + 1];
 	struct kref			ref;
 };
 
@@ -608,7 +608,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 	unsigned long flags;
 	int ret;
 
-	if (qid >= NVMET_NR_QUEUES)
+	if (qid > NVMET_NR_QUEUES)
 		return NULL;
 
 	queue = kzalloc((sizeof(*queue) +
@@ -888,7 +888,7 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
 	int i;
 
 	spin_lock_irqsave(&tgtport->lock, flags);
-	for (i = NVMET_NR_QUEUES - 1; i >= 0; i--) {
+	for (i = NVMET_NR_QUEUES; i >= 0; i--) {
 		queue = assoc->queues[i];
 		if (queue) {
 			if (!nvmet_fc_tgt_q_get(queue))
-- 
cgit 


From 161b8be2bd6abad250d4b3f674bdd5480f15beeb Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 14 Sep 2017 13:54:39 -0400
Subject: nvme-pci: initialize queue memory before interrupts

A spurious interrupt before the nvme driver has initialized the completion
queue may inadvertently cause the driver to believe it has a completion
to process. This may result in a NULL dereference since the nvmeq's tags
are not set at this point.

The patch initializes the host's CQ memory so that a spurious interrupt
isn't mistaken for a real completion.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4a2121335f48..004018c5dccc 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1313,11 +1313,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 	if (result < 0)
 		goto release_cq;
 
+	nvme_init_queue(nvmeq, qid);
 	result = queue_request_irq(nvmeq);
 	if (result < 0)
 		goto release_sq;
 
-	nvme_init_queue(nvmeq, qid);
 	return result;
 
  release_sq:
@@ -1464,6 +1464,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
 		return result;
 
 	nvmeq->cq_vector = 0;
+	nvme_init_queue(nvmeq, 0);
 	result = queue_request_irq(nvmeq);
 	if (result) {
 		nvmeq->cq_vector = -1;
@@ -2156,7 +2157,6 @@ static void nvme_reset_work(struct work_struct *work)
 	if (result)
 		goto out;
 
-	nvme_init_queue(dev->queues[0], 0);
 	result = nvme_alloc_admin_tags(dev);
 	if (result)
 		goto out;
-- 
cgit 


From d08774738446e77734777adcf5d1045237b4475a Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 15 Sep 2017 13:05:38 -0400
Subject: nvme-pci: Print invalid SGL only once

The WARN_ONCE macro returns true if the condition is true, not if the
warn was raised, so we're printing the scatter list every time it's
invalid. This is excessive and makes debugging harder, so this patch
prints it just once.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/pci.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 004018c5dccc..cb73bc8cad3b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -24,6 +24,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/once.h>
 #include <linux/pci.h>
 #include <linux/poison.h>
 #include <linux/t10-pi.h>
@@ -540,6 +541,20 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
 }
 #endif
 
+static void nvme_print_sgl(struct scatterlist *sgl, int nents)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		dma_addr_t phys = sg_phys(sg);
+		pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
+			"dma_address:%pad dma_length:%d\n",
+			i, &phys, sg->offset, sg->length, &sg_dma_address(sg),
+			sg_dma_len(sg));
+	}
+}
+
 static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -622,19 +637,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 	return BLK_STS_OK;
 
  bad_sgl:
-	if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n",
-				blk_rq_payload_bytes(req), iod->nents)) {
-		for_each_sg(iod->sg, sg, iod->nents, i) {
-			dma_addr_t phys = sg_phys(sg);
-			pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
-			       "dma_address:%pad dma_length:%d\n", i, &phys,
-					sg->offset, sg->length,
-					&sg_dma_address(sg),
-					sg_dma_len(sg));
-		}
-	}
+	WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents),
+			"Invalid SGL for payload:%d nents:%d\n",
+			blk_rq_payload_bytes(req), iod->nents);
 	return BLK_STS_IOERR;
-
 }
 
 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
-- 
cgit 


From cd48282cc736377d5abf7c04de8c6ba864ba3794 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 14 Sep 2017 11:03:09 -0700
Subject: nvme: stop aer posting if controller state not live

If an nvme async_event command completes, in most cases, a new
async event is posted. However, if the controller enters a
resetting or reconnecting state, there is nothing to block the
scheduled work element from posting the async event again. Nor are
there calls from the transport to stop async events when an
association dies.

In the case of FC, where the association is torn down, the aer must
be aborted on the FC link and completes through the normal job
completion path. Thus the terminated async event ends up being
rescheduled even though the controller isn't in a valid state for
the aer, and the reposting gets the transport into a partially torn
down data structure.

It's possible to hit the scenario on rdma, although much less likely
due to an aer completing right as the association is terminated and
as the association teardown reclaims the blk requests via
nvme_cancel_request() so its immediate, not a link-related action
like on FC.

Fix by putting controller state checks in both the async event
completion routine where it schedules the async event and in the
async event work routine before it calls into the transport. It's
effectively a "stop_async_events()" behavior.  The transport, when
it creates a new association with the subsystem will transition
the state back to live and is already restarting the async event
posting.

Signed-off-by: James Smart <james.smart@broadcom.com>
[hch: remove taking a lock over reading the controller state]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index acc816b67582..d470f031e27f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2590,7 +2590,7 @@ static void nvme_async_event_work(struct work_struct *work)
 		container_of(work, struct nvme_ctrl, async_event_work);
 
 	spin_lock_irq(&ctrl->lock);
-	while (ctrl->event_limit > 0) {
+	while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) {
 		int aer_idx = --ctrl->event_limit;
 
 		spin_unlock_irq(&ctrl->lock);
@@ -2677,7 +2677,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		/*FALLTHRU*/
 	case NVME_SC_ABORT_REQ:
 		++ctrl->event_limit;
-		queue_work(nvme_wq, &ctrl->async_event_work);
+		if (ctrl->state == NVME_CTRL_LIVE)
+			schedule_work(&ctrl->async_event_work);
 		break;
 	default:
 		break;
-- 
cgit 


From 0951338d9677f546e230685d68631dfd3f81cca5 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 13:18:04 -0700
Subject: nvme: allow timed-out ios to retry

Currently the nvme_req_needs_retry() applies several checks to see if
a retry is allowed. On of those is whether the current time has exceeded
the start time of the io plus the timeout length. This check, if an io
times out, means there is never a retry allowed for the io. Which means
applications see the io failure.

Remove this check and allow the io to timeout, like it does on other
protocols, and retries to be made.

On the FC transport, a frame can be lost for an individual io, and there
may be no other errors that escalate for the connection/association.
The io will timeout, which causes the transport to escalate into creating
a new association, but the io that timed out, due to this retry logic, has
already failed back to the application and things are hosed.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d470f031e27f..5589f67d2cd8 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -134,8 +134,6 @@ static inline bool nvme_req_needs_retry(struct request *req)
 		return false;
 	if (nvme_req(req)->status & NVME_SC_DNR)
 		return false;
-	if (jiffies - req->start_time >= req->timeout)
-		return false;
 	if (nvme_req(req)->retries >= nvme_max_retries)
 		return false;
 	return true;
-- 
cgit 


From 8edd11c9ad3a6205eea6de9d02eaf64c681a0658 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@linux.vnet.ibm.com>
Date: Thu, 14 Sep 2017 13:59:28 -0300
Subject: nvme-fabrics: Allow 0 as KATO value

Currently, driver code allows user to set 0 as KATO
(Keep Alive TimeOut), but this is not being respected.
This patch enforces the expected behavior.

Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/fabrics.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 47307752dc65..555c976cc2ee 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -565,6 +565,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	opts->queue_size = NVMF_DEF_QUEUE_SIZE;
 	opts->nr_io_queues = num_online_cpus();
 	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
+	opts->kato = NVME_DEFAULT_KATO;
 
 	options = o = kstrdup(buf, GFP_KERNEL);
 	if (!options)
@@ -655,21 +656,22 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 				goto out;
 			}
 
-			if (opts->discovery_nqn) {
-				pr_err("Discovery controllers cannot accept keep_alive_tmo != 0\n");
-				ret = -EINVAL;
-				goto out;
-			}
-
 			if (token < 0) {
 				pr_err("Invalid keep_alive_tmo %d\n", token);
 				ret = -EINVAL;
 				goto out;
-			} else if (token == 0) {
+			} else if (token == 0 && !opts->discovery_nqn) {
 				/* Allowed for debug */
 				pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
 			}
 			opts->kato = token;
+
+			if (opts->discovery_nqn && opts->kato) {
+				pr_err("Discovery controllers cannot accept KATO != 0\n");
+				ret = -EINVAL;
+				goto out;
+			}
+
 			break;
 		case NVMF_OPT_CTRL_LOSS_TMO:
 			if (match_int(args, &token)) {
@@ -762,8 +764,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	uuid_copy(&opts->host->id, &hostid);
 
 out:
-	if (!opts->discovery_nqn && !opts->kato)
-		opts->kato = NVME_DEFAULT_KATO;
 	kfree(options);
 	return ret;
 }
-- 
cgit 


From bb1cc74790eb51f52d23c6e5fd9a3bb16030c3d8 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 18 Sep 2017 09:08:29 -0700
Subject: nvmet: implement valid sqhd values in completions

To support sqhd, for initiators that are following the spec and
paying attention to sqhd vs their sqtail values:

- add sqhd to struct nvmet_sq
- initialize sqhd to 0 in nvmet_sq_setup
- rather than propagate the 0's-based qsize value from the connect message
  which requires a +1 in every sqhd update, and as nothing else references
  it, convert to 1's-based value in nvmt_sq/cq_setup() calls.
- validate connect message sqsize being non-zero per spec.
- updated assign sqhd for every completion that goes back.

Also remove handling the NULL sq case in __nvmet_req_complete, as it can't
happen with the current code.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/core.c        | 8 ++++----
 drivers/nvme/target/fabrics-cmd.c | 9 +++++++--
 drivers/nvme/target/nvmet.h       | 1 +
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 7c23eaf8e563..c2a768a94235 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -390,10 +390,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 	if (status)
 		nvmet_set_status(req, status);
 
-	/* XXX: need to fill in something useful for sq_head */
-	req->rsp->sq_head = 0;
-	if (likely(req->sq)) /* may happen during early failure */
-		req->rsp->sq_id = cpu_to_le16(req->sq->qid);
+	req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size;
+	req->rsp->sq_head = cpu_to_le16(req->sq->sqhd);
+	req->rsp->sq_id = cpu_to_le16(req->sq->qid);
 	req->rsp->command_id = req->cmd->common.command_id;
 
 	if (req->ns)
@@ -420,6 +419,7 @@ void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
 		u16 qid, u16 size)
 {
+	sq->sqhd = 0;
 	sq->qid = qid;
 	sq->size = size;
 
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 859a66725291..db3bf6b8bf9e 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -109,9 +109,14 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
 		pr_warn("queue already connected!\n");
 		return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
 	}
+	if (!sqsize) {
+		pr_warn("queue size zero!\n");
+		return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+	}
 
-	nvmet_cq_setup(ctrl, req->cq, qid, sqsize);
-	nvmet_sq_setup(ctrl, req->sq, qid, sqsize);
+	/* note: convert queue size from 0's-based value to 1's-based value */
+	nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1);
+	nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1);
 	return 0;
 }
 
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 7d261ab894f4..7b8e20adf760 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -74,6 +74,7 @@ struct nvmet_sq {
 	struct percpu_ref	ref;
 	u16			qid;
 	u16			size;
+	u16			sqhd;
 	struct completion	free_done;
 	struct completion	confirm_done;
 };
-- 
cgit 


From 332391a9935da939319e473b4680e173df75afcf Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Thu, 21 Sep 2017 08:16:29 -0600
Subject: fs: Fix page cache inconsistency when mixing buffered and AIO DIO

Currently when mixing buffered reads and asynchronous direct writes it
is possible to end up with the situation where we have stale data in the
page cache while the new data is already written to disk. This is
permanent until the affected pages are flushed away. Despite the fact
that mixing buffered and direct IO is ill-advised it does pose a thread
for a data integrity, is unexpected and should be fixed.

Fix this by deferring completion of asynchronous direct writes to a
process context in the case that there are mapped pages to be found in
the inode. Later before the completion in dio_complete() invalidate
the pages in question. This ensures that after the completion the pages
in the written area are either unmapped, or populated with up-to-date
data. Also do the same for the iomap case which uses
iomap_dio_complete() instead.

This has a side effect of deferring the completion to a process context
for every AIO DIO that happens on inode that has pages mapped. However
since the consensus is that this is ill-advised practice the performance
implication should not be a problem.

This was based on proposal from Jeff Moyer, thanks!

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/direct-io.c | 49 +++++++++++++++++++++++++++++++++++++++++++------
 fs/iomap.c     | 29 ++++++++++++++++-------------
 mm/filemap.c   | 10 ++++++++--
 3 files changed, 67 insertions(+), 21 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 5fa2211e49ae..62cf812ed0e5 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
 {
 	loff_t offset = dio->iocb->ki_pos;
 	ssize_t transferred = 0;
+	int err;
 
 	/*
 	 * AIO submission can race with bio completion to get here while
@@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
 	if (ret == 0)
 		ret = transferred;
 
+	/*
+	 * Try again to invalidate clean pages which might have been cached by
+	 * non-direct readahead, or faulted in by get_user_pages() if the source
+	 * of the write was an mmap'ed region of the file we're writing.  Either
+	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
+	 * this invalidation fails, tough, the write still worked...
+	 */
+	if (ret > 0 && dio->op == REQ_OP_WRITE &&
+	    dio->inode->i_mapping->nrpages) {
+		err = invalidate_inode_pages2_range(dio->inode->i_mapping,
+					offset >> PAGE_SHIFT,
+					(offset + ret - 1) >> PAGE_SHIFT);
+		WARN_ON_ONCE(err);
+	}
+
 	if (dio->end_io) {
-		int err;
 
 		// XXX: ki_pos??
 		err = dio->end_io(dio->iocb, offset, ret, dio->private);
@@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio)
 	struct dio *dio = bio->bi_private;
 	unsigned long remaining;
 	unsigned long flags;
+	bool defer_completion = false;
 
 	/* cleanup the bio */
 	dio_bio_complete(dio, bio);
@@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio)
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
 	if (remaining == 0) {
-		if (dio->result && dio->defer_completion) {
+		/*
+		 * Defer completion when defer_completion is set or
+		 * when the inode has pages mapped and this is AIO write.
+		 * We need to invalidate those pages because there is a
+		 * chance they contain stale data in the case buffered IO
+		 * went in between AIO submission and completion into the
+		 * same region.
+		 */
+		if (dio->result)
+			defer_completion = dio->defer_completion ||
+					   (dio->op == REQ_OP_WRITE &&
+					    dio->inode->i_mapping->nrpages);
+		if (defer_completion) {
 			INIT_WORK(&dio->complete_work, dio_aio_complete_work);
 			queue_work(dio->inode->i_sb->s_dio_done_wq,
 				   &dio->complete_work);
@@ -1210,10 +1238,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
 	 * so that we can call ->fsync.
 	 */
-	if (dio->is_async && iov_iter_rw(iter) == WRITE &&
-	    ((iocb->ki_filp->f_flags & O_DSYNC) ||
-	     IS_SYNC(iocb->ki_filp->f_mapping->host))) {
-		retval = dio_set_defer_completion(dio);
+	if (dio->is_async && iov_iter_rw(iter) == WRITE) {
+		retval = 0;
+		if ((iocb->ki_filp->f_flags & O_DSYNC) ||
+		    IS_SYNC(iocb->ki_filp->f_mapping->host))
+			retval = dio_set_defer_completion(dio);
+		else if (!dio->inode->i_sb->s_dio_done_wq) {
+			/*
+			 * In case of AIO write racing with buffered read we
+			 * need to defer completion. We can't decide this now,
+			 * however the workqueue needs to be initialized here.
+			 */
+			retval = sb_init_dio_done_wq(dio->inode->i_sb);
+		}
 		if (retval) {
 			/*
 			 * We grab i_mutex only for reads so we don't have
diff --git a/fs/iomap.c b/fs/iomap.c
index 269b24a01f32..8194d30bdca0 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -713,8 +713,24 @@ struct iomap_dio {
 static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
 	struct kiocb *iocb = dio->iocb;
+	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t ret;
 
+	/*
+	 * Try again to invalidate clean pages which might have been cached by
+	 * non-direct readahead, or faulted in by get_user_pages() if the source
+	 * of the write was an mmap'ed region of the file we're writing.  Either
+	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
+	 * this invalidation fails, tough, the write still worked...
+	 */
+	if (!dio->error &&
+	    (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
+		ret = invalidate_inode_pages2_range(inode->i_mapping,
+				iocb->ki_pos >> PAGE_SHIFT,
+				(iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
+		WARN_ON_ONCE(ret);
+	}
+
 	if (dio->end_io) {
 		ret = dio->end_io(iocb,
 				dio->error ? dio->error : dio->size,
@@ -1042,19 +1058,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 
 	ret = iomap_dio_complete(dio);
 
-	/*
-	 * Try again to invalidate clean pages which might have been cached by
-	 * non-direct readahead, or faulted in by get_user_pages() if the source
-	 * of the write was an mmap'ed region of the file we're writing.  Either
-	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
-	 * this invalidation fails, tough, the write still worked...
-	 */
-	if (iov_iter_rw(iter) == WRITE) {
-		int err = invalidate_inode_pages2_range(mapping,
-				start >> PAGE_SHIFT, end >> PAGE_SHIFT);
-		WARN_ON_ONCE(err);
-	}
-
 	return ret;
 
 out_free_dio:
diff --git a/mm/filemap.c b/mm/filemap.c
index 870971e20967..db250d0e0565 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2926,9 +2926,15 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	 * we're writing.  Either one is a pretty crazy thing to do,
 	 * so we don't support it 100%.  If this invalidation
 	 * fails, tough, the write still worked...
+	 *
+	 * Most of the time we do not need this since dio_complete() will do
+	 * the invalidation for us. However there are some file systems that
+	 * do not end up with dio_complete() being called, so let's not break
+	 * them by removing it completely
 	 */
-	invalidate_inode_pages2_range(mapping,
-				pos >> PAGE_SHIFT, end);
+	if (mapping->nrpages)
+		invalidate_inode_pages2_range(mapping,
+					pos >> PAGE_SHIFT, end);
 
 	if (written > 0) {
 		pos += written;
-- 
cgit 


From f5c156c4c29a3d87176dd6e5c099388e187ec29b Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 21 Sep 2017 12:17:16 -0700
Subject: block: fix a crash caused by wrong API

part_stat_show takes a part device not a disk, so we should use
part_to_disk.

Fixes: d62e26b3ffd2("block: pass in queue to inflight accounting")
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Omar Sandoval <osandov@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partition-generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/partition-generic.c b/block/partition-generic.c
index 86e8fe1adcdb..88c555db4e5d 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -112,7 +112,7 @@ ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
-	struct request_queue *q = dev_to_disk(dev)->queue;
+	struct request_queue *q = part_to_disk(p)->queue;
 	unsigned int inflight[2];
 	int cpu;
 
-- 
cgit 


From 9789e7e93f2b892098d7684ac8131092aa617814 Mon Sep 17 00:00:00 2001
From: Mengting Zhang <zhangmengting@huawei.com>
Date: Sat, 23 Sep 2017 16:18:14 +0800
Subject: perf report: Fix debug messages with --call-graph option

With --call-graph option, perf report can display call chains using
type, min percent threshold, optional print limit and order. And the
default call-graph parameter is 'graph,0.5,caller,function,percent'.

Before this patch, 'perf report --call-graph' shows incorrect debug
messages as below:

  # perf report --call-graph
  Invalid callchain mode: 0.5
  Invalid callchain order: 0.5
  Invalid callchain sort key: 0.5
  Invalid callchain config key: 0.5
  Invalid callchain mode: caller
  Invalid callchain mode: function
  Invalid callchain order: function
  Invalid callchain mode: percent
  Invalid callchain order: percent
  Invalid callchain sort key: percent

That is because in function __parse_callchain_report_opt(),each field of
the call-graph parameter is passed to parse_callchain_{mode,order,
sort_key,value} in turn until it meets the matching value.

For example, the order field "caller" is passed to
parse_callchain_mode() firstly and obviously it doesn't match any mode
field. Therefore parse_callchain_mode() will shows the debug message
"Invalid callchain mode: caller", which could confuse users.

The patch fixes this issue by moving the warning out of the function
parse_callchain_{mode,order,sort_key,value}.

Signed-off-by: Mengting Zhang <zhangmengting@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Krister Johansen <kjlx@templeofstupid.com>
Cc: Li Bin <huawei.libin@huawei.com>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Yao Jin <yao.jin@linux.intel.com>
Link: http://lkml.kernel.org/r/1506154694-39691-1-git-send-email-zhangmengting@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/callchain.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 510b513e0f01..be09d77cade0 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -65,8 +65,6 @@ static int parse_callchain_mode(const char *value)
 		callchain_param.mode = CHAIN_FOLDED;
 		return 0;
 	}
-
-	pr_err("Invalid callchain mode: %s\n", value);
 	return -1;
 }
 
@@ -82,8 +80,6 @@ static int parse_callchain_order(const char *value)
 		callchain_param.order_set = true;
 		return 0;
 	}
-
-	pr_err("Invalid callchain order: %s\n", value);
 	return -1;
 }
 
@@ -105,8 +101,6 @@ static int parse_callchain_sort_key(const char *value)
 		callchain_param.branch_callstack = 1;
 		return 0;
 	}
-
-	pr_err("Invalid callchain sort key: %s\n", value);
 	return -1;
 }
 
@@ -124,8 +118,6 @@ static int parse_callchain_value(const char *value)
 		callchain_param.value = CCVAL_COUNT;
 		return 0;
 	}
-
-	pr_err("Invalid callchain config key: %s\n", value);
 	return -1;
 }
 
@@ -319,12 +311,27 @@ int perf_callchain_config(const char *var, const char *value)
 
 		return ret;
 	}
-	if (!strcmp(var, "print-type"))
-		return parse_callchain_mode(value);
-	if (!strcmp(var, "order"))
-		return parse_callchain_order(value);
-	if (!strcmp(var, "sort-key"))
-		return parse_callchain_sort_key(value);
+	if (!strcmp(var, "print-type")){
+		int ret;
+		ret = parse_callchain_mode(value);
+		if (ret == -1)
+			pr_err("Invalid callchain mode: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "order")){
+		int ret;
+		ret = parse_callchain_order(value);
+		if (ret == -1)
+			pr_err("Invalid callchain order: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "sort-key")){
+		int ret;
+		ret = parse_callchain_sort_key(value);
+		if (ret == -1)
+			pr_err("Invalid callchain sort key: %s\n", value);
+		return ret;
+	}
 	if (!strcmp(var, "threshold")) {
 		callchain_param.min_percent = strtod(value, &endptr);
 		if (value == endptr) {
-- 
cgit 


From 090657c9fb7094e4c1b05c1713d6c2a12ef43dea Mon Sep 17 00:00:00 2001
From: Akemi Yagi <amyagi@gmail.com>
Date: Fri, 22 Sep 2017 22:11:53 +0000
Subject: perf tools: Fix syscalltbl build failure

The build of kernel v4.14-rc1 for i686 fails on RHEL 6 with the error
in tools/perf:

  util/syscalltbl.c:157: error: expected ';', ',' or ')' before '__maybe_unused'
  mv: cannot stat `util/.syscalltbl.o.tmp': No such file or directory

Fix it by placing/moving:

  #include <linux/compiler.h>

  outside of #ifdef HAVE_SYSCALL_TABLE block.

Signed-off-by: Akemi Yagi <toracat@elrepo.org>
Cc: Alan Bartlett <ajb@elrepo.org>
Link: http://lkml.kernel.org/r/oq41r8$1v9$1@blaine.gmane.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/syscalltbl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 19e5db90394c..6eea7cff3d4e 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -15,9 +15,9 @@
 
 #include "syscalltbl.h"
 #include <stdlib.h>
+#include <linux/compiler.h>
 
 #ifdef HAVE_SYSCALL_TABLE
-#include <linux/compiler.h>
 #include <string.h>
 #include "string2.h"
 #include "util.h"
-- 
cgit 


From 78b1beb0998437107ed144b341fbe1252188916b Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 24 Sep 2017 21:46:29 +0300
Subject: IB/core: Fix typo in the name of the tag-matching cap struct

The tag matching functionality is implemented by mlx5 driver
by extending XRQ, however this internal kernel information was
exposed to user space applications with *xrq* name instead of *tm*.

This patch renames *xrq* to *tm* to handle that.

Fixes: 8d50505ada72 ("IB/uverbs: Expose XRQ capabilities")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 14 +++++++-------
 drivers/infiniband/hw/mlx5/main.c    | 10 +++++-----
 include/rdma/ib_verbs.h              |  4 ++--
 include/uapi/rdma/ib_user_verbs.h    |  2 +-
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 4ab30d832ac5..52a2cf2d83aa 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3869,15 +3869,15 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 	resp.raw_packet_caps = attr.raw_packet_caps;
 	resp.response_length += sizeof(resp.raw_packet_caps);
 
-	if (ucore->outlen < resp.response_length + sizeof(resp.xrq_caps))
+	if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps))
 		goto end;
 
-	resp.xrq_caps.max_rndv_hdr_size = attr.xrq_caps.max_rndv_hdr_size;
-	resp.xrq_caps.max_num_tags      = attr.xrq_caps.max_num_tags;
-	resp.xrq_caps.max_ops		= attr.xrq_caps.max_ops;
-	resp.xrq_caps.max_sge		= attr.xrq_caps.max_sge;
-	resp.xrq_caps.flags		= attr.xrq_caps.flags;
-	resp.response_length += sizeof(resp.xrq_caps);
+	resp.tm_caps.max_rndv_hdr_size	= attr.tm_caps.max_rndv_hdr_size;
+	resp.tm_caps.max_num_tags	= attr.tm_caps.max_num_tags;
+	resp.tm_caps.max_ops		= attr.tm_caps.max_ops;
+	resp.tm_caps.max_sge		= attr.tm_caps.max_sge;
+	resp.tm_caps.flags		= attr.tm_caps.flags;
+	resp.response_length += sizeof(resp.tm_caps);
 end:
 	err = ib_copy_to_udata(ucore, &resp, resp.response_length);
 	return err;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 05fb4bdff6a0..d6fbad8f34aa 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -778,13 +778,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	}
 
 	if (MLX5_CAP_GEN(mdev, tag_matching)) {
-		props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
-		props->xrq_caps.max_num_tags =
+		props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
+		props->tm_caps.max_num_tags =
 			(1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
-		props->xrq_caps.flags = IB_TM_CAP_RC;
-		props->xrq_caps.max_ops =
+		props->tm_caps.flags = IB_TM_CAP_RC;
+		props->tm_caps.max_ops =
 			1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
-		props->xrq_caps.max_sge = MLX5_TM_MAX_SGE;
+		props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
 	}
 
 	if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index bdb1279a415b..bbb5f54db882 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -285,7 +285,7 @@ enum ib_tm_cap_flags {
 	IB_TM_CAP_RC		    = 1 << 0,
 };
 
-struct ib_xrq_caps {
+struct ib_tm_caps {
 	/* Max size of RNDV header */
 	u32 max_rndv_hdr_size;
 	/* Max number of entries in tag matching list */
@@ -358,7 +358,7 @@ struct ib_device_attr {
 	struct ib_rss_caps	rss_caps;
 	u32			max_wq_type_rq;
 	u32			raw_packet_caps; /* Use ib_raw_packet_caps enum */
-	struct ib_xrq_caps	xrq_caps;
+	struct ib_tm_caps	tm_caps;
 };
 
 enum ib_mtu {
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 9a0b6479fe0c..d4e0b53bfc75 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -261,7 +261,7 @@ struct ib_uverbs_ex_query_device_resp {
 	struct ib_uverbs_rss_caps rss_caps;
 	__u32  max_wq_type_rq;
 	__u32 raw_packet_caps;
-	struct ib_uverbs_tm_caps xrq_caps;
+	struct ib_uverbs_tm_caps tm_caps;
 };
 
 struct ib_uverbs_query_port {
-- 
cgit 


From 73827a605bbd7cebef4cfd1261e497246a82a0e7 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Sun, 24 Sep 2017 21:46:30 +0300
Subject: IB/core: Fix qp_sec use after free access

When security_ib_alloc_security fails, qp->qp_sec memory is freed.
However ib_destroy_qp still tries to access this memory which result
in kernel crash. So its initialized to NULL to avoid such access.

Fixes: d291f1a65232 ("IB/core: Enforce PKey security on QPs")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/security.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 70ad19c4c73e..88bdafb297f5 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -432,8 +432,10 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
 	atomic_set(&qp->qp_sec->error_list_count, 0);
 	init_completion(&qp->qp_sec->error_complete);
 	ret = security_ib_alloc_security(&qp->qp_sec->security);
-	if (ret)
+	if (ret) {
 		kfree(qp->qp_sec);
+		qp->qp_sec = NULL;
+	}
 
 	return ret;
 }
-- 
cgit 


From edd31551148c09608feee6b8756ad148d550ee3b Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Sun, 24 Sep 2017 21:46:31 +0300
Subject: IB: Correct MR length field to be 64-bit

The ib_mr->length represents the length of the MR in bytes as per
the IBTA spec 1.3 section 11.2.10.3 (REGISTER PHYSICAL MEMORY REGION).

Currently ib_mr->length field is defined as only 32-bits field.
This might result into truncation and failed WRs of consumers who
registers more than 4GB bytes memory regions and whose WRs accessing
such MRs.

This patch makes the length 64-bit to avoid such truncation.

Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Faisal Latif <faisal.latif@intel.com>
Fixes: 4c67e2bfc8b7 ("IB/core: Introduce new fast registration API")
Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/nes/nes_verbs.c     | 4 ++--
 drivers/infiniband/ulp/iser/iser_memory.c | 2 +-
 include/rdma/ib_verbs.h                   | 2 +-
 net/sunrpc/xprtrdma/frwr_ops.c            | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index f0dc5f4aa177..442b9bdc0f03 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3232,7 +3232,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
 					    mr->ibmr.iova);
 			set_wqe_32bit_value(wqe->wqe_words,
 					    NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
-					    mr->ibmr.length);
+					    lower_32_bits(mr->ibmr.length));
 			set_wqe_32bit_value(wqe->wqe_words,
 					    NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
 			set_wqe_32bit_value(wqe->wqe_words,
@@ -3274,7 +3274,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
 					    mr->npages * 8);
 
 			nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
-				  "length: %d, rkey: %0x, pgl_paddr: %llx, "
+				  "length: %lld, rkey: %0x, pgl_paddr: %llx, "
 				  "page_list_len: %u, wqe_misc: %x\n",
 				  (unsigned long long) mr->ibmr.iova,
 				  mr->ibmr.length,
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 9c3e9ab53a41..322209d5ff58 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -154,7 +154,7 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
 {
 	int i;
 
-	iser_err("page vec npages %d data length %d\n",
+	iser_err("page vec npages %d data length %lld\n",
 		 page_vec->npages, page_vec->fake_mr.length);
 	for (i = 0; i < page_vec->npages; i++)
 		iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index bbb5f54db882..e8608b2dc844 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1739,7 +1739,7 @@ struct ib_mr {
 	u32		   lkey;
 	u32		   rkey;
 	u64		   iova;
-	u32		   length;
+	u64		   length;
 	unsigned int	   page_size;
 	bool		   need_inval;
 	union {
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 5a936a6a31a3..df062e086bdb 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -401,7 +401,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	if (unlikely(n != mw->mw_nents))
 		goto out_mapmr_err;
 
-	dprintk("RPC:       %s: Using frmr %p to map %u segments (%u bytes)\n",
+	dprintk("RPC:       %s: Using frmr %p to map %u segments (%llu bytes)\n",
 		__func__, frmr, mw->mw_nents, mr->length);
 
 	key = (u8)(mr->rkey & 0x000000FF);
-- 
cgit 


From 9c6f42e9254150d2772242d9f8bd8d0b7b7431ff Mon Sep 17 00:00:00 2001
From: Shalom Lagziel <shaloml@mellanox.com>
Date: Sun, 24 Sep 2017 21:46:32 +0300
Subject: IB/ipoib: Fix sysfs Pkey create<->remove possible deadlock

A possible ABBA lock can happen with RTNL and vlan_rwsem.
For example:

Flow A: Device Flush
	__ipoib_ib_dev_flush
	down_read(vlan_rwsem) 			// Lock A
	ipoib_flush_ah
	flush_workqueue(priv->wq) 		// Wait for completion
	A work on shared WQ (Mcast carrier)
		ipoib_mcast_carrier_on_task
		while (!rtnl_trylock())         // Wait for lock B

Flow B: Sysfs PKEY delete
	ipoib_vlan_delete
	lock(RTNL) 				// Lock B
	down_write(vlan_rwsem) 			// Wait for lock A

This can happen with PKEY creates as well. The solution is to release
the RTNL lock in sysfs functions in case it is not possible to lock
VLAN RW semaphore and reset the SYS call.

Fixes: 69956d83267e ("IB/ipoib: Sync between remove_one to sysfs calls that use rtnl_lock")
Signed-off-by: Shalom Lagziel <shaloml@mellanox.com>
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 9927cd6b7082..e01c58edca15 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -141,14 +141,17 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 		return restart_syscall();
 	}
 
-	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
-	if (!priv) {
+	if (!down_write_trylock(&ppriv->vlan_rwsem)) {
 		rtnl_unlock();
 		mutex_unlock(&ppriv->sysfs_mutex);
-		return -ENOMEM;
+		return restart_syscall();
 	}
 
-	down_write(&ppriv->vlan_rwsem);
+	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
+	if (!priv) {
+		result = -ENOMEM;
+		goto out;
+	}
 
 	/*
 	 * First ensure this isn't a duplicate. We check the parent device and
@@ -175,7 +178,7 @@ out:
 	rtnl_unlock();
 	mutex_unlock(&ppriv->sysfs_mutex);
 
-	if (result) {
+	if (result && priv) {
 		free_netdev(priv->dev);
 		kfree(priv);
 	}
@@ -204,7 +207,12 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 		return restart_syscall();
 	}
 
-	down_write(&ppriv->vlan_rwsem);
+	if (!down_write_trylock(&ppriv->vlan_rwsem)) {
+		rtnl_unlock();
+		mutex_unlock(&ppriv->sysfs_mutex);
+		return restart_syscall();
+	}
+
 	list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
 		if (priv->pkey == pkey &&
 		    priv->child_type == IPOIB_LEGACY_CHILD) {
-- 
cgit 


From 7c9d9662103ae1c11acc7bfc47d988466cff23cf Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Sun, 24 Sep 2017 21:46:33 +0300
Subject: IB/ipoib: Fix inconsistency with free_netdev and free_rdma_netdev

Call free_rdma_netdev instead of free_netdev each time we want to
release a netdevice. This call is also relevant for future freeing
of offloaded child interfaces.

This patch also adds a missing call for free netdevice when releasing
a parent interface that has child interfaces using ipoib_remove_one.

Fixes: cd565b4b51e5 ('IB/IPoIB: Support acceleration options callbacks')
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 15 +++++++++++----
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 10 ++++++++--
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index bac95b509a9b..dcc77014018d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2180,6 +2180,7 @@ static struct net_device *ipoib_add_port(const char *format,
 {
 	struct ipoib_dev_priv *priv;
 	struct ib_port_attr attr;
+	struct rdma_netdev *rn;
 	int result = -ENOMEM;
 
 	priv = ipoib_intf_alloc(hca, port, format);
@@ -2279,7 +2280,8 @@ register_failed:
 	ipoib_dev_cleanup(priv->dev);
 
 device_init_failed:
-	free_netdev(priv->dev);
+	rn = netdev_priv(priv->dev);
+	rn->free_rdma_netdev(priv->dev);
 	kfree(priv);
 
 alloc_mem_failed:
@@ -2328,7 +2330,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
 		return;
 
 	list_for_each_entry_safe(priv, tmp, dev_list, list) {
-		struct rdma_netdev *rn = netdev_priv(priv->dev);
+		struct rdma_netdev *parent_rn = netdev_priv(priv->dev);
 
 		ib_unregister_event_handler(&priv->event_handler);
 		flush_workqueue(ipoib_workqueue);
@@ -2350,10 +2352,15 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
 		unregister_netdev(priv->dev);
 		mutex_unlock(&priv->sysfs_mutex);
 
-		rn->free_rdma_netdev(priv->dev);
+		parent_rn->free_rdma_netdev(priv->dev);
+
+		list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
+			struct rdma_netdev *child_rn;
 
-		list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
+			child_rn = netdev_priv(cpriv->dev);
+			child_rn->free_rdma_netdev(cpriv->dev);
 			kfree(cpriv);
+		}
 
 		kfree(priv);
 	}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index e01c58edca15..55a9b71ed05a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -179,7 +179,10 @@ out:
 	mutex_unlock(&ppriv->sysfs_mutex);
 
 	if (result && priv) {
-		free_netdev(priv->dev);
+		struct rdma_netdev *rn;
+
+		rn = netdev_priv(priv->dev);
+		rn->free_rdma_netdev(priv->dev);
 		kfree(priv);
 	}
 
@@ -232,7 +235,10 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 	mutex_unlock(&ppriv->sysfs_mutex);
 
 	if (dev) {
-		free_netdev(dev);
+		struct rdma_netdev *rn;
+
+		rn = netdev_priv(dev);
+		rn->free_rdma_netdev(priv->dev);
 		kfree(priv);
 		return 0;
 	}
-- 
cgit 


From d67bc5d4e3e100d762c0f57ea67f28bc219698a6 Mon Sep 17 00:00:00 2001
From: Ilya Lesokhin <ilyal@mellanox.com>
Date: Sun, 24 Sep 2017 21:46:34 +0300
Subject: IB/mlx5: Simplify mlx5_ib_cont_pages

The patch simplifies mlx5_ib_cont_pages and fixes the following
issues in the original implementation:

First issues is related to alignment of the PFNs. After the check
base + p != PFN, the alignment of the PFN wasn't checked. So the PFN
sequence 0, 1, 1, 2 would result in a page_shift of 13 even though
the 3rd PFN is not 8KB aligned.

This wasn't actually a bug because it was supported by all the
existing mlx5 compatible device, but we don't want to require
this support in all future devices.

Another issue is because the inner loop didn't advance PFN so
the test "if (base + p != pfn)" always failed for SGE with
len > (1<<page_shift).

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Reviewed-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/mem.c | 47 +++++++++++++++-------------------------
 1 file changed, 17 insertions(+), 30 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 914f212e7ef6..f3dbd75a0a96 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -50,13 +50,9 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
 {
 	unsigned long tmp;
 	unsigned long m;
-	int i, k;
-	u64 base = 0;
-	int p = 0;
-	int skip;
-	int mask;
-	u64 len;
-	u64 pfn;
+	u64 base = ~0, p = 0;
+	u64 len, pfn;
+	int i = 0;
 	struct scatterlist *sg;
 	int entry;
 	unsigned long page_shift = umem->page_shift;
@@ -76,33 +72,24 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
 	m = find_first_bit(&tmp, BITS_PER_LONG);
 	if (max_page_shift)
 		m = min_t(unsigned long, max_page_shift - page_shift, m);
-	skip = 1 << m;
-	mask = skip - 1;
-	i = 0;
+
 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
 		len = sg_dma_len(sg) >> page_shift;
 		pfn = sg_dma_address(sg) >> page_shift;
-		for (k = 0; k < len; k++) {
-			if (!(i & mask)) {
-				tmp = (unsigned long)pfn;
-				m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG));
-				skip = 1 << m;
-				mask = skip - 1;
-				base = pfn;
-				p = 0;
-			} else {
-				if (base + p != pfn) {
-					tmp = (unsigned long)p;
-					m = find_first_bit(&tmp, BITS_PER_LONG);
-					skip = 1 << m;
-					mask = skip - 1;
-					base = pfn;
-					p = 0;
-				}
-			}
-			p++;
-			i++;
+		if (base + p != pfn) {
+			/* If either the offset or the new
+			 * base are unaligned update m
+			 */
+			tmp = (unsigned long)(pfn | p);
+			if (!IS_ALIGNED(tmp, 1 << m))
+				m = find_first_bit(&tmp, BITS_PER_LONG);
+
+			base = pfn;
+			p = 0;
 		}
+
+		p += len;
+		i += len;
 	}
 
 	if (i) {
-- 
cgit 


From fbcd49838d9094ca45772356e7b33afe4b7c93e7 Mon Sep 17 00:00:00 2001
From: Ilya Lesokhin <ilyal@mellanox.com>
Date: Sun, 24 Sep 2017 21:46:35 +0300
Subject: IB/mlx5: Fix NULL deference on mlx5_ib_update_xlt failure

mlx5_ib_reg_user_mr called mlx5_ib_dereg_mr in case of MR population
failure. This resulted in a NULL dereference as ibmr->device wasn't
initialized yet.

We address this by adding an internal dereg_mr function that can handle
partially initialized MRs, and fixing clean_mr to work on partially
initialized MRs.

Fixes: ff740aefecb9 ("IB/mlx5: Decouple MR allocation and population flows")
Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 0e2789d9bb4d..37bbc543847a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -47,7 +47,8 @@ enum {
 
 #define MLX5_UMR_ALIGN 2048
 
-static int clean_mr(struct mlx5_ib_mr *mr);
+static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
+static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 static int mr_cache_max_order(struct mlx5_ib_dev *dev);
 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 
@@ -1270,8 +1271,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
 		err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
 					 update_xlt_flags);
+
 		if (err) {
-			mlx5_ib_dereg_mr(&mr->ibmr);
+			dereg_mr(dev, mr);
 			return ERR_PTR(err);
 		}
 	}
@@ -1356,7 +1358,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 		err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
 				  &npages, &page_shift, &ncont, &order);
 		if (err < 0) {
-			clean_mr(mr);
+			clean_mr(dev, mr);
 			return err;
 		}
 	}
@@ -1410,7 +1412,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 		if (err) {
 			mlx5_ib_warn(dev, "Failed to rereg UMR\n");
 			ib_umem_release(mr->umem);
-			clean_mr(mr);
+			clean_mr(dev, mr);
 			return err;
 		}
 	}
@@ -1469,9 +1471,8 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
 	}
 }
 
-static int clean_mr(struct mlx5_ib_mr *mr)
+static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
 	int allocated_from_cache = mr->allocated_from_cache;
 	int err;
 
@@ -1507,10 +1508,8 @@ static int clean_mr(struct mlx5_ib_mr *mr)
 	return 0;
 }
 
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
-	struct mlx5_ib_mr *mr = to_mmr(ibmr);
 	int npages = mr->npages;
 	struct ib_umem *umem = mr->umem;
 
@@ -1539,7 +1538,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
 	}
 #endif
 
-	clean_mr(mr);
+	clean_mr(dev, mr);
 
 	if (umem) {
 		ib_umem_release(umem);
@@ -1549,6 +1548,14 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
 	return 0;
 }
 
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+	struct mlx5_ib_mr *mr = to_mmr(ibmr);
+
+	return dereg_mr(dev, mr);
+}
+
 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
 			       enum ib_mr_type mr_type,
 			       u32 max_num_sg)
-- 
cgit 


From fe59493240169a2cc3f445ae5f2a2308fda06b63 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 11 Sep 2017 14:29:15 +0200
Subject: PCI: Add dummy pci_acs_enabled() for CONFIG_PCI=n build

If CONFIG_PCI=n and gcc (e.g. 4.1.2) decides not to inline
get_pci_function_alias_group(), the build fails with:

  drivers/iommu/iommu.o: In function `get_pci_function_alias_group':
  iommu.c:(.text+0xfdc): undefined reference to `pci_acs_enabled'

Due to the various dummies for PCI calls in the CONFIG_PCI=n case,
pci_acs_enabled() never called, but not all versions of gcc are smart
enough to realize that.

While explicitly marking get_pci_function_alias_group() inline would fix
the build, this would inflate the code for the CONFIG_PCI=y case, as
get_pci_function_alias_group() is a not-so-small function called from two
places.

Hence fix the issue by introducing a dummy for pci_acs_enabled() instead.

Fixes: 0ae349a0f33f ("iommu/qcom: Add qcom_iommu")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index f68c58a93dd0..f4f8ee5a7362 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1685,6 +1685,8 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; }
 
 #define dev_is_pci(d) (false)
 #define dev_is_pf(d) (false)
+static inline bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags)
+{ return false; }
 #endif /* CONFIG_PCI */
 
 /* Include architecture-dependent settings and functions */
-- 
cgit 


From 9c3340ea7f5dabf88ca096a917cb0ab1f208ef2a Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 11 Sep 2017 19:11:07 -0600
Subject: selftests: futex: copy sub-dir test scripts for make O=dir run

For make O=dir run_tests to work, test scripts from sub-directories
need to be copied over to the object directory. Running tests from the
object directory is necessary to avoid making the source tree dirty.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Reviewed-by: Darren Hart (VMware) <dvhart@infradead.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/futex/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 7c647f619d63..9358cb210fd5 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -11,10 +11,13 @@ all:
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+		if [ -e $$DIR/$(TEST_PROGS) ]; then
+			rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/;
+		fi
 	done
 
 override define RUN_TESTS
-	$(OUTPUT)/run.sh
+	cd $(OUTPUT); ./run.sh
 endef
 
 override define INSTALL_RULE
-- 
cgit 


From 8230b905a6780c60372bf5df7bf5f23041ca3196 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Tue, 12 Sep 2017 08:52:13 -0600
Subject: selftests: mqueue: Use full path to run tests from Makefile

Use full path including $(OUTPUT) to run tests from Makefile for
normal case when objects reside in the source tree as well as when
objects are relocated with make O=dir. In both cases $(OUTPUT) will
be set correctly by lib.mk.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/mqueue/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
index 79a664aeb8d7..0f5e347b068d 100644
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -5,8 +5,8 @@ TEST_GEN_PROGS := mq_open_tests mq_perf_tests
 include ../lib.mk
 
 override define RUN_TESTS
-	@./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
-	@./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
+	$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
+	$(OUTPUT)//mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
 endef
 
 override define EMIT_TESTS
-- 
cgit 


From 1ede053632f6380d7e1dba4d781e5eb78621aa3a Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 18 Sep 2017 17:30:50 -0600
Subject: selftests: Makefile: fix for loops in targets to run silently

Fix for loops in targets to run silently to avoid cluttering the test
results.

Suppresses the following from targets: e.g run from breakpoints

for TARGET in breakpoints; do		\
	BUILD_TARGET=$BUILD/$TARGET;	\
	mkdir $BUILD_TARGET  -p;	\
	make OUTPUT=$BUILD_TARGET -C $TARGET;\
done;

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/Makefile | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index f4368db011ea..ff805643b5f7 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -66,32 +66,32 @@ endif
 
 export BUILD
 all:
-	for TARGET in $(TARGETS); do		\
+	@for TARGET in $(TARGETS); do		\
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
 	done;
 
 run_tests: all
-	for TARGET in $(TARGETS); do \
+	@for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
 	done;
 
 hotplug:
-	for TARGET in $(TARGETS_HOTPLUG); do \
+	@for TARGET in $(TARGETS_HOTPLUG); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
 	done;
 
 run_hotplug: hotplug
-	for TARGET in $(TARGETS_HOTPLUG); do \
+	@for TARGET in $(TARGETS_HOTPLUG); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
 	done;
 
 clean_hotplug:
-	for TARGET in $(TARGETS_HOTPLUG); do \
+	@for TARGET in $(TARGETS_HOTPLUG); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
 	done;
@@ -107,7 +107,7 @@ install:
 ifdef INSTALL_PATH
 	@# Ask all targets to install their files
 	mkdir -p $(INSTALL_PATH)
-	for TARGET in $(TARGETS); do \
+	@for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
 	done;
@@ -132,7 +132,7 @@ else
 endif
 
 clean:
-	for TARGET in $(TARGETS); do \
+	@for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
 	done;
-- 
cgit 


From 659dbfd8c47adeb03f401d1a1f17091bb63cc5a2 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 18 Sep 2017 18:46:23 -0600
Subject: selftests: futex: Makefile: fix for loops in targets to run silently

Fix for loops in targets to run silently to avoid cluttering the test
results.

Suppresses the following from targets:

for DIR in functional; do               \
        BUILD_TARGET=./tools/testing/selftests/futex/$DIR; \
        mkdir $BUILD_TARGET  -p;        \
        make OUTPUT=$BUILD_TARGET -C $DIR all;\
done

./tools/testing/selftests/futex/run.sh

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Reviewed-by: Darren Hart (VMware) <dvhart@infradead.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/futex/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 9358cb210fd5..f0c0369ccb79 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -7,7 +7,7 @@ TEST_PROGS := run.sh
 include ../lib.mk
 
 all:
-	for DIR in $(SUBDIRS); do		\
+	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
@@ -17,7 +17,7 @@ all:
 	done
 
 override define RUN_TESTS
-	cd $(OUTPUT); ./run.sh
+	@cd $(OUTPUT); ./run.sh
 endef
 
 override define INSTALL_RULE
@@ -36,7 +36,7 @@ override define EMIT_TESTS
 endef
 
 override define CLEAN
-	for DIR in $(SUBDIRS); do		\
+	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-- 
cgit 


From 10859f3855db4c6f10dc7974ff4b3a292f3de8e0 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 7 Sep 2017 16:32:46 -0700
Subject: selftests/seccomp: Support glibc 2.26 siginfo_t.h

The 2.26 release of glibc changed how siginfo_t is defined, and the earlier
work-around to using the kernel definition are no longer needed. The old
way needs to stay around for a while, though.

Reported-by: Seth Forshee <seth.forshee@canonical.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Will Drewry <wad@chromium.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: linux-kselftest@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 4d6f92a9df6b..19cd272c234d 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -6,10 +6,18 @@
  */
 
 #include <sys/types.h>
-#include <asm/siginfo.h>
-#define __have_siginfo_t 1
-#define __have_sigval_t 1
-#define __have_sigevent_t 1
+
+/*
+ * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
+ * we need to use the kernel's siginfo.h file and trick glibc
+ * into accepting it.
+ */
+#if !__GLIBC_PREREQ(2, 26)
+# include <asm/siginfo.h>
+# define __have_siginfo_t 1
+# define __have_sigval_t 1
+# define __have_sigevent_t 1
+#endif
 
 #include <errno.h>
 #include <linux/filter.h>
@@ -676,7 +684,7 @@ TEST_F_SIGNAL(TRAP, ign, SIGSYS)
 	syscall(__NR_getpid);
 }
 
-static struct siginfo TRAP_info;
+static siginfo_t TRAP_info;
 static volatile int TRAP_nr;
 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
 {
-- 
cgit 


From 21aadfa2426d5d199ceb474d0159d079c7f17bfa Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@cn.fujitsu.com>
Date: Thu, 21 Sep 2017 17:13:27 +0800
Subject: selftests/memfd: correct run_tests.sh permission

to fix the following issue:
------------------
TAP version 13
selftests: run_tests.sh
========================================
selftests: Warning: file run_tests.sh is not executable, correct this.
not ok 1..1 selftests: run_tests.sh [FAIL]
------------------

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/memfd/run_tests.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 tools/testing/selftests/memfd/run_tests.sh

diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh
old mode 100644
new mode 100755
-- 
cgit 


From 01db7fbf5487505b887fbd6a03c51f2adc952196 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 21 Sep 2017 13:46:01 -0600
Subject: selftests: timers: set-timer-lat: fix hang when std out/err are
 redirected

do_timer_oneshot() uses select() as a timer with FD_SETSIZE and readfs
is cleared with FD_ZERO without FD_SET.

When stdout and stderr are redirected, the test hangs in select forever.
Fix the problem calling select() with readfds empty and nfds zero. This
is sufficient for using select() for timer.

With this fix "./set-timer-lat > /dev/null 2>&1" no longer hangs.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Acked-by: Greg Hackmann <ghackmann@google.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/timers/set-timer-lat.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
index 9c92b7bd5641..ea1af5dbc7b6 100644
--- a/tools/testing/selftests/timers/set-timer-lat.c
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -228,7 +228,6 @@ int do_timer_oneshot(int clock_id, int flags)
 	timer_t tm1;
 	const int interval = 0;
 	struct timeval timeout;
-	fd_set fds;
 	int err;
 
 	err = setup_timer(clock_id, flags, interval, &tm1);
@@ -237,9 +236,8 @@ int do_timer_oneshot(int clock_id, int flags)
 
 	memset(&timeout, 0, sizeof(timeout));
 	timeout.tv_sec = 5;
-	FD_ZERO(&fds);
 	do {
-		err = select(FD_SETSIZE, &fds, NULL, NULL, &timeout);
+		err = select(0, NULL, NULL, NULL, &timeout);
 	} while (err == -1 && errno == EINTR);
 
 	timer_delete(tm1);
-- 
cgit 


From eefd95e1f3d47b90dc768e9ebc77d390c4f34809 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 21 Sep 2017 13:05:18 -0600
Subject: selftests: timers: set-timer-lat: Fix hang when testing unsupported
 alarms

When timer_create() fails on a bootime or realtime clock, setup_timer()
returns 0 as if timer has been set. Callers wait forever for the timer
to expire.

This hang is seen on a system that doesn't have support for:

CLOCK_REALTIME_ALARM   ABSTIME missing CAP_WAKE_ALARM? : [UNSUPPORTED]

Test hangs waiting for a timer that hasn't been set to expire. Fix
setup_timer() to return 1, add handling in callers to detect the
unsupported case and return 0 without waiting to not fail the test.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/timers/set-timer-lat.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
index ea1af5dbc7b6..50da45437daa 100644
--- a/tools/testing/selftests/timers/set-timer-lat.c
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -143,7 +143,8 @@ int setup_timer(int clock_id, int flags, int interval, timer_t *tm1)
 			printf("%-22s %s missing CAP_WAKE_ALARM?    : [UNSUPPORTED]\n",
 					clockstring(clock_id),
 					flags ? "ABSTIME":"RELTIME");
-			return 0;
+			/* Indicate timer isn't set, so caller doesn't wait */
+			return 1;
 		}
 		printf("%s - timer_create() failed\n", clockstring(clock_id));
 		return -1;
@@ -213,8 +214,9 @@ int do_timer(int clock_id, int flags)
 	int err;
 
 	err = setup_timer(clock_id, flags, interval, &tm1);
+	/* Unsupported case - return 0 to not fail the test */
 	if (err)
-		return err;
+		return err == 1 ? 0 : err;
 
 	while (alarmcount < 5)
 		sleep(1);
@@ -231,8 +233,9 @@ int do_timer_oneshot(int clock_id, int flags)
 	int err;
 
 	err = setup_timer(clock_id, flags, interval, &tm1);
+	/* Unsupported case - return 0 to not fail the test */
 	if (err)
-		return err;
+		return err == 1 ? 0 : err;
 
 	memset(&timeout, 0, sizeof(timeout));
 	timeout.tv_sec = 5;
-- 
cgit 


From 10201655b085df8e000822e496e5d4016a167a36 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Sep 2017 07:15:35 -0500
Subject: gfs2: Fix debugfs glocks dump

The switch to rhashtables (commit 88ffbf3e03) broke the debugfs glock
dump (/sys/kernel/debug/gfs2/<device>/glocks) for dumps bigger than a
single buffer: the right function for restarting an rhashtable iteration
from the beginning of the hash table is rhashtable_walk_enter;
rhashtable_walk_stop + rhashtable_walk_start will just resume from the
current position.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Cc: stable@vger.kernel.org # v4.3+
---
 fs/gfs2/glock.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 98e845b7841b..11066d8647d2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1945,13 +1945,9 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct gfs2_glock_iter *gi = seq->private;
 	loff_t n = *pos;
-	int ret;
-
-	if (gi->last_pos <= *pos)
-		n = (*pos - gi->last_pos);
 
-	ret = rhashtable_walk_start(&gi->hti);
-	if (ret)
+	rhashtable_walk_enter(&gl_hash_table, &gi->hti);
+	if (rhashtable_walk_start(&gi->hti) != 0)
 		return NULL;
 
 	do {
@@ -1959,6 +1955,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 	} while (gi->gl && n--);
 
 	gi->last_pos = *pos;
+
 	return gi->gl;
 }
 
@@ -1970,6 +1967,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
 	(*pos)++;
 	gi->last_pos = *pos;
 	gfs2_glock_iter_next(gi);
+
 	return gi->gl;
 }
 
@@ -1980,6 +1978,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
 
 	gi->gl = NULL;
 	rhashtable_walk_stop(&gi->hti);
+	rhashtable_walk_exit(&gi->hti);
 }
 
 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2042,12 +2041,10 @@ static int __gfs2_glocks_open(struct inode *inode, struct file *file,
 		struct gfs2_glock_iter *gi = seq->private;
 
 		gi->sdp = inode->i_private;
-		gi->last_pos = 0;
 		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
 		if (seq->buf)
 			seq->size = GFS2_SEQ_GOODSIZE;
 		gi->gl = NULL;
-		rhashtable_walk_enter(&gl_hash_table, &gi->hti);
 	}
 	return ret;
 }
@@ -2063,7 +2060,6 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file)
 	struct gfs2_glock_iter *gi = seq->private;
 
 	gi->gl = NULL;
-	rhashtable_walk_exit(&gi->hti);
 	return seq_release_private(inode, file);
 }
 
-- 
cgit 


From 8cbd96a6285e8eb65232b5afd3e8d9418453a61c Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 21 Sep 2017 08:13:49 -0700
Subject: nvme: fix sqhd reference when admin queue connect fails

Fix bug in sqhd patch.

It wasn't the sq that was at risk. In the case where the admin queue
connect command fails, the sq->size field is not set. Therefore, this
becomes a divide by zero error.

Add a quick check to bypass under this failure condition.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index c2a768a94235..1b208beeef50 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -390,7 +390,8 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 	if (status)
 		nvmet_set_status(req, status);
 
-	req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size;
+	if (req->sq->size)
+		req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size;
 	req->rsp->sq_head = cpu_to_le16(req->sq->sqhd);
 	req->rsp->sq_id = cpu_to_le16(req->sq->qid);
 	req->rsp->command_id = req->cmd->common.command_id;
-- 
cgit 


From 1a40d97288c6ffea9b355139e88fa62f0e5439f7 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 21 Sep 2017 17:01:36 +0300
Subject: nvme-core: Use nvme_wq to queue async events and fw activation

async_event_work might race as it is executed from two different
workqueues at the moment.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5589f67d2cd8..bb2aad078637 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2676,7 +2676,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 	case NVME_SC_ABORT_REQ:
 		++ctrl->event_limit;
 		if (ctrl->state == NVME_CTRL_LIVE)
-			schedule_work(&ctrl->async_event_work);
+			queue_work(nvme_wq, &ctrl->async_event_work);
 		break;
 	default:
 		break;
@@ -2691,7 +2691,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		nvme_queue_scan(ctrl);
 		break;
 	case NVME_AER_NOTICE_FW_ACT_STARTING:
-		schedule_work(&ctrl->fw_act_work);
+		queue_work(nvme_wq, &ctrl->fw_act_work);
 		break;
 	default:
 		dev_warn(ctrl->device, "async event result %08x\n", result);
-- 
cgit 


From 0a960afd60d02808c7f7f36d4aa8a2e07045e1e9 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 21 Sep 2017 17:01:37 +0300
Subject: nvme-rdma: give up reconnect if state change fails

If we failed to transition to state LIVE after a successful reconnect,
then controller deletion already started. In this case there is no
point moving forward with reconnect.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/rdma.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 58983000964b..8441f6b3f617 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -942,7 +942,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-	WARN_ON_ONCE(!changed);
+	if (!changed) {
+		/* state change failure is ok if we're in DELETING state */
+		WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
+		return;
+	}
+
 	ctrl->ctrl.nr_reconnects = 0;
 
 	nvme_start_ctrl(&ctrl->ctrl);
-- 
cgit 


From e4d753d7e51c0648b9ee33efeed55d45f362fc3d Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 21 Sep 2017 17:01:38 +0300
Subject: nvme-rdma: don't fully stop the controller in error recovery

By calling nvme_stop_ctrl on a already failed controller will wait for the
scan work to complete (only by identify timeout expiration which is 60
seconds). This is unnecessary when we already know that the controller has
failed.

Reported-by: Yi Zhang <yizhan@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 8441f6b3f617..92a03ff5fb4d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -967,7 +967,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	struct nvme_rdma_ctrl *ctrl = container_of(work,
 			struct nvme_rdma_ctrl, err_work);
 
-	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_stop_keep_alive(&ctrl->ctrl);
 
 	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
-- 
cgit 


From 3688feb582a1bc4e58ad50f5eccfdb90615de27b Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 19 Sep 2017 15:13:11 -0700
Subject: nvmet-fc: on port remove call put outside lock

Avoid calling the put routine, as it may traverse to free routines while
holding the target lock.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index c48c83d97e30..6850672ad2a2 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2530,13 +2530,17 @@ nvmet_fc_remove_port(struct nvmet_port *port)
 {
 	struct nvmet_fc_tgtport *tgtport = port->priv;
 	unsigned long flags;
+	bool matched = false;
 
 	spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
 	if (tgtport->port == port) {
-		nvmet_fc_tgtport_put(tgtport);
+		matched = true;
 		tgtport->port = NULL;
 	}
 	spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+
+	if (matched)
+		nvmet_fc_tgtport_put(tgtport);
 }
 
 static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
-- 
cgit 


From 0c319d3a144d4b8f1ea2047fd614d2149b68f889 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 19 Sep 2017 16:33:56 -0700
Subject: nvmet-fc: ensure target queue id within range.

When searching for queue id's ensure they are within the expected range.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 6850672ad2a2..58e010bdda3e 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -783,6 +783,9 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
 	u16 qid = nvmet_fc_getqueueid(connection_id);
 	unsigned long flags;
 
+	if (qid > NVMET_NR_QUEUES)
+		return NULL;
+
 	spin_lock_irqsave(&tgtport->lock, flags);
 	list_for_each_entry(assoc, &tgtport->assoc_list, a_list) {
 		if (association_id == assoc->association_id) {
-- 
cgit 


From 6b71f9e1e849f82abb4a8d54ce7f4b1c71f19ac4 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Wed, 20 Sep 2017 11:07:26 -0700
Subject: nvmet-fc: sync header templates with comments

Comments were incorrect:
- defer_rcv was in host port template. moved to target port template
- Added Mandatory statements for target port template items

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/nvme-fc-driver.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 9c5cb4480806..a726f96010d5 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -346,11 +346,6 @@ struct nvme_fc_remote_port {
  *       indicating an FC transport Aborted status.
  *       Entrypoint is Mandatory.
  *
- * @defer_rcv:  Called by the transport to signal the LLLD that it has
- *       begun processing of a previously received NVME CMD IU. The LLDD
- *       is now free to re-use the rcv buffer associated with the
- *       nvmefc_tgt_fcp_req.
- *
  * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
  *       supports for cpu affinitization.
  *       Value is Mandatory. Must be at least 1.
@@ -806,11 +801,19 @@ struct nvmet_fc_target_port {
  *       outstanding operation (if there was one) to complete, then will
  *       call the fcp_req_release() callback to return the command's
  *       exchange context back to the LLDD.
+ *       Entrypoint is Mandatory.
  *
  * @fcp_req_release:  Called by the transport to return a nvmefc_tgt_fcp_req
  *       to the LLDD after all operations on the fcp operation are complete.
  *       This may be due to the command completing or upon completion of
  *       abort cleanup.
+ *       Entrypoint is Mandatory.
+ *
+ * @defer_rcv:  Called by the transport to signal the LLLD that it has
+ *       begun processing of a previously received NVME CMD IU. The LLDD
+ *       is now free to re-use the rcv buffer associated with the
+ *       nvmefc_tgt_fcp_req.
+ *       Entrypoint is Optional.
  *
  * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
  *       supports for cpu affinitization.
-- 
cgit 


From fddc9923c6d41de9fe7b1f323a3cece53e046c88 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 19 Sep 2017 14:01:50 -0700
Subject: nvme-fcloop: fix port deletes and callbacks

Now that there are potentially long delays between when a remoteport or
targetport delete calls is made and when the callback occurs (dev_loss_tmo
timeout), no longer block in the delete routines and move the final nport
puts to the callbacks.

Moved the fcloop_nport_get/put/free routines to avoid forward declarations.

Ensure port_info structs used in registrations are nulled in case fields
are not set (ex: devloss_tmo values).

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fcloop.c | 102 ++++++++++++++++---------------------------
 1 file changed, 38 insertions(+), 64 deletions(-)

diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 1fd1afbb8b2a..7b75d9de55ab 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -224,8 +224,6 @@ struct fcloop_nport {
 	struct fcloop_lport *lport;
 	struct list_head nport_list;
 	struct kref ref;
-	struct completion rport_unreg_done;
-	struct completion tport_unreg_done;
 	u64 node_name;
 	u64 port_name;
 	u32 port_role;
@@ -630,6 +628,32 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
 	schedule_work(&inireq->iniwork);
 }
 
+static void
+fcloop_nport_free(struct kref *ref)
+{
+	struct fcloop_nport *nport =
+		container_of(ref, struct fcloop_nport, ref);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fcloop_lock, flags);
+	list_del(&nport->nport_list);
+	spin_unlock_irqrestore(&fcloop_lock, flags);
+
+	kfree(nport);
+}
+
+static void
+fcloop_nport_put(struct fcloop_nport *nport)
+{
+	kref_put(&nport->ref, fcloop_nport_free);
+}
+
+static int
+fcloop_nport_get(struct fcloop_nport *nport)
+{
+	return kref_get_unless_zero(&nport->ref);
+}
+
 static void
 fcloop_localport_delete(struct nvme_fc_local_port *localport)
 {
@@ -644,8 +668,7 @@ fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport)
 {
 	struct fcloop_rport *rport = remoteport->private;
 
-	/* release any threads waiting for the unreg to complete */
-	complete(&rport->nport->rport_unreg_done);
+	fcloop_nport_put(rport->nport);
 }
 
 static void
@@ -653,8 +676,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
 {
 	struct fcloop_tport *tport = targetport->private;
 
-	/* release any threads waiting for the unreg to complete */
-	complete(&tport->nport->tport_unreg_done);
+	fcloop_nport_put(tport->nport);
 }
 
 #define	FCLOOP_HW_QUEUES		4
@@ -722,6 +744,7 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr,
 		goto out_free_opts;
 	}
 
+	memset(&pinfo, 0, sizeof(pinfo));
 	pinfo.node_name = opts->wwnn;
 	pinfo.port_name = opts->wwpn;
 	pinfo.port_role = opts->roles;
@@ -804,32 +827,6 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr,
 	return ret ? ret : count;
 }
 
-static void
-fcloop_nport_free(struct kref *ref)
-{
-	struct fcloop_nport *nport =
-		container_of(ref, struct fcloop_nport, ref);
-	unsigned long flags;
-
-	spin_lock_irqsave(&fcloop_lock, flags);
-	list_del(&nport->nport_list);
-	spin_unlock_irqrestore(&fcloop_lock, flags);
-
-	kfree(nport);
-}
-
-static void
-fcloop_nport_put(struct fcloop_nport *nport)
-{
-	kref_put(&nport->ref, fcloop_nport_free);
-}
-
-static int
-fcloop_nport_get(struct fcloop_nport *nport)
-{
-	return kref_get_unless_zero(&nport->ref);
-}
-
 static struct fcloop_nport *
 fcloop_alloc_nport(const char *buf, size_t count, bool remoteport)
 {
@@ -938,6 +935,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr,
 	if (!nport)
 		return -EIO;
 
+	memset(&pinfo, 0, sizeof(pinfo));
 	pinfo.node_name = nport->node_name;
 	pinfo.port_name = nport->port_name;
 	pinfo.port_role = nport->port_role;
@@ -979,24 +977,12 @@ __unlink_remote_port(struct fcloop_nport *nport)
 }
 
 static int
-__wait_remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
+__remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
 {
-	int ret;
-
 	if (!rport)
 		return -EALREADY;
 
-	init_completion(&nport->rport_unreg_done);
-
-	ret = nvme_fc_unregister_remoteport(rport->remoteport);
-	if (ret)
-		return ret;
-
-	wait_for_completion(&nport->rport_unreg_done);
-
-	fcloop_nport_put(nport);
-
-	return ret;
+	return nvme_fc_unregister_remoteport(rport->remoteport);
 }
 
 static ssize_t
@@ -1029,7 +1015,7 @@ fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr,
 	if (!nport)
 		return -ENOENT;
 
-	ret = __wait_remoteport_unreg(nport, rport);
+	ret = __remoteport_unreg(nport, rport);
 
 	return ret ? ret : count;
 }
@@ -1086,24 +1072,12 @@ __unlink_target_port(struct fcloop_nport *nport)
 }
 
 static int
-__wait_targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
+__targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
 {
-	int ret;
-
 	if (!tport)
 		return -EALREADY;
 
-	init_completion(&nport->tport_unreg_done);
-
-	ret = nvmet_fc_unregister_targetport(tport->targetport);
-	if (ret)
-		return ret;
-
-	wait_for_completion(&nport->tport_unreg_done);
-
-	fcloop_nport_put(nport);
-
-	return ret;
+	return nvmet_fc_unregister_targetport(tport->targetport);
 }
 
 static ssize_t
@@ -1136,7 +1110,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
 	if (!nport)
 		return -ENOENT;
 
-	ret = __wait_targetport_unreg(nport, tport);
+	ret = __targetport_unreg(nport, tport);
 
 	return ret ? ret : count;
 }
@@ -1223,11 +1197,11 @@ static void __exit fcloop_exit(void)
 
 		spin_unlock_irqrestore(&fcloop_lock, flags);
 
-		ret = __wait_targetport_unreg(nport, tport);
+		ret = __targetport_unreg(nport, tport);
 		if (ret)
 			pr_warn("%s: Failed deleting target port\n", __func__);
 
-		ret = __wait_remoteport_unreg(nport, rport);
+		ret = __remoteport_unreg(nport, rport);
 		if (ret)
 			pr_warn("%s: Failed deleting remote port\n", __func__);
 
-- 
cgit 


From a08588ea486a5590b50c36f437dc86350271b250 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 21 Sep 2017 23:24:39 -0700
Subject: irqchip/mips-gic: Fix shifts to extract register fields

The MIPS GIC driver is incorrectly using __fls to shift registers,
intending to shift to the least significant bit of a value based upon
its mask but instead shifting off all but the value's top bit. It should
actually be using __ffs to shift to the first, not last, bit of the
value.

Apparently the system I used when testing commit 3680746abd87
("irqchip: mips-gic: Convert remaining shared reg access to new
accessors") and commit b2b2e584ceab ("irqchip: mips-gic: Clean up mti,
reserved-cpu-vectors handling") managed to work correctly despite this
issue, but not all systems do...

Fixes: 3680746abd87 ("irqchip: mips-gic: Convert remaining shared reg access to new accessors")
Fixes: b2b2e584ceab ("irqchip: mips-gic: Clean up mti, reserved-cpu-vectors handling")
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Link: https://lkml.kernel.org/r/20170922062440.23701-2-paul.burton@imgtec.com
---
 drivers/irqchip/irq-mips-gic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 40159ac12ac8..0022b31ad2c5 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -645,7 +645,7 @@ static int __init gic_of_init(struct device_node *node,
 
 	/* Find the first available CPU vector. */
 	i = 0;
-	reserved = (C_SW0 | C_SW1) >> __fls(C_SW0);
+	reserved = (C_SW0 | C_SW1) >> __ffs(C_SW0);
 	while (!of_property_read_u32_index(node, "mti,reserved-cpu-vectors",
 					   i++, &cpu_vec))
 		reserved |= BIT(cpu_vec);
@@ -684,11 +684,11 @@ static int __init gic_of_init(struct device_node *node,
 
 	gicconfig = read_gic_config();
 	gic_shared_intrs = gicconfig & GIC_CONFIG_NUMINTERRUPTS;
-	gic_shared_intrs >>= __fls(GIC_CONFIG_NUMINTERRUPTS);
+	gic_shared_intrs >>= __ffs(GIC_CONFIG_NUMINTERRUPTS);
 	gic_shared_intrs = (gic_shared_intrs + 1) * 8;
 
 	gic_vpes = gicconfig & GIC_CONFIG_PVPS;
-	gic_vpes >>= __fls(GIC_CONFIG_PVPS);
+	gic_vpes >>= __ffs(GIC_CONFIG_PVPS);
 	gic_vpes = gic_vpes + 1;
 
 	if (cpu_has_veic) {
-- 
cgit 


From d9f82930a5b41f28fadb1e4838b877ae528456d3 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 21 Sep 2017 23:24:40 -0700
Subject: irqchip/mips-gic: Use effective affinity to unmask

Commit 7778c4b27cbe ("irqchip: mips-gic: Use pcpu_masks to avoid reading
GIC_SH_MASK*") adjusted the way we handle masking interrupts to set &
clear the interrupt's bit in each pcpu_mask. This allows us to avoid
needing to read the GIC mask registers and perform a bitwise and of
their values with the pending & pcpu_masks.

Unfortunately this didn't quite work for IPIs, which were mapped to a
particular CPU/VP during initialisation but never set the affinity or
effective_affinity fields of their struct irq_desc. This led to them
losing their affinity when gic_unmask_irq() was called for them, and
they'd all become affine to cpu0.

Fix this by:

 1) Setting the effective affinity of interrupts in
    gic_shared_irq_domain_map(), which is where we actually map an
    interrupt to a CPU/VP. This ensures that the effective affinity mask
    is always valid, not just after explicitly setting affinity.

 2) Using an interrupt's effective affinity when unmasking it, which
    prevents gic_unmask_irq() from unintentionally changing which
    pcpu_mask includes an interrupt.


Fixes: 7778c4b27cbe ("irqchip: mips-gic: Use pcpu_masks to avoid reading GIC_SH_MASK*")
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Link: https://lkml.kernel.org/r/20170922062440.23701-3-paul.burton@imgtec.com
---
 drivers/irqchip/irq-mips-gic.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 0022b31ad2c5..c90976d7e53c 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -175,14 +175,13 @@ static void gic_mask_irq(struct irq_data *d)
 
 static void gic_unmask_irq(struct irq_data *d)
 {
-	struct cpumask *affinity = irq_data_get_affinity_mask(d);
 	unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq);
 	unsigned int cpu;
 
 	write_gic_smask(intr);
 
 	gic_clear_pcpu_masks(intr);
-	cpu = cpumask_first_and(affinity, cpu_online_mask);
+	cpu = cpumask_first(irq_data_get_effective_affinity_mask(d));
 	set_bit(intr, per_cpu_ptr(pcpu_masks, cpu));
 }
 
@@ -420,13 +419,17 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
 				     irq_hw_number_t hw, unsigned int cpu)
 {
 	int intr = GIC_HWIRQ_TO_SHARED(hw);
+	struct irq_data *data;
 	unsigned long flags;
 
+	data = irq_get_irq_data(virq);
+
 	spin_lock_irqsave(&gic_lock, flags);
 	write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin);
 	write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu)));
 	gic_clear_pcpu_masks(intr);
 	set_bit(intr, per_cpu_ptr(pcpu_masks, cpu));
+	irq_data_update_effective_affinity(data, cpumask_of(cpu));
 	spin_unlock_irqrestore(&gic_lock, flags);
 
 	return 0;
-- 
cgit 


From 7755d83e48397e822aac751b1545f8bcf71d133e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 22 Sep 2017 21:20:41 +0900
Subject: irqdomain: Add __rcu annotations to radix tree accessors

Fix various address spaces warning of sparse.

kernel/irq/irqdomain.c:1463:14: warning: incorrect type in assignment (different address spaces)
kernel/irq/irqdomain.c:1463:14:    expected void **slot
kernel/irq/irqdomain.c:1463:14:    got void [noderef] <asn:4>**
kernel/irq/irqdomain.c:1465:66: warning: incorrect type in argument 2 (different address spaces)
kernel/irq/irqdomain.c:1465:66:    expected void [noderef] <asn:4>**slot
kernel/irq/irqdomain.c:1465:66:    got void **slot

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Link: https://lkml.kernel.org/r/1506082841-11530-1-git-send-email-yamada.masahiro@socionext.com
---
 kernel/irq/irqdomain.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index e84b7056bb08..ac4644e92b49 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -945,7 +945,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
 	struct irq_desc *desc;
 	struct irq_domain *domain;
 	struct radix_tree_iter iter;
-	void **slot;
+	void __rcu **slot;
 	int i;
 
 	seq_printf(m, " %-16s  %-6s  %-10s  %-10s  %s\n",
@@ -1453,7 +1453,7 @@ out_free_desc:
 /* The irq_data was moved, fix the revmap to refer to the new location */
 static void irq_domain_fix_revmap(struct irq_data *d)
 {
-	void **slot;
+	void __rcu **slot;
 
 	if (d->hwirq < d->domain->revmap_size)
 		return; /* Not using radix tree. */
-- 
cgit 


From c88f0e6b06f4092995688211a631bb436125d77b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 27 Aug 2017 20:25:26 +0800
Subject: scsi: scsi_transport_iscsi: fix the issue that iscsi_if_rx doesn't
 parse nlmsg properly

ChunYu found a kernel crash by syzkaller:

[  651.617875] kasan: CONFIG_KASAN_INLINE enabled
[  651.618217] kasan: GPF could be caused by NULL-ptr deref or user memory access
[  651.618731] general protection fault: 0000 [#1] SMP KASAN
[  651.621543] CPU: 1 PID: 9539 Comm: scsi Not tainted 4.11.0.cov #32
[  651.621938] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[  651.622309] task: ffff880117780000 task.stack: ffff8800a3188000
[  651.622762] RIP: 0010:skb_release_data+0x26c/0x590
[...]
[  651.627260] Call Trace:
[  651.629156]  skb_release_all+0x4f/0x60
[  651.629450]  consume_skb+0x1a5/0x600
[  651.630705]  netlink_unicast+0x505/0x720
[  651.632345]  netlink_sendmsg+0xab2/0xe70
[  651.633704]  sock_sendmsg+0xcf/0x110
[  651.633942]  ___sys_sendmsg+0x833/0x980
[  651.637117]  __sys_sendmsg+0xf3/0x240
[  651.638820]  SyS_sendmsg+0x32/0x50
[  651.639048]  entry_SYSCALL_64_fastpath+0x1f/0xc2

It's caused by skb_shared_info at the end of sk_buff was overwritten by
ISCSI_KEVENT_IF_ERROR when parsing nlmsg info from skb in iscsi_if_rx.

During the loop if skb->len == nlh->nlmsg_len and both are sizeof(*nlh),
ev = nlmsg_data(nlh) will acutally get skb_shinfo(SKB) instead and set a
new value to skb_shinfo(SKB)->nr_frags by ev->type.

This patch is to fix it by checking nlh->nlmsg_len properly there to
avoid over accessing sk_buff.

Reported-by: ChunYu Wang <chunwang@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Chris Leech <cleech@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_transport_iscsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 8934f19bce8e..0190aeff5f7f 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -3689,7 +3689,7 @@ iscsi_if_rx(struct sk_buff *skb)
 		uint32_t group;
 
 		nlh = nlmsg_hdr(skb);
-		if (nlh->nlmsg_len < sizeof(*nlh) ||
+		if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) ||
 		    skb->len < nlh->nlmsg_len) {
 			break;
 		}
-- 
cgit 


From f3a0c7b3fa7cdf6783827c245c62772687f8b3ac Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Sat, 9 Sep 2017 20:37:03 +0800
Subject: MAINTAINERS: Add entry for MediaTek PMIC LED driver

Add myself as a maintainer to support existing SoCs and push forward
following MediaTek PMICs with LEDs to reuse the driver.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2281af4b41b6..22cb6cc91081 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8586,6 +8586,12 @@ M:	Sean Wang <sean.wang@mediatek.com>
 S:	Maintained
 F:	drivers/media/rc/mtk-cir.c
 
+MEDIATEK PMIC LED DRIVER
+M:	Sean Wang <sean.wang@mediatek.com>
+S:	Maintained
+F:	drivers/leds/leds-mt6323.c
+F:	Documentation/devicetree/bindings/leds/leds-mt6323.txt
+
 MEDIATEK ETHERNET DRIVER
 M:	Felix Fietkau <nbd@openwrt.org>
 M:	John Crispin <john@phrozen.org>
-- 
cgit 


From fac1c2040203363eab6c6e86ce883cb71390418f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Sep 2017 19:00:15 +0200
Subject: smp/hotplug: Add state diagram

Add a state diagram to clarify when which states are ran where.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: bigeasy@linutronix.de
Cc: efault@gmx.de
Cc: rostedt@goodmis.org
Cc: max.byungchul.park@gmail.com
Link: https://lkml.kernel.org/r/20170920170546.661598270@infradead.org
---
 include/linux/cpuhotplug.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index f24bfb2b9a2d..477b2e6f60f7 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -3,6 +3,24 @@
 
 #include <linux/types.h>
 
+/*
+ * CPU-up			CPU-down
+ *
+ * BP		AP		BP		AP
+ *
+ * OFFLINE			OFFLINE
+ *   |				  ^
+ *   v				  |
+ * BRINGUP_CPU->AP_OFFLINE	BRINGUP_CPU  <- AP_IDLE_DEAD (idle thread/play_dead)
+ *		  |				AP_OFFLINE
+ *		  v (IRQ-off)	  ,---------------^
+ *		AP_ONLNE	  | (stop_machine)
+ *		  |		TEARDOWN_CPU <-	AP_ONLINE_IDLE
+ *		  |				  ^
+ *		  v				  |
+ *              AP_ACTIVE			AP_ACTIVE
+ */
+
 enum cpuhp_state {
 	CPUHP_OFFLINE,
 	CPUHP_CREATE_THREADS,
-- 
cgit 


From 96abb968549cdefd0964d1f7af0a79f4e6e7f897 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Sep 2017 19:00:16 +0200
Subject: smp/hotplug: Allow external multi-instance rollback

Currently the rollback of multi-instance states is handled inside
cpuhp_invoke_callback(). The problem is that when we want to allow an
explicit state change for rollback, we need to return from the
function without doing the rollback.

Change cpuhp_invoke_callback() to optionally return the multi-instance
state, such that rollback can be done from a subsequent call.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: bigeasy@linutronix.de
Cc: efault@gmx.de
Cc: rostedt@goodmis.org
Cc: max.byungchul.park@gmail.com
Link: https://lkml.kernel.org/r/20170920170546.720361181@infradead.org
---
 kernel/cpu.c | 47 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index acf5308fad51..323b71050b54 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -123,13 +123,16 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
 /**
  * cpuhp_invoke_callback _ Invoke the callbacks for a given state
  * @cpu:	The cpu for which the callback should be invoked
- * @step:	The step in the state machine
+ * @state:	The state to do callbacks for
  * @bringup:	True if the bringup callback should be invoked
+ * @node:	For multi-instance, do a single entry callback for install/remove
+ * @lastp:	For multi-instance rollback, remember how far we got
  *
  * Called from cpu hotplug and from the state register machinery.
  */
 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
-				 bool bringup, struct hlist_node *node)
+				 bool bringup, struct hlist_node *node,
+				 struct hlist_node **lastp)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 	struct cpuhp_step *step = cpuhp_get_step(state);
@@ -138,6 +141,7 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
 	int ret, cnt;
 
 	if (!step->multi_instance) {
+		WARN_ON_ONCE(lastp && *lastp);
 		cb = bringup ? step->startup.single : step->teardown.single;
 		if (!cb)
 			return 0;
@@ -152,6 +156,7 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
 
 	/* Single invocation for instance add/remove */
 	if (node) {
+		WARN_ON_ONCE(lastp && *lastp);
 		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 		ret = cbm(cpu, node);
 		trace_cpuhp_exit(cpu, st->state, state, ret);
@@ -161,13 +166,23 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
 	/* State transition. Invoke on all instances */
 	cnt = 0;
 	hlist_for_each(node, &step->list) {
+		if (lastp && node == *lastp)
+			break;
+
 		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 		ret = cbm(cpu, node);
 		trace_cpuhp_exit(cpu, st->state, state, ret);
-		if (ret)
-			goto err;
+		if (ret) {
+			if (!lastp)
+				goto err;
+
+			*lastp = node;
+			return ret;
+		}
 		cnt++;
 	}
+	if (lastp)
+		*lastp = NULL;
 	return 0;
 err:
 	/* Rollback the instances if one failed */
@@ -323,7 +338,7 @@ static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
 		struct cpuhp_step *step = cpuhp_get_step(st->state);
 
 		if (!step->skip_onerr)
-			cpuhp_invoke_callback(cpu, st->state, true, NULL);
+			cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
 	}
 }
 
@@ -334,7 +349,7 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 	int ret = 0;
 
 	for (; st->state > target; st->state--) {
-		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
+		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
 		if (ret) {
 			st->target = prev_state;
 			undo_cpu_down(cpu, st);
@@ -350,7 +365,7 @@ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
 		struct cpuhp_step *step = cpuhp_get_step(st->state);
 
 		if (!step->skip_onerr)
-			cpuhp_invoke_callback(cpu, st->state, false, NULL);
+			cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
 	}
 }
 
@@ -362,7 +377,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 
 	while (st->state < target) {
 		st->state++;
-		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
+		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
 		if (ret) {
 			st->target = prev_state;
 			undo_cpu_up(cpu, st);
@@ -428,11 +443,13 @@ static void cpuhp_thread_fun(unsigned int cpu)
 		if (st->cb_state < CPUHP_AP_ONLINE) {
 			local_irq_disable();
 			ret = cpuhp_invoke_callback(cpu, st->cb_state,
-						    st->bringup, st->node);
+						    st->bringup, st->node,
+						    NULL);
 			local_irq_enable();
 		} else {
 			ret = cpuhp_invoke_callback(cpu, st->cb_state,
-						    st->bringup, st->node);
+						    st->bringup, st->node,
+						    NULL);
 		}
 	} else if (st->rollback) {
 		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
@@ -472,7 +489,7 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
 	 * we invoke the thread function directly.
 	 */
 	if (!st->thread)
-		return cpuhp_invoke_callback(cpu, state, bringup, node);
+		return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 
 	st->cb_state = state;
 	st->single = true;
@@ -595,7 +612,7 @@ static int take_cpu_down(void *_param)
 	st->state--;
 	/* Invoke the former CPU_DYING callbacks */
 	for (; st->state > target; st->state--)
-		cpuhp_invoke_callback(cpu, st->state, false, NULL);
+		cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
 
 	/* Give up timekeeping duties */
 	tick_handover_do_timer();
@@ -776,7 +793,7 @@ void notify_cpu_starting(unsigned int cpu)
 	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
 	while (st->state < target) {
 		st->state++;
-		cpuhp_invoke_callback(cpu, st->state, true, NULL);
+		cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
 	}
 }
 
@@ -1307,9 +1324,9 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
 	if (cpuhp_is_ap_state(state))
 		ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
 	else
-		ret = cpuhp_invoke_callback(cpu, state, bringup, node);
+		ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 #else
-	ret = cpuhp_invoke_callback(cpu, state, bringup, node);
+	ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 #endif
 	BUG_ON(ret && !bringup);
 	return ret;
-- 
cgit 


From 4dddfb5faa6118564b0c54a163353d13882299d8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Sep 2017 19:00:17 +0200
Subject: smp/hotplug: Rewrite AP state machine core

There is currently no explicit state change on rollback. That is,
st->bringup, st->rollback and st->target are not consistent when doing
the rollback.

Rework the AP state handling to be more coherent. This does mean we
have to do a second AP kick-and-wait for rollback, but since rollback
is the slow path of a slowpath, this really should not matter.

Take this opportunity to simplify the AP thread function to only run a
single callback per invocation. This unifies the three single/up/down
modes is supports. The looping it used to do for up/down are achieved
by retaining should_run and relying on the main smpboot_thread_fn()
loop.

(I have most of a patch that does the same for the BP state handling,
but that's not critical and gets a little complicated because
CPUHP_BRINGUP_CPU does the AP handoff from a callback, which gets
recursive @st usage, I still have de-fugly that.)

[ tglx: Move cpuhp_down_callbacks() et al. into the HOTPLUG_CPU section to
  	avoid gcc complaining about unused functions. Make the HOTPLUG_CPU
  	one piece instead of having two consecutive ifdef sections of the
  	same type. ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: bigeasy@linutronix.de
Cc: efault@gmx.de
Cc: rostedt@goodmis.org
Cc: max.byungchul.park@gmail.com
Link: https://lkml.kernel.org/r/20170920170546.769658088@infradead.org
---
 kernel/cpu.c | 321 ++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 206 insertions(+), 115 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 323b71050b54..1139063de5af 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -58,6 +58,7 @@ struct cpuhp_cpu_state {
 	bool			single;
 	bool			bringup;
 	struct hlist_node	*node;
+	struct hlist_node	*last;
 	enum cpuhp_state	cb_state;
 	int			result;
 	struct completion	done;
@@ -112,6 +113,14 @@ static bool cpuhp_is_ap_state(enum cpuhp_state state)
 	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
 }
 
+/*
+ * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
+ */
+static bool cpuhp_is_atomic_state(enum cpuhp_state state)
+{
+	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
+}
+
 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
 {
 	struct cpuhp_step *sp;
@@ -286,7 +295,72 @@ void cpu_hotplug_enable(void)
 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 #endif	/* CONFIG_HOTPLUG_CPU */
 
-static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st);
+static inline enum cpuhp_state
+cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+{
+	enum cpuhp_state prev_state = st->state;
+
+	st->rollback = false;
+	st->last = NULL;
+
+	st->target = target;
+	st->single = false;
+	st->bringup = st->state < target;
+
+	return prev_state;
+}
+
+static inline void
+cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
+{
+	st->rollback = true;
+
+	/*
+	 * If we have st->last we need to undo partial multi_instance of this
+	 * state first. Otherwise start undo at the previous state.
+	 */
+	if (!st->last) {
+		if (st->bringup)
+			st->state--;
+		else
+			st->state++;
+	}
+
+	st->target = prev_state;
+	st->bringup = !st->bringup;
+}
+
+/* Regular hotplug invocation of the AP hotplug thread */
+static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
+{
+	if (!st->single && st->state == st->target)
+		return;
+
+	st->result = 0;
+	/*
+	 * Make sure the above stores are visible before should_run becomes
+	 * true. Paired with the mb() above in cpuhp_thread_fun()
+	 */
+	smp_mb();
+	st->should_run = true;
+	wake_up_process(st->thread);
+	wait_for_completion(&st->done);
+}
+
+static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+{
+	enum cpuhp_state prev_state;
+	int ret;
+
+	prev_state = cpuhp_set_state(st, target);
+	__cpuhp_kick_ap(st);
+	if ((ret = st->result)) {
+		cpuhp_reset_state(st, prev_state);
+		__cpuhp_kick_ap(st);
+	}
+
+	return ret;
+}
 
 static int bringup_wait_for_ap(unsigned int cpu)
 {
@@ -301,12 +375,10 @@ static int bringup_wait_for_ap(unsigned int cpu)
 	stop_machine_unpark(cpu);
 	kthread_unpark(st->thread);
 
-	/* Should we go further up ? */
-	if (st->target > CPUHP_AP_ONLINE_IDLE) {
-		__cpuhp_kick_ap_work(st);
-		wait_for_completion(&st->done);
-	}
-	return st->result;
+	if (st->target <= CPUHP_AP_ONLINE_IDLE)
+		return 0;
+
+	return cpuhp_kick_ap(st, st->target);
 }
 
 static int bringup_cpu(unsigned int cpu)
@@ -332,32 +404,6 @@ static int bringup_cpu(unsigned int cpu)
 /*
  * Hotplug state machine related functions
  */
-static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-	for (st->state++; st->state < st->target; st->state++) {
-		struct cpuhp_step *step = cpuhp_get_step(st->state);
-
-		if (!step->skip_onerr)
-			cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
-	}
-}
-
-static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
-				enum cpuhp_state target)
-{
-	enum cpuhp_state prev_state = st->state;
-	int ret = 0;
-
-	for (; st->state > target; st->state--) {
-		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
-		if (ret) {
-			st->target = prev_state;
-			undo_cpu_down(cpu, st);
-			break;
-		}
-	}
-	return ret;
-}
 
 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
 {
@@ -404,71 +450,90 @@ static int cpuhp_should_run(unsigned int cpu)
 	return st->should_run;
 }
 
-/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
-static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-	enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
-
-	return cpuhp_down_callbacks(cpu, st, target);
-}
-
-/* Execute the online startup callbacks. Used to be CPU_ONLINE */
-static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-	return cpuhp_up_callbacks(cpu, st, st->target);
-}
-
 /*
  * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
  * callbacks when a state gets [un]installed at runtime.
+ *
+ * Each invocation of this function by the smpboot thread does a single AP
+ * state callback.
+ *
+ * It has 3 modes of operation:
+ *  - single: runs st->cb_state
+ *  - up:     runs ++st->state, while st->state < st->target
+ *  - down:   runs st->state--, while st->state > st->target
+ *
+ * When complete or on error, should_run is cleared and the completion is fired.
  */
 static void cpuhp_thread_fun(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
-	int ret = 0;
+	bool bringup = st->bringup;
+	enum cpuhp_state state;
 
 	/*
-	 * Paired with the mb() in cpuhp_kick_ap_work and
-	 * cpuhp_invoke_ap_callback, so the work set is consistent visible.
+	 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
+	 * that if we see ->should_run we also see the rest of the state.
 	 */
 	smp_mb();
-	if (!st->should_run)
-		return;
 
-	st->should_run = false;
+	if (WARN_ON_ONCE(!st->should_run))
+		return;
 
 	lock_map_acquire(&cpuhp_state_lock_map);
-	/* Single callback invocation for [un]install ? */
+
 	if (st->single) {
-		if (st->cb_state < CPUHP_AP_ONLINE) {
-			local_irq_disable();
-			ret = cpuhp_invoke_callback(cpu, st->cb_state,
-						    st->bringup, st->node,
-						    NULL);
-			local_irq_enable();
+		state = st->cb_state;
+		st->should_run = false;
+	} else {
+		if (bringup) {
+			st->state++;
+			state = st->state;
+			st->should_run = (st->state < st->target);
+			WARN_ON_ONCE(st->state > st->target);
 		} else {
-			ret = cpuhp_invoke_callback(cpu, st->cb_state,
-						    st->bringup, st->node,
-						    NULL);
+			state = st->state;
+			st->state--;
+			st->should_run = (st->state > st->target);
+			WARN_ON_ONCE(st->state < st->target);
 		}
-	} else if (st->rollback) {
-		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
+	}
+
+	WARN_ON_ONCE(!cpuhp_is_ap_state(state));
+
+	if (st->rollback) {
+		struct cpuhp_step *step = cpuhp_get_step(state);
+		if (step->skip_onerr)
+			goto next;
+	}
+
+	if (cpuhp_is_atomic_state(state)) {
+		local_irq_disable();
+		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
+		local_irq_enable();
 
-		undo_cpu_down(cpu, st);
-		st->rollback = false;
+		/*
+		 * STARTING/DYING must not fail!
+		 */
+		WARN_ON_ONCE(st->result);
 	} else {
-		/* Cannot happen .... */
-		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
-
-		/* Regular hotplug work */
-		if (st->state < st->target)
-			ret = cpuhp_ap_online(cpu, st);
-		else if (st->state > st->target)
-			ret = cpuhp_ap_offline(cpu, st);
+		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
+	}
+
+	if (st->result) {
+		/*
+		 * If we fail on a rollback, we're up a creek without no
+		 * paddle, no way forward, no way back. We loose, thanks for
+		 * playing.
+		 */
+		WARN_ON_ONCE(st->rollback);
+		st->should_run = false;
 	}
+
+next:
 	lock_map_release(&cpuhp_state_lock_map);
-	st->result = ret;
-	complete(&st->done);
+
+	if (!st->should_run)
+		complete(&st->done);
 }
 
 /* Invoke a single callback on a remote cpu */
@@ -477,6 +542,7 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
 			 struct hlist_node *node)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+	int ret;
 
 	if (!cpu_online(cpu))
 		return 0;
@@ -491,48 +557,43 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
 	if (!st->thread)
 		return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 
+	st->rollback = false;
+	st->last = NULL;
+
+	st->node = node;
+	st->bringup = bringup;
 	st->cb_state = state;
 	st->single = true;
-	st->bringup = bringup;
-	st->node = node;
 
-	/*
-	 * Make sure the above stores are visible before should_run becomes
-	 * true. Paired with the mb() above in cpuhp_thread_fun()
-	 */
-	smp_mb();
-	st->should_run = true;
-	wake_up_process(st->thread);
-	wait_for_completion(&st->done);
-	return st->result;
-}
+	__cpuhp_kick_ap(st);
 
-/* Regular hotplug invocation of the AP hotplug thread */
-static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
-{
-	st->result = 0;
-	st->single = false;
 	/*
-	 * Make sure the above stores are visible before should_run becomes
-	 * true. Paired with the mb() above in cpuhp_thread_fun()
+	 * If we failed and did a partial, do a rollback.
 	 */
-	smp_mb();
-	st->should_run = true;
-	wake_up_process(st->thread);
+	if ((ret = st->result) && st->last) {
+		st->rollback = true;
+		st->bringup = !bringup;
+
+		__cpuhp_kick_ap(st);
+	}
+
+	return ret;
 }
 
 static int cpuhp_kick_ap_work(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-	enum cpuhp_state state = st->state;
+	enum cpuhp_state prev_state = st->state;
+	int ret;
 
-	trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
 	lock_map_acquire(&cpuhp_state_lock_map);
 	lock_map_release(&cpuhp_state_lock_map);
-	__cpuhp_kick_ap_work(st);
-	wait_for_completion(&st->done);
-	trace_cpuhp_exit(cpu, st->state, state, st->result);
-	return st->result;
+
+	trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
+	ret = cpuhp_kick_ap(st, st->target);
+	trace_cpuhp_exit(cpu, st->state, prev_state, ret);
+
+	return ret;
 }
 
 static struct smp_hotplug_thread cpuhp_threads = {
@@ -693,11 +754,32 @@ void cpuhp_report_idle_dead(void)
 				 cpuhp_complete_idle_dead, st, 0);
 }
 
-#else
-#define takedown_cpu		NULL
-#endif
+static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
+{
+	for (st->state++; st->state < st->target; st->state++) {
+		struct cpuhp_step *step = cpuhp_get_step(st->state);
 
-#ifdef CONFIG_HOTPLUG_CPU
+		if (!step->skip_onerr)
+			cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
+	}
+}
+
+static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+				enum cpuhp_state target)
+{
+	enum cpuhp_state prev_state = st->state;
+	int ret = 0;
+
+	for (; st->state > target; st->state--) {
+		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
+		if (ret) {
+			st->target = prev_state;
+			undo_cpu_down(cpu, st);
+			break;
+		}
+	}
+	return ret;
+}
 
 /* Requires cpu_add_remove_lock to be held */
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
@@ -716,13 +798,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 
 	cpuhp_tasks_frozen = tasks_frozen;
 
-	prev_state = st->state;
-	st->target = target;
+	prev_state = cpuhp_set_state(st, target);
 	/*
 	 * If the current CPU state is in the range of the AP hotplug thread,
 	 * then we need to kick the thread.
 	 */
 	if (st->state > CPUHP_TEARDOWN_CPU) {
+		st->target = max((int)target, CPUHP_TEARDOWN_CPU);
 		ret = cpuhp_kick_ap_work(cpu);
 		/*
 		 * The AP side has done the error rollback already. Just
@@ -737,6 +819,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 		 */
 		if (st->state > CPUHP_TEARDOWN_CPU)
 			goto out;
+
+		st->target = target;
 	}
 	/*
 	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
@@ -744,9 +828,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 	 */
 	ret = cpuhp_down_callbacks(cpu, st, target);
 	if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
-		st->target = prev_state;
-		st->rollback = true;
-		cpuhp_kick_ap_work(cpu);
+		cpuhp_reset_state(st, prev_state);
+		__cpuhp_kick_ap(st);
 	}
 
 out:
@@ -771,11 +854,15 @@ out:
 	cpu_maps_update_done();
 	return err;
 }
+
 int cpu_down(unsigned int cpu)
 {
 	return do_cpu_down(cpu, CPUHP_OFFLINE);
 }
 EXPORT_SYMBOL(cpu_down);
+
+#else
+#define takedown_cpu		NULL
 #endif /*CONFIG_HOTPLUG_CPU*/
 
 /**
@@ -846,7 +933,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 
 	cpuhp_tasks_frozen = tasks_frozen;
 
-	st->target = target;
+	cpuhp_set_state(st, target);
 	/*
 	 * If the current CPU state is in the range of the AP hotplug thread,
 	 * then we need to kick the thread once more.
@@ -1313,6 +1400,10 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
 	struct cpuhp_step *sp = cpuhp_get_step(state);
 	int ret;
 
+	/*
+	 * If there's nothing to do, we done.
+	 * Relies on the union for multi_instance.
+	 */
 	if ((bringup && !sp->startup.single) ||
 	    (!bringup && !sp->teardown.single))
 		return 0;
-- 
cgit 


From 724a86881d03ee5794148e65142e24ed3621be66 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Sep 2017 19:00:18 +0200
Subject: smp/hotplug: Callback vs state-machine consistency

While the generic callback functions have an 'int' return and thus
appear to be allowed to return error, this is not true for all states.

Specifically, what used to be STARTING/DYING are ran with IRQs
disabled from critical parts of CPU bringup/teardown and are not
allowed to fail. Add WARNs to enforce this rule.

But since some callbacks are indeed allowed to fail, we have the
situation where a state-machine rollback encounters a failure, in this
case we're stuck, we can't go forward and we can't go back. Also add a
WARN for that case.

AFAICT this is a fundamental 'problem' with no real obvious solution.
We want the 'prepare' callbacks to allow failure on either up or down.
Typically on prepare-up this would be things like -ENOMEM from
resource allocations, and the typical usage in prepare-down would be
something like -EBUSY to avoid CPUs being taken away.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: bigeasy@linutronix.de
Cc: efault@gmx.de
Cc: rostedt@goodmis.org
Cc: max.byungchul.park@gmail.com
Link: https://lkml.kernel.org/r/20170920170546.819539119@infradead.org
---
 kernel/cpu.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 1139063de5af..d6f1b8c36400 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -202,7 +202,14 @@ err:
 	hlist_for_each(node, &step->list) {
 		if (!cnt--)
 			break;
-		cbm(cpu, node);
+
+		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+		ret = cbm(cpu, node);
+		trace_cpuhp_exit(cpu, st->state, state, ret);
+		/*
+		 * Rollback must not fail,
+		 */
+		WARN_ON_ONCE(ret);
 	}
 	return ret;
 }
@@ -659,6 +666,7 @@ static int take_cpu_down(void *_param)
 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
 	int err, cpu = smp_processor_id();
+	int ret;
 
 	/* Ensure this CPU doesn't handle any more interrupts. */
 	err = __cpu_disable();
@@ -672,8 +680,13 @@ static int take_cpu_down(void *_param)
 	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
 	st->state--;
 	/* Invoke the former CPU_DYING callbacks */
-	for (; st->state > target; st->state--)
-		cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
+	for (; st->state > target; st->state--) {
+		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
+		/*
+		 * DYING must not fail!
+		 */
+		WARN_ON_ONCE(ret);
+	}
 
 	/* Give up timekeeping duties */
 	tick_handover_do_timer();
@@ -876,11 +889,16 @@ void notify_cpu_starting(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
+	int ret;
 
 	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
 	while (st->state < target) {
 		st->state++;
-		cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
+		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
+		/*
+		 * STARTING must not fail!
+		 */
+		WARN_ON_ONCE(ret);
 	}
 }
 
-- 
cgit 


From 5f4b55e10645b7371322c800a5ec745cab487a6c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Sep 2017 19:00:20 +0200
Subject: smp/hotplug: Differentiate the AP-work lockdep class between up and
 down

With lockdep-crossrelease we get deadlock reports that span cpu-up and
cpu-down chains. Such deadlocks cannot possibly happen because cpu-up
and cpu-down are globally serialized.

  CPU0                  CPU1                    CPU2
  cpuhp_up_callbacks:   takedown_cpu:           cpuhp_thread_fun:

  cpuhp_state
                        irq_lock_sparse()
    irq_lock_sparse()
                        wait_for_completion()
                                                cpuhp_state
                                                complete()

Now that we have consistent AP state, we can trivially separate the
AP-work class between up and down using st->bringup.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: max.byungchul.park@gmail.com
Cc: bigeasy@linutronix.de
Cc: efault@gmx.de
Cc: rostedt@goodmis.org
Link: https://lkml.kernel.org/r/20170920170546.922524234@infradead.org
---
 kernel/cpu.c | 41 ++++++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index d6f1b8c36400..d5c09985fbb6 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -68,9 +68,26 @@ struct cpuhp_cpu_state {
 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
 
 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
-static struct lock_class_key cpuhp_state_key;
-static struct lockdep_map cpuhp_state_lock_map =
-	STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
+static struct lockdep_map cpuhp_state_up_map =
+	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
+static struct lockdep_map cpuhp_state_down_map =
+	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
+
+
+static void inline cpuhp_lock_acquire(bool bringup)
+{
+	lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
+}
+
+static void inline cpuhp_lock_release(bool bringup)
+{
+	lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
+}
+#else
+
+static void inline cpuhp_lock_acquire(bool bringup) { }
+static void inline cpuhp_lock_release(bool bringup) { }
+
 #endif
 
 /**
@@ -486,7 +503,7 @@ static void cpuhp_thread_fun(unsigned int cpu)
 	if (WARN_ON_ONCE(!st->should_run))
 		return;
 
-	lock_map_acquire(&cpuhp_state_lock_map);
+	cpuhp_lock_acquire(bringup);
 
 	if (st->single) {
 		state = st->cb_state;
@@ -537,7 +554,7 @@ static void cpuhp_thread_fun(unsigned int cpu)
 	}
 
 next:
-	lock_map_release(&cpuhp_state_lock_map);
+	cpuhp_lock_release(bringup);
 
 	if (!st->should_run)
 		complete(&st->done);
@@ -554,8 +571,11 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
 	if (!cpu_online(cpu))
 		return 0;
 
-	lock_map_acquire(&cpuhp_state_lock_map);
-	lock_map_release(&cpuhp_state_lock_map);
+	cpuhp_lock_acquire(false);
+	cpuhp_lock_release(false);
+
+	cpuhp_lock_acquire(true);
+	cpuhp_lock_release(true);
 
 	/*
 	 * If we are up and running, use the hotplug thread. For early calls
@@ -593,8 +613,11 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
 	enum cpuhp_state prev_state = st->state;
 	int ret;
 
-	lock_map_acquire(&cpuhp_state_lock_map);
-	lock_map_release(&cpuhp_state_lock_map);
+	cpuhp_lock_acquire(false);
+	cpuhp_lock_release(false);
+
+	cpuhp_lock_acquire(true);
+	cpuhp_lock_release(true);
 
 	trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
 	ret = cpuhp_kick_ap(st, st->target);
-- 
cgit 


From 5ebe7742fff8be5f1359bc50f5d43fb6ff7bd060 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Sep 2017 19:00:19 +0200
Subject: smp/hotplug: Differentiate the AP completion between up and down

With lockdep-crossrelease we get deadlock reports that span cpu-up and
cpu-down chains. Such deadlocks cannot possibly happen because cpu-up
and cpu-down are globally serialized.

  takedown_cpu()
    irq_lock_sparse()
    wait_for_completion(&st->done)

                                cpuhp_thread_fun
                                  cpuhp_up_callback
                                    cpuhp_invoke_callback
                                      irq_affinity_online_cpu
                                        irq_local_spare()
                                        irq_unlock_sparse()
                                  complete(&st->done)

Now that we have consistent AP state, we can trivially separate the
AP completion between up and down using st->bringup.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: max.byungchul.park@gmail.com
Cc: bigeasy@linutronix.de
Cc: efault@gmx.de
Cc: rostedt@goodmis.org
Link: https://lkml.kernel.org/r/20170920170546.872472799@infradead.org
---
 kernel/cpu.c | 49 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 17 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index d5c09985fbb6..6bbe261b851f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -46,7 +46,8 @@
  * @bringup:	Single callback bringup or teardown selector
  * @cb_state:	The state for a single callback (install/uninstall)
  * @result:	Result of the operation
- * @done:	Signal completion to the issuer of the task
+ * @done_up:	Signal completion to the issuer of the task for cpu-up
+ * @done_down:	Signal completion to the issuer of the task for cpu-down
  */
 struct cpuhp_cpu_state {
 	enum cpuhp_state	state;
@@ -61,7 +62,8 @@ struct cpuhp_cpu_state {
 	struct hlist_node	*last;
 	enum cpuhp_state	cb_state;
 	int			result;
-	struct completion	done;
+	struct completion	done_up;
+	struct completion	done_down;
 #endif
 };
 
@@ -130,14 +132,6 @@ static bool cpuhp_is_ap_state(enum cpuhp_state state)
 	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
 }
 
-/*
- * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
- */
-static bool cpuhp_is_atomic_state(enum cpuhp_state state)
-{
-	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
-}
-
 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
 {
 	struct cpuhp_step *sp;
@@ -232,6 +226,26 @@ err:
 }
 
 #ifdef CONFIG_SMP
+static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
+{
+	struct completion *done = bringup ? &st->done_up : &st->done_down;
+	wait_for_completion(done);
+}
+
+static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
+{
+	struct completion *done = bringup ? &st->done_up : &st->done_down;
+	complete(done);
+}
+
+/*
+ * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
+ */
+static bool cpuhp_is_atomic_state(enum cpuhp_state state)
+{
+	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
+}
+
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 bool cpuhp_tasks_frozen;
@@ -368,7 +382,7 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
 	smp_mb();
 	st->should_run = true;
 	wake_up_process(st->thread);
-	wait_for_completion(&st->done);
+	wait_for_ap_thread(st, st->bringup);
 }
 
 static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
@@ -391,7 +405,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 
 	/* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
-	wait_for_completion(&st->done);
+	wait_for_ap_thread(st, true);
 	if (WARN_ON_ONCE((!cpu_online(cpu))))
 		return -ECANCELED;
 
@@ -464,7 +478,8 @@ static void cpuhp_create(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 
-	init_completion(&st->done);
+	init_completion(&st->done_up);
+	init_completion(&st->done_down);
 }
 
 static int cpuhp_should_run(unsigned int cpu)
@@ -557,7 +572,7 @@ next:
 	cpuhp_lock_release(bringup);
 
 	if (!st->should_run)
-		complete(&st->done);
+		complete_ap_thread(st, bringup);
 }
 
 /* Invoke a single callback on a remote cpu */
@@ -753,7 +768,7 @@ static int takedown_cpu(unsigned int cpu)
 	 *
 	 * Wait for the stop thread to go away.
 	 */
-	wait_for_completion(&st->done);
+	wait_for_ap_thread(st, false);
 	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
 
 	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
@@ -772,7 +787,7 @@ static void cpuhp_complete_idle_dead(void *arg)
 {
 	struct cpuhp_cpu_state *st = arg;
 
-	complete(&st->done);
+	complete_ap_thread(st, false);
 }
 
 void cpuhp_report_idle_dead(void)
@@ -939,7 +954,7 @@ void cpuhp_online_idle(enum cpuhp_state state)
 		return;
 
 	st->state = CPUHP_AP_ONLINE_IDLE;
-	complete(&st->done);
+	complete_ap_thread(st, true);
 }
 
 /* Requires cpu_add_remove_lock to be held */
-- 
cgit 


From 1db49484f21ed0fcdadd0635a3669f5f386546fa Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Sep 2017 19:00:21 +0200
Subject: smp/hotplug: Hotplug state fail injection

Add a sysfs file to one-time fail a specific state. This can be used
to test the state rollback code paths.

Something like this (hotplug-up.sh):

  #!/bin/bash

  echo 0 > /debug/sched_debug
  echo 1 > /debug/tracing/events/cpuhp/enable

  ALL_STATES=`cat /sys/devices/system/cpu/hotplug/states | cut -d':' -f1`
  STATES=${1:-$ALL_STATES}

  for state in $STATES
  do
	  echo 0 > /sys/devices/system/cpu/cpu1/online
	  echo 0 > /debug/tracing/trace
	  echo Fail state: $state
	  echo $state > /sys/devices/system/cpu/cpu1/hotplug/fail
	  cat /sys/devices/system/cpu/cpu1/hotplug/fail
	  echo 1 > /sys/devices/system/cpu/cpu1/online

	  cat /debug/tracing/trace > hotfail-${state}.trace

	  sleep 1
  done

Can be used to test for all possible rollback (barring multi-instance)
scenarios on CPU-up, CPU-down is a trivial modification of the above.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: bigeasy@linutronix.de
Cc: efault@gmx.de
Cc: rostedt@goodmis.org
Cc: max.byungchul.park@gmail.com
Link: https://lkml.kernel.org/r/20170920170546.972581715@infradead.org
---
 include/linux/cpuhotplug.h |  3 ++-
 kernel/cpu.c               | 60 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 477b2e6f60f7..6d508767e144 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -22,7 +22,8 @@
  */
 
 enum cpuhp_state {
-	CPUHP_OFFLINE,
+	CPUHP_INVALID = -1,
+	CPUHP_OFFLINE = 0,
 	CPUHP_CREATE_THREADS,
 	CPUHP_PERF_PREPARE,
 	CPUHP_PERF_X86_PREPARE,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6bbe261b851f..8de11a29e495 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -52,6 +52,7 @@
 struct cpuhp_cpu_state {
 	enum cpuhp_state	state;
 	enum cpuhp_state	target;
+	enum cpuhp_state	fail;
 #ifdef CONFIG_SMP
 	struct task_struct	*thread;
 	bool			should_run;
@@ -67,7 +68,9 @@ struct cpuhp_cpu_state {
 #endif
 };
 
-static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
+static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
+	.fail = CPUHP_INVALID,
+};
 
 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
 static struct lockdep_map cpuhp_state_up_map =
@@ -160,6 +163,15 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
 	int (*cb)(unsigned int cpu);
 	int ret, cnt;
 
+	if (st->fail == state) {
+		st->fail = CPUHP_INVALID;
+
+		if (!(bringup ? step->startup.single : step->teardown.single))
+			return 0;
+
+		return -EAGAIN;
+	}
+
 	if (!step->multi_instance) {
 		WARN_ON_ONCE(lastp && *lastp);
 		cb = bringup ? step->startup.single : step->teardown.single;
@@ -1805,9 +1817,55 @@ static ssize_t show_cpuhp_target(struct device *dev,
 }
 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
 
+
+static ssize_t write_cpuhp_fail(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+	struct cpuhp_step *sp;
+	int fail, ret;
+
+	ret = kstrtoint(buf, 10, &fail);
+	if (ret)
+		return ret;
+
+	/*
+	 * Cannot fail STARTING/DYING callbacks.
+	 */
+	if (cpuhp_is_atomic_state(fail))
+		return -EINVAL;
+
+	/*
+	 * Cannot fail anything that doesn't have callbacks.
+	 */
+	mutex_lock(&cpuhp_state_mutex);
+	sp = cpuhp_get_step(fail);
+	if (!sp->startup.single && !sp->teardown.single)
+		ret = -EINVAL;
+	mutex_unlock(&cpuhp_state_mutex);
+	if (ret)
+		return ret;
+
+	st->fail = fail;
+
+	return count;
+}
+
+static ssize_t show_cpuhp_fail(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+
+	return sprintf(buf, "%d\n", st->fail);
+}
+
+static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
+
 static struct attribute *cpuhp_cpu_attrs[] = {
 	&dev_attr_state.attr,
 	&dev_attr_target.attr,
+	&dev_attr_fail.attr,
 	NULL
 };
 
-- 
cgit 


From 910801809b2e40a4baedd080ef5d80b4a180e70e Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 20 Sep 2017 16:58:38 +0200
Subject: security/keys: properly zero out sensitive key material in big_key

Error paths forgot to zero out sensitive material, so this patch changes
some kfrees into a kzfrees.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Eric Biggers <ebiggers3@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Kirill Marinushkin <k.marinushkin@gmail.com>
Cc: security@kernel.org
Cc: stable@vger.kernel.org
---
 security/keys/big_key.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 6acb00f6f22c..507d6fb86a4f 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -195,7 +195,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 		*path = file->f_path;
 		path_get(path);
 		fput(file);
-		kfree(data);
+		kzfree(data);
 	} else {
 		/* Just store the data in a buffer */
 		void *data = kmalloc(datalen, GFP_KERNEL);
@@ -211,9 +211,9 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 err_fput:
 	fput(file);
 err_enckey:
-	kfree(enckey);
+	kzfree(enckey);
 error:
-	kfree(data);
+	kzfree(data);
 	return ret;
 }
 
@@ -227,7 +227,7 @@ void big_key_free_preparse(struct key_preparsed_payload *prep)
 
 		path_put(path);
 	}
-	kfree(prep->payload.data[big_key_data]);
+	kzfree(prep->payload.data[big_key_data]);
 }
 
 /*
@@ -259,7 +259,7 @@ void big_key_destroy(struct key *key)
 		path->mnt = NULL;
 		path->dentry = NULL;
 	}
-	kfree(key->payload.data[big_key_data]);
+	kzfree(key->payload.data[big_key_data]);
 	key->payload.data[big_key_data] = NULL;
 }
 
@@ -328,7 +328,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
 err_fput:
 		fput(file);
 error:
-		kfree(data);
+		kzfree(data);
 	} else {
 		ret = datalen;
 		if (copy_to_user(buffer, key->payload.data[big_key_data],
-- 
cgit 


From 428490e38b2e352812e0b765d8bceafab0ec441d Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 20 Sep 2017 16:58:39 +0200
Subject: security/keys: rewrite all of big_key crypto

This started out as just replacing the use of crypto/rng with
get_random_bytes_wait, so that we wouldn't use bad randomness at boot
time. But, upon looking further, it appears that there were even deeper
underlying cryptographic problems, and that this seems to have been
committed with very little crypto review. So, I rewrote the whole thing,
trying to keep to the conventions introduced by the previous author, to
fix these cryptographic flaws.

It makes no sense to seed crypto/rng at boot time and then keep
using it like this, when in fact there's already get_random_bytes_wait,
which can ensure there's enough entropy and be a much more standard way
of generating keys. Since this sensitive material is being stored
untrusted, using ECB and no authentication is simply not okay at all. I
find it surprising and a bit horrifying that this code even made it past
basic crypto review, which perhaps points to some larger issues. This
patch moves from using AES-ECB to using AES-GCM. Since keys are uniquely
generated each time, we can set the nonce to zero. There was also a race
condition in which the same key would be reused at the same time in
different threads. A mutex fixes this issue now.

So, to summarize, this commit fixes the following vulnerabilities:

  * Low entropy key generation, allowing an attacker to potentially
    guess or predict keys.
  * Unauthenticated encryption, allowing an attacker to modify the
    cipher text in particular ways in order to manipulate the plaintext,
    which is is even more frightening considering the next point.
  * Use of ECB mode, allowing an attacker to trivially swap blocks or
    compare identical plaintext blocks.
  * Key re-use.
  * Faulty memory zeroing.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Eric Biggers <ebiggers3@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Kirill Marinushkin <k.marinushkin@gmail.com>
Cc: security@kernel.org
Cc: stable@vger.kernel.org
---
 security/keys/Kconfig   |   4 +-
 security/keys/big_key.c | 127 ++++++++++++++++++++++--------------------------
 2 files changed, 60 insertions(+), 71 deletions(-)

diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index a7a23b5541f8..91eafada3164 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -45,10 +45,8 @@ config BIG_KEYS
 	bool "Large payload keys"
 	depends on KEYS
 	depends on TMPFS
-	depends on (CRYPTO_ANSI_CPRNG = y || CRYPTO_DRBG = y)
 	select CRYPTO_AES
-	select CRYPTO_ECB
-	select CRYPTO_RNG
+	select CRYPTO_GCM
 	help
 	  This option provides support for holding large keys within the kernel
 	  (for example Kerberos ticket caches).  The data may be stored out to
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 507d6fb86a4f..e607830b6154 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -1,5 +1,6 @@
 /* Large capacity key type
  *
+ * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
@@ -16,10 +17,10 @@
 #include <linux/shmem_fs.h>
 #include <linux/err.h>
 #include <linux/scatterlist.h>
+#include <linux/random.h>
 #include <keys/user-type.h>
 #include <keys/big_key-type.h>
-#include <crypto/rng.h>
-#include <crypto/skcipher.h>
+#include <crypto/aead.h>
 
 /*
  * Layout of key payload words.
@@ -49,7 +50,12 @@ enum big_key_op {
 /*
  * Key size for big_key data encryption
  */
-#define ENC_KEY_SIZE	16
+#define ENC_KEY_SIZE 32
+
+/*
+ * Authentication tag length
+ */
+#define ENC_AUTHTAG_SIZE 16
 
 /*
  * big_key defined keys take an arbitrary string as the description and an
@@ -64,57 +70,62 @@ struct key_type key_type_big_key = {
 	.destroy		= big_key_destroy,
 	.describe		= big_key_describe,
 	.read			= big_key_read,
+	/* no ->update(); don't add it without changing big_key_crypt() nonce */
 };
 
 /*
- * Crypto names for big_key data encryption
+ * Crypto names for big_key data authenticated encryption
  */
-static const char big_key_rng_name[] = "stdrng";
-static const char big_key_alg_name[] = "ecb(aes)";
+static const char big_key_alg_name[] = "gcm(aes)";
 
 /*
- * Crypto algorithms for big_key data encryption
+ * Crypto algorithms for big_key data authenticated encryption
  */
-static struct crypto_rng *big_key_rng;
-static struct crypto_skcipher *big_key_skcipher;
+static struct crypto_aead *big_key_aead;
 
 /*
- * Generate random key to encrypt big_key data
+ * Since changing the key affects the entire object, we need a mutex.
  */
-static inline int big_key_gen_enckey(u8 *key)
-{
-	return crypto_rng_get_bytes(big_key_rng, key, ENC_KEY_SIZE);
-}
+static DEFINE_MUTEX(big_key_aead_lock);
 
 /*
  * Encrypt/decrypt big_key data
  */
 static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key)
 {
-	int ret = -EINVAL;
+	int ret;
 	struct scatterlist sgio;
-	SKCIPHER_REQUEST_ON_STACK(req, big_key_skcipher);
-
-	if (crypto_skcipher_setkey(big_key_skcipher, key, ENC_KEY_SIZE)) {
+	struct aead_request *aead_req;
+	/* We always use a zero nonce. The reason we can get away with this is
+	 * because we're using a different randomly generated key for every
+	 * different encryption. Notably, too, key_type_big_key doesn't define
+	 * an .update function, so there's no chance we'll wind up reusing the
+	 * key to encrypt updated data. Simply put: one key, one encryption.
+	 */
+	u8 zero_nonce[crypto_aead_ivsize(big_key_aead)];
+
+	aead_req = aead_request_alloc(big_key_aead, GFP_KERNEL);
+	if (!aead_req)
+		return -ENOMEM;
+
+	memset(zero_nonce, 0, sizeof(zero_nonce));
+	sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0));
+	aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce);
+	aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+	aead_request_set_ad(aead_req, 0);
+
+	mutex_lock(&big_key_aead_lock);
+	if (crypto_aead_setkey(big_key_aead, key, ENC_KEY_SIZE)) {
 		ret = -EAGAIN;
 		goto error;
 	}
-
-	skcipher_request_set_tfm(req, big_key_skcipher);
-	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
-				      NULL, NULL);
-
-	sg_init_one(&sgio, data, datalen);
-	skcipher_request_set_crypt(req, &sgio, &sgio, datalen, NULL);
-
 	if (op == BIG_KEY_ENC)
-		ret = crypto_skcipher_encrypt(req);
+		ret = crypto_aead_encrypt(aead_req);
 	else
-		ret = crypto_skcipher_decrypt(req);
-
-	skcipher_request_zero(req);
-
+		ret = crypto_aead_decrypt(aead_req);
 error:
+	mutex_unlock(&big_key_aead_lock);
+	aead_request_free(aead_req);
 	return ret;
 }
 
@@ -146,16 +157,13 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 		 *
 		 * File content is stored encrypted with randomly generated key.
 		 */
-		size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher));
+		size_t enclen = datalen + ENC_AUTHTAG_SIZE;
 		loff_t pos = 0;
 
-		/* prepare aligned data to encrypt */
 		data = kmalloc(enclen, GFP_KERNEL);
 		if (!data)
 			return -ENOMEM;
-
 		memcpy(data, prep->data, datalen);
-		memset(data + datalen, 0x00, enclen - datalen);
 
 		/* generate random key */
 		enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL);
@@ -163,13 +171,12 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 			ret = -ENOMEM;
 			goto error;
 		}
-
-		ret = big_key_gen_enckey(enckey);
-		if (ret)
+		ret = get_random_bytes_wait(enckey, ENC_KEY_SIZE);
+		if (unlikely(ret))
 			goto err_enckey;
 
 		/* encrypt aligned data */
-		ret = big_key_crypt(BIG_KEY_ENC, data, enclen, enckey);
+		ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey);
 		if (ret)
 			goto err_enckey;
 
@@ -295,7 +302,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
 		struct file *file;
 		u8 *data;
 		u8 *enckey = (u8 *)key->payload.data[big_key_data];
-		size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher));
+		size_t enclen = datalen + ENC_AUTHTAG_SIZE;
 		loff_t pos = 0;
 
 		data = kmalloc(enclen, GFP_KERNEL);
@@ -344,47 +351,31 @@ error:
  */
 static int __init big_key_init(void)
 {
-	struct crypto_skcipher *cipher;
-	struct crypto_rng *rng;
 	int ret;
 
-	rng = crypto_alloc_rng(big_key_rng_name, 0, 0);
-	if (IS_ERR(rng)) {
-		pr_err("Can't alloc rng: %ld\n", PTR_ERR(rng));
-		return PTR_ERR(rng);
-	}
-
-	big_key_rng = rng;
-
-	/* seed RNG */
-	ret = crypto_rng_reset(rng, NULL, crypto_rng_seedsize(rng));
-	if (ret) {
-		pr_err("Can't reset rng: %d\n", ret);
-		goto error_rng;
-	}
-
 	/* init block cipher */
-	cipher = crypto_alloc_skcipher(big_key_alg_name, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(cipher)) {
-		ret = PTR_ERR(cipher);
+	big_key_aead = crypto_alloc_aead(big_key_alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(big_key_aead)) {
+		ret = PTR_ERR(big_key_aead);
 		pr_err("Can't alloc crypto: %d\n", ret);
-		goto error_rng;
+		return ret;
+	}
+	ret = crypto_aead_setauthsize(big_key_aead, ENC_AUTHTAG_SIZE);
+	if (ret < 0) {
+		pr_err("Can't set crypto auth tag len: %d\n", ret);
+		goto free_aead;
 	}
-
-	big_key_skcipher = cipher;
 
 	ret = register_key_type(&key_type_big_key);
 	if (ret < 0) {
 		pr_err("Can't register type: %d\n", ret);
-		goto error_cipher;
+		goto free_aead;
 	}
 
 	return 0;
 
-error_cipher:
-	crypto_free_skcipher(big_key_skcipher);
-error_rng:
-	crypto_free_rng(big_key_rng);
+free_aead:
+	crypto_free_aead(big_key_aead);
 	return ret;
 }
 
-- 
cgit 


From e4d8ae00169f7686e1da5a62e5cf797d12bf8822 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 21 Sep 2017 10:44:36 -0700
Subject: PM / OPP: Call notifier without holding opp_table->lock

The notifier callbacks may want to call some OPP helper routines which
may try to take the same opp_table->lock again and cause a deadlock. One
such usecase was reported by Chanwoo Choi, where calling
dev_pm_opp_disable() leads us to the devfreq's OPP notifier handler,
which further calls dev_pm_opp_find_freq_floor() and it deadlocks.

We don't really need the opp_table->lock to be held across the notifier
call though, all we want to make sure is that the 'opp' doesn't get
freed while being used from within the notifier chain. We can do it with
help of dev_pm_opp_get/put() as well. Let's do it.

Cc: 4.11+ <stable@vger.kernel.org> # 4.11+
Fixes: 5b650b388844 "PM / OPP: Take kref from _find_opp_table()"
Reported-by: Chanwoo Choi <cw00.choi@samsung.com>
Tested-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index a8cc14fd8ae4..a6de32530693 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -1581,6 +1581,9 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
 
 	opp->available = availability_req;
 
+	dev_pm_opp_get(opp);
+	mutex_unlock(&opp_table->lock);
+
 	/* Notify the change of the OPP availability */
 	if (availability_req)
 		blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ENABLE,
@@ -1589,8 +1592,12 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
 		blocking_notifier_call_chain(&opp_table->head,
 					     OPP_EVENT_DISABLE, opp);
 
+	dev_pm_opp_put(opp);
+	goto put_table;
+
 unlock:
 	mutex_unlock(&opp_table->lock);
+put_table:
 	dev_pm_opp_put_opp_table(opp_table);
 	return r;
 }
-- 
cgit 


From 675195d0be27391d48d8d23c7c62991505168528 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.com>
Date: Wed, 20 Sep 2017 08:58:53 +0200
Subject: scsi: scsi_transport_fc: set scsi_target_id upon rescan

When an rport is found in the bindings array there is no guarantee that
it had been a target port, so we need to call fc_remote_port_rolechg()
here to ensure the scsi_target_id is set correctly.  Otherwise the port
will never be scanned.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Tested-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_transport_fc.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index ba9d70f8a6a1..e74fffc32c75 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -2876,7 +2876,6 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 			memcpy(&rport->port_name, &ids->port_name,
 				sizeof(rport->port_name));
 			rport->port_id = ids->port_id;
-			rport->roles = ids->roles;
 			rport->port_state = FC_PORTSTATE_ONLINE;
 			rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
 
@@ -2885,15 +2884,7 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 						fci->f->dd_fcrport_size);
 			spin_unlock_irqrestore(shost->host_lock, flags);
 
-			if (ids->roles & FC_PORT_ROLE_FCP_TARGET) {
-				scsi_target_unblock(&rport->dev, SDEV_RUNNING);
-
-				/* initiate a scan of the target */
-				spin_lock_irqsave(shost->host_lock, flags);
-				rport->flags |= FC_RPORT_SCAN_PENDING;
-				scsi_queue_work(shost, &rport->scan_work);
-				spin_unlock_irqrestore(shost->host_lock, flags);
-			}
+			fc_remote_port_rolechg(rport, ids->roles);
 			return rport;
 		}
 	}
-- 
cgit 


From d477bf3af1e88fe27c893f84136647fe11963198 Mon Sep 17 00:00:00 2001
From: Suniel Mahesh <sunil.m@techveda.org>
Date: Thu, 21 Sep 2017 19:09:03 +0530
Subject: cpufreq: dt: Fix sysfs duplicate filename creation for
 platform-device

ti-cpufreq and cpufreq-dt-platdev drivers are registering platform-device
with same name "cpufreq-dt" using platform_device_register_*() routines.
This is leading to build warnings appended below.

Providing hardware information to OPP framework along with the platform-
device creation should be done by ti-cpufreq driver before cpufreq-dt
driver comes into place.

This patch add's TI am33xx, am43 and dra7 platforms (which use opp-v2
property) to the blacklist of devices in cpufreq-dt-platform driver to
avoid creating platform-device twice and remove build warnings.

[    2.370167] ------------[ cut here ]------------
[    2.375087] WARNING: CPU: 0 PID: 1 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x58/0x78
[    2.383112] sysfs: cannot create duplicate filename '/devices/platform/cpufreq-dt'
[    2.391219] Modules linked in:
[    2.394506] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.13.0-next-20170912 #1
[    2.402006] Hardware name: Generic AM33XX (Flattened Device Tree)
[    2.408437] [<c0110a28>] (unwind_backtrace) from [<c010ca84>] (show_stack+0x10/0x14)
[    2.416568] [<c010ca84>] (show_stack) from [<c0827d64>] (dump_stack+0xac/0xe0)
[    2.424165] [<c0827d64>] (dump_stack) from [<c0137470>] (__warn+0xd8/0x104)
[    2.431488] [<c0137470>] (__warn) from [<c01374d0>] (warn_slowpath_fmt+0x34/0x44)
[    2.439351] [<c01374d0>] (warn_slowpath_fmt) from [<c03459d0>] (sysfs_warn_dup+0x58/0x78)
[    2.447938] [<c03459d0>] (sysfs_warn_dup) from [<c0345ab8>] (sysfs_create_dir_ns+0x80/0x98)
[    2.456719] [<c0345ab8>] (sysfs_create_dir_ns) from [<c082c554>] (kobject_add_internal+0x9c/0x2d4)
[    2.466124] [<c082c554>] (kobject_add_internal) from [<c082c7d8>] (kobject_add+0x4c/0x9c)
[    2.474712] [<c082c7d8>] (kobject_add) from [<c05803e4>] (device_add+0xcc/0x57c)
[    2.482489] [<c05803e4>] (device_add) from [<c0584b74>] (platform_device_add+0x100/0x220)
[    2.491085] [<c0584b74>] (platform_device_add) from [<c05855a8>] (platform_device_register_full+0xf4/0x118)
[    2.501305] [<c05855a8>] (platform_device_register_full) from [<c067023c>] (ti_cpufreq_init+0x150/0x22c)
[    2.511253] [<c067023c>] (ti_cpufreq_init) from [<c0101df4>] (do_one_initcall+0x3c/0x170)
[    2.519838] [<c0101df4>] (do_one_initcall) from [<c0c00eb4>] (kernel_init_freeable+0x1fc/0x2c4)
[    2.528974] [<c0c00eb4>] (kernel_init_freeable) from [<c083bcac>] (kernel_init+0x8/0x110)
[    2.537565] [<c083bcac>] (kernel_init) from [<c0107d18>] (ret_from_fork+0x14/0x3c)
[    2.545981] ---[ end trace 2fc00e213c13ab20 ]---
[    2.551051] ------------[ cut here ]------------
[    2.555931] WARNING: CPU: 0 PID: 1 at lib/kobject.c:240 kobject_add_internal+0x254/0x2d4
[    2.564578] kobject_add_internal failed for cpufreq-dt with -EEXIST, don't try to register
things with the same name in the same directory.
[    2.577977] Modules linked in:
[    2.581261] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G        W       4.13.0-next-20170912 #1
[    2.590013] Hardware name: Generic AM33XX (Flattened Device Tree)
[    2.596437] [<c0110a28>] (unwind_backtrace) from [<c010ca84>] (show_stack+0x10/0x14)
[    2.604573] [<c010ca84>] (show_stack) from [<c0827d64>] (dump_stack+0xac/0xe0)
[    2.612172] [<c0827d64>] (dump_stack) from [<c0137470>] (__warn+0xd8/0x104)
[    2.619494] [<c0137470>] (__warn) from [<c01374d0>] (warn_slowpath_fmt+0x34/0x44)
[    2.627362] [<c01374d0>] (warn_slowpath_fmt) from [<c082c70c>] (kobject_add_internal+0x254/0x2d4)
[    2.636666] [<c082c70c>] (kobject_add_internal) from [<c082c7d8>] (kobject_add+0x4c/0x9c)
[    2.645255] [<c082c7d8>] (kobject_add) from [<c05803e4>] (device_add+0xcc/0x57c)
[    2.653027] [<c05803e4>] (device_add) from [<c0584b74>] (platform_device_add+0x100/0x220)
[    2.661615] [<c0584b74>] (platform_device_add) from [<c05855a8>] (platform_device_register_full+0xf4/0x118)
[    2.671833] [<c05855a8>] (platform_device_register_full) from [<c067023c>] (ti_cpufreq_init+0x150/0x22c)
[    2.681779] [<c067023c>] (ti_cpufreq_init) from [<c0101df4>] (do_one_initcall+0x3c/0x170)
[    2.690377] [<c0101df4>] (do_one_initcall) from [<c0c00eb4>] (kernel_init_freeable+0x1fc/0x2c4)
[    2.699510] [<c0c00eb4>] (kernel_init_freeable) from [<c083bcac>] (kernel_init+0x8/0x110)
[    2.708106] [<c083bcac>] (kernel_init) from [<c0107d18>] (ret_from_fork+0x14/0x3c)
[    2.716217] ---[ end trace 2fc00e213c13ab21 ]---

Fixes: edeec420de24 (cpufreq: dt-cpufreq: platdev Automatically create device with OPP v2)
Signed-off-by: Suniel Mahesh <sunil.m@techveda.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq-dt-platdev.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 430edadca527..a753c50e9e41 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -118,6 +118,10 @@ static const struct of_device_id blacklist[] __initconst = {
 
 	{ .compatible = "sigma,tango4", },
 
+	{ .compatible = "ti,am33xx", },
+	{ .compatible = "ti,am43", },
+	{ .compatible = "ti,dra7", },
+
 	{ }
 };
 
-- 
cgit 


From 9561475db680f7144d2223a409dd3d7e322aca03 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 11 Sep 2017 09:45:40 +0200
Subject: PCI: Fix race condition with driver_override

The driver_override implementation is susceptible to a race condition when
different threads are reading vs. storing a different driver override.  Add
locking to avoid the race condition.

This is in close analogy to commit 6265539776a0 ("driver core: platform:
fix race condition with driver_override") from Adrian Salido.

Fixes: 782a985d7af2 ("PCI: Introduce new device binding path using pci_dev.driver_override")
Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org	# v3.16+
---
 drivers/pci/pci-sysfs.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 1eecfa301f7f..8e075ea2743e 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -686,7 +686,7 @@ static ssize_t driver_override_store(struct device *dev,
 				     const char *buf, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	char *driver_override, *old = pdev->driver_override, *cp;
+	char *driver_override, *old, *cp;
 
 	/* We need to keep extra room for a newline */
 	if (count >= (PAGE_SIZE - 1))
@@ -700,12 +700,15 @@ static ssize_t driver_override_store(struct device *dev,
 	if (cp)
 		*cp = '\0';
 
+	device_lock(dev);
+	old = pdev->driver_override;
 	if (strlen(driver_override)) {
 		pdev->driver_override = driver_override;
 	} else {
 		kfree(driver_override);
 		pdev->driver_override = NULL;
 	}
+	device_unlock(dev);
 
 	kfree(old);
 
@@ -716,8 +719,12 @@ static ssize_t driver_override_show(struct device *dev,
 				    struct device_attribute *attr, char *buf)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	ssize_t len;
 
-	return snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override);
+	device_lock(dev);
+	len = snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override);
+	device_unlock(dev);
+	return len;
 }
 static DEVICE_ATTR_RW(driver_override);
 
-- 
cgit 


From b776e4b1a990045a7c70798f1f353c3160c26594 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 25 Sep 2017 20:38:45 -0400
Subject: fix a typo in put_compat_shm_info()

"uip" misspelled as "up"; unfortunately, the latter happens to be
a function and gcc is happy to convert it to void *...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 ipc/shm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index 1e2b1692ba2c..badac463e2c8 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1154,7 +1154,7 @@ static int put_compat_shm_info(struct shm_info *ip,
 	info.shm_swp = ip->shm_swp;
 	info.swap_attempts = ip->swap_attempts;
 	info.swap_successes = ip->swap_successes;
-	return copy_to_user(up, &info, sizeof(info));
+	return copy_to_user(uip, &info, sizeof(info));
 }
 
 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
-- 
cgit 


From cc6f77710a6de6210f9feda7cd53e2f5ee7a7e69 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 18 Sep 2017 09:41:16 -0700
Subject: xfs: don't unconditionally clear the reflink flag on zero-block files

If we have speculative cow preallocations hanging around in the cow
fork, don't let a truncate operation clear the reflink flag because if
we do then there's a chance we'll forget to free those extents when we
destroy the incore inode.

Reported-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_inode.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5599dda4727a..4ec5b7f45401 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1624,10 +1624,12 @@ xfs_itruncate_extents(
 		goto out;
 
 	/*
-	 * Clear the reflink flag if we truncated everything.
+	 * Clear the reflink flag if there are no data fork blocks and
+	 * there are no extents staged in the cow fork.
 	 */
-	if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
-		ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+	if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
+		if (ip->i_d.di_nblocks == 0)
+			ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
 		xfs_inode_clear_cowblocks_tag(ip);
 	}
 
-- 
cgit 


From 3af423b03435c81036fa710623d3ae92fbe346a3 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 18 Sep 2017 09:41:17 -0700
Subject: xfs: evict CoW fork extents when performing finsert/fcollapse

When we perform an finsert/fcollapse operation, cancel all the CoW
extents for the affected file offset range so that they don't end up
pointing to the wrong blocks.

Reported-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cd9a5400ba4f..bc6c6e10a969 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1459,7 +1459,19 @@ xfs_shift_file_space(
 		return error;
 
 	/*
-	 * The extent shiting code works on extent granularity. So, if
+	 * Clean out anything hanging around in the cow fork now that
+	 * we've flushed all the dirty data out to disk to avoid having
+	 * CoW extents at the wrong offsets.
+	 */
+	if (xfs_is_reflink_inode(ip)) {
+		error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
+				true);
+		if (error)
+			return error;
+	}
+
+	/*
+	 * The extent shifting code works on extent granularity. So, if
 	 * stop_fsb is not the starting block of extent, we need to split
 	 * the extent at stop_fsb.
 	 */
-- 
cgit 


From e150dcd459e1b441eaf08f341a986f04e61bf3b8 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Mon, 18 Sep 2017 11:34:16 -0700
Subject: fs/xfs: Use %pS printk format for direct addresses

Use the %pS instead of the %pF printk format specifier for printing symbols
from direct addresses. This is needed for the ia64, ppc64 and parisc64
architectures.

Signed-off-by: Helge Deller <deller@gmx.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_error.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index bd786a9ac2c3..eaf86f55b7f2 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -347,7 +347,7 @@ xfs_verifier_error(
 {
 	struct xfs_mount *mp = bp->b_target->bt_mount;
 
-	xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
+	xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
 		  bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
 		  __return_address, bp->b_ops->name, bp->b_bn);
 
-- 
cgit 


From 64671bafbdd984535aa382bccadd91fbe7be0e80 Mon Sep 17 00:00:00 2001
From: Eryu Guan <eguan@redhat.com>
Date: Mon, 18 Sep 2017 11:38:58 -0700
Subject: xfs: kill meaningless variable 'zero'

In xfs_file_aio_write_checks(), variable 'zero' is there only to
satisfy xfs_zero_eof(), the result of it is ignored. Now, with
iomap_zero_range() based xfs_zero_eof(), we can safely pass NULL as
the last param of it and kill 'zero'.

Signed-off-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ebdd0bd2b261..261d83f1db76 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -377,8 +377,6 @@ restart:
 	 */
 	spin_lock(&ip->i_flags_lock);
 	if (iocb->ki_pos > i_size_read(inode)) {
-		bool	zero = false;
-
 		spin_unlock(&ip->i_flags_lock);
 		if (!drained_dio) {
 			if (*iolock == XFS_IOLOCK_SHARED) {
@@ -399,7 +397,7 @@ restart:
 			drained_dio = true;
 			goto restart;
 		}
-		error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
+		error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
 		if (error)
 			return error;
 	} else
-- 
cgit 


From d20a5e3851969fa685f118a80e4df670255a4e8d Mon Sep 17 00:00:00 2001
From: Eryu Guan <eguan@redhat.com>
Date: Mon, 18 Sep 2017 11:39:23 -0700
Subject: xfs: report zeroed or not correctly in xfs_zero_range()

The 'did_zero' param of xfs_zero_range() was not passed to
iomap_zero_range() correctly. This was introduced by commit
7bb41db3ea16 ("xfs: handle 64-bit length in xfs_iozero"), and found
by code inspection.

Signed-off-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 261d83f1db76..350b6d43ba23 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -58,7 +58,7 @@ xfs_zero_range(
 	xfs_off_t		count,
 	bool			*did_zero)
 {
-	return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
+	return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
 }
 
 int
-- 
cgit 


From 1e6fa688bffc0ff419a4c3e78dbaf7aabfb55183 Mon Sep 17 00:00:00 2001
From: Kenjiro Nakayama <nakayamakenjiro@gmail.com>
Date: Mon, 18 Sep 2017 12:03:56 -0700
Subject: xfs: Output warning message when discard option was enabled even
 though the device does not support discard

In order to using discard function, it is necessary that not only xfs
is mounted with discard option, but also the discard function is
supported by the device. Current code doesn't output any message when
users mount with discard option on unsupported device, so it is
difficult to notice that it was not enabled actually.

This patch adds the warning message to notice that discard option is
not enabled due to unsupported device when the filesystem is mounted.

Changes in v2 (Suggested by Brian Foster):
  - Move the unsupported device check into xfs_fs_fill_super().
  - Clear the discard flag when device is unsupported.

Signed-off-by: Kenjiro Nakayama <nakayamakenjiro@gmail.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_super.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c996f4ae4a5f..584cf2d573ba 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1654,6 +1654,16 @@ xfs_fs_fill_super(
 		"DAX and reflink have not been tested together!");
 	}
 
+	if (mp->m_flags & XFS_MOUNT_DISCARD) {
+		struct request_queue *q = bdev_get_queue(sb->s_bdev);
+
+		if (!blk_queue_discard(q)) {
+			xfs_warn(mp, "mounting with \"discard\" option, but "
+					"the device does not support discard");
+			mp->m_flags &= ~XFS_MOUNT_DISCARD;
+		}
+	}
+
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
 		if (mp->m_sb.sb_rblocks) {
 			xfs_alert(mp,
-- 
cgit 


From 60915f83cd1e021a66fc1503a446aef5c772553a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 18 Sep 2017 13:38:46 -0700
Subject: xfs: remove redundant re-initialization of total_nr_pages

Variable total_nr_pages is being initialized and then updated with
the same value, this latter assignment is redundant and can be
removed.  Cleans up clang build warning:

Value stored to 'total_nr_pages' during its initialization is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_buf.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index da14658da310..2f97c12ca75e 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1258,8 +1258,6 @@ xfs_buf_ioapply_map(
 	int		size;
 	int		offset;
 
-	total_nr_pages = bp->b_page_count;
-
 	/* skip the pages in the buffer before the start offset */
 	page_index = 0;
 	offset = *buf_offset;
-- 
cgit 


From f091fb8c344ce13cbf058d304c6cbb042be97058 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Mon, 25 Sep 2017 13:47:23 +0200
Subject: scsi: scsi_transport_fc: Also check for NOTPRESENT in
 fc_remote_port_add()

During failover there is a small race window between fc_remote_port_add()
and fc_timeout_deleted_rport(); the latter drops the lock after setting the
port to NOTPRESENT, so if fc_remote_port_add() is called right at that time
it will fail to detect the existing rport and happily adding a new
structure, causing rports to get registered twice.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_transport_fc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index e74fffc32c75..cbd4495d0ff9 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -2739,7 +2739,8 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 
 	list_for_each_entry(rport, &fc_host->rports, peers) {
 
-		if ((rport->port_state == FC_PORTSTATE_BLOCKED) &&
+		if ((rport->port_state == FC_PORTSTATE_BLOCKED ||
+		     rport->port_state == FC_PORTSTATE_NOTPRESENT) &&
 			(rport->channel == channel)) {
 
 			switch (fc_host->tgtid_bind_type) {
-- 
cgit 


From 4618e90965f272fe522f2af2523a60d0d4bc78f3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:10 +0200
Subject: x86/fpu: Fix fpu__activate_fpstate_read() and update comments

fpu__activate_fpstate_read() can be called for the current task
when coredumping - or for stopped tasks when ptrace-ing.

Implement this properly in the code and update the comments.

This also fixes an incorrect (but harmless) warning introduced by
one of the earlier patches.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-28-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 93103a909c47..afd3f2a5c64e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -254,18 +254,21 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr);
 /*
  * This function must be called before we read a task's fpstate.
  *
- * If the task has not used the FPU before then initialize its
- * fpstate.
+ * There's two cases where this gets called:
+ *
+ * - for the current task (when coredumping), in which case we have
+ *   to save the latest FPU registers into the fpstate,
+ *
+ * - or it's called for stopped tasks (ptrace), in which case the
+ *   registers were already saved by the context-switch code when
+ *   the task scheduled out - we only have to initialize the registers
+ *   if they've never been initialized.
  *
  * If the task has used the FPU before then save it.
  */
 void fpu__activate_fpstate_read(struct fpu *fpu)
 {
-	/*
-	 * If fpregs are active (in the current CPU), then
-	 * copy them to the fpstate:
-	 */
-	if (fpu->fpstate_active) {
+	if (fpu == &current->thread.fpu) {
 		fpu__save(fpu);
 	} else {
 		if (!fpu->fpstate_active) {
-- 
cgit 


From 685c930d6e58e31e251ec354f9dca3958a4c5040 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:11 +0200
Subject: x86/fpu: Remove fpu__current_fpstate_write_begin/end()

These functions are not used anymore, so remove them.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Bobby Powers <bobbypowers@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-29-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h |  2 --
 arch/x86/kernel/fpu/core.c          | 63 -------------------------------------
 2 files changed, 65 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index cf290d424e48..508e4181c4af 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -26,8 +26,6 @@
 extern void fpu__activate_curr(struct fpu *fpu);
 extern void fpu__activate_fpstate_read(struct fpu *fpu);
 extern void fpu__activate_fpstate_write(struct fpu *fpu);
-extern void fpu__current_fpstate_write_begin(void);
-extern void fpu__current_fpstate_write_end(void);
 extern void fpu__save(struct fpu *fpu);
 extern void fpu__restore(struct fpu *fpu);
 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index afd3f2a5c64e..b2cdeb3b1860 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -316,69 +316,6 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 	}
 }
 
-/*
- * This function must be called before we write the current
- * task's fpstate.
- *
- * This call gets the current FPU register state and moves
- * it in to the 'fpstate'.  Preemption is disabled so that
- * no writes to the 'fpstate' can occur from context
- * swiches.
- *
- * Must be followed by a fpu__current_fpstate_write_end().
- */
-void fpu__current_fpstate_write_begin(void)
-{
-	struct fpu *fpu = &current->thread.fpu;
-
-	/*
-	 * Ensure that the context-switching code does not write
-	 * over the fpstate while we are doing our update.
-	 */
-	preempt_disable();
-
-	/*
-	 * Move the fpregs in to the fpu's 'fpstate'.
-	 */
-	fpu__activate_fpstate_read(fpu);
-
-	/*
-	 * The caller is about to write to 'fpu'.  Ensure that no
-	 * CPU thinks that its fpregs match the fpstate.  This
-	 * ensures we will not be lazy and skip a XRSTOR in the
-	 * future.
-	 */
-	__fpu_invalidate_fpregs_state(fpu);
-}
-
-/*
- * This function must be paired with fpu__current_fpstate_write_begin()
- *
- * This will ensure that the modified fpstate gets placed back in
- * the fpregs if necessary.
- *
- * Note: This function may be called whether or not an _actual_
- * write to the fpstate occurred.
- */
-void fpu__current_fpstate_write_end(void)
-{
-	struct fpu *fpu = &current->thread.fpu;
-
-	/*
-	 * 'fpu' now has an updated copy of the state, but the
-	 * registers may still be out of date.  Update them with
-	 * an XRSTOR if they are active.
-	 */
-	if (fpu->fpstate_active)
-		copy_kernel_to_fpregs(&fpu->state);
-
-	/*
-	 * Our update is done and the fpregs/fpstate are in sync
-	 * if necessary.  Context switches can happen again.
-	 */
-	preempt_enable();
-}
-
 /*
  * 'fpu__restore()' is called to copy FPU registers from
  * the FPU fpstate to the live hw registers and to activate
-- 
cgit 


From e4a81bfcaae1ebbdc6efe74e8ea563144d90e9a9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 26 Sep 2017 09:43:36 +0200
Subject: x86/fpu: Rename fpu::fpstate_active to fpu::initialized

The x86 FPU code used to have a complex state machine where both the FPU
registers and the FPU state context could be 'active' (or inactive)
independently of each other - which enabled features like lazy FPU restore.

Much of this complexity is gone in the current code: now we basically can
have FPU-less tasks (kernel threads) that don't use (and save/restore) FPU
state at all, plus full FPU users that save/restore directly with no laziness
whatsoever.

But the fpu::fpstate_active still carries bits of the old complexity - meanwhile
this flag has become a simple flag that shows whether the FPU context saving
area in the thread struct is initialized and used, or not.

Rename it to fpu::initialized to express this simplicity in the name as well.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-30-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/ia32/ia32_signal.c         |  2 +-
 arch/x86/include/asm/fpu/internal.h |  4 ++--
 arch/x86/include/asm/fpu/types.h    |  6 +++---
 arch/x86/include/asm/trace/fpu.h    |  8 ++++----
 arch/x86/kernel/fpu/core.c          | 24 ++++++++++++------------
 arch/x86/kernel/fpu/init.c          |  2 +-
 arch/x86/kernel/fpu/regset.c        |  6 +++---
 arch/x86/kernel/fpu/signal.c        |  8 ++++----
 arch/x86/kernel/fpu/xstate.c        |  2 +-
 arch/x86/kernel/signal.c            |  6 +++---
 arch/x86/mm/pkeys.c                 |  2 +-
 11 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index e0bb46c02857..0e2a5edbce00 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
 		 ksig->ka.sa.sa_restorer)
 		sp = (unsigned long) ksig->ka.sa.sa_restorer;
 
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		unsigned long fx_aligned, math_size;
 
 		sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 508e4181c4af..b26ae05da18a 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -527,7 +527,7 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-	if (old_fpu->fpstate_active) {
+	if (old_fpu->initialized) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
 		else
@@ -550,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
 {
 	bool preload = static_cpu_has(X86_FEATURE_FPU) &&
-		       new_fpu->fpstate_active;
+		       new_fpu->initialized;
 
 	if (preload) {
 		if (!fpregs_state_valid(new_fpu, cpu))
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 71db45ca8870..a1520575d86b 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -293,13 +293,13 @@ struct fpu {
 	unsigned int			last_cpu;
 
 	/*
-	 * @fpstate_active:
+	 * @initialized:
 	 *
-	 * This flag indicates whether this context is active: if the task
+	 * This flag indicates whether this context is initialized: if the task
 	 * is not running then we can restore from this context, if the task
 	 * is running then we should save into this context.
 	 */
-	unsigned char			fpstate_active;
+	unsigned char			initialized;
 
 	/*
 	 * @state:
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index da565aae9fd2..39f7a27bef13 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,22 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
 	TP_STRUCT__entry(
 		__field(struct fpu *, fpu)
-		__field(bool, fpstate_active)
+		__field(bool, initialized)
 		__field(u64, xfeatures)
 		__field(u64, xcomp_bv)
 		),
 
 	TP_fast_assign(
 		__entry->fpu		= fpu;
-		__entry->fpstate_active	= fpu->fpstate_active;
+		__entry->initialized	= fpu->initialized;
 		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
 			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
 		}
 	),
-	TP_printk("x86/fpu: %p fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
+	TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
 			__entry->fpu,
-			__entry->fpstate_active,
+			__entry->initialized,
 			__entry->xfeatures,
 			__entry->xcomp_bv
 	)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index b2cdeb3b1860..c8d6032f04d0 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
 
 	kernel_fpu_disable();
 
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		/*
 		 * Ignore return value -- we don't care if reg state
 		 * is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
-	if (fpu->fpstate_active)
+	if (fpu->initialized)
 		copy_kernel_to_fpregs(&fpu->state);
 
 	kernel_fpu_enable();
@@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu)
 
 	preempt_disable();
 	trace_x86_fpu_before_save(fpu);
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
 			copy_kernel_to_fpregs(&fpu->state);
 		}
@@ -191,7 +191,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
 	dst_fpu->last_cpu = -1;
 
-	if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
+	if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
 		return 0;
 
 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -240,13 +240,13 @@ void fpu__activate_curr(struct fpu *fpu)
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
-	if (!fpu->fpstate_active) {
+	if (!fpu->initialized) {
 		fpstate_init(&fpu->state);
 		trace_x86_fpu_init_state(fpu);
 
 		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for the current task: */
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 	}
 }
 EXPORT_SYMBOL_GPL(fpu__activate_curr);
@@ -271,13 +271,13 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 	if (fpu == &current->thread.fpu) {
 		fpu__save(fpu);
 	} else {
-		if (!fpu->fpstate_active) {
+		if (!fpu->initialized) {
 			fpstate_init(&fpu->state);
 			trace_x86_fpu_init_state(fpu);
 
 			trace_x86_fpu_activate_state(fpu);
 			/* Safe to do for current and for stopped child tasks: */
-			fpu->fpstate_active = 1;
+			fpu->initialized = 1;
 		}
 	}
 }
@@ -303,7 +303,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 	 */
 	WARN_ON_FPU(fpu == &current->thread.fpu);
 
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		/* Invalidate any lazy state: */
 		__fpu_invalidate_fpregs_state(fpu);
 	} else {
@@ -312,7 +312,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 
 		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for stopped child tasks: */
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 	}
 }
 
@@ -354,7 +354,7 @@ void fpu__drop(struct fpu *fpu)
 	preempt_disable();
 
 	if (fpu == &current->thread.fpu) {
-		if (fpu->fpstate_active) {
+		if (fpu->initialized) {
 			/* Ignore delayed exceptions from user space */
 			asm volatile("1: fwait\n"
 				     "2:\n"
@@ -363,7 +363,7 @@ void fpu__drop(struct fpu *fpu)
 		}
 	}
 
-	fpu->fpstate_active = 0;
+	fpu->initialized = 0;
 
 	trace_x86_fpu_dropped(fpu);
 
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index d5d44c452624..7affb7e3d9a5 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void)
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
 
-	WARN_ON_FPU(current->thread.fpu.fpstate_active);
+	WARN_ON_FPU(current->thread.fpu.initialized);
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index c764f7405322..19e82334e811 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -16,14 +16,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r
 {
 	struct fpu *target_fpu = &target->thread.fpu;
 
-	return target_fpu->fpstate_active ? regset->n : 0;
+	return target_fpu->initialized ? regset->n : 0;
 }
 
 int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
 {
 	struct fpu *target_fpu = &target->thread.fpu;
 
-	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
 		return regset->n;
 	else
 		return 0;
@@ -380,7 +380,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
 	struct fpu *fpu = &tsk->thread.fpu;
 	int fpvalid;
 
-	fpvalid = fpu->fpstate_active;
+	fpvalid = fpu->initialized;
 	if (fpvalid)
 		fpvalid = !fpregs_get(tsk, NULL,
 				      0, sizeof(struct user_i387_ia32_struct),
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index da68ea1c3a44..ab2dd24cfea4 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -171,7 +171,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpu->fpstate_active || using_compacted_format()) {
+	if (fpu->initialized || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
@@ -315,12 +315,12 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		int err = 0;
 
 		/*
-		 * Drop the current fpu which clears fpu->fpstate_active. This ensures
+		 * Drop the current fpu which clears fpu->initialized. This ensures
 		 * that any context-switch during the copy of the new state,
 		 * avoids the intermediate state from getting restored/saved.
 		 * Thus avoiding the new restored state from getting corrupted.
 		 * We will be ready to restore/save the state only after
-		 * fpu->fpstate_active is again set.
+		 * fpu->initialized is again set.
 		 */
 		fpu__drop(fpu);
 
@@ -342,7 +342,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
 		}
 
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 		preempt_disable();
 		fpu__restore(fpu);
 		preempt_enable();
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index fda1109cc355..703e76d027ee 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -867,7 +867,7 @@ const void *get_xsave_field_ptr(int xsave_state)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
-	if (!fpu->fpstate_active)
+	if (!fpu->initialized)
 		return NULL;
 	/*
 	 * fpu__save() takes the CPU's xstate registers
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e04442345fc0..4e188fda5961 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -263,7 +263,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 		sp = (unsigned long) ka->sa.sa_restorer;
 	}
 
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
 					  &buf_fx, &math_size);
 		*fpstate = (void __user *)sp;
@@ -279,7 +279,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 		return (void __user *)-1L;
 
 	/* save i387 and extended state */
-	if (fpu->fpstate_active &&
+	if (fpu->initialized &&
 	    copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
 		return (void __user *)-1L;
 
@@ -755,7 +755,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 		/*
 		 * Ensure the signal handler starts with the new fpu state.
 		 */
-		if (fpu->fpstate_active)
+		if (fpu->initialized)
 			fpu__clear(fpu);
 	}
 	signal_setup_done(failed, ksig, stepping);
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 4d24269c071f..d7bc0eea20a5 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -44,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm)
 	 */
 	preempt_disable();
 	if (!need_to_set_mm_pkey &&
-	    current->thread.fpu.fpstate_active &&
+	    current->thread.fpu.initialized &&
 	    !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
 		preempt_enable();
 		return execute_only_pkey;
-- 
cgit 


From 7f1487c59b7c6dcb20155f4302985da2659a2997 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:13 +0200
Subject: x86/fpu: Fix stale comments about lazy FPU logic

We don't do any lazy restore anymore, what we have are two pieces of optimization:

 - no-FPU tasks that don't save/restore the FPU context (kernel threads are such)

 - cached FPU registers maintained via the fpu->last_cpu field. This means that
   if an FPU task context switches to a non-FPU task then we can maintain the
   FPU registers as an in-FPU copies (cache), and skip the restoration of them
   once we switch back to the original FPU-using task.

Update all the comments that still referred to old 'lazy' and 'unlazy' concepts.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-31-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index c8d6032f04d0..77668d91fdc1 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -205,9 +205,6 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	/*
 	 * Save current FPU registers directly into the child
 	 * FPU context, without any memory-to-memory copying.
-	 * In lazy mode, if the FPU context isn't loaded into
-	 * fpregs, CR0.TS will be set and do_device_not_available
-	 * will load the FPU context.
 	 *
 	 * We have to do all this with preemption disabled,
 	 * mostly because of the FNSAVE case, because in that
@@ -285,13 +282,13 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 /*
  * This function must be called before we write a task's fpstate.
  *
- * If the task has used the FPU before then unlazy it.
+ * If the task has used the FPU before then invalidate any cached FPU registers.
  * If the task has not used the FPU before then initialize its fpstate.
  *
  * After this function call, after registers in the fpstate are
  * modified and the child task has woken up, the child task will
  * restore the modified FPU state from the modified context. If we
- * didn't clear its lazy status here then the lazy in-registers
+ * didn't clear its cached status here then the cached in-registers
  * state pending on its former CPU could be restored, corrupting
  * the modifications.
  */
@@ -304,7 +301,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 	WARN_ON_FPU(fpu == &current->thread.fpu);
 
 	if (fpu->initialized) {
-		/* Invalidate any lazy state: */
+		/* Invalidate any cached state: */
 		__fpu_invalidate_fpregs_state(fpu);
 	} else {
 		fpstate_init(&fpu->state);
-- 
cgit 


From e10078eba69859359ce8644dd423b4132a6a8913 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:14 +0200
Subject: x86/fpu: Simplify and speed up fpu__copy()

fpu__copy() has a preempt_disable()/enable() pair, which it had to do to
be able to atomically unlazy the current task when doing an FNSAVE.

But we don't unlazy tasks anymore, we always do direct saves/restores of
FPU context.

So remove both the unnecessary critical section, and update the comments.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-32-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 77668d91fdc1..52122dd418ae 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -206,22 +206,13 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 * Save current FPU registers directly into the child
 	 * FPU context, without any memory-to-memory copying.
 	 *
-	 * We have to do all this with preemption disabled,
-	 * mostly because of the FNSAVE case, because in that
-	 * case we must not allow preemption in the window
-	 * between the FNSAVE and us marking the context lazy.
-	 *
-	 * It shouldn't be an issue as even FNSAVE is plenty
-	 * fast in terms of critical section length.
+	 * ( The function 'fails' in the FNSAVE case, which destroys
+	 *   register contents so we have to copy them back. )
 	 */
-	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
-		memcpy(&src_fpu->state, &dst_fpu->state,
-		       fpu_kernel_xstate_size);
-
+		memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
 		copy_kernel_to_fpregs(&src_fpu->state);
 	}
-	preempt_enable();
 
 	trace_x86_fpu_copy_src(src_fpu);
 	trace_x86_fpu_copy_dst(dst_fpu);
-- 
cgit 


From 2ce03d850b9a2f17d55596ecfa86e72b5687a627 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:15 +0200
Subject: x86/fpu: Rename fpu__activate_curr() to fpu__initialize()

Rename this function to better express that it's all about
initializing the FPU state of a task which goes hand in hand
with the fpu::initialized field.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-33-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h | 2 +-
 arch/x86/kernel/fpu/core.c          | 8 ++++----
 arch/x86/kernel/fpu/signal.c        | 2 +-
 arch/x86/kvm/x86.c                  | 2 +-
 arch/x86/math-emu/fpu_entry.c       | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index b26ae05da18a..7c980aafb8aa 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -23,7 +23,7 @@
 /*
  * High level FPU state handling functions:
  */
-extern void fpu__activate_curr(struct fpu *fpu);
+extern void fpu__initialize(struct fpu *fpu);
 extern void fpu__activate_fpstate_read(struct fpu *fpu);
 extern void fpu__activate_fpstate_write(struct fpu *fpu);
 extern void fpu__save(struct fpu *fpu);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 52122dd418ae..07db9d94b68b 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -224,7 +224,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
  * Activate the current task's in-memory FPU context,
  * if it has not been used before:
  */
-void fpu__activate_curr(struct fpu *fpu)
+void fpu__initialize(struct fpu *fpu)
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
@@ -237,7 +237,7 @@ void fpu__activate_curr(struct fpu *fpu)
 		fpu->initialized = 1;
 	}
 }
-EXPORT_SYMBOL_GPL(fpu__activate_curr);
+EXPORT_SYMBOL_GPL(fpu__initialize);
 
 /*
  * This function must be called before we read a task's fpstate.
@@ -316,7 +316,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
  */
 void fpu__restore(struct fpu *fpu)
 {
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
 	kernel_fpu_disable();
@@ -392,7 +392,7 @@ void fpu__clear(struct fpu *fpu)
 	 */
 	if (static_cpu_has(X86_FEATURE_FPU)) {
 		preempt_disable();
-		fpu__activate_curr(fpu);
+		fpu__initialize(fpu);
 		user_fpu_begin();
 		copy_init_fpstate_to_fpregs();
 		preempt_enable();
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index ab2dd24cfea4..7fa3bdb331e9 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -280,7 +280,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	if (!access_ok(VERIFY_READ, buf, size))
 		return -EACCES;
 
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 	if (!static_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_set(current, NULL,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd17b7d9a107..03869eb7fcd6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	int r;
 	sigset_t sigsaved;
 
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index d4a7df2205b8..220638a4cb94 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info)
 	struct desc_struct code_descriptor;
 	struct fpu *fpu = &current->thread.fpu;
 
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 #ifdef RE_ENTRANT_CHECKING
 	if (emulating) {
-- 
cgit 


From 369a036de206710ff27a66f9bffe78ef657648c3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 13:37:45 +0200
Subject: x86/fpu: Rename fpu__activate_fpstate_read/write() to
 fpu__prepare_[read|write]()

As per the new nomenclature we don't 'activate' the FPU state
anymore, we initialize it. So drop the _activate_fpstate name
from these functions, which were a bit of a mouthful anyway,
and name them:

	fpu__prepare_read()
	fpu__prepare_write()

Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h |  4 ++--
 arch/x86/kernel/fpu/core.c          |  4 ++--
 arch/x86/kernel/fpu/regset.c        | 12 ++++++------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 7c980aafb8aa..e3221ffa304e 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -24,8 +24,8 @@
  * High level FPU state handling functions:
  */
 extern void fpu__initialize(struct fpu *fpu);
-extern void fpu__activate_fpstate_read(struct fpu *fpu);
-extern void fpu__activate_fpstate_write(struct fpu *fpu);
+extern void fpu__prepare_read(struct fpu *fpu);
+extern void fpu__prepare_write(struct fpu *fpu);
 extern void fpu__save(struct fpu *fpu);
 extern void fpu__restore(struct fpu *fpu);
 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 07db9d94b68b..f92a6593de1e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(fpu__initialize);
  *
  * If the task has used the FPU before then save it.
  */
-void fpu__activate_fpstate_read(struct fpu *fpu)
+void fpu__prepare_read(struct fpu *fpu)
 {
 	if (fpu == &current->thread.fpu) {
 		fpu__save(fpu);
@@ -283,7 +283,7 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
  * state pending on its former CPU could be restored, corrupting
  * the modifications.
  */
-void fpu__activate_fpstate_write(struct fpu *fpu)
+void fpu__prepare_write(struct fpu *fpu)
 {
 	/*
 	 * Only stopped child tasks can be used to modify the FPU
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 19e82334e811..ee8d2f049818 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -38,7 +38,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -55,7 +55,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -89,7 +89,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 
 	xsave = &fpu->state.xsave;
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 
 	if (using_compacted_format()) {
 		if (kbuf)
@@ -132,7 +132,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	xsave = &fpu->state.xsave;
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
 		if (kbuf)
@@ -310,7 +310,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 	struct fpu *fpu = &target->thread.fpu;
 	struct user_i387_ia32_struct env;
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 
 	if (!boot_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
@@ -340,7 +340,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	struct user_i387_ia32_struct env;
 	int ret;
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 	if (!boot_cpu_has(X86_FEATURE_FPU))
-- 
cgit 


From e63e5d5c15c6b1dba26f7cbd1b1089a1d6155db5 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:04 +0200
Subject: x86/fpu: Introduce validate_xstate_header()

Move validation of user-supplied xstate_header into a helper function,
in preparation of calling it from both the ptrace and sigreturn syscall
paths.

The new function also considers it to be an error if *any* reserved bits
are set, whereas before we were just clearing most of them silently.

This should reduce the chance of bugs that fail to correctly validate
user-supplied XSAVE areas.  It also will expose any broken userspace
programs that set the other reserved bits; this is desirable because
such programs will lose compatibility with future CPUs and kernels if
those bits are ever used for anything.  (There shouldn't be any such
programs, and in fact in the case where the compacted format is in use
we were already validating xfeatures.  But you never know...)

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-2-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  4 ++++
 arch/x86/kernel/fpu/xstate.c      | 24 ++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 579ac2358e63..83fee2469eb7 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -52,4 +52,8 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
 int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
+
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+extern int validate_xstate_header(const struct xstate_header *hdr);
+
 #endif
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 703e76d027ee..2427aeea33b5 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -483,6 +483,30 @@ int using_compacted_format(void)
 	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
 
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+int validate_xstate_header(const struct xstate_header *hdr)
+{
+	/* No unknown or supervisor features may be set */
+	if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+		return -EINVAL;
+
+	/* Userspace must use the uncompacted format */
+	if (hdr->xcomp_bv)
+		return -EINVAL;
+
+	/*
+	 * If 'reserved' is shrunken to add a new field, make sure to validate
+	 * that new field here!
+	 */
+	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
+
+	/* No reserved bits may be set */
+	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+		return -EINVAL;
+
+	return 0;
+}
+
 static void __xstate_dump_leaves(void)
 {
 	int i;
-- 
cgit 


From cf9df81b139b6ebaec188d73758f02ca3b2110e4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:05 +0200
Subject: x86/fpu: Use validate_xstate_header() to validate the xstate_header
 in xstateregs_set()

Tighten the checks in xstateregs_set().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-3-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/regset.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index ee8d2f049818..b831d5b9de99 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -141,27 +141,20 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 			ret = copy_user_to_xstate(xsave, ubuf);
 	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-
-		/* xcomp_bv must be 0 when using uncompacted format */
-		if (!ret && xsave->header.xcomp_bv)
-			ret = -EINVAL;
+		if (!ret)
+			ret = validate_xstate_header(&xsave->header);
 	}
 
-	/*
-	 * In case of failure, mark all states as init:
-	 */
-	if (ret)
-		fpstate_init(&fpu->state);
-
 	/*
 	 * mxcsr reserved bits must be masked to zero for security reasons.
 	 */
 	xsave->i387.mxcsr &= mxcsr_feature_mask;
-	xsave->header.xfeatures &= xfeatures_mask;
+
 	/*
-	 * These bits must be zero.
+	 * In case of failure, mark all states as init:
 	 */
-	memset(&xsave->header.reserved, 0, 48);
+	if (ret)
+		fpstate_init(&fpu->state);
 
 	return ret;
 }
-- 
cgit 


From b11e2e18a7fc8eaa3d592c260d50c7129e094ded Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:06 +0200
Subject: x86/fpu: Use validate_xstate_header() to validate the xstate_header
 in __fpu__restore_sig()

Tighten the checks in __fpu__restore_sig() and update comments.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-4-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/signal.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 7fa3bdb331e9..fb639e70048f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -214,8 +214,11 @@ sanitize_restored_xstate(struct task_struct *tsk,
 	struct xstate_header *header = &xsave->header;
 
 	if (use_xsave()) {
-		/* These bits must be zero. */
-		memset(header->reserved, 0, 48);
+		/*
+		 * Note: we don't need to zero the reserved bits in the
+		 * xstate_header here because we either didn't copy them at all,
+		 * or we checked earlier that they aren't set.
+		 */
 
 		/*
 		 * Init the state that is not present in the memory
@@ -224,7 +227,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
 		if (fx_only)
 			header->xfeatures = XFEATURE_MASK_FPSSE;
 		else
-			header->xfeatures &= (xfeatures_mask & xfeatures);
+			header->xfeatures &= xfeatures;
 	}
 
 	if (use_fxsr()) {
@@ -308,7 +311,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		/*
 		 * For 32-bit frames with fxstate, copy the user state to the
 		 * thread's fpu state, reconstruct fxstate from the fsave
-		 * header. Sanitize the copied state etc.
+		 * header. Validate and sanitize the copied state.
 		 */
 		struct fpu *fpu = &tsk->thread.fpu;
 		struct user_i387_ia32_struct env;
@@ -329,9 +332,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		} else {
 			err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
 
-			/* xcomp_bv must be 0 when using uncompacted format */
-			if (!err && state_size > offsetof(struct xregs_state, header) && fpu->state.xsave.header.xcomp_bv)
-				err = -EINVAL;
+			if (!err && state_size > offsetof(struct xregs_state, header))
+				err = validate_xstate_header(&fpu->state.xsave.header);
 		}
 
 		if (err || __copy_from_user(&env, buf, sizeof(env))) {
-- 
cgit 


From 80d8ae86b36791a545ca28ddc95133ea59bba6e0 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:07 +0200
Subject: x86/fpu: Copy the full state_header in copy_kernel_to_xstate()

This is in preparation to verify the full xstate header as supplied by user-space.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-5-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 2427aeea33b5..02591b96bb25 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1148,11 +1148,13 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 	int i;
 	u64 xfeatures;
 	u64 allowed_features;
+	struct xstate_header hdr;
 
 	offset = offsetof(struct xregs_state, header);
-	size = sizeof(xfeatures);
+	size = sizeof(hdr);
 
-	memcpy(&xfeatures, kbuf + offset, size);
+	memcpy(&hdr, kbuf + offset, size);
+	xfeatures = hdr.xfeatures;
 
 	/*
 	 * Reject if the user sets any disabled or supervisor features:
-- 
cgit 


From b89eda482d7849a1c146b6d0a42f4e76369bb08e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:08 +0200
Subject: x86/fpu: Eliminate the 'xfeatures' local variable in
 copy_kernel_to_xstate()

We have this information in the xstate_header.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-6-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 02591b96bb25..c97c4a9db52a 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1146,7 +1146,6 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 {
 	unsigned int offset, size;
 	int i;
-	u64 xfeatures;
 	u64 allowed_features;
 	struct xstate_header hdr;
 
@@ -1154,20 +1153,19 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 	size = sizeof(hdr);
 
 	memcpy(&hdr, kbuf + offset, size);
-	xfeatures = hdr.xfeatures;
 
 	/*
 	 * Reject if the user sets any disabled or supervisor features:
 	 */
 	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
 
-	if (xfeatures & ~allowed_features)
+	if (hdr.xfeatures & ~allowed_features)
 		return -EINVAL;
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
 		u64 mask = ((u64)1 << i);
 
-		if (xfeatures & mask) {
+		if (hdr.xfeatures & mask) {
 			void *dst = __raw_xsave_addr(xsave, 1 << i);
 
 			offset = xstate_offsets[i];
@@ -1177,7 +1175,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 		}
 	}
 
-	if (xfeatures_mxcsr_quirk(xfeatures)) {
+	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
 		offset = offsetof(struct fxregs_state, mxcsr);
 		size = MXCSR_AND_FLAGS_SIZE;
 		memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
@@ -1192,7 +1190,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 	/*
 	 * Add back in the features that came in from userspace:
 	 */
-	xsave->header.xfeatures |= xfeatures;
+	xsave->header.xfeatures |= hdr.xfeatures;
 
 	return 0;
 }
-- 
cgit 


From af95774b3ca080b0e1e651c0fc7680f3444ddda7 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:09 +0200
Subject: x86/fpu: Use validate_xstate_header() to validate the xstate_header
 in copy_kernel_to_xstate()

Tighten the checks in copy_kernel_to_xstate().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-7-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c97c4a9db52a..325db7850335 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1138,15 +1138,12 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 
 /*
  * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
- * and copy to the target thread. This is called from xstateregs_set() and
- * there we check the CPU has XSAVES and a whole standard-sized buffer
- * exists.
+ * and copy to the target thread. This is called from xstateregs_set().
  */
 int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 {
 	unsigned int offset, size;
 	int i;
-	u64 allowed_features;
 	struct xstate_header hdr;
 
 	offset = offsetof(struct xregs_state, header);
@@ -1154,12 +1151,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 
 	memcpy(&hdr, kbuf + offset, size);
 
-	/*
-	 * Reject if the user sets any disabled or supervisor features:
-	 */
-	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
-
-	if (hdr.xfeatures & ~allowed_features)
+	if (validate_xstate_header(&hdr))
 		return -EINVAL;
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
-- 
cgit 


From af2c4322d986a08a6e793b74b83a62b325019c20 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:10 +0200
Subject: x86/fpu: Copy the full header in copy_user_to_xstate()

This is in preparation to verify the full xstate header as supplied by user-space.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-8-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 325db7850335..0cd7b73c25e8 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1199,13 +1199,16 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 	int i;
 	u64 xfeatures;
 	u64 allowed_features;
+	struct xstate_header hdr;
 
 	offset = offsetof(struct xregs_state, header);
-	size = sizeof(xfeatures);
+	size = sizeof(hdr);
 
-	if (__copy_from_user(&xfeatures, ubuf + offset, size))
+	if (__copy_from_user(&hdr, ubuf + offset, size))
 		return -EFAULT;
 
+	xfeatures = hdr.xfeatures;
+
 	/*
 	 * Reject if the user sets any disabled or supervisor features:
 	 */
-- 
cgit 


From 3d703477bcfe8bb57079d97198cf1e342fe1fef9 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:11 +0200
Subject: x86/fpu: Eliminate the 'xfeatures' local variable in
 copy_user_to_xstate()

We now have this field in hdr.xfeatures.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-9-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0cd7b73c25e8..b6d78b78b5c2 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1197,7 +1197,6 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 {
 	unsigned int offset, size;
 	int i;
-	u64 xfeatures;
 	u64 allowed_features;
 	struct xstate_header hdr;
 
@@ -1207,20 +1206,18 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 	if (__copy_from_user(&hdr, ubuf + offset, size))
 		return -EFAULT;
 
-	xfeatures = hdr.xfeatures;
-
 	/*
 	 * Reject if the user sets any disabled or supervisor features:
 	 */
 	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
 
-	if (xfeatures & ~allowed_features)
+	if (hdr.xfeatures & ~allowed_features)
 		return -EINVAL;
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
 		u64 mask = ((u64)1 << i);
 
-		if (xfeatures & mask) {
+		if (hdr.xfeatures & mask) {
 			void *dst = __raw_xsave_addr(xsave, 1 << i);
 
 			offset = xstate_offsets[i];
@@ -1231,7 +1228,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 		}
 	}
 
-	if (xfeatures_mxcsr_quirk(xfeatures)) {
+	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
 		offset = offsetof(struct fxregs_state, mxcsr);
 		size = MXCSR_AND_FLAGS_SIZE;
 		if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
@@ -1247,7 +1244,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 	/*
 	 * Add back in the features that came in from userspace:
 	 */
-	xsave->header.xfeatures |= xfeatures;
+	xsave->header.xfeatures |= hdr.xfeatures;
 
 	return 0;
 }
-- 
cgit 


From 98c0fad9d60e8b2cd47e15b7bee7df343648f5bb Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:12 +0200
Subject: x86/fpu: Use validate_xstate_header() to validate the xstate_header
 in copy_user_to_xstate()

Tighten the checks in copy_user_to_xstate().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-10-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b6d78b78b5c2..f1d5476c9022 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1188,16 +1188,15 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 }
 
 /*
- * Convert from a ptrace standard-format user-space buffer to kernel XSAVES format
- * and copy to the target thread. This is called from xstateregs_set() and
- * there we check the CPU has XSAVES and a whole standard-sized buffer
- * exists.
+ * Convert from a ptrace or sigreturn standard-format user-space buffer to
+ * kernel XSAVES format and copy to the target thread. This is called from
+ * xstateregs_set(), as well as potentially from the sigreturn() and
+ * rt_sigreturn() system calls.
  */
 int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 {
 	unsigned int offset, size;
 	int i;
-	u64 allowed_features;
 	struct xstate_header hdr;
 
 	offset = offsetof(struct xregs_state, header);
@@ -1206,12 +1205,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 	if (__copy_from_user(&hdr, ubuf + offset, size))
 		return -EFAULT;
 
-	/*
-	 * Reject if the user sets any disabled or supervisor features:
-	 */
-	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
-
-	if (hdr.xfeatures & ~allowed_features)
+	if (validate_xstate_header(&hdr))
 		return -EINVAL;
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
-- 
cgit 


From 738f48cb5fdd5878d11934f1898aa2bcf1578289 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:13 +0200
Subject: x86/fpu: Use using_compacted_format() instead of open coded
 X86_FEATURE_XSAVES

This is the canonical method to use.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-11-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/regset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index b831d5b9de99..3ea151372389 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -134,7 +134,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	fpu__prepare_write(fpu);
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+	if (using_compacted_format()) {
 		if (kbuf)
 			ret = copy_kernel_to_xstate(xsave, kbuf);
 		else
-- 
cgit 


From a98c75fcd0ec02623f4f56d824d76e659410a52b Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 23 Aug 2017 19:13:26 +0200
Subject: drm/tegra: trace: Fix path to include

The TRACE_INCLUDE_FILE macro needs to specify the path relative to the
define_trace.h header rather than relative to the file defining it.

Reported-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170823171326.23620-1-thierry.reding@gmail.com
---
 drivers/gpu/drm/tegra/trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/trace.h b/drivers/gpu/drm/tegra/trace.h
index e9b7cdad5c4c..5a1ab4046e92 100644
--- a/drivers/gpu/drm/tegra/trace.h
+++ b/drivers/gpu/drm/tegra/trace.h
@@ -63,6 +63,6 @@ DEFINE_EVENT(register_access, sor_readl,
 
 /* This part must be outside protection */
 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/tegra
 #define TRACE_INCLUDE_FILE trace
 #include <trace/define_trace.h>
-- 
cgit 


From ff40adf7fbdff96860b1153332c0b1c7bab6e0c1 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Thu, 24 Aug 2017 18:19:48 -0600
Subject: Btrfs: use the new helper wbc_to_write_flags

This updates btrfs to use the helper wbc_to_write_flags which has been
applied in ext4/xfs/f2fs/block.

Please note that, with this, btrfs's dirty pages written by a
writeback job will carry the flag REQ_BACKGROUND, which is currently
used by writeback-throttle to determine whether it should go to get a
request or wait.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d17783d70228..4ead6da5a645 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3471,8 +3471,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 	unsigned int write_flags = 0;
 	unsigned long nr_written = 0;
 
-	if (wbc->sync_mode == WB_SYNC_ALL)
-		write_flags = REQ_SYNC;
+	write_flags = wbc_to_write_flags(wbc);
 
 	trace___extent_writepage(page, inode, wbc);
 
@@ -3718,7 +3717,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 	unsigned long i, num_pages;
 	unsigned long bio_flags = 0;
 	unsigned long start, end;
-	unsigned int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META;
+	unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
 	int ret = 0;
 
 	clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
-- 
cgit 


From 5f14efd3d437205143dcffcf776e0122eae1755a Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 23 Aug 2017 12:15:09 -0600
Subject: Btrfs: do not reset bio->bi_ops while writing bio

flush_epd_write_bio() sets bio->bi_opf by itself to honor REQ_SYNC,
but it's not needed at all since bio->bi_opf has set up properly in
both __extent_writepage() and write_one_eb(), and in the case of
write_one_eb(), it also sets REQ_META, which we will lose in
flush_epd_write_bio().

This remove this unnecessary bio->bi_opf setting.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4ead6da5a645..3738d245518c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4062,9 +4062,6 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
 	if (epd->bio) {
 		int ret;
 
-		bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
-				 epd->sync_io ? REQ_SYNC : 0);
-
 		ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
 		BUG_ON(ret < 0); /* -ENOMEM */
 		epd->bio = NULL;
-- 
cgit 


From bea7eafdbda3ba1d4b2ccb9cca829eefb7989bb9 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 22 Aug 2017 23:46:00 -0700
Subject: Btrfs: fix incorrect {node,sector}size endianness from
 BTRFS_IOC_FS_INFO

fs_info->super_copy->{node,sector}size are little-endian, but the ioctl
should return the values in native endianness. Use the cached values in
btrfs_fs_info instead. Found with sparse.

Fixes: 80a773fbfc2d ("btrfs: retrieve more info from FS_INFO ioctl")
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ae8fbf9d3de2..cf1c2ee030dd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2769,9 +2769,9 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
 	}
 	mutex_unlock(&fs_devices->device_list_mutex);
 
-	fi_args->nodesize = fs_info->super_copy->nodesize;
-	fi_args->sectorsize = fs_info->super_copy->sectorsize;
-	fi_args->clone_alignment = fs_info->super_copy->sectorsize;
+	fi_args->nodesize = fs_info->nodesize;
+	fi_args->sectorsize = fs_info->sectorsize;
+	fi_args->clone_alignment = fs_info->sectorsize;
 
 	if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
 		ret = -EFAULT;
-- 
cgit 


From 63d71450c8d817649a79e37d685523f988b9cc98 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Fri, 1 Sep 2017 17:58:47 +0900
Subject: btrfs: clear ordered flag on cleaning up ordered extents

Commit 524272607e88 ("btrfs: Handle delalloc error correctly to avoid
ordered extent hang") introduced btrfs_cleanup_ordered_extents() to cleanup
submitted ordered extents. However, it does not clear the ordered bit
(Private2) of corresponding pages. Thus, the following BUG occurs from
free_pages_check_bad() (on btrfs/125 with nospace_cache).

BUG: Bad page state in process btrfs  pfn:3fa787
page:ffffdf2acfe9e1c0 count:0 mapcount:0 mapping:          (null) index:0xd
flags: 0x8000000000002008(uptodate|private_2)
raw: 8000000000002008 0000000000000000 000000000000000d 00000000ffffffff
raw: ffffdf2acf5c1b20 ffffb443802238b0 0000000000000000 0000000000000000
page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
bad because of flags: 0x2000(private_2)

This patch clears the flag same as other places calling
btrfs_dec_test_ordered_pending() for every page in the specified range.

Fixes: 524272607e88 ("btrfs: Handle delalloc error correctly to avoid ordered extent hang")
Cc: <stable@vger.kernel.org> # 4.12
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d184a46e46c4..455c0f22fe2d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -135,6 +135,18 @@ static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
 						 const u64 offset,
 						 const u64 bytes)
 {
+	unsigned long index = offset >> PAGE_SHIFT;
+	unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
+	struct page *page;
+
+	while (index <= end_index) {
+		page = find_get_page(inode->i_mapping, index);
+		index++;
+		if (!page)
+			continue;
+		ClearPagePrivate2(page);
+		put_page(page);
+	}
 	return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
 					    bytes - PAGE_SIZE, false);
 }
-- 
cgit 


From 67c003f90fd68062d92a7ffade36f9b2a9098bd8 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Fri, 1 Sep 2017 17:59:07 +0900
Subject: btrfs: finish ordered extent cleaning if no progress is found

__endio_write_update_ordered() repeats the search until it reaches the end
of the specified range. This works well with direct IO path, because before
the function is called, it's ensured that there are ordered extents filling
whole the range. It's not the case, however, when it's called from
run_delalloc_range(): it is possible to have error in the midle of the loop
in e.g. run_delalloc_nocow(), so that there exisits the range not covered
by any ordered extents. By cleaning such "uncomplete" range,
__endio_write_update_ordered() stucks at offset where there're no ordered
extents.

Since the ordered extents are created from head to tail, we can stop the
search if there are no offset progress.

Fixes: 524272607e88 ("btrfs: Handle delalloc error correctly to avoid ordered extent hang")
Cc: <stable@vger.kernel.org> # 4.12
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 455c0f22fe2d..f78c5640c6dc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8396,6 +8396,7 @@ static void __endio_write_update_ordered(struct inode *inode,
 	btrfs_work_func_t func;
 	u64 ordered_offset = offset;
 	u64 ordered_bytes = bytes;
+	u64 last_offset;
 	int ret;
 
 	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@@ -8407,6 +8408,7 @@ static void __endio_write_update_ordered(struct inode *inode,
 	}
 
 again:
+	last_offset = ordered_offset;
 	ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
 						   &ordered_offset,
 						   ordered_bytes,
@@ -8417,6 +8419,12 @@ again:
 	btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL);
 	btrfs_queue_work(wq, &ordered->work);
 out_test:
+	/*
+	 * If btrfs_dec_test_ordered_pending does not find any ordered extent
+	 * in the range, we can exit.
+	 */
+	if (ordered_offset == last_offset)
+		return;
 	/*
 	 * our bio might span multiple ordered extents.  If we haven't
 	 * completed the accounting for the whole dio, go back and try again
-- 
cgit 


From bb166d7207432d3c7d10c45dc052f12ba3a2121d Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Fri, 25 Aug 2017 14:15:14 +0900
Subject: btrfs: fix NULL pointer dereference from free_reloc_roots()

__del_reloc_root should be called before freeing up reloc_root->node.
If not, calling __del_reloc_root() dereference reloc_root->node, causing
the system BUG.

Fixes: 6bdf131fac23 ("Btrfs: don't leak reloc root nodes on error")
Cc: <stable@vger.kernel.org> # 4.9
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/relocation.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 3a49a3c2fca4..9841faef08ea 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2400,11 +2400,11 @@ void free_reloc_roots(struct list_head *list)
 	while (!list_empty(list)) {
 		reloc_root = list_entry(list->next, struct btrfs_root,
 					root_list);
+		__del_reloc_root(reloc_root);
 		free_extent_buffer(reloc_root->node);
 		free_extent_buffer(reloc_root->commit_root);
 		reloc_root->node = NULL;
 		reloc_root->commit_root = NULL;
-		__del_reloc_root(reloc_root);
 	}
 }
 
-- 
cgit 


From ca6842bf01dc1ad41195eac1e343b4f08c496ba8 Mon Sep 17 00:00:00 2001
From: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Date: Fri, 22 Jan 2016 09:13:25 +0900
Subject: Btrfs: send: fix error number for unknown inode types

ENOTSUPP should not be returned to the user program.
 (cf. include/linux/errno.h)
Therefore, EOPNOTSUPP is used instead of ENOTSUPP.

Signed-off-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/send.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 8f1d3d6e7087..43430e6c99aa 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2640,7 +2640,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino)
 	} else {
 		btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
 				(int)(mode & S_IFMT));
-		ret = -ENOTSUPP;
+		ret = -EOPNOTSUPP;
 		goto out;
 	}
 
-- 
cgit 


From 6d6d282932d1a609e60dc4467677e0e863682f57 Mon Sep 17 00:00:00 2001
From: satoru takeuchi <satoru.takeuchi@gmail.com>
Date: Tue, 12 Sep 2017 22:42:52 +0900
Subject: btrfs: prevent to set invalid default subvolid

`btrfs sub set-default` succeeds to set an ID which isn't corresponding to any
fs/file tree. If such the bad ID is set to a filesystem, we can't mount this
filesystem without specifying `subvol` or `subvolid` mount options.

Fixes: 6ef5ed0d386b ("Btrfs: add ioctl and incompat flag to set the default mount subvol")
Cc: <stable@vger.kernel.org>
Signed-off-by: Satoru Takeuchi <satoru.takeuchi@gmail.com>
Reviewed-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cf1c2ee030dd..d4a77993e52f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4057,6 +4057,10 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
 		ret = PTR_ERR(new_root);
 		goto out;
 	}
+	if (!is_fstree(new_root->objectid)) {
+		ret = -ENOENT;
+		goto out;
+	}
 
 	path = btrfs_alloc_path();
 	if (!path) {
-- 
cgit 


From 78ad4ce014d025f41b8dde3a81876832ead643cf Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Fri, 8 Sep 2017 17:48:55 +0900
Subject: btrfs: propagate error to btrfs_cmp_data_prepare caller

btrfs_cmp_data_prepare() (almost) always returns 0 i.e. ignoring errors
from gather_extent_pages(). While the pages are freed by
btrfs_cmp_data_free(), cmp->num_pages still has > 0. Then,
btrfs_extent_same() try to access the already freed pages causing faults
(or violates PageLocked assertion).

This patch just return the error as is so that the caller stop the process.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Fixes: f441460202cb ("btrfs: fix deadlock with extent-same and readpage")
Cc: <stable@vger.kernel.org> # 4.2
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d4a77993e52f..802df5755cd3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3028,7 +3028,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
 out:
 	if (ret)
 		btrfs_cmp_data_free(cmp);
-	return 0;
+	return ret;
 }
 
 static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
-- 
cgit 


From c2faff790ccd11ea5be8e3ca99713f116fcd6030 Mon Sep 17 00:00:00 2001
From: "Misono, Tomohiro" <misono.tomohiro@jp.fujitsu.com>
Date: Wed, 30 Aug 2017 16:33:16 +0900
Subject: btrfs: remove BTRFS_FS_QUOTA_DISABLING flag

Currently, "btrfs quota enable" would fail after "btrfs quota disable" on
the first time with syslog output "qgroup_rescan_init failed with -22", but
it would succeed on the second time.

When "quota disable" is called, BTRFS_FS_QUOTA_DISABLING flag bit will be
set in fs_info->flags in btrfs_quota_disable(), but it will not be droppd
in btrfs_run_qgroups() (which is called in btrfs_commit_transaction())
because quota_root has already been freed. If "quota enable" is called
after that, both BTRFS_FS_QUOTA_DISABLING and BTRFS_FS_QUOTA_ENABLED flag
would be dropped in the btrfs_run_qgroups() since quota_root is not NULL.
This leads to the failure of "quota enable" on the first time.

BTRFS_FS_QUOTA_DISABLING flag is not used outside of "quota disable"
context and is equivalent to whether quota_root is NULL or not.
btrfs_run_qgroups() checks whether quota_root is NULL or not in the first
place.

So, let's remove BTRFS_FS_QUOTA_DISABLING flag.

Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h  | 1 -
 fs/btrfs/qgroup.c | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2add002662f4..b7ccfcc01732 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -708,7 +708,6 @@ struct btrfs_delayed_root;
 #define BTRFS_FS_OPEN				5
 #define BTRFS_FS_QUOTA_ENABLED			6
 #define BTRFS_FS_QUOTA_ENABLING			7
-#define BTRFS_FS_QUOTA_DISABLING		8
 #define BTRFS_FS_UPDATE_UUID_TREE_GEN		9
 #define BTRFS_FS_CREATING_FREE_SPACE_TREE	10
 #define BTRFS_FS_BTREE_ERR			11
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5c8b61c86e61..770f667269f5 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -807,7 +807,6 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
 	}
 	ret = 0;
 out:
-	set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags);
 	btrfs_free_path(path);
 	return ret;
 }
@@ -953,7 +952,6 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 	if (!fs_info->quota_root)
 		goto out;
 	clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
-	set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags);
 	btrfs_qgroup_wait_for_completion(fs_info, false);
 	spin_lock(&fs_info->qgroup_lock);
 	quota_root = fs_info->quota_root;
@@ -2086,8 +2084,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 
 	if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
 		set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
-	if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags))
-		clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
 
 	spin_lock(&fs_info->qgroup_lock);
 	while (!list_empty(&fs_info->dirty_qgroups)) {
-- 
cgit 


From fed3b381145e2e7c66b0b3f640851e1633ebd07f Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 13 Sep 2017 12:25:21 -0600
Subject: Btrfs: do not backup tree roots when fsync

It doesn't make sense to backup tree roots when doing fsync, since
during fsync those tree roots have not been consistent on disk.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 27d458640536..0f2271815eb6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3641,7 +3641,14 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
 	u64 flags;
 
 	do_barriers = !btrfs_test_opt(fs_info, NOBARRIER);
-	backup_super_roots(fs_info);
+
+	/*
+	 * max_mirrors == 0 indicates we're from commit_transaction,
+	 * not from fsync where the tree roots in fs_info have not
+	 * been consistent on disk.
+	 */
+	if (max_mirrors == 0)
+		backup_super_roots(fs_info);
 
 	sb = fs_info->super_for_commit;
 	dev_item = &sb->dev_item;
-- 
cgit 


From bd7d63c2ceaf737eeb21630a2b62fc5fe34dba29 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Tue, 19 Sep 2017 17:50:09 -0600
Subject: Btrfs: use btrfs_op instead of bio_op in __btrfs_map_block

This seems to be a leftover of commit cf8cddd38bab ("btrfs: don't
abuse REQ_OP_* flags for btrfs_map_block").

It should use btrfs_op() helper to provide one of 'enum btrfs_map_op'
types.

Fixes: cf8cddd38bab ("btrfs: don't abuse REQ_OP_* flags for btrfs_map_block")
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: Satoru Takeuchi <satoru.takeuchi@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d024f1b07282..6a72f88f77b6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6166,7 +6166,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	map_length = length;
 
 	btrfs_bio_counter_inc_blocked(fs_info);
-	ret = __btrfs_map_block(fs_info, bio_op(bio), logical,
+	ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
 				&map_length, &bbio, mirror_num, 1);
 	if (ret) {
 		btrfs_bio_counter_dec(fs_info);
-- 
cgit 


From cf1167d5c1abf3bc42b2a1562bfa7937c05337e2 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 20 Sep 2017 17:50:18 -0600
Subject: Btrfs: fix kernel oops while reading compressed data

The kernel oops happens at

kernel BUG at fs/btrfs/extent_io.c:2104!
...
RIP: clean_io_failure+0x263/0x2a0 [btrfs]

It's showing that read-repair code is using an improper mirror index.
This is due to the fact that compression read's endio hasn't recorded
the failed mirror index in %cb->orig_bio.

With this, btrfs's read-repair can work properly on reading compressed
data.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reported-by: Paul Jones <paul@pauljones.id.au>
Tested-by: Paul Jones <paul@pauljones.id.au>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 883ecc58fd0d..c6aa53c4c102 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -107,6 +107,7 @@ static void end_compressed_bio_read(struct bio *bio)
 	struct inode *inode;
 	struct page *page;
 	unsigned long index;
+	unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
 	int ret;
 
 	if (bio->bi_status)
@@ -118,6 +119,14 @@ static void end_compressed_bio_read(struct bio *bio)
 	if (!refcount_dec_and_test(&cb->pending_bios))
 		goto out;
 
+	/*
+	 * Record the correct mirror_num in cb->orig_bio so that
+	 * read-repair can work properly.
+	 */
+	ASSERT(btrfs_io_bio(cb->orig_bio));
+	btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
+	cb->mirror_num = mirror;
+
 	inode = cb->inode;
 	ret = check_compressed_csum(BTRFS_I(inode), cb,
 				    (u64)bio->bi_iter.bi_sector << 9);
-- 
cgit 


From e6311f240c946788131ba2b97e14f37312688072 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 20 Sep 2017 17:50:19 -0600
Subject: Btrfs: skip checksum when reading compressed data if some IO have
 failed

Currently even if the underlying disk reports failure on IO,
compressed read endio still gets to verify checksum and reports it as
a checksum error.

In fact, if some IO have failed during reading a compressed data
extent , there's no way the checksum could match, therefore, we can
skip that in order to return error quickly to the upper layer.

Please note that we need to do this after recording the failed mirror
index so that read-repair in the upper layer's endio can work
properly.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Tested-by: Paul Jones <paul@pauljones.id.au>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c6aa53c4c102..fc31af98a41b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -108,7 +108,7 @@ static void end_compressed_bio_read(struct bio *bio)
 	struct page *page;
 	unsigned long index;
 	unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
-	int ret;
+	int ret = 0;
 
 	if (bio->bi_status)
 		cb->errors = 1;
@@ -127,6 +127,13 @@ static void end_compressed_bio_read(struct bio *bio)
 	btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
 	cb->mirror_num = mirror;
 
+	/*
+	 * Some IO in this cb have failed, just skip checksum as there
+	 * is no way it could be correct.
+	 */
+	if (cb->errors == 1)
+		goto csum_failed;
+
 	inode = cb->inode;
 	ret = check_compressed_csum(BTRFS_I(inode), cb,
 				    (u64)bio->bi_iter.bi_sector << 9);
-- 
cgit 


From 36b96fdc6b2dc6f4a0fedc563fa7508c91b90a10 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Sun, 17 Sep 2017 09:02:29 +0000
Subject: btrfs: Report error on removing qgroup if del_qgroup_item fails

Previously, we were calling del_qgroup_item, and ignoring the return code
resulting in a potential to have divergent in-memory state without an
error. Perhaps, it makes sense to handle this error code, and put the
filesystem into a read only, or similar state.

This patch only adds reporting of the error if the error is fatal,
(any error other than qgroup not found).

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Reviewed-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 770f667269f5..e172d4843eae 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1305,6 +1305,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
 		}
 	}
 	ret = del_qgroup_item(trans, quota_root, qgroupid);
+	if (ret && ret != -ENOENT)
+		goto out;
 
 	while (!list_empty(&qgroup->groups)) {
 		list = list_first_entry(&qgroup->groups,
-- 
cgit 


From 99c4e3b96c797f047be4e6b7c03cfca01959f146 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Fri, 15 Sep 2017 15:06:51 -0600
Subject: Btrfs: fix unexpected result when dio reading corrupted blocks

commit 4246a0b63bd8 ("block: add a bi_error field to struct bio")
changed the logic of how dio read endio reports errors.

For single stripe dio read, %bio->bi_status reflects the error before
verifying checksum, and now we're updating it when data block matches
with its checksum, while in the mismatching case, %bio->bi_status is
not updated to relfect that.

When some blocks in a file have been corrupted on disk, reading such a
file ends up with

1) checksum errors are reported in kernel log
2) read(2) returns successfully with some content being 0x01.

In order to fix it, we need to report its checksum mismatch error to
the upper layer (dio layer in this case) as well.

Fixes: 4246a0b63bd8 ("block: add a bi_error field to struct bio")
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reported-by: Goffredo Baroncelli <kreijack@inwind.it>
Tested-by: Goffredo Baroncelli <kreijack@inwind.it>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f78c5640c6dc..c242d0230db9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8366,11 +8366,8 @@ static void btrfs_endio_direct_read(struct bio *bio)
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 	blk_status_t err = bio->bi_status;
 
-	if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) {
+	if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
 		err = btrfs_subio_endio_read(inode, io_bio, err);
-		if (!err)
-			bio->bi_status = 0;
-	}
 
 	unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
 		      dip->logical_offset + dip->bytes - 1);
@@ -8378,7 +8375,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
 
 	kfree(dip);
 
-	dio_bio->bi_status = bio->bi_status;
+	dio_bio->bi_status = err;
 	dio_end_io(dio_bio);
 
 	if (io_bio->end_io)
-- 
cgit 


From 8c6c592831a09a28428448e68fb08c6bbb8b9b8b Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 29 Aug 2017 10:11:39 -0400
Subject: btrfs: log csums for all modified extents

Amir reported a bug discovered by his cleaned up version of my
dm-log-writes xfstests where we were missing csums at certain replay
points.  This is because fsx was doing an msync(), which essentially
fsync()'s a specific range of a file.  We will log all modified extents,
but only search for the checksums in the range we are being asked to
sync.  We cannot simply log the extents in the range we're being asked
because we are logging the inode item as it is currently, which if it
has had a i_size update before the msync means we will miss extents when
replaying.  We could possibly get around this by marking the inode with
the transaction that extended the i_size to see if we have this case,
but this would be racy and we'd have to lock the whole range of the
inode to make sure we didn't have an ordered extent outside of our range
that was in the middle of completing.

Fix this simply by keeping track of the modified extents range and
logging the csums for the entire range of extents that we are logging.
This makes the xfstest pass.

Reported-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ad7f4bab640b..c800d067fcbf 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4181,6 +4181,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 	struct extent_map *em, *n;
 	struct list_head extents;
 	struct extent_map_tree *tree = &inode->extent_tree;
+	u64 logged_start, logged_end;
 	u64 test_gen;
 	int ret = 0;
 	int num = 0;
@@ -4190,10 +4191,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 	down_write(&inode->dio_sem);
 	write_lock(&tree->lock);
 	test_gen = root->fs_info->last_trans_committed;
+	logged_start = start;
+	logged_end = end;
 
 	list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
 		list_del_init(&em->list);
-
 		/*
 		 * Just an arbitrary number, this can be really CPU intensive
 		 * once we start getting a lot of extents, and really once we
@@ -4208,6 +4210,12 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 
 		if (em->generation <= test_gen)
 			continue;
+
+		if (em->start < logged_start)
+			logged_start = em->start;
+		if ((em->start + em->len - 1) > logged_end)
+			logged_end = em->start + em->len - 1;
+
 		/* Need a ref to keep it from getting evicted from cache */
 		refcount_inc(&em->refs);
 		set_bit(EXTENT_FLAG_LOGGING, &em->flags);
@@ -4216,7 +4224,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 	}
 
 	list_sort(NULL, &extents, extent_cmp);
-	btrfs_get_logged_extents(inode, logged_list, start, end);
+	btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
 	/*
 	 * Some ordered extents started by fsync might have completed
 	 * before we could collect them into the list logged_list, which
-- 
cgit 


From 6851a3db7e224bbb85e23b3c64a506c9e0904382 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Mon, 18 Sep 2017 14:46:03 -0700
Subject: xfs: validate bdev support for DAX inode flag

Currently only the blocksize is checked, but we should really be calling
bdev_dax_supported() which also tests to make sure we can get a
struct dax_device and that the dax_direct_access() path is working.

This is the same check that we do for the "-o dax" mount option in
xfs_fs_fill_super().

This does not fix the race issues that caused the XFS DAX inode option to
be disabled, so that option will still be disabled.  If/when we re-enable
it, though, I think we will want this issue to have been fixed.  I also do
think that we want to fix this in stable kernels.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
CC: stable@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_ioctl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 5049e8ab6e30..aa75389be8cf 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1088,6 +1088,7 @@ xfs_ioctl_setattr_dax_invalidate(
 	int			*join_flags)
 {
 	struct inode		*inode = VFS_I(ip);
+	struct super_block	*sb = inode->i_sb;
 	int			error;
 
 	*join_flags = 0;
@@ -1100,7 +1101,7 @@ xfs_ioctl_setattr_dax_invalidate(
 	if (fa->fsx_xflags & FS_XFLAG_DAX) {
 		if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
 			return -EINVAL;
-		if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
+		if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
 			return -EINVAL;
 	}
 
-- 
cgit 


From 546e7be8244dc050effef0555df5b8d94d10dafc Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Fri, 22 Sep 2017 11:47:33 -0700
Subject: iomap_dio_rw: Allocate AIO completion queue before submitting dio

Executing xfs/104 test in a loop on Linux-v4.13 kernel on a ppc64
machine can cause the following NULL pointer dereference,

.queue_work_on+0x4c/0x80
.iomap_dio_bio_end_io+0xbc/0x1f0
.bio_endio+0x118/0x1f0
.blk_update_request+0xd0/0x470
.blk_mq_end_request+0x24/0xc0
.lo_complete_rq+0x40/0xe0
.__blk_mq_complete_request_remote+0x28/0x40
.flush_smp_call_function_queue+0xc4/0x1e0
.smp_ipi_demux_relaxed+0x8c/0x100
.icp_hv_ipi_action+0x54/0xa0
.__handle_irq_event_percpu+0x84/0x2c0
.handle_irq_event_percpu+0x28/0x80
.handle_percpu_irq+0x78/0xc0
.generic_handle_irq+0x40/0x70
.__do_irq+0x88/0x200
.call_do_irq+0x14/0x24
.do_IRQ+0x84/0x130

This occurs due to the following sequence of events,

1. Allocate dio for Direct I/O write.
2. Invoke iomap_apply() until iov_iter_count() bytes have been submitted.
   - Assume that we have submitted atleast one bio. Hence iomap_dio->ref value
     will be >= 2.
   - If during the second iteration, iomap_apply() ends up returning -ENOSPC, we would
     break out of the loop and since the 'ret' value is a negative number we
     end up not allocating memory for super_block->s_dio_done_wq.
3. Meanwhile, iomap_dio_bio_end_io() is invoked for bios that have been
   submitted and here the code ends up dereferencing the NULL pointer stored
   at super_block->s_dio_done_wq.

This commit fixes the bug by allocating memory for
super_block->s_dio_done_wq before iomap_apply() is invoked.

Reported-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Eryu Guan <eguan@redhat.com>
Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 269b24a01f32..d4f526a3f5b2 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -993,6 +993,13 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	WARN_ON_ONCE(ret);
 	ret = 0;
 
+	if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+	    !inode->i_sb->s_dio_done_wq) {
+		ret = sb_init_dio_done_wq(inode->i_sb);
+		if (ret < 0)
+			goto out_free_dio;
+	}
+
 	inode_dio_begin(inode);
 
 	blk_start_plug(&plug);
@@ -1015,13 +1022,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	if (ret < 0)
 		iomap_dio_set_error(dio, ret);
 
-	if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
-			!inode->i_sb->s_dio_done_wq) {
-		ret = sb_init_dio_done_wq(inode->i_sb);
-		if (ret < 0)
-			iomap_dio_set_error(dio, ret);
-	}
-
 	if (!atomic_dec_and_test(&dio->ref)) {
 		if (!is_sync_kiocb(iocb))
 			return -EIOCBQUEUED;
-- 
cgit 


From ee70daaba82d70766d0723b743d9fdeb3b06102a Mon Sep 17 00:00:00 2001
From: Eryu Guan <eguan@redhat.com>
Date: Thu, 21 Sep 2017 11:26:18 -0700
Subject: xfs: update i_size after unwritten conversion in dio completion

Since commit d531d91d6990 ("xfs: always use unwritten extents for
direct I/O writes"), we start allocating unwritten extents for all
direct writes to allow appending aio in XFS.

But for dio writes that could extend file size we update the in-core
inode size first, then convert the unwritten extents to real
allocations at dio completion time in xfs_dio_write_end_io(). Thus a
racing direct read could see the new i_size and find the unwritten
extents first and read zeros instead of actual data, if the direct
writer also takes a shared iolock.

Fix it by updating the in-core inode size after the unwritten extent
conversion. To do this, introduce a new boolean argument to
xfs_iomap_write_unwritten() to tell if we want to update in-core
i_size or not.

Suggested-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c  |  3 ++-
 fs/xfs/xfs_file.c  | 33 +++++++++++++++++++--------------
 fs/xfs/xfs_iomap.c |  7 +++++--
 fs/xfs/xfs_iomap.h |  2 +-
 fs/xfs/xfs_pnfs.c  |  2 +-
 5 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 29172609f2a3..f18e5932aec4 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -343,7 +343,8 @@ xfs_end_io(
 		error = xfs_reflink_end_cow(ip, offset, size);
 		break;
 	case XFS_IO_UNWRITTEN:
-		error = xfs_iomap_write_unwritten(ip, offset, size);
+		/* writeback should never update isize */
+		error = xfs_iomap_write_unwritten(ip, offset, size, false);
 		break;
 	default:
 		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 350b6d43ba23..309e26c9dddb 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -434,7 +434,6 @@ xfs_dio_write_end_io(
 	struct inode		*inode = file_inode(iocb->ki_filp);
 	struct xfs_inode	*ip = XFS_I(inode);
 	loff_t			offset = iocb->ki_pos;
-	bool			update_size = false;
 	int			error = 0;
 
 	trace_xfs_end_io_direct_write(ip, offset, size);
@@ -445,6 +444,21 @@ xfs_dio_write_end_io(
 	if (size <= 0)
 		return size;
 
+	if (flags & IOMAP_DIO_COW) {
+		error = xfs_reflink_end_cow(ip, offset, size);
+		if (error)
+			return error;
+	}
+
+	/*
+	 * Unwritten conversion updates the in-core isize after extent
+	 * conversion but before updating the on-disk size. Updating isize any
+	 * earlier allows a racing dio read to find unwritten extents before
+	 * they are converted.
+	 */
+	if (flags & IOMAP_DIO_UNWRITTEN)
+		return xfs_iomap_write_unwritten(ip, offset, size, true);
+
 	/*
 	 * We need to update the in-core inode size here so that we don't end up
 	 * with the on-disk inode size being outside the in-core inode size. We
@@ -459,20 +473,11 @@ xfs_dio_write_end_io(
 	spin_lock(&ip->i_flags_lock);
 	if (offset + size > i_size_read(inode)) {
 		i_size_write(inode, offset + size);
-		update_size = true;
-	}
-	spin_unlock(&ip->i_flags_lock);
-
-	if (flags & IOMAP_DIO_COW) {
-		error = xfs_reflink_end_cow(ip, offset, size);
-		if (error)
-			return error;
-	}
-
-	if (flags & IOMAP_DIO_UNWRITTEN)
-		error = xfs_iomap_write_unwritten(ip, offset, size);
-	else if (update_size)
+		spin_unlock(&ip->i_flags_lock);
 		error = xfs_setfilesize(ip, offset, size);
+	} else {
+		spin_unlock(&ip->i_flags_lock);
+	}
 
 	return error;
 }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index a1909bc064e9..f179bdf1644d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -829,7 +829,8 @@ int
 xfs_iomap_write_unwritten(
 	xfs_inode_t	*ip,
 	xfs_off_t	offset,
-	xfs_off_t	count)
+	xfs_off_t	count,
+	bool		update_isize)
 {
 	xfs_mount_t	*mp = ip->i_mount;
 	xfs_fileoff_t	offset_fsb;
@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten(
 	xfs_trans_t	*tp;
 	xfs_bmbt_irec_t imap;
 	struct xfs_defer_ops dfops;
+	struct inode	*inode = VFS_I(ip);
 	xfs_fsize_t	i_size;
 	uint		resblks;
 	int		error;
@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten(
 		i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
 		if (i_size > offset + count)
 			i_size = offset + count;
-
+		if (update_isize && i_size > i_size_read(inode))
+			i_size_write(inode, i_size);
 		i_size = xfs_new_eof(ip, i_size);
 		if (i_size) {
 			ip->i_d.di_size = i_size;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 00db3ecea084..ee535065c5d0 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 			struct xfs_bmbt_irec *, int);
 int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
 			struct xfs_bmbt_irec *);
-int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
+int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
 		struct xfs_bmbt_irec *);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 2f2dc3c09ad0..4246876df7b7 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -274,7 +274,7 @@ xfs_fs_commit_blocks(
 					(end - 1) >> PAGE_SHIFT);
 		WARN_ON_ONCE(error);
 
-		error = xfs_iomap_write_unwritten(ip, start, length);
+		error = xfs_iomap_write_unwritten(ip, start, length, false);
 		if (error)
 			goto out_drop_iolock;
 	}
-- 
cgit 


From 9789dd9e1d939232e8ff4c50ef8e75aa6781b3fb Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 18 Sep 2017 09:42:09 -0700
Subject: xfs: perag initialization should only touch m_ag_max_usable for AG 0

We call __xfs_ag_resv_init to make a per-AG reservation for each AG.
This makes the reservation per-AG, not per-filesystem.  Therefore, it
is incorrect to adjust m_ag_max_usable for each AG.  Adjust it only
when we're reserving AG 0's blocks so that we only do it once per fs.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_ag_resv.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index b008ff3250eb..df3e600835e8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -156,7 +156,8 @@ __xfs_ag_resv_free(
 	trace_xfs_ag_resv_free(pag, type, 0);
 
 	resv = xfs_perag_resv(pag, type);
-	pag->pag_mount->m_ag_max_usable += resv->ar_asked;
+	if (pag->pag_agno == 0)
+		pag->pag_mount->m_ag_max_usable += resv->ar_asked;
 	/*
 	 * AGFL blocks are always considered "free", so whatever
 	 * was reserved at mount time must be given back at umount.
@@ -216,7 +217,14 @@ __xfs_ag_resv_init(
 		return error;
 	}
 
-	mp->m_ag_max_usable -= ask;
+	/*
+	 * Reduce the maximum per-AG allocation length by however much we're
+	 * trying to reserve for an AG.  Since this is a filesystem-wide
+	 * counter, we only make the adjustment for AG 0.  This assumes that
+	 * there aren't any AGs hungrier for per-AG reservation than AG 0.
+	 */
+	if (pag->pag_agno == 0)
+		mp->m_ag_max_usable -= ask;
 
 	resv = xfs_perag_resv(pag, type);
 	resv->ar_asked = ask;
-- 
cgit 


From 842f6e9f786226c58fcbd5ef80eadca72fdfe652 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Fri, 22 Sep 2017 11:47:46 -0700
Subject: xfs: Capture state of the right inode in xfs_iflush_done

My previous patch: d3a304b6292168b83b45d624784f973fdc1ca674 check for
XFS_LI_FAILED flag xfs_iflush done, so the failed item can be properly
resubmitted.

In the loop scanning other inodes being completed, it should check the
current item for the XFS_LI_FAILED, and not the initial one.

The state of the initial inode is checked after the loop ends

Kudos to Eric for catching this.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_inode_item.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6d0f74ec31e8..a705f34b58fa 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -745,7 +745,7 @@ xfs_iflush_done(
 		 */
 		iip = INODE_ITEM(blip);
 		if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
-		    lip->li_flags & XFS_LI_FAILED)
+		    (blip->li_flags & XFS_LI_FAILED))
 			need_ail++;
 
 		blip = next;
-- 
cgit 


From 5e5c943c1f257c2b3424fc3f8a7b18570152dab3 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 18 Sep 2017 09:41:17 -0700
Subject: xfs: revert "xfs: factor rmap btree size into the indlen
 calculations"

In commit fd26a88093ba we added a worst case estimate for rmapbt blocks
needed to satisfy the block mapping request.  Since then, we added the
ability to reserve enough space in each AG such that we should never run
out of blocks to grow the rmapbt, which makes this calculation
unnecessary.  Revert the commit because it makes the extra delalloc
indlen accounting unnecessary and incorrect.

Reported-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 459f4b4f08fe..044a363119be 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -49,7 +49,6 @@
 #include "xfs_rmap.h"
 #include "xfs_ag_resv.h"
 #include "xfs_refcount.h"
-#include "xfs_rmap_btree.h"
 #include "xfs_icache.h"
 
 
@@ -192,12 +191,8 @@ xfs_bmap_worst_indlen(
 	int		maxrecs;	/* maximum record count at this level */
 	xfs_mount_t	*mp;		/* mount structure */
 	xfs_filblks_t	rval;		/* return value */
-	xfs_filblks_t   orig_len;
 
 	mp = ip->i_mount;
-
-	/* Calculate the worst-case size of the bmbt. */
-	orig_len = len;
 	maxrecs = mp->m_bmap_dmxr[0];
 	for (level = 0, rval = 0;
 	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
@@ -205,20 +200,12 @@ xfs_bmap_worst_indlen(
 		len += maxrecs - 1;
 		do_div(len, maxrecs);
 		rval += len;
-		if (len == 1) {
-			rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+		if (len == 1)
+			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
 				level - 1;
-			break;
-		}
 		if (level == 0)
 			maxrecs = mp->m_bmap_dmxr[1];
 	}
-
-	/* Calculate the worst-case size of the rmapbt. */
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-		rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
-				mp->m_rmap_maxlevels;
-
 	return rval;
 }
 
-- 
cgit 


From fc46820b27a2d9a46f7e90c9ceb4a64a1bc5fab8 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 25 Sep 2017 12:23:03 +0200
Subject: vfs: Return -ENXIO for negative SEEK_HOLE / SEEK_DATA offsets

In generic_file_llseek_size, return -ENXIO for negative offsets as well
as offsets beyond EOF.  This affects filesystems which don't implement
SEEK_HOLE / SEEK_DATA internally, possibly because they don't support
holes.

Fixes xfstest generic/448.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/read_write.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index a2b9a47235c5..f0d4b16873e8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -112,7 +112,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
 		 * In the generic case the entire file is data, so as long as
 		 * offset isn't at the end of the file then the offset is data.
 		 */
-		if (offset >= eof)
+		if ((unsigned long long)offset >= eof)
 			return -ENXIO;
 		break;
 	case SEEK_HOLE:
@@ -120,7 +120,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
 		 * There is a virtual hole at the end of the file, so as long as
 		 * offset isn't i_size or larger, return i_size.
 		 */
-		if (offset >= eof)
+		if ((unsigned long long)offset >= eof)
 			return -ENXIO;
 		offset = eof;
 		break;
-- 
cgit 


From ce7c47d60bda6c7f09ccf16e978d971c8fa16ff0 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Mon, 18 Sep 2017 23:00:59 +0300
Subject: platform/x86: fujitsu-laptop: Don't oops when FUJ02E3 is not presnt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

My Fujitsu-Siemens Lifebook S6120 doesn't have the FUJ02E3 device,
but it does have FUJ02B1. That means we do register the backlight
device (and it even seems to work), but the code will oops as soon
as we try to set the backlight brightness because it's trying to
call call_fext_func() with a NULL device. Let's just skip those
function calls when the FUJ02E3 device is not present.

Cc: Jonathan Woithe <jwoithe@just42.net>
Cc: Andy Shevchenko <andy@infradead.org>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: <stable@vger.kernel.org> # 4.13.x
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/fujitsu-laptop.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 85de30f93a9c..56a8195096a2 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -254,10 +254,12 @@ static int bl_update_status(struct backlight_device *b)
 {
 	struct acpi_device *device = bl_get_data(b);
 
-	if (b->props.power == FB_BLANK_POWERDOWN)
-		call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3);
-	else
-		call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0);
+	if (fext) {
+		if (b->props.power == FB_BLANK_POWERDOWN)
+			call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3);
+		else
+			call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0);
+	}
 
 	return set_lcd_level(device, b->props.brightness);
 }
-- 
cgit 


From 4c6bb69663b3a3f2db8f488356e96acb5460f25f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 26 Sep 2017 10:36:05 +0200
Subject: quota: Fix quota corruption with generic/232 test

Eric has reported that since commit d2faa415166b "quota: Do not acquire
dqio_sem for dquot overwrites in v2 format" test generic/232
occasionally fails due to quota information being incorrect. Indeed that
commit was too eager to remove dqio_sem completely from the path that
just overwrites quota structure with updated information. Although that
is innocent on its own, another process that inserts new quota structure
to the same block can perform read-modify-write cycle of that block thus
effectively discarding quota information update if they race in a wrong
way.

Fix the problem by acquiring dqio_sem for reading for overwrites of
quota structure. Note that it *is* possible to completely avoid taking
dqio_sem in the overwrite path however that will require modifying path
inserting / deleting quota structures to avoid RMW cycles of the full
block and for now it is not clear whether it is worth the hassle.

Fixes: d2faa415166b2883428efa92f451774ef44373ac
Reported-and-tested-by: Eric Whitney <enwlinux@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/quota_v2.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index c0187cda2c1e..a73e5b34db41 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -328,12 +328,16 @@ static int v2_write_dquot(struct dquot *dquot)
 	if (!dquot->dq_off) {
 		alloc = true;
 		down_write(&dqopt->dqio_sem);
+	} else {
+		down_read(&dqopt->dqio_sem);
 	}
 	ret = qtree_write_dquot(
 			sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
 			dquot);
 	if (alloc)
 		up_write(&dqopt->dqio_sem);
+	else
+		up_read(&dqopt->dqio_sem);
 	return ret;
 }
 
-- 
cgit 


From 5371513fb338fb9989c569dc071326d369d6ade8 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 26 Sep 2017 15:57:16 +0100
Subject: arm64: Make sure SPsel is always set

When the kernel is entered at EL2 on an ARMv8.0 system, we construct
the EL1 pstate and make sure this uses the the EL1 stack pointer
(we perform an exception return to EL1h).

But if the kernel is either entered at EL1 or stays at EL2 (because
we're on a VHE-capable system), we fail to set SPsel, and use whatever
stack selection the higher exception level has choosen for us.

Let's not take any chance, and make sure that SPsel is set to one
before we decide the mode we're going to run in.

Cc: <stable@vger.kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/head.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 7434ec0c7a27..0b243ecaf7ac 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -384,6 +384,7 @@ ENTRY(kimage_vaddr)
  * booted in EL1 or EL2 respectively.
  */
 ENTRY(el2_setup)
+	msr	SPsel, #1			// We want to use SP_EL{1,2}
 	mrs	x0, CurrentEL
 	cmp	x0, #CurrentEL_EL2
 	b.eq	1f
-- 
cgit 


From cd39e1176d320157831ce030b4c869bd2d5eb142 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 6 Jun 2017 12:57:04 +0200
Subject: KVM: VMX: extract __pi_post_block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Simple code movement patch, preparing for the next one.

Cc: Huangweidong <weidong.huang@huawei.com>
Cc: Gonglei <arei.gonglei@huawei.com>
Cc: wangxin <wangxinxin.wang@huawei.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Tested-by: Longpeng (Mike) <longpeng2@huawei.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 71 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 38 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c83d28b0ab05..0002b14307ab 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11705,6 +11705,43 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
 	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+static void __pi_post_block(struct kvm_vcpu *vcpu)
+{
+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+	struct pi_desc old, new;
+	unsigned int dest;
+	unsigned long flags;
+
+	do {
+		old.control = new.control = pi_desc->control;
+
+		dest = cpu_physical_id(vcpu->cpu);
+
+		if (x2apic_enabled())
+			new.ndst = dest;
+		else
+			new.ndst = (dest << 8) & 0xFF00;
+
+		/* Allow posting non-urgent interrupts */
+		new.sn = 0;
+
+		/* set 'NV' to 'notification vector' */
+		new.nv = POSTED_INTR_VECTOR;
+	} while (cmpxchg(&pi_desc->control, old.control,
+			new.control) != old.control);
+
+	if(vcpu->pre_pcpu != -1) {
+		spin_lock_irqsave(
+			&per_cpu(blocked_vcpu_on_cpu_lock,
+			vcpu->pre_pcpu), flags);
+		list_del(&vcpu->blocked_vcpu_list);
+		spin_unlock_irqrestore(
+			&per_cpu(blocked_vcpu_on_cpu_lock,
+			vcpu->pre_pcpu), flags);
+		vcpu->pre_pcpu = -1;
+	}
+}
+
 /*
  * This routine does the following things for vCPU which is going
  * to be blocked if VT-d PI is enabled.
@@ -11798,44 +11835,12 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
 
 static void pi_post_block(struct kvm_vcpu *vcpu)
 {
-	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-	struct pi_desc old, new;
-	unsigned int dest;
-	unsigned long flags;
-
 	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
 		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
 		!kvm_vcpu_apicv_active(vcpu))
 		return;
 
-	do {
-		old.control = new.control = pi_desc->control;
-
-		dest = cpu_physical_id(vcpu->cpu);
-
-		if (x2apic_enabled())
-			new.ndst = dest;
-		else
-			new.ndst = (dest << 8) & 0xFF00;
-
-		/* Allow posting non-urgent interrupts */
-		new.sn = 0;
-
-		/* set 'NV' to 'notification vector' */
-		new.nv = POSTED_INTR_VECTOR;
-	} while (cmpxchg(&pi_desc->control, old.control,
-			new.control) != old.control);
-
-	if(vcpu->pre_pcpu != -1) {
-		spin_lock_irqsave(
-			&per_cpu(blocked_vcpu_on_cpu_lock,
-			vcpu->pre_pcpu), flags);
-		list_del(&vcpu->blocked_vcpu_list);
-		spin_unlock_irqrestore(
-			&per_cpu(blocked_vcpu_on_cpu_lock,
-			vcpu->pre_pcpu), flags);
-		vcpu->pre_pcpu = -1;
-	}
+	__pi_post_block(vcpu);
 }
 
 static void vmx_post_block(struct kvm_vcpu *vcpu)
-- 
cgit 


From 8b306e2f3c41939ea528e6174c88cfbfff893ce1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 6 Jun 2017 12:57:05 +0200
Subject: KVM: VMX: avoid double list add with VT-d posted interrupts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some cases, for example involving hot-unplug of assigned
devices, pi_post_block can forget to remove the vCPU from the
blocked_vcpu_list.  When this happens, the next call to
pi_pre_block corrupts the list.

Fix this in two ways.  First, check vcpu->pre_pcpu in pi_pre_block
and WARN instead of adding the element twice in the list.  Second,
always do the list removal in pi_post_block if vcpu->pre_pcpu is
set (not -1).

The new code keeps interrupts disabled for the whole duration of
pi_pre_block/pi_post_block.  This is not strictly necessary, but
easier to follow.  For the same reason, PI.ON is checked only
after the cmpxchg, and to handle it we just call the post-block
code.  This removes duplication of the list removal code.

Cc: Huangweidong <weidong.huang@huawei.com>
Cc: Gonglei <arei.gonglei@huawei.com>
Cc: wangxin <wangxinxin.wang@huawei.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Tested-by: Longpeng (Mike) <longpeng2@huawei.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 62 ++++++++++++++++++++++--------------------------------
 1 file changed, 25 insertions(+), 37 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0002b14307ab..0bfe97e50a40 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11710,10 +11710,11 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 	struct pi_desc old, new;
 	unsigned int dest;
-	unsigned long flags;
 
 	do {
 		old.control = new.control = pi_desc->control;
+		WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+		     "Wakeup handler not enabled while the VCPU is blocked\n");
 
 		dest = cpu_physical_id(vcpu->cpu);
 
@@ -11730,14 +11731,10 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
 	} while (cmpxchg(&pi_desc->control, old.control,
 			new.control) != old.control);
 
-	if(vcpu->pre_pcpu != -1) {
-		spin_lock_irqsave(
-			&per_cpu(blocked_vcpu_on_cpu_lock,
-			vcpu->pre_pcpu), flags);
+	if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
 		list_del(&vcpu->blocked_vcpu_list);
-		spin_unlock_irqrestore(
-			&per_cpu(blocked_vcpu_on_cpu_lock,
-			vcpu->pre_pcpu), flags);
+		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
 		vcpu->pre_pcpu = -1;
 	}
 }
@@ -11757,7 +11754,6 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
  */
 static int pi_pre_block(struct kvm_vcpu *vcpu)
 {
-	unsigned long flags;
 	unsigned int dest;
 	struct pi_desc old, new;
 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -11767,34 +11763,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
 		!kvm_vcpu_apicv_active(vcpu))
 		return 0;
 
-	vcpu->pre_pcpu = vcpu->cpu;
-	spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-			  vcpu->pre_pcpu), flags);
-	list_add_tail(&vcpu->blocked_vcpu_list,
-		      &per_cpu(blocked_vcpu_on_cpu,
-		      vcpu->pre_pcpu));
-	spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
-			       vcpu->pre_pcpu), flags);
+	WARN_ON(irqs_disabled());
+	local_irq_disable();
+	if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+		vcpu->pre_pcpu = vcpu->cpu;
+		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+		list_add_tail(&vcpu->blocked_vcpu_list,
+			      &per_cpu(blocked_vcpu_on_cpu,
+				       vcpu->pre_pcpu));
+		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+	}
 
 	do {
 		old.control = new.control = pi_desc->control;
 
-		/*
-		 * We should not block the vCPU if
-		 * an interrupt is posted for it.
-		 */
-		if (pi_test_on(pi_desc) == 1) {
-			spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-					  vcpu->pre_pcpu), flags);
-			list_del(&vcpu->blocked_vcpu_list);
-			spin_unlock_irqrestore(
-					&per_cpu(blocked_vcpu_on_cpu_lock,
-					vcpu->pre_pcpu), flags);
-			vcpu->pre_pcpu = -1;
-
-			return 1;
-		}
-
 		WARN((pi_desc->sn == 1),
 		     "Warning: SN field of posted-interrupts "
 		     "is set before blocking\n");
@@ -11819,7 +11801,12 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
 	} while (cmpxchg(&pi_desc->control, old.control,
 			new.control) != old.control);
 
-	return 0;
+	/* We should not block the vCPU if an interrupt is posted for it.  */
+	if (pi_test_on(pi_desc) == 1)
+		__pi_post_block(vcpu);
+
+	local_irq_enable();
+	return (vcpu->pre_pcpu == -1);
 }
 
 static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -11835,12 +11822,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
 
 static void pi_post_block(struct kvm_vcpu *vcpu)
 {
-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
-		!kvm_vcpu_apicv_active(vcpu))
+	if (vcpu->pre_pcpu == -1)
 		return;
 
+	WARN_ON(irqs_disabled());
+	local_irq_disable();
 	__pi_post_block(vcpu);
+	local_irq_enable();
 }
 
 static void vmx_post_block(struct kvm_vcpu *vcpu)
-- 
cgit 


From 31afb2ea2b10a7d17ce3db4cdb0a12b63b2fe08a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 6 Jun 2017 12:57:06 +0200
Subject: KVM: VMX: simplify and fix vmx_vcpu_pi_load
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The simplify part: do not touch pi_desc.nv, we can set it when the
VCPU is first created.  Likewise, pi_desc.sn is only handled by
vmx_vcpu_pi_load, do not touch it in __pi_post_block.

The fix part: do not check kvm_arch_has_assigned_device, instead
check the SN bit to figure out whether vmx_vcpu_pi_put ran before.
This matches what the previous patch did in pi_post_block.

Cc: Huangweidong <weidong.huang@huawei.com>
Cc: Gonglei <arei.gonglei@huawei.com>
Cc: wangxin <wangxinxin.wang@huawei.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Tested-by: Longpeng (Mike) <longpeng2@huawei.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 68 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0bfe97e50a40..b9d2140eb212 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2202,43 +2202,41 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 	struct pi_desc old, new;
 	unsigned int dest;
 
-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
-		!kvm_vcpu_apicv_active(vcpu))
+	/*
+	 * In case of hot-plug or hot-unplug, we may have to undo
+	 * vmx_vcpu_pi_put even if there is no assigned device.  And we
+	 * always keep PI.NDST up to date for simplicity: it makes the
+	 * code easier, and CPU migration is not a fast path.
+	 */
+	if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
+		return;
+
+	/*
+	 * First handle the simple case where no cmpxchg is necessary; just
+	 * allow posting non-urgent interrupts.
+	 *
+	 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+	 * PI.NDST: pi_post_block will do it for us and the wakeup_handler
+	 * expects the VCPU to be on the blocked_vcpu_list that matches
+	 * PI.NDST.
+	 */
+	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
+	    vcpu->cpu == cpu) {
+		pi_clear_sn(pi_desc);
 		return;
+	}
 
+	/* The full case.  */
 	do {
 		old.control = new.control = pi_desc->control;
 
-		/*
-		 * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
-		 * are two possible cases:
-		 * 1. After running 'pre_block', context switch
-		 *    happened. For this case, 'sn' was set in
-		 *    vmx_vcpu_put(), so we need to clear it here.
-		 * 2. After running 'pre_block', we were blocked,
-		 *    and woken up by some other guy. For this case,
-		 *    we don't need to do anything, 'pi_post_block'
-		 *    will do everything for us. However, we cannot
-		 *    check whether it is case #1 or case #2 here
-		 *    (maybe, not needed), so we also clear sn here,
-		 *    I think it is not a big deal.
-		 */
-		if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
-			if (vcpu->cpu != cpu) {
-				dest = cpu_physical_id(cpu);
-
-				if (x2apic_enabled())
-					new.ndst = dest;
-				else
-					new.ndst = (dest << 8) & 0xFF00;
-			}
+		dest = cpu_physical_id(cpu);
 
-			/* set 'NV' to 'notification vector' */
-			new.nv = POSTED_INTR_VECTOR;
-		}
+		if (x2apic_enabled())
+			new.ndst = dest;
+		else
+			new.ndst = (dest << 8) & 0xFF00;
 
-		/* Allow posting non-urgent interrupts */
 		new.sn = 0;
 	} while (cmpxchg(&pi_desc->control, old.control,
 			new.control) != old.control);
@@ -9592,6 +9590,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
 	vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
 
+	/*
+	 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
+	 * or POSTED_INTR_WAKEUP_VECTOR.
+	 */
+	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+	vmx->pi_desc.sn = 1;
+
 	return &vmx->vcpu;
 
 free_vmcs:
@@ -11723,9 +11728,6 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
 		else
 			new.ndst = (dest << 8) & 0xFF00;
 
-		/* Allow posting non-urgent interrupts */
-		new.sn = 0;
-
 		/* set 'NV' to 'notification vector' */
 		new.nv = POSTED_INTR_VECTOR;
 	} while (cmpxchg(&pi_desc->control, old.control,
-- 
cgit 


From 7e439681af82984045efc215437ebb2ca8d33a4c Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 25 Sep 2017 10:19:57 +0200
Subject: mtd: Fix partition alignment check on multi-erasesize devices

Commit 1eeef2d7483a ("mtd: handle partitioning on devices with 0
erasesize") introduced a regression on heterogeneous erase region
devices. Alignment of the partition was tested against the master
eraseblock size which can be bigger than the slave one, thus leading
to some partitions being marked as read-only.

Update wr_alignment to match this slave erasesize after this erasesize
has been determined by picking the biggest erasesize of all the regions
embedded in the MTD partition.

Reported-by: Mathias Thore <Mathias.Thore@infinera.com>
Fixes: 1eeef2d7483a ("mtd: handle partitioning on devices with 0 erasesize")
Cc: <stable@vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Mathias Thore <Mathias.Thore@infinera.com>
Reviewed-by: Mathias Thore <Mathias.Thore@infinera.com>
---
 drivers/mtd/mtdpart.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 5736b0c90b33..a308e707392d 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -581,6 +581,14 @@ static struct mtd_part *allocate_partition(struct mtd_info *parent,
 		slave->mtd.erasesize = parent->erasesize;
 	}
 
+	/*
+	 * Slave erasesize might differ from the master one if the master
+	 * exposes several regions with different erasesize. Adjust
+	 * wr_alignment accordingly.
+	 */
+	if (!(slave->mtd.flags & MTD_NO_ERASE))
+		wr_alignment = slave->mtd.erasesize;
+
 	tmp = slave->offset;
 	remainder = do_div(tmp, wr_alignment);
 	if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
-- 
cgit 


From 5c62c1c67903621cfa715d6f690548ee53301620 Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Mon, 25 Sep 2017 17:28:47 +0800
Subject: iommu/io-pgtable-arm-v7s: Need dma-sync while there is no
 QUIRK_NO_DMA

Fix the commit 81b3c2521844 ("iommu/io-pgtable: Introduce explicit
coherency"). If there is no IO_PGTABLE_QUIRK_NO_DMA, we should call
dma_sync_single_for_device for cache synchronization.

Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Fixes: 81b3c2521844 ('iommu/io-pgtable: Introduce explicit coherency')
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index d665d0dc16e8..6961fc393f0b 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -245,7 +245,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
 			       struct io_pgtable_cfg *cfg)
 {
-	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
+	if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)
 		return;
 
 	dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
-- 
cgit 


From 1ff9b17cedb39bc78f9e3f82485765f9b467177d Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Mon, 25 Sep 2017 18:15:26 +0800
Subject: iommu/mediatek: Limit the physical address in 32bit for v7s

The ARM short descriptor has already limited the physical address
to 32bit after the commit <76557391433c> ("iommu/io-pgtable: Sanitise
map/unmap addresses"). But in MediaTek 4GB mode, the physical address
is from 0x1_0000_0000 to 0x1_ffff_ffff. this will cause:

WARNING: CPU: 4 PID: 3900 at
xxx/drivers/iommu/io-pgtable-arm-v7s.c:482 arm_v7s_map+0x40/0xf8
Modules linked in:

CPU: 4 PID: 3900 Comm: weston Tainted: G S      W       4.9.44 #1
Hardware name: MediaTek MT2712m1v1 board (DT)
task: ffffffc0eaa5b280 task.stack: ffffffc0e9858000
PC is at arm_v7s_map+0x40/0xf8
LR is at mtk_iommu_map+0x64/0x90
pc : [<ffffff80085b09e8>] lr : [<ffffff80085b29fc>] pstate: 000001c5
sp : ffffffc0e985b920
x29: ffffffc0e985b920 x28: 0000000127d00000
x27: 0000000000100000 x26: ffffff8008f9e000
x25: 0000000000000003 x24: 0000000000100000
x23: 0000000127d00000 x22: 00000000ff800000
x21: ffffffc0f7ec8ce0 x20: 0000000000000003
x19: 0000000000000003 x18: 0000000000000002
x17: 0000007f7e5d72c0 x16: ffffff80082b0f08
x15: 0000000000000001 x14: 000000000000003f
x13: 0000000000000000 x12: 0000000000000028
x11: 0088000000000000 x10: 0000000000000000
x9 : ffffff80092fa000 x8 : ffffffc0e9858000
x7 : ffffff80085b29d8 x6 : 0000000000000000
x5 : ffffff80085b09a8 x4 : 0000000000000003
x3 : 0000000000100000 x2 : 0000000127d00000
x1 : 00000000ff800000 x0 : 0000000000000001
...
Call trace:
[<ffffff80085b09e8>] arm_v7s_map+0x40/0xf8
[<ffffff80085b29fc>] mtk_iommu_map+0x64/0x90
[<ffffff80085ab5f8>] iommu_map+0x100/0x3a0
[<ffffff80085ab99c>] default_iommu_map_sg+0x104/0x168
[<ffffff80085aead8>] iommu_dma_alloc+0x238/0x3f8
[<ffffff8008098b30>] __iommu_alloc_attrs+0xa8/0x260
[<ffffff80085f364c>] mtk_drm_gem_create+0xac/0x180
[<ffffff80085f3894>] mtk_drm_gem_dumb_create+0x54/0xc8
[<ffffff80085d576c>] drm_mode_create_dumb_ioctl+0xa4/0xd8
[<ffffff80085cb2a0>] drm_ioctl+0x1c0/0x490

In order to satify this, Limit the physical address to 32bit.

Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index bd515be5b380..16d33ac19db0 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -371,7 +371,8 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
 	int ret;
 
 	spin_lock_irqsave(&dom->pgtlock, flags);
-	ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
+	ret = dom->iop->map(dom->iop, iova, paddr & DMA_BIT_MASK(32),
+			    size, prot);
 	spin_unlock_irqrestore(&dom->pgtlock, flags);
 
 	return ret;
-- 
cgit 


From 3c6bae62136ba5b24f0b113e68121b783457ca4b Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 26 Sep 2017 13:07:46 +0530
Subject: iommu/amd: pr_err() strings should end with newlines

pr_err() messages should end with a new-line to avoid other messages
being concatenated. So replace '/n' with '\n'.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Fixes: 45a01c42933b ('iommu/amd: Add function copy_dev_tables()')
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 382de42b8359..6fe2d0346073 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -874,7 +874,7 @@ static bool copy_device_table(void)
 		hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
 		entry = (((u64) hi) << 32) + lo;
 		if (last_entry && last_entry != entry) {
-			pr_err("IOMMU:%d should use the same dev table as others!/n",
+			pr_err("IOMMU:%d should use the same dev table as others!\n",
 				iommu->index);
 			return false;
 		}
@@ -882,7 +882,7 @@ static bool copy_device_table(void)
 
 		old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
 		if (old_devtb_size != dev_table_size) {
-			pr_err("The device table size of IOMMU:%d is not expected!/n",
+			pr_err("The device table size of IOMMU:%d is not expected!\n",
 				iommu->index);
 			return false;
 		}
@@ -890,7 +890,7 @@ static bool copy_device_table(void)
 
 	old_devtb_phys = entry & PAGE_MASK;
 	if (old_devtb_phys >= 0x100000000ULL) {
-		pr_err("The address of old device table is above 4G, not trustworthy!/n");
+		pr_err("The address of old device table is above 4G, not trustworthy!\n");
 		return false;
 	}
 	old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
@@ -901,7 +901,7 @@ static bool copy_device_table(void)
 	old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
 				get_order(dev_table_size));
 	if (old_dev_tbl_cpy == NULL) {
-		pr_err("Failed to allocate memory for copying old device table!/n");
+		pr_err("Failed to allocate memory for copying old device table!\n");
 		return false;
 	}
 
-- 
cgit 


From 50ce6312f293e129eedf2affc7bd791c71d8287e Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Date: Tue, 26 Sep 2017 19:32:52 +0100
Subject: iommu: Fix comment for iommu_ops.map_sg

The definition of map_sg was split during a recent addition to iommu_ops.
Put it back together.

Fixes: add02cfdc9bc ("iommu: Introduce Interface for IOMMU TLB Flushing")
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a7f2ac689d29..41b8c5757859 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -167,11 +167,11 @@ struct iommu_resv_region {
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @map_sg: map a scatter-gather list of physically contiguous memory chunks
+ *          to an iommu domain
  * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain
  * @tlb_range_add: Add a given iova range to the flush queue for this domain
  * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
  *            queue
- * to an iommu domain
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
-- 
cgit 


From df5efdd97029f2cff7e5c91ea1c9f2b94d009b0f Mon Sep 17 00:00:00 2001
From: Sebastian Sanchez <sebastian.sanchez@intel.com>
Date: Tue, 26 Sep 2017 06:05:57 -0700
Subject: IB/hfi1: Turn off AOC TX after offline substates

Offline.quietDuration was added in the 8051 firmware, and the driver
only turns off the AOC transmitters when offline.quiet is reached.
However, the AOC transmitters need to be turned off at the new state.
Therefore, turn off the AOC transmitters at any offline substates
including offline.quiet and offline.quietDuration, then recheck we
reached offline.quiet to support backwards compatibility.

Reviewed-by: Jakub Byczkowski <jakub.byczkowski@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/chip.c | 85 +++++++++++++++++++++++++++++----------
 drivers/infiniband/hw/hfi1/chip.h |  1 +
 2 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index b2ed4b9cda6e..1c810d65721a 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1066,6 +1066,8 @@ static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
 static int thermal_init(struct hfi1_devdata *dd);
 
 static void update_statusp(struct hfi1_pportdata *ppd, u32 state);
+static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
+					    int msecs);
 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
 				  int msecs);
 static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
@@ -10305,6 +10307,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 {
 	struct hfi1_devdata *dd = ppd->dd;
 	u32 previous_state;
+	int offline_state_ret;
 	int ret;
 
 	update_lcb_cache(dd);
@@ -10326,28 +10329,11 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 		ppd->offline_disabled_reason =
 		HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
 
-	/*
-	 * Wait for offline transition. It can take a while for
-	 * the link to go down.
-	 */
-	ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000);
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * Now in charge of LCB - must be after the physical state is
-	 * offline.quiet and before host_link_state is changed.
-	 */
-	set_host_lcb_access(dd);
-	write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
-
-	/* make sure the logical state is also down */
-	ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
-	if (ret)
-		force_logical_link_state_down(ppd);
-
-	ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
+	offline_state_ret = wait_phys_link_offline_substates(ppd, 10000);
+	if (offline_state_ret < 0)
+		return offline_state_ret;
 
+	/* Disabling AOC transmitters */
 	if (ppd->port_type == PORT_TYPE_QSFP &&
 	    ppd->qsfp_info.limiting_active &&
 	    qsfp_mod_present(ppd)) {
@@ -10364,6 +10350,30 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 		}
 	}
 
+	/*
+	 * Wait for the offline.Quiet transition if it hasn't happened yet. It
+	 * can take a while for the link to go down.
+	 */
+	if (offline_state_ret != PLS_OFFLINE_QUIET) {
+		ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 30000);
+		if (ret < 0)
+			return ret;
+	}
+
+	/*
+	 * Now in charge of LCB - must be after the physical state is
+	 * offline.quiet and before host_link_state is changed.
+	 */
+	set_host_lcb_access(dd);
+	write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
+
+	/* make sure the logical state is also down */
+	ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
+	if (ret)
+		force_logical_link_state_down(ppd);
+
+	ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
+
 	/*
 	 * The LNI has a mandatory wait time after the physical state
 	 * moves to Offline.Quiet.  The wait time may be different
@@ -12804,6 +12814,39 @@ static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
 	return 0;
 }
 
+/*
+ * wait_phys_link_offline_quiet_substates - wait for any offline substate
+ * @ppd: port device
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for any offline physical link
+ * state change to occur.
+ * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
+ */
+static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
+					    int msecs)
+{
+	u32 read_state;
+	unsigned long timeout;
+
+	timeout = jiffies + msecs_to_jiffies(msecs);
+	while (1) {
+		read_state = read_physical_state(ppd->dd);
+		if ((read_state & 0xF0) == PLS_OFFLINE)
+			break;
+		if (time_after(jiffies, timeout)) {
+			dd_dev_err(ppd->dd,
+				   "timeout waiting for phy link offline.quiet substates. Read state 0x%x, %dms\n",
+				   read_state, msecs);
+			return -ETIMEDOUT;
+		}
+		usleep_range(1950, 2050); /* sleep 2ms-ish */
+	}
+
+	log_state_transition(ppd, read_state);
+	return read_state;
+}
+
 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
 (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
 
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index b8345a60a0fb..461f937fe110 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -204,6 +204,7 @@
 #define PLS_OFFLINE_READY_TO_QUIET_LT	   0x92
 #define PLS_OFFLINE_REPORT_FAILURE		   0x93
 #define PLS_OFFLINE_READY_TO_QUIET_BCC	   0x94
+#define PLS_OFFLINE_QUIET_DURATION	   0x95
 #define PLS_POLLING				   0x20
 #define PLS_POLLING_QUIET			   0x20
 #define PLS_POLLING_ACTIVE			   0x21
-- 
cgit 


From 30e10527bcce376114e627abb7fabfbe9bfee91e Mon Sep 17 00:00:00 2001
From: Sebastian Sanchez <sebastian.sanchez@intel.com>
Date: Tue, 26 Sep 2017 06:06:03 -0700
Subject: IB/hfi1: Only reset QSFP after link up and turn off AOC TX

QSFP reset enables AOC transmitters by default. They should be off
before moving to high power mode to complete the setup. There is no
need to reset the QSFP during LNI failure as it was reset at link down.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Jakub Byczkowski <jakub.byczkowski@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/chip.c     | 12 +++++++++++-
 drivers/infiniband/hw/hfi1/chip.h     |  2 +-
 drivers/infiniband/hw/hfi1/platform.c |  4 +++-
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 1c810d65721a..27b75a8f5097 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -9415,7 +9415,7 @@ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
 	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
 }
 
-void reset_qsfp(struct hfi1_pportdata *ppd)
+int reset_qsfp(struct hfi1_pportdata *ppd)
 {
 	struct hfi1_devdata *dd = ppd->dd;
 	u64 mask, qsfp_mask;
@@ -9445,6 +9445,13 @@ void reset_qsfp(struct hfi1_pportdata *ppd)
 	 * for alarms and warnings
 	 */
 	set_qsfp_int_n(ppd, 1);
+
+	/*
+	 * After the reset, AOC transmitters are enabled by default. They need
+	 * to be turned off to complete the QSFP setup before they can be
+	 * enabled again.
+	 */
+	return set_qsfp_tx(ppd, 0);
 }
 
 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
@@ -10406,6 +10413,9 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 			& (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
 		/* went down while attempting link up */
 		check_lni_states(ppd);
+
+		/* The QSFP doesn't need to be reset on LNI failure */
+		ppd->qsfp_info.reset_needed = 0;
 	}
 
 	/* the active link width (downgrade) is 0 on link down */
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 461f937fe110..50b8645d0b87 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -723,7 +723,7 @@ void handle_link_downgrade(struct work_struct *work);
 void handle_link_bounce(struct work_struct *work);
 void handle_start_link(struct work_struct *work);
 void handle_sma_message(struct work_struct *work);
-void reset_qsfp(struct hfi1_pportdata *ppd);
+int reset_qsfp(struct hfi1_pportdata *ppd);
 void qsfp_event(struct work_struct *work);
 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
 int send_idle_sma(struct hfi1_devdata *dd, u64 message);
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index a8af96d2b1b0..d486355880cb 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -790,7 +790,9 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
 	 * reuse of stale settings established in our previous pass through.
 	 */
 	if (ppd->qsfp_info.reset_needed) {
-		reset_qsfp(ppd);
+		ret = reset_qsfp(ppd);
+		if (ret)
+			return ret;
 		refresh_qsfp_cache(ppd, &ppd->qsfp_info);
 	} else {
 		ppd->qsfp_info.reset_needed = 1;
-- 
cgit 


From 753b19afb19dd97d0767df5e8afb13faff605315 Mon Sep 17 00:00:00 2001
From: Jan Sokolowski <jan.sokolowski@intel.com>
Date: Tue, 26 Sep 2017 06:06:09 -0700
Subject: IB/hfi1: Check eeprom config partition validity

Relying on a trailing magic value is incorrect. There are instances where
this is not present as trailing magic value has a specific purpose which is
not partition validation. Instead use the header magic value which is
present in all variants of the platform configuration and is intended for
validation. This is also used in other locations in the driver.

Fixes: bc5214ee2922 (IB/hfi1: Handle missing magic values in config file)
Reviewed-by: Jakub Byczkowski <jakub.byczkowski@intel.com>
Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/eprom.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index d46b17107901..1613af1c58d9 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -204,7 +204,10 @@ done_asic:
 	return ret;
 }
 
-/* magic character sequence that trails an image */
+/* magic character sequence that begins an image */
+#define IMAGE_START_MAGIC "APO="
+
+/* magic character sequence that might trail an image */
 #define IMAGE_TRAIL_MAGIC "egamiAPO"
 
 /* EPROM file types */
@@ -250,6 +253,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
 {
 	void *buffer;
 	void *p;
+	u32 length;
 	int ret;
 
 	buffer = kmalloc(P1_SIZE, GFP_KERNEL);
@@ -262,15 +266,21 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
 		return ret;
 	}
 
-	/* scan for image magic that may trail the actual data */
-	p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
-	if (!p) {
+	/* config partition is valid only if it starts with IMAGE_START_MAGIC */
+	if (memcmp(buffer, IMAGE_START_MAGIC, strlen(IMAGE_START_MAGIC))) {
 		kfree(buffer);
 		return -ENOENT;
 	}
 
+	/* scan for image magic that may trail the actual data */
+	p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
+	if (p)
+		length = p - buffer;
+	else
+		length = P1_SIZE;
+
 	*data = buffer;
-	*size = p - buffer;
+	*size = length;
 	return 0;
 }
 
-- 
cgit 


From 09592af5fdd722615ebe435fb34308de26c74bcf Mon Sep 17 00:00:00 2001
From: Kamenee Arumugam <kamenee.arumugam@intel.com>
Date: Tue, 26 Sep 2017 06:06:15 -0700
Subject: IB/hfi1: Return correct value in general interrupt handler

The general interrupt handler returns IRQ_HANDLED whether an IRQ
was handled or not.
Determine if an IRQ was handled and return the correct value.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Kamenee Arumugam <kamenee.arumugam@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/chip.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 27b75a8f5097..0be42787759f 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -8240,6 +8240,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
 	u64 regs[CCE_NUM_INT_CSRS];
 	u32 bit;
 	int i;
+	irqreturn_t handled = IRQ_NONE;
 
 	this_cpu_inc(*dd->int_counter);
 
@@ -8260,9 +8261,10 @@ static irqreturn_t general_interrupt(int irq, void *data)
 	for_each_set_bit(bit, (unsigned long *)&regs[0],
 			 CCE_NUM_INT_CSRS * 64) {
 		is_interrupt(dd, bit);
+		handled = IRQ_HANDLED;
 	}
 
-	return IRQ_HANDLED;
+	return handled;
 }
 
 static irqreturn_t sdma_interrupt(int irq, void *data)
-- 
cgit 


From 612601d0013f03de9dc134809f242ba6da9ca252 Mon Sep 17 00:00:00 2001
From: Alex Estrin <alex.estrin@intel.com>
Date: Tue, 26 Sep 2017 06:06:22 -0700
Subject: Revert "IB/ipoib: Update broadcast object if PKey value was changed
 in index 0"

commit 9a9b8112699d will cause core to fail UD QP from being destroyed
on ipoib unload, therefore cause resources leakage.
On pkey change event above patch modifies mgid before calling underlying
driver to detach it from QP. Drivers' detach_mcast() will fail to find
modified mgid it was never given to attach in a first place.
Core qp->usecnt will never go down, so ib_destroy_qp() will fail.

IPoIB driver actually does take care of new broadcast mgid based on new
pkey by destroying an old mcast object in ipoib_mcast_dev_flush())
....
	if (priv->broadcast) {
		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
		list_add_tail(&priv->broadcast->list, &remove_list);
		priv->broadcast = NULL;
	}
...

then in restarted ipoib_macst_join_task() creating a new broadcast mcast
object, sending join request and on completion tells the driver to attach
to reinitialized QP:
...
if (!priv->broadcast) {
...
	broadcast = ipoib_mcast_alloc(dev, 0);
...
	memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
	       sizeof (union ib_gid));
	priv->broadcast = broadcast;
...

Fixes: 9a9b8112699d ("IB/ipoib: Update broadcast object if PKey value was changed in index 0")
Cc: stable@vger.kernel.org
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Alex Estrin <alex.estrin@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_ib.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 2e075377242e..6cd61638b441 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1000,19 +1000,6 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
 		 */
 		priv->dev->broadcast[8] = priv->pkey >> 8;
 		priv->dev->broadcast[9] = priv->pkey & 0xff;
-
-		/*
-		 * Update the broadcast address in the priv->broadcast object,
-		 * in case it already exists, otherwise no one will do that.
-		 */
-		if (priv->broadcast) {
-			spin_lock_irq(&priv->lock);
-			memcpy(priv->broadcast->mcmember.mgid.raw,
-			       priv->dev->broadcast + 4,
-			sizeof(union ib_gid));
-			spin_unlock_irq(&priv->lock);
-		}
-
 		return 0;
 	}
 
-- 
cgit 


From b8f42738acaddf67731c34935c0994e09a588ca7 Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Tue, 26 Sep 2017 06:06:28 -0700
Subject: IB/hfi1: On error, fix use after free during user context setup

During base context setup, if setup_base_ctxt() fails, the context is
deallocated. This is incorrect because the context is referenced on
return, to notify any waiting subcontext.  If there are no subcontexts
the pointer will be invalid.

Reorganize the error path so that deallocate_ctxt() is called after all
the possible subcontexts have been notified.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/file_ops.c | 41 +++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 2bc89260235a..d9a1e9893136 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -930,15 +930,8 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
 	switch (ret) {
 	case 0:
 		ret = setup_base_ctxt(fd, uctxt);
-		if (uctxt->subctxt_cnt) {
-			/*
-			 * Base context is done (successfully or not), notify
-			 * anybody using a sub-context that is waiting for
-			 * this completion.
-			 */
-			clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
-			wake_up(&uctxt->wait);
-		}
+		if (ret)
+			deallocate_ctxt(uctxt);
 		break;
 	case 1:
 		ret = complete_subctxt(fd);
@@ -1305,25 +1298,25 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
 	/* Now allocate the RcvHdr queue and eager buffers. */
 	ret = hfi1_create_rcvhdrq(dd, uctxt);
 	if (ret)
-		return ret;
+		goto done;
 
 	ret = hfi1_setup_eagerbufs(uctxt);
 	if (ret)
-		goto setup_failed;
+		goto done;
 
 	/* If sub-contexts are enabled, do the appropriate setup */
 	if (uctxt->subctxt_cnt)
 		ret = setup_subctxt(uctxt);
 	if (ret)
-		goto setup_failed;
+		goto done;
 
 	ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
 	if (ret)
-		goto setup_failed;
+		goto done;
 
 	ret = init_user_ctxt(fd, uctxt);
 	if (ret)
-		goto setup_failed;
+		goto done;
 
 	user_init(uctxt);
 
@@ -1331,12 +1324,22 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
 	fd->uctxt = uctxt;
 	hfi1_rcd_get(uctxt);
 
-	return 0;
+done:
+	if (uctxt->subctxt_cnt) {
+		/*
+		 * On error, set the failed bit so sub-contexts will clean up
+		 * correctly.
+		 */
+		if (ret)
+			set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
 
-setup_failed:
-	/* Set the failed bit so sub-context init can do the right thing */
-	set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
-	deallocate_ctxt(uctxt);
+		/*
+		 * Base context is done (successfully or not), notify anybody
+		 * using a sub-context that is waiting for this completion.
+		 */
+		clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
+		wake_up(&uctxt->wait);
+	}
 
 	return ret;
 }
-- 
cgit 


From 828bcbdc975fbcfb27946c33d4b1d1bfab70789b Mon Sep 17 00:00:00 2001
From: Harish Chegondi <harish.chegondi@intel.com>
Date: Tue, 26 Sep 2017 06:06:34 -0700
Subject: IB/hfi1: Unsuccessful PCIe caps tuning should not fail driver load

Failure to tune PCIe capabilities should not fail driver load. This can
cause the driver load to fail on systems with any of the following:
1. HFI's parent is not root. Example: HFI card is behind a PCIe bridge.
2. HFI's parent is not PCI Express capable.
In these situations, failure to tune PCIe capabilities should be logged
in the system message logs but not cause the driver load to fail.

This patch also ensures pcie capability word DevCtl is written only
after a successful read and the capability tuning process continues
even if read/write of the pcie capability word DevCtl fails.

Fixes: c53df62c7a9a ("IB/hfi1: Check return values from PCI config API calls")
Fixes: bf70a7757736 ("staging/rdma/hfi1: Enable WFR PCIe extended tags from the driver")
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Jakub Byczkowski <jakub.byczkowski@intel.com>
Signed-off-by: Harish Chegondi <harish.chegondi@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/pcie.c | 50 ++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 29 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 82447b7cdda1..09e50fd2a08f 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -68,7 +68,7 @@
 /*
  * Code to adjust PCIe capabilities.
  */
-static int tune_pcie_caps(struct hfi1_devdata *);
+static void tune_pcie_caps(struct hfi1_devdata *);
 
 /*
  * Do all the common PCIe setup and initialization.
@@ -351,7 +351,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
  */
 int request_msix(struct hfi1_devdata *dd, u32 msireq)
 {
-	int nvec, ret;
+	int nvec;
 
 	nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
 				     PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
@@ -360,12 +360,7 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq)
 		return nvec;
 	}
 
-	ret = tune_pcie_caps(dd);
-	if (ret) {
-		dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret);
-		pci_free_irq_vectors(dd->pcidev);
-		return ret;
-	}
+	tune_pcie_caps(dd);
 
 	/* check for legacy IRQ */
 	if (nvec == 1 && !dd->pcidev->msix_enabled)
@@ -502,7 +497,7 @@ uint aspm_mode = ASPM_MODE_DISABLED;
 module_param_named(aspm, aspm_mode, uint, S_IRUGO);
 MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
 
-static int tune_pcie_caps(struct hfi1_devdata *dd)
+static void tune_pcie_caps(struct hfi1_devdata *dd)
 {
 	struct pci_dev *parent;
 	u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@@ -513,22 +508,14 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
 	 * Turn on extended tags in DevCtl in case the BIOS has turned it off
 	 * to improve WFR SDMA bandwidth
 	 */
-	ret = pcie_capability_read_word(dd->pcidev,
-					PCI_EXP_DEVCTL, &ectl);
-	if (ret) {
-		dd_dev_err(dd, "Unable to read from PCI config\n");
-		return ret;
-	}
-
-	if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
+	ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
+	if ((!ret) && !(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
 		dd_dev_info(dd, "Enabling PCIe extended tags\n");
 		ectl |= PCI_EXP_DEVCTL_EXT_TAG;
 		ret = pcie_capability_write_word(dd->pcidev,
 						 PCI_EXP_DEVCTL, ectl);
-		if (ret) {
-			dd_dev_err(dd, "Unable to write to PCI config\n");
-			return ret;
-		}
+		if (ret)
+			dd_dev_info(dd, "Unable to write to PCI config\n");
 	}
 	/* Find out supported and configured values for parent (root) */
 	parent = dd->pcidev->bus->self;
@@ -536,15 +523,22 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
 	 * The driver cannot perform the tuning if it does not have
 	 * access to the upstream component.
 	 */
-	if (!parent)
-		return -EINVAL;
+	if (!parent) {
+		dd_dev_info(dd, "Parent not found\n");
+		return;
+	}
 	if (!pci_is_root_bus(parent->bus)) {
 		dd_dev_info(dd, "Parent not root\n");
-		return -EINVAL;
+		return;
+	}
+	if (!pci_is_pcie(parent)) {
+		dd_dev_info(dd, "Parent is not PCI Express capable\n");
+		return;
+	}
+	if (!pci_is_pcie(dd->pcidev)) {
+		dd_dev_info(dd, "PCI device is not PCI Express capable\n");
+		return;
 	}
-
-	if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
-		return -EINVAL;
 	rc_mpss = parent->pcie_mpss;
 	rc_mps = ffs(pcie_get_mps(parent)) - 8;
 	/* Find out supported and configured values for endpoint (us) */
@@ -590,8 +584,6 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
 		ep_mrrs = max_mrrs;
 		pcie_set_readrq(dd->pcidev, ep_mrrs);
 	}
-
-	return 0;
 }
 
 /* End of PCIe capability tuning */
-- 
cgit 


From 36de80740008e6a4a55115b4a92e2059e47c1cba Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Wed, 27 Sep 2017 14:49:17 +0200
Subject: mtd: nand: atmel: fix buffer overflow in atmel_pmecc_user

When calculating the size needed by struct atmel_pmecc_user *user,
the dmu and delta buffer sizes were forgotten.
This lead to a memory corruption (especially with a large ecc_strength).

Link: http://lkml.kernel.org/r/1506503157.3016.5.camel@gmail.com
Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver")
Cc: stable@vger.kernel.org
Reported-by: Richard Genoud <richard.genoud@gmail.com>
Pointed-at-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Reviewed-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/atmel/pmecc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/atmel/pmecc.c b/drivers/mtd/nand/atmel/pmecc.c
index 146af8218314..8268636675ef 100644
--- a/drivers/mtd/nand/atmel/pmecc.c
+++ b/drivers/mtd/nand/atmel/pmecc.c
@@ -363,7 +363,7 @@ atmel_pmecc_create_user(struct atmel_pmecc *pmecc,
 	size += (req->ecc.strength + 1) * sizeof(u16);
 	/* Reserve space for mu, dmu and delta. */
 	size = ALIGN(size, sizeof(s32));
-	size += (req->ecc.strength + 1) * sizeof(s32);
+	size += (req->ecc.strength + 1) * sizeof(s32) * 3;
 
 	user = kzalloc(size, GFP_KERNEL);
 	if (!user)
-- 
cgit 


From aaf2c2fb0f51f91c699039440862b6ae9c25c10e Mon Sep 17 00:00:00 2001
From: Tyler Baicar <tbaicar@codeaurora.org>
Date: Mon, 28 Aug 2017 10:53:41 -0600
Subject: ACPI / APEI: clear error status before acknowledging the error

Currently we acknowledge errors before clearing the error status.
This could cause a new error to be populated by firmware in-between
the error acknowledgment and the error status clearing which would
cause the second error's status to be cleared without being handled.
So, clear the error status before acknowledging the errors.

Also, make sure to acknowledge the error if the error status read
fails.

Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/ghes.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 077f9bad6f44..3c3a37b8503b 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -743,17 +743,19 @@ static int ghes_proc(struct ghes *ghes)
 	}
 	ghes_do_proc(ghes, ghes->estatus);
 
+out:
+	ghes_clear_estatus(ghes);
+
+	if (rc == -ENOENT)
+		return rc;
+
 	/*
 	 * GHESv2 type HEST entries introduce support for error acknowledgment,
 	 * so only acknowledge the error if this support is present.
 	 */
-	if (is_hest_type_generic_v2(ghes)) {
-		rc = ghes_ack_error(ghes->generic_v2);
-		if (rc)
-			return rc;
-	}
-out:
-	ghes_clear_estatus(ghes);
+	if (is_hest_type_generic_v2(ghes))
+		return ghes_ack_error(ghes->generic_v2);
+
 	return rc;
 }
 
-- 
cgit 


From 8aba2333904f9b1c1ea038df261bf7ae8fefb98e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 28 Sep 2017 02:08:43 +0200
Subject: cpufreq: docs: Drop intel-pstate.txt from index.txt

Commit 33fc30b47098 (cpufreq: intel_pstate: Document the current
behavior and user interface) dropped the intel-pstate.txt file
from Documentation/cpu-freq/, but it did not update the index.txt
file in there accordingly, so do that now.

Fixes: 33fc30b47098 (cpufreq: intel_pstate: Document the current behavior and user interface)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/cpu-freq/index.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Documentation/cpu-freq/index.txt b/Documentation/cpu-freq/index.txt
index 03a7cee6ac73..c15e75386a05 100644
--- a/Documentation/cpu-freq/index.txt
+++ b/Documentation/cpu-freq/index.txt
@@ -32,8 +32,6 @@ cpufreq-stats.txt -	General description of sysfs cpufreq stats.
 
 index.txt	-	File index, Mailing list and Links (this document)
 
-intel-pstate.txt -	Intel pstate cpufreq driver specific file.
-
 pcc-cpufreq.txt -	PCC cpufreq driver specific file.
 
 
-- 
cgit 


From d1b490939d8c117a06dfc562c41d933f71d30289 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@linux.vnet.ibm.com>
Date: Tue, 19 Sep 2017 12:11:55 -0300
Subject: scsi: aacraid: Add a small delay after IOP reset

Commit 0e9973ed3382 ("scsi: aacraid: Add periodic checks to see IOP reset
status") changed the way driver checks if a reset succeeded. Now, after an
IOP reset, aacraid immediately start polling a register to verify the reset
is complete.

This behavior cause regressions on the reset path in PowerPC (at least).
Since the delay after the IOP reset was removed by the aforementioned patch,
the fact driver just starts to read a register instantly after the reset
was issued (by writing in another register) "corrupts" the reset procedure,
which ends up failing all the time.

The issue highly impacted kdump on PowerPC, since on kdump path we
proactively issue a reset in adapter (through the reset_devices kernel
parameter).

This patch (re-)adds a delay right after IOP reset is issued. Empirically
we measured that 3 seconds is enough, but for safety reasons we delay
for 5s (and since it was 30s before, 5s is still a small amount).

For reference, without this patch we observe the following messages
on kdump kernel boot process:

  [ 76.294] aacraid 0003:01:00.0: IOP reset failed
  [ 76.294] aacraid 0003:01:00.0: ARC Reset attempt failed
  [ 86.524] aacraid 0003:01:00.0: adapter kernel panic'd ff.
  [ 86.524] aacraid 0003:01:00.0: Controller reset type is 3
  [ 86.524] aacraid 0003:01:00.0: Issuing IOP reset
  [146.534] aacraid 0003:01:00.0: IOP reset failed
  [146.534] aacraid 0003:01:00.0: ARC Reset attempt failed

Fixes: 0e9973ed3382 ("scsi: aacraid: Add periodic checks to see IOP reset status")
Cc: stable@vger.kernel.org # v4.13+
Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Acked-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/src.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 48c2b2b34b72..0c9361c87ec8 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -740,6 +740,8 @@ static void aac_send_iop_reset(struct aac_dev *dev)
 	aac_set_intx_mode(dev);
 
 	src_writel(dev, MUnit.IDR, IOP_SRC_RESET_MASK);
+
+	msleep(5000);
 }
 
 static void aac_send_hardware_soft_reset(struct aac_dev *dev)
-- 
cgit 


From d0b7a9095c0730b92a0a2eecaba2e6b77ed87339 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Wed, 27 Sep 2017 14:44:19 +0200
Subject: scsi: ILLEGAL REQUEST + ASC==27 => target failure

ASC 0x27 is "WRITE PROTECTED". This error code is returned e.g.  by
Fujitsu ETERNUS systems under certain conditions for WRITE SAME 16
commands with UNMAP bit set. It should not be treated as a path
error. In general, it makes sense to assume that being write protected
is a target rather than a path property.

Signed-off-by: Martin Wilck <mwilck@suse.com>
Acked-by: Lee Duncan <lduncan@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_error.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 38942050b265..dab876c65473 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -580,7 +580,8 @@ int scsi_check_sense(struct scsi_cmnd *scmd)
 		if (sshdr.asc == 0x20 || /* Invalid command operation code */
 		    sshdr.asc == 0x21 || /* Logical block address out of range */
 		    sshdr.asc == 0x24 || /* Invalid field in cdb */
-		    sshdr.asc == 0x26) { /* Parameter value invalid */
+		    sshdr.asc == 0x26 || /* Parameter value invalid */
+		    sshdr.asc == 0x27) { /* Write protected */
 			set_host_byte(scmd, DID_TARGET_FAILURE);
 		}
 		return SUCCESS;
-- 
cgit 


From 393debc23c7820211d1c8253dd6a8408a7628fe7 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 21 Sep 2017 10:23:35 -0700
Subject: md: separate request handling

With commit cc27b0c78c79, pers->make_request could bail out without handling
the bio. If that happens, we should retry.  The commit fixes md_make_request
but not other call sites. Separate the request handling part, so other call
sites can use it.

Reported-by: Nate Dailey <nate.dailey@stratus.com>
Fix: cc27b0c78c79(md: fix deadlock between mddev_suspend() and md_write_start())
Cc: stable@vger.kernel.org
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 58 ++++++++++++++++++++++++++++++++-------------------------
 drivers/md/md.h |  1 +
 2 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 08fcaebc61bd..1db1a22ed835 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -266,6 +266,37 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
  * call has finished, the bio has been linked into some internal structure
  * and so is visible to ->quiesce(), so we don't need the refcount any more.
  */
+void md_handle_request(struct mddev *mddev, struct bio *bio)
+{
+check_suspended:
+	rcu_read_lock();
+	if (mddev->suspended) {
+		DEFINE_WAIT(__wait);
+		for (;;) {
+			prepare_to_wait(&mddev->sb_wait, &__wait,
+					TASK_UNINTERRUPTIBLE);
+			if (!mddev->suspended)
+				break;
+			rcu_read_unlock();
+			schedule();
+			rcu_read_lock();
+		}
+		finish_wait(&mddev->sb_wait, &__wait);
+	}
+	atomic_inc(&mddev->active_io);
+	rcu_read_unlock();
+
+	if (!mddev->pers->make_request(mddev, bio)) {
+		atomic_dec(&mddev->active_io);
+		wake_up(&mddev->sb_wait);
+		goto check_suspended;
+	}
+
+	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
+		wake_up(&mddev->sb_wait);
+}
+EXPORT_SYMBOL(md_handle_request);
+
 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
@@ -285,23 +316,6 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 		bio_endio(bio);
 		return BLK_QC_T_NONE;
 	}
-check_suspended:
-	rcu_read_lock();
-	if (mddev->suspended) {
-		DEFINE_WAIT(__wait);
-		for (;;) {
-			prepare_to_wait(&mddev->sb_wait, &__wait,
-					TASK_UNINTERRUPTIBLE);
-			if (!mddev->suspended)
-				break;
-			rcu_read_unlock();
-			schedule();
-			rcu_read_lock();
-		}
-		finish_wait(&mddev->sb_wait, &__wait);
-	}
-	atomic_inc(&mddev->active_io);
-	rcu_read_unlock();
 
 	/*
 	 * save the sectors now since our bio can
@@ -310,20 +324,14 @@ check_suspended:
 	sectors = bio_sectors(bio);
 	/* bio could be mergeable after passing to underlayer */
 	bio->bi_opf &= ~REQ_NOMERGE;
-	if (!mddev->pers->make_request(mddev, bio)) {
-		atomic_dec(&mddev->active_io);
-		wake_up(&mddev->sb_wait);
-		goto check_suspended;
-	}
+
+	md_handle_request(mddev, bio);
 
 	cpu = part_stat_lock();
 	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
 	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
 	part_stat_unlock();
 
-	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
-		wake_up(&mddev->sb_wait);
-
 	return BLK_QC_T_NONE;
 }
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 561d22b9a9a8..d8287d3cd1bf 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -692,6 +692,7 @@ extern void md_stop_writes(struct mddev *mddev);
 extern int md_rdev_init(struct md_rdev *rdev);
 extern void md_rdev_clear(struct md_rdev *rdev);
 
+extern void md_handle_request(struct mddev *mddev, struct bio *bio);
 extern void mddev_suspend(struct mddev *mddev);
 extern void mddev_resume(struct mddev *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
-- 
cgit 


From 79bf31a3b2a7ca467cfec8ff97d359a77065d01f Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 21 Sep 2017 09:55:28 -0700
Subject: md: fix a race condition for flush request handling

md_submit_flush_data calls pers->make_request, which missed the suspend check.
Fix it with the new md_handle_request API.

Reported-by: Nate Dailey <nate.dailey@stratus.com>
Tested-by: Nate Dailey <nate.dailey@stratus.com>
Fix: cc27b0c78c79(md: fix deadlock between mddev_suspend() and md_write_start())
Cc: stable@vger.kernel.org
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1db1a22ed835..0ff1bbf6c90e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -447,16 +447,22 @@ static void md_submit_flush_data(struct work_struct *ws)
 	struct mddev *mddev = container_of(ws, struct mddev, flush_work);
 	struct bio *bio = mddev->flush_bio;
 
+	/*
+	 * must reset flush_bio before calling into md_handle_request to avoid a
+	 * deadlock, because other bios passed md_handle_request suspend check
+	 * could wait for this and below md_handle_request could wait for those
+	 * bios because of suspend check
+	 */
+	mddev->flush_bio = NULL;
+	wake_up(&mddev->sb_wait);
+
 	if (bio->bi_iter.bi_size == 0)
 		/* an empty barrier - all done */
 		bio_endio(bio);
 	else {
 		bio->bi_opf &= ~REQ_PREFLUSH;
-		mddev->pers->make_request(mddev, bio);
+		md_handle_request(mddev, bio);
 	}
-
-	mddev->flush_bio = NULL;
-	wake_up(&mddev->sb_wait);
 }
 
 void md_flush_request(struct mddev *mddev, struct bio *bio)
-- 
cgit 


From c4d6a1b8e8ea79c439a4871cba540443c9eb13b9 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 21 Sep 2017 10:29:22 -0700
Subject: dm-raid: fix a race condition in request handling

raid_map calls pers->make_request, which missed the suspend check. Fix it with
the new md_handle_request API.

Fix: cc27b0c78c79(md: fix deadlock between mddev_suspend() and md_write_start())
Cc: Heinz Mauelshagen <heinzm@redhat.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/dm-raid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5bfe285ea9d1..1ac58c5651b7 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3238,7 +3238,7 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
 	if (unlikely(bio_end_sector(bio) > mddev->array_sectors))
 		return DM_MAPIO_REQUEUE;
 
-	mddev->pers->make_request(mddev, bio);
+	md_handle_request(mddev, bio);
 
 	return DM_MAPIO_SUBMITTED;
 }
-- 
cgit 


From 7d5d7b5058fbd638914e42504677141a69f43011 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 21 Sep 2017 11:03:52 -0700
Subject: md/raid5: cap worker count

static checker reports a potential integer overflow. Cap the worker count to
avoid the overflow.

Reported:-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 076409455b60..928e24a07133 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6575,14 +6575,17 @@ static ssize_t
 raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
 {
 	struct r5conf *conf;
-	unsigned long new;
+	unsigned int new;
 	int err;
 	struct r5worker_group *new_groups, *old_groups;
 	int group_cnt, worker_cnt_per_group;
 
 	if (len >= PAGE_SIZE)
 		return -EINVAL;
-	if (kstrtoul(page, 10, &new))
+	if (kstrtouint(page, 10, &new))
+		return -EINVAL;
+	/* 8192 should be big enough */
+	if (new > 8192)
 		return -EINVAL;
 
 	err = mddev_lock(mddev);
-- 
cgit 


From da541b20021c781f8b65492eeaee824e729599eb Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 27 Sep 2017 17:34:23 -0500
Subject: objtool: Skip unreachable warnings for GCC 4.4 and older

The kbuild bot occasionally reports warnings like:

  drivers/scsi/pcmcia/aha152x_core.o: warning: objtool: seldo_run()+0x130: unreachable instruction

These warnings are always with GCC 4.4.  That version of GCC sometimes
places unreachable instructions after calls to noreturn functions.

The unreachable warnings aren't very important anyway.  Just ignore them
for old versions of GCC.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/bc89b807d965b98ec18a0bb94f96a594bd58f2f2.1506551639.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 scripts/Makefile.build | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 2e3a10e79ca9..061d0c3a420a 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -265,6 +265,8 @@ objtool_args += --no-fp
 endif
 ifdef CONFIG_GCOV_KERNEL
 objtool_args += --no-unreachable
+else
+objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
 endif
 
 # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
-- 
cgit 


From 607a4029d439cdfa258aff5da32bb9cd6ed1a66d Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 27 Sep 2017 10:36:38 -0500
Subject: objtool: Support unoptimized frame pointer setup

Arnd Bergmann reported a bunch of warnings like:

  crypto/jitterentropy.o: warning: objtool: jent_fold_time()+0x3b: call without frame pointer save/setup
  crypto/jitterentropy.o: warning: objtool: jent_stuck()+0x1d: call without frame pointer save/setup
  crypto/jitterentropy.o: warning: objtool: jent_unbiased_bit()+0x15: call without frame pointer save/setup
  crypto/jitterentropy.o: warning: objtool: jent_read_entropy()+0x32: call without frame pointer save/setup
  crypto/jitterentropy.o: warning: objtool: jent_entropy_collector_free()+0x19: call without frame pointer save/setup

and

  arch/x86/events/core.o: warning: objtool: collect_events uses BP as a scratch register
  arch/x86/events/core.o: warning: objtool: events_ht_sysfs_show()+0x22: call without frame pointer save/setup

With certain rare configurations, GCC sometimes sets up the frame
pointer with:

  lea    (%rsp),%rbp

instead of:

  mov    %rsp,%rbp

The instructions are equivalent, so treat the former like the latter.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/a468af8b28a69b83fffc6d7668be9b6fcc873699.1506526584.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/arch/x86/decode.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 0f22768c0d4d..34a579f806e3 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -284,11 +284,16 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 	case 0x8d:
 		if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
 
-			/* lea disp(%rsp), reg */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
+			if (!insn.displacement.value) {
+				/* lea (%rsp), reg */
+				op->src.type = OP_SRC_REG;
+			} else {
+				/* lea disp(%rsp), reg */
+				op->src.type = OP_SRC_ADD;
+				op->src.offset = insn.displacement.value;
+			}
 			op->src.reg = CFI_SP;
-			op->src.offset = insn.displacement.value;
 			op->dest.type = OP_DEST_REG;
 			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
 
-- 
cgit 


From 66a733ea6b611aecf0119514d2dddab5f9d6c01e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 27 Sep 2017 09:25:30 -0600
Subject: seccomp: fix the usage of get/put_seccomp_filter() in
 seccomp_get_filter()

As Chris explains, get_seccomp_filter() and put_seccomp_filter() can end
up using different filters. Once we drop ->siglock it is possible for
task->seccomp.filter to have been replaced by SECCOMP_FILTER_FLAG_TSYNC.

Fixes: f8e529ed941b ("seccomp, ptrace: add support for dumping seccomp filters")
Reported-by: Chris Salls <chrissalls5@gmail.com>
Cc: stable@vger.kernel.org # needs s/refcount_/atomic_/ for v4.12 and earlier
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
[tycho: add __get_seccomp_filter vs. open coding refcount_inc()]
Signed-off-by: Tycho Andersen <tycho@docker.com>
[kees: tweak commit log]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/seccomp.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index c24579dfa7a1..bb3a38005b9c 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -473,14 +473,19 @@ static long seccomp_attach_filter(unsigned int flags,
 	return 0;
 }
 
+void __get_seccomp_filter(struct seccomp_filter *filter)
+{
+	/* Reference count is bounded by the number of total processes. */
+	refcount_inc(&filter->usage);
+}
+
 /* get_seccomp_filter - increments the reference count of the filter on @tsk */
 void get_seccomp_filter(struct task_struct *tsk)
 {
 	struct seccomp_filter *orig = tsk->seccomp.filter;
 	if (!orig)
 		return;
-	/* Reference count is bounded by the number of total processes. */
-	refcount_inc(&orig->usage);
+	__get_seccomp_filter(orig);
 }
 
 static inline void seccomp_filter_free(struct seccomp_filter *filter)
@@ -491,10 +496,8 @@ static inline void seccomp_filter_free(struct seccomp_filter *filter)
 	}
 }
 
-/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
-void put_seccomp_filter(struct task_struct *tsk)
+static void __put_seccomp_filter(struct seccomp_filter *orig)
 {
-	struct seccomp_filter *orig = tsk->seccomp.filter;
 	/* Clean up single-reference branches iteratively. */
 	while (orig && refcount_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
@@ -503,6 +506,12 @@ void put_seccomp_filter(struct task_struct *tsk)
 	}
 }
 
+/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
+void put_seccomp_filter(struct task_struct *tsk)
+{
+	__put_seccomp_filter(tsk->seccomp.filter);
+}
+
 static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason)
 {
 	memset(info, 0, sizeof(*info));
@@ -1025,13 +1034,13 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
 	if (!data)
 		goto out;
 
-	get_seccomp_filter(task);
+	__get_seccomp_filter(filter);
 	spin_unlock_irq(&task->sighand->siglock);
 
 	if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
 		ret = -EFAULT;
 
-	put_seccomp_filter(task);
+	__put_seccomp_filter(filter);
 	return ret;
 
 out:
-- 
cgit 


From 72364d320644c12948786962673772f271039a4a Mon Sep 17 00:00:00 2001
From: Jeffy Chen <jeffy.chen@rock-chips.com>
Date: Thu, 28 Sep 2017 12:37:31 +0800
Subject: irq/generic-chip: Don't replace domain's name

When generic irq chips are allocated for an irq domain the domain name is
set to the irq chip name. That was done to have named domains before the
recent changes which enforce domain naming were done.

Since then the overwrite causes a memory leak when the domain name is
dynamically allocated and even worse it would cause the domain free code to
free the wrong name pointer, which might point to a constant.

Remove the name assignment to prevent this.

Fixes: d59f6617eef0 ("genirq: Allow fwnode to carry name information only")
Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20170928043731.4764-1-jeffy.chen@rock-chips.com
---
 kernel/irq/generic-chip.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index f7086b78ad6e..5270a54b9fa4 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -322,7 +322,6 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
 		/* Calc pointer to the next generic chip */
 		tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
 	}
-	d->name = name;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips);
-- 
cgit 


From 8c28ef3f1c1c57b6f468343d5959e5125b30334d Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 25 Sep 2017 02:01:01 -0600
Subject: xen-pciback: relax BAR sizing write value check

Just like done in d2bd05d88d ("xen-pciback: return proper values during
BAR sizing") for the ROM BAR, ordinary ones also shouldn't compare the
written value directly against ~0, but consider the r/o bits at the
bottom (if any).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/xen-pciback/conf_space_header.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index 5fbfd9cfb6d6..5b3d57fc82d3 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -169,6 +169,9 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
 static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
 {
 	struct pci_bar_info *bar = data;
+	unsigned int pos = (offset - PCI_BASE_ADDRESS_0) / 4;
+	const struct resource *res = dev->resource;
+	u32 mask;
 
 	if (unlikely(!bar)) {
 		pr_warn(DRV_NAME ": driver data not found for %s\n",
@@ -179,7 +182,13 @@ static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
 	/* A write to obtain the length must happen as a 32-bit write.
 	 * This does not (yet) support writing individual bytes
 	 */
-	if (value == ~0)
+	if (res[pos].flags & IORESOURCE_IO)
+		mask = ~PCI_BASE_ADDRESS_IO_MASK;
+	else if (pos && (res[pos - 1].flags & IORESOURCE_MEM_64))
+		mask = 0;
+	else
+		mask = ~PCI_BASE_ADDRESS_MEM_MASK;
+	if ((value | mask) == ~0U)
 		bar->which = 1;
 	else {
 		u32 tmpval;
-- 
cgit 


From 0d805ee70a69eabd38160dc199e183ac2f13fe4b Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Wed, 27 Sep 2017 02:41:25 -0700
Subject: xen/mmu: Call xen_cleanhighmap() with 4MB aligned for page tables
 mapping

When bootup a PVM guest with large memory(Ex.240GB), XEN provided initial
mapping overlaps with kernel module virtual space. When mapping in this space
is cleared by xen_cleanhighmap(), in certain case there could be an 2MB mapping
left. This is due to XEN initialize 4MB aligned mapping but xen_cleanhighmap()
finish at 2MB boundary.

When module loading is just on top of the 2MB space, got below warning:

WARNING: at mm/vmalloc.c:106 vmap_pte_range+0x14e/0x190()
Call Trace:
 [<ffffffff81117083>] warn_alloc_failed+0xf3/0x160
 [<ffffffff81146022>] __vmalloc_area_node+0x182/0x1c0
 [<ffffffff810ac91e>] ? module_alloc_update_bounds+0x1e/0x80
 [<ffffffff81145df7>] __vmalloc_node_range+0xa7/0x110
 [<ffffffff810ac91e>] ? module_alloc_update_bounds+0x1e/0x80
 [<ffffffff8103ca54>] module_alloc+0x64/0x70
 [<ffffffff810ac91e>] ? module_alloc_update_bounds+0x1e/0x80
 [<ffffffff810ac91e>] module_alloc_update_bounds+0x1e/0x80
 [<ffffffff810ac9a7>] move_module+0x27/0x150
 [<ffffffff810aefa0>] layout_and_allocate+0x120/0x1b0
 [<ffffffff810af0a8>] load_module+0x78/0x640
 [<ffffffff811ff90b>] ? security_file_permission+0x8b/0x90
 [<ffffffff810af6d2>] sys_init_module+0x62/0x1e0
 [<ffffffff815154c2>] system_call_fastpath+0x16/0x1b

Then the mapping of 2MB is cleared, finally oops when the page in that space is
accessed.

BUG: unable to handle kernel paging request at ffff880022600000
IP: [<ffffffff81260877>] clear_page_c_e+0x7/0x10
PGD 1788067 PUD 178c067 PMD 22434067 PTE 0
Oops: 0002 [#1] SMP
Call Trace:
 [<ffffffff81116ef7>] ? prep_new_page+0x127/0x1c0
 [<ffffffff81117d42>] get_page_from_freelist+0x1e2/0x550
 [<ffffffff81133010>] ? ii_iovec_copy_to_user+0x90/0x140
 [<ffffffff81119c9d>] __alloc_pages_nodemask+0x12d/0x230
 [<ffffffff81155516>] alloc_pages_vma+0xc6/0x1a0
 [<ffffffff81006ffd>] ? pte_mfn_to_pfn+0x7d/0x100
 [<ffffffff81134cfb>] do_anonymous_page+0x16b/0x350
 [<ffffffff81139c34>] handle_pte_fault+0x1e4/0x200
 [<ffffffff8100712e>] ? xen_pmd_val+0xe/0x10
 [<ffffffff810052c9>] ? __raw_callee_save_xen_pmd_val+0x11/0x1e
 [<ffffffff81139dab>] handle_mm_fault+0x15b/0x270
 [<ffffffff81510c10>] do_page_fault+0x140/0x470
 [<ffffffff8150d7d5>] page_fault+0x25/0x30

Call xen_cleanhighmap() with 4MB aligned for page tables mapping to fix it.
The unnecessory call of xen_cleanhighmap() in DEBUG mode is also removed.

-v2: add comment about XEN alignment from Juergen.

References: https://lists.xen.org/archives/html/xen-devel/2012-07/msg01562.html
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Reviewed-by: Juergen Gross <jgross@suse.com>

[boris: added 'xen/mmu' tag to commit subject]
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 arch/x86/xen/mmu_pv.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 509f560bd0c6..58b09fcadbaa 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1238,21 +1238,16 @@ static void __init xen_pagetable_cleanhighmap(void)
 	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
 	 * the ramdisk). We continue on, erasing PMD entries that point to page
 	 * tables - do note that they are accessible at this stage via __va.
-	 * For good measure we also round up to the PMD - which means that if
+	 * As Xen is aligning the memory end to a 4MB boundary, for good
+	 * measure we also round up to PMD_SIZE * 2 - which means that if
 	 * anybody is using __ka address to the initial boot-stack - and try
 	 * to use it - they are going to crash. The xen_start_info has been
 	 * taken care of already in xen_setup_kernel_pagetable. */
 	addr = xen_start_info->pt_base;
-	size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
+	size = xen_start_info->nr_pt_frames * PAGE_SIZE;
 
-	xen_cleanhighmap(addr, addr + size);
+	xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
 	xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
-#ifdef DEBUG
-	/* This is superfluous and is not necessary, but you know what
-	 * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
-	 * anything at this stage. */
-	xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
-#endif
 }
 #endif
 
-- 
cgit 


From 686fef928bba6be13cabe639f154af7d72b63120 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 28 Sep 2017 06:38:17 -0700
Subject: timer: Prepare to change timer callback argument type

Modern kernel callback systems pass the structure associated with a
given callback to the callback function. The timer callback remains one
of the legacy cases where an arbitrary unsigned long argument continues
to be passed as the callback argument. This has several problems:

- This bloats the timer_list structure with a normally redundant
  .data field.

- No type checking is being performed, forcing callbacks to do
  explicit type casts of the unsigned long argument into the object
  that was passed, rather than using container_of(), as done in most
  of the other callback infrastructure.

- Neighboring buffer overflows can overwrite both the .function and
  the .data field, providing attackers with a way to elevate from a buffer
  overflow into a simplistic ROP-like mechanism that allows calling
  arbitrary functions with a controlled first argument.

- For future Control Flow Integrity work, this creates a unique function
  prototype for timer callbacks, instead of allowing them to continue to
  be clustered with other void functions that take a single unsigned long
  argument.

This adds a new timer initialization API, which will ultimately replace
the existing setup_timer(), setup_{deferrable,pinned,etc}_timer() family,
named timer_setup() (to mirror hrtimer_setup(), making instances of its
use much easier to grep for).

In order to support the migration of existing timers into the new
callback arguments, timer_setup() casts its arguments to the existing
legacy types, and explicitly passes the timer pointer as the legacy
data argument. Once all setup_*timer() callers have been replaced with
timer_setup(), the casts can be removed, and the data argument can be
dropped with the timer expiration code changed to just pass the timer
to the callback directly.

Since the regular pattern of using container_of() during local variable
declaration repeats the need for the variable type declaration
to be included, this adds a helper modeled after other from_*()
helpers that wrap container_of(), named from_timer(). This helper uses
typeof(*variable), removing the type redundancy and minimizing the need
for line wraps in forthcoming conversions from "unsigned data long" to
"struct timer_list *" in the timer callbacks:

-void callback(unsigned long data)
+void callback(struct timer_list *t)
{
-   struct some_data_structure *local = (struct some_data_structure *)data;
+   struct some_data_structure *local = from_timer(local, t, timer);

Finally, in order to support the handful of timer users that perform
open-coded assignments of the .function (and .data) fields, provide
cast macros (TIMER_FUNC_TYPE and TIMER_DATA_TYPE) that can be used
temporarily. Once conversion has been completed, these can be globally
trivially removed.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20170928133817.GA113410@beast
---
 include/linux/timer.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/linux/timer.h b/include/linux/timer.h
index e6789b8757d5..6383c528b148 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -168,6 +168,20 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
 #define setup_pinned_deferrable_timer_on_stack(timer, fn, data)		\
 	__setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED)
 
+#define TIMER_DATA_TYPE		unsigned long
+#define TIMER_FUNC_TYPE		void (*)(TIMER_DATA_TYPE)
+
+static inline void timer_setup(struct timer_list *timer,
+			       void (*callback)(struct timer_list *),
+			       unsigned int flags)
+{
+	__setup_timer(timer, (TIMER_FUNC_TYPE)callback,
+		      (TIMER_DATA_TYPE)timer, flags);
+}
+
+#define from_timer(var, callback_timer, timer_fieldname) \
+	container_of(callback_timer, typeof(*var), timer_fieldname)
+
 /**
  * timer_pending - is a timer pending?
  * @timer: the timer in question
-- 
cgit 


From c0a1666bcb2a33e84187a15eabdcd54056be9a97 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 28 Sep 2017 17:58:41 +0200
Subject: KVM: VMX: use cmpxchg64

This fixes a compilation failure on 32-bit systems.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b9d2140eb212..7f62c94196d1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2238,8 +2238,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 			new.ndst = (dest << 8) & 0xFF00;
 
 		new.sn = 0;
-	} while (cmpxchg(&pi_desc->control, old.control,
-			new.control) != old.control);
+	} while (cmpxchg64(&pi_desc->control, old.control,
+			   new.control) != old.control);
 }
 
 static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
@@ -11730,8 +11730,8 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
 
 		/* set 'NV' to 'notification vector' */
 		new.nv = POSTED_INTR_VECTOR;
-	} while (cmpxchg(&pi_desc->control, old.control,
-			new.control) != old.control);
+	} while (cmpxchg64(&pi_desc->control, old.control,
+			   new.control) != old.control);
 
 	if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
 		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
@@ -11800,8 +11800,8 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
 
 		/* set 'NV' to 'wakeup vector' */
 		new.nv = POSTED_INTR_WAKEUP_VECTOR;
-	} while (cmpxchg(&pi_desc->control, old.control,
-			new.control) != old.control);
+	} while (cmpxchg64(&pi_desc->control, old.control,
+			   new.control) != old.control);
 
 	/* We should not block the vCPU if an interrupt is posted for it.  */
 	if (pi_test_on(pi_desc) == 1)
-- 
cgit 


From b28503a3fe6400757817e4460090f96bc1b9d6e7 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Fri, 15 Sep 2017 09:14:03 +0200
Subject: perf test: Fix vmlinux failure on s390x

On s390x perf test 1 failed. It turned out that commit 4a084ecfc821
("perf report: Fix module symbol adjustment for s390x") was incorrect.
The previous implementation in dso__load_sym() is also suitable for
s390x.

Therefore this patch undoes commit 4a084ecfc821.

Signed-off-by: Thomas-Mich Richter <tmricht@linux.vnet.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Zvonko Kosic <zvonko.kosic@de.ibm.com>
Fixes: 4a084ecfc821 ("perf report: Fix module symbol adjustment for s390x")
LPU-Reference: 20170915071404.58398-1-tmricht@linux.vnet.ibm.com
Link: http://lkml.kernel.org/n/tip-5ani7ly57zji7s0hmzkx416l@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/util/sym-handling.c | 8 --------
 tools/perf/util/symbol-elf.c             | 8 +-------
 tools/perf/util/symbol.h                 | 3 ---
 3 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/tools/perf/arch/s390/util/sym-handling.c b/tools/perf/arch/s390/util/sym-handling.c
index e103f6e46afe..581d4c5a896b 100644
--- a/tools/perf/arch/s390/util/sym-handling.c
+++ b/tools/perf/arch/s390/util/sym-handling.c
@@ -18,12 +18,4 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
 		return false;
 	return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN;
 }
-
-void arch__adjust_sym_map_offset(GElf_Sym *sym,
-				 GElf_Shdr *shdr __maybe_unused,
-				 struct map *map)
-{
-	if (map->type == MAP__FUNCTION)
-		sym->st_value += map->start;
-}
 #endif
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 5c39f420111e..9cf781f0d8a2 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -810,12 +810,6 @@ static u64 ref_reloc(struct kmap *kmap)
 void __weak arch__sym_update(struct symbol *s __maybe_unused,
 		GElf_Sym *sym __maybe_unused) { }
 
-void __weak arch__adjust_sym_map_offset(GElf_Sym *sym, GElf_Shdr *shdr,
-				       struct map *map __maybe_unused)
-{
-	sym->st_value -= shdr->sh_addr - shdr->sh_offset;
-}
-
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		  struct symsrc *runtime_ss, int kmodule)
 {
@@ -996,7 +990,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 
 			/* Adjust symbol to map to file offset */
 			if (adjust_kernel_syms)
-				arch__adjust_sym_map_offset(&sym, &shdr, map);
+				sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 
 			if (strcmp(section_name,
 				   (curr_dso->short_name +
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 2bd6a1f01a1c..aad99e7e179b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -344,9 +344,6 @@ int setup_intlist(struct intlist **list, const char *list_str,
 #ifdef HAVE_LIBELF_SUPPORT
 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
 void arch__sym_update(struct symbol *s, GElf_Sym *sym);
-void arch__adjust_sym_map_offset(GElf_Sym *sym,
-				 GElf_Shdr *shdr __maybe_unused,
-				 struct map *map __maybe_unused);
 #endif
 
 #define SYMBOL_A 0
-- 
cgit 


From 5357413f5c067f60933e4b8d79d483fbe62b2bb5 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Fri, 15 Sep 2017 09:14:04 +0200
Subject: perf test: Fix vmlinux failure on s390x part 2

On s390x perf test 1 failed. It turned out that commit cf6383f73cf2
("perf report: Fix kernel symbol adjustment for s390x") was incorrect.

The previous implementation in dso__load_sym() is also suitable for
s390x.

Therefore this patch undoes commit cf6383f73cf2

Signed-off-by: Thomas-Mich Richter <tmricht@linux.vnet.ibm.com>
Cc: Zvonko Kosic <zvonko.kosic@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Fixes: cf6383f73cf2 ("perf report: Fix kernel symbol adjustment for s390x")
LPU-Reference: 20170915071404.58398-2-tmricht@linux.vnet.ibm.com
Link: http://lkml.kernel.org/n/tip-v101o8k25vuja2ogosgf15yy@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/util/Build          |  1 -
 tools/perf/arch/s390/util/sym-handling.c | 21 ---------------------
 2 files changed, 22 deletions(-)
 delete mode 100644 tools/perf/arch/s390/util/sym-handling.c

diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build
index bd518b623d7a..5bd7b9260cc0 100644
--- a/tools/perf/arch/s390/util/Build
+++ b/tools/perf/arch/s390/util/Build
@@ -1,5 +1,4 @@
 libperf-y += header.o
-libperf-y += sym-handling.o
 libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/s390/util/sym-handling.c b/tools/perf/arch/s390/util/sym-handling.c
deleted file mode 100644
index 581d4c5a896b..000000000000
--- a/tools/perf/arch/s390/util/sym-handling.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Architecture specific ELF symbol handling and relocation mapping.
- *
- * Copyright 2017 IBM Corp.
- * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- */
-
-#include "symbol.h"
-
-#ifdef HAVE_LIBELF_SUPPORT
-bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
-{
-	if (ehdr.e_type == ET_EXEC)
-		return false;
-	return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN;
-}
-#endif
-- 
cgit 


From e49aa15ef6c179f69e5578a271801f31a09e9a3f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 28 Sep 2017 13:20:32 -0700
Subject: Revert "Bluetooth: Add option for disabling legacy ioctl interfaces"

This reverts commit dbbccdc4ced015cdd4051299bd87fbe0254ad351.

It turns out that the "legacy" users aren't so legacy at all, and that
turning off the legacy ioctl will break the current Qt bluetooth stack
for bluetooth LE devices that were released just a couple of months ago.

So it's simply not true that this was a legacy interface that hasn't
been needed and is only limited to old legacy BT devices.  Because I
actually read Kconfig help messages, and actively try to turn off
features that I don't need, I turned the option off.

Then I spent _way_ too much time debugging BLE issues until I realized
that it wasn't the Qt and subsurface development that had broken one of
my dive computer BLE downloads, but simply my broken kernel config.

Maybe in a decade it will be true that this is a legacy interface.  And
maybe with a better help-text and correct dependencies, this kind of
legacy removal might be acceptable.  But as things are right now both
the commit message and the Kconfig help text were misleading, and the
Kconfig option had the wrong dependenencies.

There's no reason to keep that broken Kconfig option in the tree.

Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/bluetooth/Kconfig    | 10 ----------
 net/bluetooth/hci_sock.c |  6 ------
 2 files changed, 16 deletions(-)

diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index c18115d22f00..db82a40875e8 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -126,14 +126,4 @@ config BT_DEBUGFS
 	  Provide extensive information about internal Bluetooth states
 	  in debugfs.
 
-config BT_LEGACY_IOCTL
-	bool "Enable legacy ioctl interfaces"
-	depends on BT && BT_BREDR
-	default y
-	help
-	  Enable support for legacy ioctl interfaces.  This is only needed
-	  for old and deprecated applications using direct ioctl calls for
-	  controller management.  Since Linux 3.4 all configuration and
-	  setup is done via mgmt interface and this is no longer needed.
-
 source "drivers/bluetooth/Kconfig"
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 0bad296fe0af..65d734c165bd 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -878,7 +878,6 @@ static int hci_sock_release(struct socket *sock)
 	return 0;
 }
 
-#ifdef CONFIG_BT_LEGACY_IOCTL
 static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
 {
 	bdaddr_t bdaddr;
@@ -1050,7 +1049,6 @@ done:
 	release_sock(sk);
 	return err;
 }
-#endif
 
 static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 			 int addr_len)
@@ -1971,11 +1969,7 @@ static const struct proto_ops hci_sock_ops = {
 	.getname	= hci_sock_getname,
 	.sendmsg	= hci_sock_sendmsg,
 	.recvmsg	= hci_sock_recvmsg,
-#ifdef CONFIG_BT_LEGACY_IOCTL
 	.ioctl		= hci_sock_ioctl,
-#else
-	.ioctl		= sock_no_ioctl,
-#endif
 	.poll		= datagram_poll,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
-- 
cgit 


From 441430eb54a00586f95f1aefc48e0801bbd6a923 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Wed, 6 Sep 2017 19:08:11 +0300
Subject: perf/aux: Only update ->aux_wakeup in non-overwrite mode

The following commit:

  d9a50b0256 ("perf/aux: Ensure aux_wakeup represents most recent wakeup index")

changed the AUX wakeup position calculation to rounddown(), which causes
a division-by-zero in AUX overwrite mode (aka "snapshot mode").

The zero denominator results from the fact that perf record doesn't set
aux_watermark to anything, in which case the kernel will set it to half
the AUX buffer size, but only for non-overwrite mode. In the overwrite
mode aux_watermark stays zero.

The good news is that, AUX overwrite mode, wakeups don't happen and
related bookkeeping is not relevant, so we can simply forego the whole
wakeup updates.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: will.deacon@arm.com
Link: http://lkml.kernel.org/r/20170906160811.16510-1-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/ring_buffer.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index af71a84e12ee..f684d8e5fa2b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -412,6 +412,19 @@ err:
 	return NULL;
 }
 
+static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb)
+{
+	if (rb->aux_overwrite)
+		return false;
+
+	if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) {
+		rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
+		return true;
+	}
+
+	return false;
+}
+
 /*
  * Commit the data written by hardware into the ring buffer by adjusting
  * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
@@ -451,10 +464,8 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
 	}
 
 	rb->user_page->aux_head = rb->aux_head;
-	if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) {
+	if (rb_need_aux_wakeup(rb))
 		wakeup = true;
-		rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
-	}
 
 	if (wakeup) {
 		if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
@@ -484,9 +495,8 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
 	rb->aux_head += size;
 
 	rb->user_page->aux_head = rb->aux_head;
-	if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) {
+	if (rb_need_aux_wakeup(rb)) {
 		perf_output_wakeup(handle);
-		rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
 		handle->wakeup = rb->aux_wakeup + rb->aux_watermark;
 	}
 
-- 
cgit 


From 69b73e95982649a1f2dc63b8f08f2113d28f7fed Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 29 Sep 2017 10:07:44 +0200
Subject: um/time: Fixup namespace collision

The new timer_setup() function for struct timer_list collides with a
private um function. Rename it.

Fixes: 686fef928bba ("timer: Prepare to change timer callback argument type")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Weinberger <richard@nod.at>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: user-mode-linux-devel@lists.sourceforge.net
Cc: Kees Cook  <keescook@chromium.org>
---
 arch/um/kernel/time.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 0b034ebbda2a..7f69d17de354 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -98,7 +98,7 @@ static struct clocksource timer_clocksource = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void __init timer_setup(void)
+static void __init um_timer_setup(void)
 {
 	int err;
 
@@ -132,5 +132,5 @@ void read_persistent_clock(struct timespec *ts)
 void __init time_init(void)
 {
 	timer_set_signal_handler();
-	late_time_init = timer_setup;
+	late_time_init = um_timer_setup;
 }
-- 
cgit 


From 1593baab910da72480d651ea7bf2ce6e3a25a484 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 22 Sep 2017 18:09:26 +0200
Subject: sched/debug: Implement consistent task-state printing

Currently get_task_state() and task_state_to_char() report different
states, create a number of common helpers and unify the reported state
space.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/proc/array.c       | 15 ++-------------
 include/linux/sched.h | 26 +++++++++++++++++++-------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 525157ca25cb..01196d3ad452 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -130,19 +130,8 @@ static const char * const task_state_array[] = {
 
 static inline const char *get_task_state(struct task_struct *tsk)
 {
-	unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT;
-
-	/*
-	 * Parked tasks do not run; they sit in __kthread_parkme().
-	 * Without this check, we would report them as running, which is
-	 * clearly wrong, so we report them as sleeping instead.
-	 */
-	if (tsk->state == TASK_PARKED)
-		state = TASK_INTERRUPTIBLE;
-
-	BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1);
-
-	return task_state_array[fls(state)];
+	BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array) - 1);
+	return task_state_array[__get_task_state(tsk)];
 }
 
 static inline int get_task_umask(struct task_struct *tsk)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 92fb8dd5a9e4..163a0b738908 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1243,17 +1243,29 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
 	return task_pgrp_nr_ns(tsk, &init_pid_ns);
 }
 
-static inline char task_state_to_char(struct task_struct *task)
+static inline unsigned int __get_task_state(struct task_struct *tsk)
 {
-	const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
-	unsigned long state = task->state;
+	unsigned int tsk_state = READ_ONCE(tsk->state);
+	unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
 
-	state = state ? __ffs(state) + 1 : 0;
+	if (tsk_state == TASK_PARKED)
+		state = TASK_INTERRUPTIBLE;
 
-	/* Make sure the string lines up properly with the number of task states: */
-	BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1);
+	return fls(state);
+}
+
+static inline char __task_state_to_char(unsigned int state)
+{
+	static const char state_char[] = "RSDTtXZ";
+
+	BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != sizeof(state_char) - 2);
 
-	return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?';
+	return state_char[state];
+}
+
+static inline char task_state_to_char(struct task_struct *tsk)
+{
+	return __task_state_to_char(__get_task_state(tsk));
 }
 
 /**
-- 
cgit 


From 92c4bc9f9cd92a8581e36bc5105f03b569f37e36 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 22 Sep 2017 18:13:36 +0200
Subject: sched/debug: Convert TASK_state to hex

Bit patterns are easier in hex.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 163a0b738908..69bed5339ffa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -65,23 +65,23 @@ struct task_group;
  */
 
 /* Used in tsk->state: */
-#define TASK_RUNNING			0
-#define TASK_INTERRUPTIBLE		1
-#define TASK_UNINTERRUPTIBLE		2
-#define __TASK_STOPPED			4
-#define __TASK_TRACED			8
+#define TASK_RUNNING			0x0000
+#define TASK_INTERRUPTIBLE		0x0001
+#define TASK_UNINTERRUPTIBLE		0x0002
+#define __TASK_STOPPED			0x0004
+#define __TASK_TRACED			0x0008
 /* Used in tsk->exit_state: */
-#define EXIT_DEAD			16
-#define EXIT_ZOMBIE			32
+#define EXIT_DEAD			0x0010
+#define EXIT_ZOMBIE			0x0020
 #define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
 /* Used in tsk->state again: */
-#define TASK_DEAD			64
-#define TASK_WAKEKILL			128
-#define TASK_WAKING			256
-#define TASK_PARKED			512
-#define TASK_NOLOAD			1024
-#define TASK_NEW			2048
-#define TASK_STATE_MAX			4096
+#define TASK_DEAD			0x0040
+#define TASK_WAKEKILL			0x0080
+#define TASK_WAKING			0x0100
+#define TASK_PARKED			0x0200
+#define TASK_NOLOAD			0x0400
+#define TASK_NEW			0x0800
+#define TASK_STATE_MAX			0x1000
 
 #define TASK_STATE_TO_CHAR_STR		"RSDTtXZxKWPNn"
 
-- 
cgit 


From 65d5dc47fe8530e17e318722fb4df676536c2bfc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 22 Sep 2017 18:14:08 +0200
Subject: sched/debug: Remove unused variable

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/debug.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 01217fb5a5de..2f93e4a2d9f6 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -466,8 +466,6 @@ static char *task_group_path(struct task_group *tg)
 }
 #endif
 
-static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
-
 static void
 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 {
-- 
cgit 


From efb40f588b4370ffaeffafbd50f6ff213d954254 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 22 Sep 2017 18:19:53 +0200
Subject: sched/tracing: Fix trace_sched_switch task-state printing

Convert trace_sched_switch to use the common task-state helpers and
fix the "X" and "Z" order, possibly they ended up in the wrong order
because TASK_REPORT has them in the wrong order too.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        |  2 +-
 include/trace/events/sched.h | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 69bed5339ffa..a2fe636b6825 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -99,7 +99,7 @@ struct task_group;
 /* get_task_state(): */
 #define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
 					 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
-					 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
+					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE)
 
 #define task_is_traced(task)		((task->state & __TASK_TRACED) != 0)
 
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index ae1409ffe99a..c63e20c9ef24 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -114,7 +114,10 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
 	 * Preemption ignores task state, therefore preempted tasks are always
 	 * RUNNING (we will not have dequeued if state != RUNNING).
 	 */
-	return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
+	if (preempt)
+		return TASK_STATE_MAX;
+
+	return __get_task_state(p);
 }
 #endif /* CREATE_TRACE_POINTS */
 
@@ -152,12 +155,13 @@ TRACE_EVENT(sched_switch,
 
 	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
-		__entry->prev_state & (TASK_STATE_MAX-1) ?
-		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
-				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
-				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
-				{ 128, "K" }, { 256, "W" }, { 512, "P" },
-				{ 1024, "N" }) : "R",
+
+		(__entry->prev_state & TASK_REPORT) ?
+		  __print_flags(__entry->prev_state & TASK_REPORT, "|",
+				{ 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" },
+				{ 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }) :
+		  "R",
+
 		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
-- 
cgit 


From 9c29c31830a4eca724e137a9339137204bbb31be Mon Sep 17 00:00:00 2001
From: Prateek Sood <prsood@codeaurora.org>
Date: Thu, 7 Sep 2017 20:00:58 +0530
Subject: locking/rwsem-xadd: Fix missed wakeup due to reordering of load

If a spinner is present, there is a chance that the load of
rwsem_has_spinner() in rwsem_wake() can be reordered with
respect to decrement of rwsem count in __up_write() leading
to wakeup being missed:

 spinning writer                  up_write caller
 ---------------                  -----------------------
 [S] osq_unlock()                 [L] osq
  spin_lock(wait_lock)
  sem->count=0xFFFFFFFF00000001
            +0xFFFFFFFF00000000
  count=sem->count
  MB
                                   sem->count=0xFFFFFFFE00000001
                                             -0xFFFFFFFF00000001
                                   spin_trylock(wait_lock)
                                   return
 rwsem_try_write_lock(count)
 spin_unlock(wait_lock)
 schedule()

Reordering of atomic_long_sub_return_release() in __up_write()
and rwsem_has_spinner() in rwsem_wake() can cause missing of
wakeup in up_write() context. In spinning writer, sem->count
and local variable count is 0XFFFFFFFE00000001. It would result
in rwsem_try_write_lock() failing to acquire rwsem and spinning
writer going to sleep in rwsem_down_write_failed().

The smp_rmb() will make sure that the spinner state is
consulted after sem->count is updated in up_write context.

Signed-off-by: Prateek Sood <prsood@codeaurora.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Cc: longman@redhat.com
Cc: parri.andrea@gmail.com
Cc: sramana@codeaurora.org
Link: http://lkml.kernel.org/r/1504794658-15397-1-git-send-email-prsood@codeaurora.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-xadd.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 02f660666ab8..1fefe6dcafd7 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -612,6 +612,33 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 	unsigned long flags;
 	DEFINE_WAKE_Q(wake_q);
 
+	/*
+	* __rwsem_down_write_failed_common(sem)
+	*   rwsem_optimistic_spin(sem)
+	*     osq_unlock(sem->osq)
+	*   ...
+	*   atomic_long_add_return(&sem->count)
+	*
+	*      - VS -
+	*
+	*              __up_write()
+	*                if (atomic_long_sub_return_release(&sem->count) < 0)
+	*                  rwsem_wake(sem)
+	*                    osq_is_locked(&sem->osq)
+	*
+	* And __up_write() must observe !osq_is_locked() when it observes the
+	* atomic_long_add_return() in order to not miss a wakeup.
+	*
+	* This boils down to:
+	*
+	* [S.rel] X = 1                [RmW] r0 = (Y += 0)
+	*         MB                         RMB
+	* [RmW]   Y += 1               [L]   r1 = X
+	*
+	* exists (r0=1 /\ r1=0)
+	*/
+	smp_rmb();
+
 	/*
 	 * If a spinner is present, it is not necessary to do the wakeup.
 	 * Try to do wakeup only if the trylock succeeds to minimize
-- 
cgit 


From 5f6ad26ea353fdf3dad2328052cbee49e0b9c5b4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 22 Sep 2017 18:23:31 +0200
Subject: sched/tracing: Use common task-state helpers

Remove yet another task-state char instance.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h             |  2 --
 kernel/trace/trace_output.c       | 21 ++++++---------------
 kernel/trace/trace_sched_wakeup.c |  8 ++++----
 3 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a2fe636b6825..bc7807933415 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -83,8 +83,6 @@ struct task_group;
 #define TASK_NEW			0x0800
 #define TASK_STATE_MAX			0x1000
 
-#define TASK_STATE_TO_CHAR_STR		"RSDTtXZxKWPNn"
-
 /* Convenience macros for the sake of set_current_state: */
 #define TASK_KILLABLE			(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 #define TASK_STOPPED			(TASK_WAKEKILL | __TASK_STOPPED)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index bac629af2285..c738e764e2a5 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -656,15 +656,6 @@ int trace_print_lat_context(struct trace_iterator *iter)
 	return !trace_seq_has_overflowed(s);
 }
 
-static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
-
-static int task_state_char(unsigned long state)
-{
-	int bit = state ? __ffs(state) + 1 : 0;
-
-	return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
-}
-
 /**
  * ftrace_find_event - find a registered event
  * @type: the type of event to look for
@@ -930,8 +921,8 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
 
 	trace_assign_type(field, iter->ent);
 
-	T = task_state_char(field->next_state);
-	S = task_state_char(field->prev_state);
+	T = __task_state_to_char(field->next_state);
+	S = __task_state_to_char(field->prev_state);
 	trace_find_cmdline(field->next_pid, comm);
 	trace_seq_printf(&iter->seq,
 			 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
@@ -966,8 +957,8 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
 	trace_assign_type(field, iter->ent);
 
 	if (!S)
-		S = task_state_char(field->prev_state);
-	T = task_state_char(field->next_state);
+		S = __task_state_to_char(field->prev_state);
+	T = __task_state_to_char(field->next_state);
 	trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
 			 field->prev_pid,
 			 field->prev_prio,
@@ -1002,8 +993,8 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
 	trace_assign_type(field, iter->ent);
 
 	if (!S)
-		S = task_state_char(field->prev_state);
-	T = task_state_char(field->next_state);
+		S = __task_state_to_char(field->prev_state);
+	T = __task_state_to_char(field->next_state);
 
 	SEQ_PUT_HEX_FIELD(s, field->prev_pid);
 	SEQ_PUT_HEX_FIELD(s, field->prev_prio);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index ddec53b67646..0c331978b1a6 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -397,10 +397,10 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry	= ring_buffer_event_data(event);
 	entry->prev_pid			= prev->pid;
 	entry->prev_prio		= prev->prio;
-	entry->prev_state		= prev->state;
+	entry->prev_state		= __get_task_state(prev);
 	entry->next_pid			= next->pid;
 	entry->next_prio		= next->prio;
-	entry->next_state		= next->state;
+	entry->next_state		= __get_task_state(next);
 	entry->next_cpu	= task_cpu(next);
 
 	if (!call_filter_check_discard(call, entry, buffer, event))
@@ -425,10 +425,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry	= ring_buffer_event_data(event);
 	entry->prev_pid			= curr->pid;
 	entry->prev_prio		= curr->prio;
-	entry->prev_state		= curr->state;
+	entry->prev_state		= __get_task_state(curr);
 	entry->next_pid			= wakee->pid;
 	entry->next_prio		= wakee->prio;
-	entry->next_state		= wakee->state;
+	entry->next_state		= __get_task_state(wakee);
 	entry->next_cpu			= task_cpu(wakee);
 
 	if (!call_filter_check_discard(call, entry, buffer, event))
-- 
cgit 


From 06eb61844d841d0032a9950ce7f8e783ee49c0d0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 22 Sep 2017 18:30:40 +0200
Subject: sched/debug: Add explicit TASK_IDLE printing

Markus reported that kthreads that idle using TASK_IDLE instead of
TASK_INTERRUPTIBLE are reported in as TASK_UNINTERRUPTIBLE and things
like htop mark those red.

This is undesirable, so add an explicit state for TASK_IDLE.

Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/proc/array.c              | 21 +++++++++++++--------
 include/linux/sched.h        | 12 ++++++++++--
 include/trace/events/sched.h |  7 ++++---
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 01196d3ad452..a120a4549d48 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -119,18 +119,23 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
  * simple bit tests.
  */
 static const char * const task_state_array[] = {
-	"R (running)",		/*   0 */
-	"S (sleeping)",		/*   1 */
-	"D (disk sleep)",	/*   2 */
-	"T (stopped)",		/*   4 */
-	"t (tracing stop)",	/*   8 */
-	"X (dead)",		/*  16 */
-	"Z (zombie)",		/*  32 */
+
+	/* states in TASK_REPORT: */
+	"R (running)",		/* 0x00 */
+	"S (sleeping)",		/* 0x01 */
+	"D (disk sleep)",	/* 0x02 */
+	"T (stopped)",		/* 0x04 */
+	"t (tracing stop)",	/* 0x08 */
+	"X (dead)",		/* 0x10 */
+	"Z (zombie)",		/* 0x20 */
+
+	/* states beyond TASK_REPORT: */
+	"I (idle)",		/* 0x40 */
 };
 
 static inline const char *get_task_state(struct task_struct *tsk)
 {
-	BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array) - 1);
+	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array));
 	return task_state_array[__get_task_state(tsk)];
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bc7807933415..286fc1117046 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1241,22 +1241,30 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
 	return task_pgrp_nr_ns(tsk, &init_pid_ns);
 }
 
+#define TASK_REPORT_IDLE	(TASK_REPORT + 1)
+#define TASK_REPORT_MAX		(TASK_REPORT_IDLE << 1)
+
 static inline unsigned int __get_task_state(struct task_struct *tsk)
 {
 	unsigned int tsk_state = READ_ONCE(tsk->state);
 	unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
 
+	BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
+
 	if (tsk_state == TASK_PARKED)
 		state = TASK_INTERRUPTIBLE;
 
+	if (tsk_state == TASK_IDLE)
+		state = TASK_REPORT_IDLE;
+
 	return fls(state);
 }
 
 static inline char __task_state_to_char(unsigned int state)
 {
-	static const char state_char[] = "RSDTtXZ";
+	static const char state_char[] = "RSDTtXZI";
 
-	BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != sizeof(state_char) - 2);
+	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);
 
 	return state_char[state];
 }
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index c63e20c9ef24..b371ef8206e1 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -156,10 +156,11 @@ TRACE_EVENT(sched_switch,
 	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
 
-		(__entry->prev_state & TASK_REPORT) ?
-		  __print_flags(__entry->prev_state & TASK_REPORT, "|",
+		(__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
+		  __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
 				{ 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" },
-				{ 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }) :
+				{ 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" },
+				{ 0x40, "I" }) :
 		  "R",
 
 		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
-- 
cgit 


From 5d68cc95fb24b2f58060cc5340dd7402a11f054e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 22 Sep 2017 18:32:41 +0200
Subject: sched/debug: Ignore TASK_IDLE for SysRq-W

Markus reported that tasks in TASK_IDLE state are reported by SysRq-W,
which results in undesirable clutter.

Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 18a6966567da..d17c5da523a0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5166,6 +5166,28 @@ void sched_show_task(struct task_struct *p)
 	put_task_stack(p);
 }
 
+static inline bool
+state_filter_match(unsigned long state_filter, struct task_struct *p)
+{
+	/* no filter, everything matches */
+	if (!state_filter)
+		return true;
+
+	/* filter, but doesn't match */
+	if (!(p->state & state_filter))
+		return false;
+
+	/*
+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
+	 * TASK_KILLABLE).
+	 */
+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
+		return false;
+
+	return true;
+}
+
+
 void show_state_filter(unsigned long state_filter)
 {
 	struct task_struct *g, *p;
@@ -5188,7 +5210,7 @@ void show_state_filter(unsigned long state_filter)
 		 */
 		touch_nmi_watchdog();
 		touch_all_softlockup_watchdogs();
-		if (!state_filter || (p->state & state_filter))
+		if (state_filter_match(state_filter, p))
 			sched_show_task(p);
 	}
 
-- 
cgit 


From 8ef9925b02c23e3838d5e593c5cf37984141150f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 22 Sep 2017 18:37:28 +0200
Subject: sched/debug: Add explicit TASK_PARKED printing

Currently TASK_PARKED is masqueraded as TASK_INTERRUPTIBLE, give it
its own print state because it will not in fact get woken by regular
wakeups and is a long-term state.

This requires moving TASK_PARKED into the TASK_REPORT mask, and since
that latter needs to be a contiguous bitmask, we need to shuffle the
bits around a bit.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/proc/array.c              |  3 ++-
 include/linux/sched.h        | 16 +++++++---------
 include/trace/events/sched.h |  2 +-
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index a120a4549d48..77a8eacbe032 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -128,9 +128,10 @@ static const char * const task_state_array[] = {
 	"t (tracing stop)",	/* 0x08 */
 	"X (dead)",		/* 0x10 */
 	"Z (zombie)",		/* 0x20 */
+	"P (parked)",		/* 0x40 */
 
 	/* states beyond TASK_REPORT: */
-	"I (idle)",		/* 0x40 */
+	"I (idle)",		/* 0x80 */
 };
 
 static inline const char *get_task_state(struct task_struct *tsk)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 286fc1117046..26a7df4e558c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -75,10 +75,10 @@ struct task_group;
 #define EXIT_ZOMBIE			0x0020
 #define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
 /* Used in tsk->state again: */
-#define TASK_DEAD			0x0040
-#define TASK_WAKEKILL			0x0080
-#define TASK_WAKING			0x0100
-#define TASK_PARKED			0x0200
+#define TASK_PARKED			0x0040
+#define TASK_DEAD			0x0080
+#define TASK_WAKEKILL			0x0100
+#define TASK_WAKING			0x0200
 #define TASK_NOLOAD			0x0400
 #define TASK_NEW			0x0800
 #define TASK_STATE_MAX			0x1000
@@ -97,7 +97,8 @@ struct task_group;
 /* get_task_state(): */
 #define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
 					 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
-					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE)
+					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
+					 TASK_PARKED)
 
 #define task_is_traced(task)		((task->state & __TASK_TRACED) != 0)
 
@@ -1251,9 +1252,6 @@ static inline unsigned int __get_task_state(struct task_struct *tsk)
 
 	BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
 
-	if (tsk_state == TASK_PARKED)
-		state = TASK_INTERRUPTIBLE;
-
 	if (tsk_state == TASK_IDLE)
 		state = TASK_REPORT_IDLE;
 
@@ -1262,7 +1260,7 @@ static inline unsigned int __get_task_state(struct task_struct *tsk)
 
 static inline char __task_state_to_char(unsigned int state)
 {
-	static const char state_char[] = "RSDTtXZI";
+	static const char state_char[] = "RSDTtXZPI";
 
 	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);
 
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index b371ef8206e1..3c8b7f625670 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -160,7 +160,7 @@ TRACE_EVENT(sched_switch,
 		  __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
 				{ 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" },
 				{ 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" },
-				{ 0x40, "I" }) :
+				{ 0x40, "P" }, { 0x80, "I" }) :
 		  "R",
 
 		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
-- 
cgit 


From 520a13c530aeb5f63e011d668c42db1af19ed349 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 28 Sep 2017 16:58:26 -0500
Subject: x86/asm: Fix inline asm call constraints for GCC 4.4

The kernel test bot (run by Xiaolong Ye) reported that the following commit:

  f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang")

is causing double faults in a kernel compiled with GCC 4.4.

Linus subsequently diagnosed the crash pattern and the buggy commit and found that
the issue is with this code:

  register unsigned int __asm_call_sp asm("esp");
  #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)

Even on a 64-bit kernel, it's using ESP instead of RSP.  That causes GCC
to produce the following bogus code:

  ffffffff8147461d:       89 e0                   mov    %esp,%eax
  ffffffff8147461f:       4c 89 f7                mov    %r14,%rdi
  ffffffff81474622:       4c 89 fe                mov    %r15,%rsi
  ffffffff81474625:       ba 20 00 00 00          mov    $0x20,%edx
  ffffffff8147462a:       89 c4                   mov    %eax,%esp
  ffffffff8147462c:       e8 bf 52 05 00          callq  ffffffff814c98f0 <copy_user_generic_unrolled>

Despite the absurdity of it backing up and restoring the stack pointer
for no reason, the bug is actually the fact that it's only backing up
and restoring the lower 32 bits of the stack pointer.  The upper 32 bits
are getting cleared out, corrupting the stack pointer.

So change the '__asm_call_sp' register variable to be associated with
the actual full-size stack pointer.

This also requires changing the __ASM_SEL() macro to be based on the
actual compiled arch size, rather than the CONFIG value, because
CONFIG_X86_64 compiles some files with '-m32' (e.g., realmode and vdso).
Otherwise Clang fails to build the kernel because it complains about the
use of a 64-bit register (RSP) in a 32-bit file.

Reported-and-Bisected-and-Tested-by: kernel test robot <xiaolong.ye@intel.com>
Diagnosed-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: LKP <lkp@01.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang")
Link: http://lkml.kernel.org/r/20170928215826.6sdpmwtkiydiytim@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/asm.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index c1eadbaf1115..30c3c9ac784a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -11,10 +11,12 @@
 # define __ASM_FORM_COMMA(x) " " #x ","
 #endif
 
-#ifdef CONFIG_X86_32
+#ifndef __x86_64__
+/* 32 bit */
 # define __ASM_SEL(a,b) __ASM_FORM(a)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
 #else
+/* 64 bit */
 # define __ASM_SEL(a,b) __ASM_FORM(b)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
 #endif
@@ -139,7 +141,7 @@
  * gets set up by the containing function.  If you forget to do this, objtool
  * may print a "call without frame pointer save/setup" warning.
  */
-register unsigned int __asm_call_sp asm("esp");
+register unsigned long __asm_call_sp asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
 #endif
 
-- 
cgit 


From 5ccba44ba118a5000cccc50076b0344632459779 Mon Sep 17 00:00:00 2001
From: Ethan Zhao <ethan.zhao@oracle.com>
Date: Mon, 4 Sep 2017 13:59:34 +0800
Subject: sched/sysctl: Check user input value of sysctl_sched_time_avg

System will hang if user set sysctl_sched_time_avg to 0:

  [root@XXX ~]# sysctl kernel.sched_time_avg_ms=0

  Stack traceback for pid 0
  0xffff883f6406c600 0 0 1 3 R 0xffff883f6406cf50 *swapper/3
  ffff883f7ccc3ae8 0000000000000018 ffffffff810c4dd0 0000000000000000
  0000000000017800 ffff883f7ccc3d78 0000000000000003 ffff883f7ccc3bf8
  ffffffff810c4fc9 ffff883f7ccc3c08 00000000810c5043 ffff883f7ccc3c08
  Call Trace:
  <IRQ> [<ffffffff810c4dd0>] ? update_group_capacity+0x110/0x200
  [<ffffffff810c4fc9>] ? update_sd_lb_stats+0x109/0x600
  [<ffffffff810c5507>] ? find_busiest_group+0x47/0x530
  [<ffffffff810c5b84>] ? load_balance+0x194/0x900
  [<ffffffff810ad5ca>] ? update_rq_clock.part.83+0x1a/0xe0
  [<ffffffff810c6d42>] ? rebalance_domains+0x152/0x290
  [<ffffffff810c6f5c>] ? run_rebalance_domains+0xdc/0x1d0
  [<ffffffff8108a75b>] ? __do_softirq+0xfb/0x320
  [<ffffffff8108ac85>] ? irq_exit+0x125/0x130
  [<ffffffff810b3a17>] ? scheduler_ipi+0x97/0x160
  [<ffffffff81052709>] ? smp_reschedule_interrupt+0x29/0x30
  [<ffffffff8173a1be>] ? reschedule_interrupt+0x6e/0x80
   <EOI> [<ffffffff815bc83c>] ? cpuidle_enter_state+0xcc/0x230
  [<ffffffff815bc80c>] ? cpuidle_enter_state+0x9c/0x230
  [<ffffffff815bc9d7>] ? cpuidle_enter+0x17/0x20
  [<ffffffff810cd6dc>] ? cpu_startup_entry+0x38c/0x420
  [<ffffffff81053373>] ? start_secondary+0x173/0x1e0

Because divide-by-zero error happens in function:

update_group_capacity()
  update_cpu_capacity()
    scale_rt_capacity()
     {
          ...
          total = sched_avg_period() + delta;
          used = div_u64(avg, total);
          ...
     }

To fix this issue, check user input value of sysctl_sched_time_avg, keep
it unchanged when hitting invalid input, and set the minimum limit of
sysctl_sched_time_avg to 1 ms.

Reported-by: James Puthukattukaran <james.puthukattukaran@oracle.com>
Signed-off-by: Ethan Zhao <ethan.zhao@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: efault@gmx.de
Cc: ethan.kernel@gmail.com
Cc: keescook@chromium.org
Cc: mcgrof@kernel.org
Cc: <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/1504504774-18253-1-git-send-email-ethan.zhao@oracle.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sysctl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6648fbbb8157..423554ad3610 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -367,7 +367,8 @@ static struct ctl_table kern_table[] = {
 		.data		= &sysctl_sched_time_avg,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 #ifdef CONFIG_SCHEDSTATS
 	{
-- 
cgit 


From 305d0ab4764d36a02c8e7cddb67099aca65351ce Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 28 Sep 2017 18:16:44 -0700
Subject: KVM: nVMX: Fix nested #PF intends to break L1's vmlauch/vmresume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

------------[ cut here ]------------
 WARNING: CPU: 4 PID: 5280 at /home/kernel/linux/arch/x86/kvm//vmx.c:11394 nested_vmx_vmexit+0xc2b/0xd70 [kvm_intel]
 CPU: 4 PID: 5280 Comm: qemu-system-x86 Tainted: G        W  OE   4.13.0+ #17
 RIP: 0010:nested_vmx_vmexit+0xc2b/0xd70 [kvm_intel]
 Call Trace:
  ? emulator_read_emulated+0x15/0x20 [kvm]
  ? segmented_read+0xae/0xf0 [kvm]
  vmx_inject_page_fault_nested+0x60/0x70 [kvm_intel]
  ? vmx_inject_page_fault_nested+0x60/0x70 [kvm_intel]
  x86_emulate_instruction+0x733/0x810 [kvm]
  vmx_handle_exit+0x2f4/0xda0 [kvm_intel]
  ? kvm_arch_vcpu_ioctl_run+0xd2f/0x1c60 [kvm]
  kvm_arch_vcpu_ioctl_run+0xdab/0x1c60 [kvm]
  ? kvm_arch_vcpu_load+0x62/0x230 [kvm]
  kvm_vcpu_ioctl+0x340/0x700 [kvm]
  ? kvm_vcpu_ioctl+0x340/0x700 [kvm]
  ? __fget+0xfc/0x210
  do_vfs_ioctl+0xa4/0x6a0
  ? __fget+0x11d/0x210
  SyS_ioctl+0x79/0x90
  entry_SYSCALL_64_fastpath+0x23/0xc2

A nested #PF is triggered during L0 emulating instruction for L2. However, it
doesn't consider we should not break L1's vmlauch/vmresme. This patch fixes
it by queuing the #PF exception instead ,requesting an immediate VM exit from
L2 and keeping the exception for L1 pending for a subsequent nested VM exit.

This should actually work all the time, making vmx_inject_page_fault_nested
totally unnecessary.  However, that's not working yet, so this patch can work
around the issue in the meanwhile.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7f62c94196d1..5bfa353f6354 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9845,7 +9845,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 
 	WARN_ON(!is_guest_mode(vcpu));
 
-	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) {
+	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
+		!to_vmx(vcpu)->nested.nested_run_pending) {
 		vmcs12->vm_exit_intr_error_code = fault->error_code;
 		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
 				  PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
-- 
cgit 


From b862789aa5186d5ea3a024b7cfe0f80c3a38b980 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Fri, 29 Sep 2017 19:01:45 +0800
Subject: kvm/x86: Handle async PF in RCU read-side critical sections

Sasha Levin reported a WARNING:

| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329
| rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline]
| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329
| rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458
...
| CPU: 0 PID: 6974 Comm: syz-fuzzer Not tainted 4.13.0-next-20170908+ #246
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
| 1.10.1-1ubuntu1 04/01/2014
| Call Trace:
...
| RIP: 0010:rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline]
| RIP: 0010:rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458
| RSP: 0018:ffff88003b2debc8 EFLAGS: 00010002
| RAX: 0000000000000001 RBX: 1ffff1000765bd85 RCX: 0000000000000000
| RDX: 1ffff100075d7882 RSI: ffffffffb5c7da20 RDI: ffff88003aebc410
| RBP: ffff88003b2def30 R08: dffffc0000000000 R09: 0000000000000001
| R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003b2def08
| R13: 0000000000000000 R14: ffff88003aebc040 R15: ffff88003aebc040
| __schedule+0x201/0x2240 kernel/sched/core.c:3292
| schedule+0x113/0x460 kernel/sched/core.c:3421
| kvm_async_pf_task_wait+0x43f/0x940 arch/x86/kernel/kvm.c:158
| do_async_page_fault+0x72/0x90 arch/x86/kernel/kvm.c:271
| async_page_fault+0x22/0x30 arch/x86/entry/entry_64.S:1069
| RIP: 0010:format_decode+0x240/0x830 lib/vsprintf.c:1996
| RSP: 0018:ffff88003b2df520 EFLAGS: 00010283
| RAX: 000000000000003f RBX: ffffffffb5d1e141 RCX: ffff88003b2df670
| RDX: 0000000000000001 RSI: dffffc0000000000 RDI: ffffffffb5d1e140
| RBP: ffff88003b2df560 R08: dffffc0000000000 R09: 0000000000000000
| R10: ffff88003b2df718 R11: 0000000000000000 R12: ffff88003b2df5d8
| R13: 0000000000000064 R14: ffffffffb5d1e140 R15: 0000000000000000
| vsnprintf+0x173/0x1700 lib/vsprintf.c:2136
| sprintf+0xbe/0xf0 lib/vsprintf.c:2386
| proc_self_get_link+0xfb/0x1c0 fs/proc/self.c:23
| get_link fs/namei.c:1047 [inline]
| link_path_walk+0x1041/0x1490 fs/namei.c:2127
...

This happened when the host hit a page fault, and delivered it as in an
async page fault, while the guest was in an RCU read-side critical
section.  The guest then tries to reschedule in kvm_async_pf_task_wait(),
but rcu_preempt_note_context_switch() would treat the reschedule as a
sleep in RCU read-side critical section, which is not allowed (even in
preemptible RCU).  Thus the WARN.

To cure this, make kvm_async_pf_task_wait() go to the halt path if the
PF happens in a RCU read-side critical section.

Reported-by: Sasha Levin <levinsasha928@gmail.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kernel/kvm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index aa60a08b65b1..e675704fa6f7 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token)
 
 	n.token = token;
 	n.cpu = smp_processor_id();
-	n.halted = is_idle_task(current) || preempt_count() > 1;
+	n.halted = is_idle_task(current) || preempt_count() > 1 ||
+		   rcu_preempt_depth();
 	init_swait_queue_head(&n.wq);
 	hlist_add_head(&n.link, &b->list);
 	raw_spin_unlock(&b->lock);
-- 
cgit 


From f069faba688701c4d56b6c3452a130f97bf02e95 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 29 Sep 2017 11:29:55 +0100
Subject: arm64: mm: Use READ_ONCE when dereferencing pointer to pte table

On kernels built with support for transparent huge pages, different CPUs
can access the PMD concurrently due to e.g. fast GUP or page_vma_mapped_walk
and they must take care to use READ_ONCE to avoid value tearing or caching
of stale values by the compiler. Unfortunately, these functions call into
our pgtable macros, which don't use READ_ONCE, and compiler caching has
been observed to cause the following crash during ext4 writeback:

PC is at check_pte+0x20/0x170
LR is at page_vma_mapped_walk+0x2e0/0x540
[...]
Process doio (pid: 2463, stack limit = 0xffff00000f2e8000)
Call trace:
[<ffff000008233328>] check_pte+0x20/0x170
[<ffff000008233758>] page_vma_mapped_walk+0x2e0/0x540
[<ffff000008234adc>] page_mkclean_one+0xac/0x278
[<ffff000008234d98>] rmap_walk_file+0xf0/0x238
[<ffff000008236e74>] rmap_walk+0x64/0xa0
[<ffff0000082370c8>] page_mkclean+0x90/0xa8
[<ffff0000081f3c64>] clear_page_dirty_for_io+0x84/0x2a8
[<ffff00000832f984>] mpage_submit_page+0x34/0x98
[<ffff00000832fb4c>] mpage_process_page_bufs+0x164/0x170
[<ffff00000832fc8c>] mpage_prepare_extent_to_map+0x134/0x2b8
[<ffff00000833530c>] ext4_writepages+0x484/0xe30
[<ffff0000081f6ab4>] do_writepages+0x44/0xe8
[<ffff0000081e5bd4>] __filemap_fdatawrite_range+0xbc/0x110
[<ffff0000081e5e68>] file_write_and_wait_range+0x48/0xd8
[<ffff000008324310>] ext4_sync_file+0x80/0x4b8
[<ffff0000082bd434>] vfs_fsync_range+0x64/0xc0
[<ffff0000082332b4>] SyS_msync+0x194/0x1e8

This is because page_vma_mapped_walk loads the PMD twice before calling
pte_offset_map: the first time without READ_ONCE (where it gets all zeroes
due to a concurrent pmdp_invalidate) and the second time with READ_ONCE
(where it sees a valid table pointer due to a concurrent pmd_populate).
However, the compiler inlines everything and caches the first value in
a register, which is subsequently used in pte_offset_phys which returns
a junk pointer that is later dereferenced when attempting to access the
relevant pte.

This patch fixes the issue by using READ_ONCE in pte_offset_phys to ensure
that a stale value is not used. Whilst this is a point fix for a known
failure (and simple to backport), a full fix moving all of our page table
accessors over to {READ,WRITE}_ONCE and consistently using READ_ONCE in
page_vma_mapped_walk is in the works for a future kernel release.

Cc: Jon Masters <jcm@redhat.com>
Cc: Timur Tabi <timur@codeaurora.org>
Cc: <stable@vger.kernel.org>
Fixes: f27176cfc363 ("mm: convert page_mkclean_one() to use page_vma_mapped_walk()")
Tested-by: Richard Ruigrok <rruigrok@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index bc4e92337d16..b46e54c2399b 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -401,7 +401,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 /* Find an entry in the third-level page table. */
 #define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
-#define pte_offset_phys(dir,addr)	(pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t))
+#define pte_offset_phys(dir,addr)	(pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t))
 #define pte_offset_kernel(dir,addr)	((pte_t *)__va(pte_offset_phys((dir), (addr))))
 
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
-- 
cgit 


From 760bfb47c36a07741a089bf6a28e854ffbee7dc9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 29 Sep 2017 12:27:41 +0100
Subject: arm64: fault: Route pte translation faults via do_translation_fault

We currently route pte translation faults via do_page_fault, which elides
the address check against TASK_SIZE before invoking the mm fault handling
code. However, this can cause issues with the path walking code in
conjunction with our word-at-a-time implementation because
load_unaligned_zeropad can end up faulting in kernel space if it reads
across a page boundary and runs into a page fault (e.g. by attempting to
read from a guard region).

In the case of such a fault, load_unaligned_zeropad has registered a
fixup to shift the valid data and pad with zeroes, however the abort is
reported as a level 3 translation fault and we dispatch it straight to
do_page_fault, despite it being a kernel address. This results in calling
a sleeping function from atomic context:

  BUG: sleeping function called from invalid context at arch/arm64/mm/fault.c:313
  in_atomic(): 0, irqs_disabled(): 0, pid: 10290
  Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
  [...]
  [<ffffff8e016cd0cc>] ___might_sleep+0x134/0x144
  [<ffffff8e016cd158>] __might_sleep+0x7c/0x8c
  [<ffffff8e016977f0>] do_page_fault+0x140/0x330
  [<ffffff8e01681328>] do_mem_abort+0x54/0xb0
  Exception stack(0xfffffffb20247a70 to 0xfffffffb20247ba0)
  [...]
  [<ffffff8e016844fc>] el1_da+0x18/0x78
  [<ffffff8e017f399c>] path_parentat+0x44/0x88
  [<ffffff8e017f4c9c>] filename_parentat+0x5c/0xd8
  [<ffffff8e017f5044>] filename_create+0x4c/0x128
  [<ffffff8e017f59e4>] SyS_mkdirat+0x50/0xc8
  [<ffffff8e01684e30>] el0_svc_naked+0x24/0x28
  Code: 36380080 d5384100 f9400800 9402566d (d4210000)
  ---[ end trace 2d01889f2bca9b9f ]---

Fix this by dispatching all translation faults to do_translation_faults,
which avoids invoking the page fault logic for faults on kernel addresses.

Cc: <stable@vger.kernel.org>
Reported-by: Ankit Jain <ankijain@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/fault.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 89993c4be1be..2069e9bc0fca 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -651,7 +651,7 @@ static const struct fault_info fault_info[] = {
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 0 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
-	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
+	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
 	{ do_bad,		SIGBUS,  0,		"unknown 8"			},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
-- 
cgit 


From bc829ee36e0ec92383c9d9b88fe08f00d4d592f8 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Fri, 29 Sep 2017 11:24:19 -0500
Subject: x86/mm: Disable branch profiling in mem_encrypt.c

Some routines in mem_encrypt.c are called very early in the boot process,
e.g. sme_encrypt_kernel(). When CONFIG_TRACE_BRANCH_PROFILING=y is defined
the resulting branch profiling associated with the check to see if SME is
active results in a kernel crash. Disable branch profiling for
mem_encrypt.c by defining DISABLE_BRANCH_PROFILING before including any
header files.

Reported-by: kernel test robot <lkp@01.org>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Acked-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170929162419.6016.53390.stgit@tlendack-t1.amdoffice.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/mem_encrypt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 3fcc8e01683b..16c5f37933a2 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
  */
 
+#define DISABLE_BRANCH_PROFILING
+
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <linux/mm.h>
-- 
cgit 


From 196bd485ee4f03ce4c690bfcf38138abfcd0a4bc Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Fri, 29 Sep 2017 17:15:36 +0300
Subject: x86/asm: Use register variable to get stack pointer value

Currently we use current_stack_pointer() function to get the value
of the stack pointer register. Since commit:

  f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang")

... we have a stack register variable declared. It can be used instead of
current_stack_pointer() function which allows to optimize away some
excessive "mov %rsp, %<dst>" instructions:

 -mov    %rsp,%rdx
 -sub    %rdx,%rax
 -cmp    $0x3fff,%rax
 -ja     ffffffff810722fd <ist_begin_non_atomic+0x2d>

 +sub    %rsp,%rax
 +cmp    $0x3fff,%rax
 +ja     ffffffff810722fa <ist_begin_non_atomic+0x2a>

Remove current_stack_pointer(), rename __asm_call_sp to current_stack_pointer
and use it instead of the removed function.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170929141537.29167-1-aryabinin@virtuozzo.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/asm.h         |  4 ++--
 arch/x86/include/asm/thread_info.h | 11 -----------
 arch/x86/kernel/irq_32.c           |  6 +++---
 arch/x86/kernel/traps.c            |  2 +-
 arch/x86/mm/tlb.c                  |  2 +-
 5 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 30c3c9ac784a..b0dc91f4bedc 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -141,8 +141,8 @@
  * gets set up by the containing function.  If you forget to do this, objtool
  * may print a "call without frame pointer save/setup" warning.
  */
-register unsigned long __asm_call_sp asm(_ASM_SP);
-#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
+register unsigned long current_stack_pointer asm(_ASM_SP);
+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
 #endif
 
 #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 5161da1a0fa0..89e7eeb5cec1 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -158,17 +158,6 @@ struct thread_info {
  */
 #ifndef __ASSEMBLY__
 
-static inline unsigned long current_stack_pointer(void)
-{
-	unsigned long sp;
-#ifdef CONFIG_X86_64
-	asm("mov %%rsp,%0" : "=g" (sp));
-#else
-	asm("mov %%esp,%0" : "=g" (sp));
-#endif
-	return sp;
-}
-
 /*
  * Walks up the stack frames to make sure that the specified object is
  * entirely contained by a single stack frame.
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1f38d9a4d9de..d4eb450144fd 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack)
 
 static inline void *current_stack(void)
 {
-	return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+	return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
 }
 
 static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
@@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 
 	/* Save the next esp at the bottom of the stack */
 	prev_esp = (u32 *)irqstk;
-	*prev_esp = current_stack_pointer();
+	*prev_esp = current_stack_pointer;
 
 	if (unlikely(overflow))
 		call_on_stack(print_stack_overflow, isp);
@@ -139,7 +139,7 @@ void do_softirq_own_stack(void)
 
 	/* Push the previous esp onto the stack */
 	prev_esp = (u32 *)irqstk;
-	*prev_esp = current_stack_pointer();
+	*prev_esp = current_stack_pointer;
 
 	call_on_stack(__do_softirq, isp);
 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 34ea3651362e..67db4f43309e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -142,7 +142,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
 	 * from double_fault.
 	 */
 	BUG_ON((unsigned long)(current_top_of_stack() -
-			       current_stack_pointer()) >= THREAD_SIZE);
+			       current_stack_pointer) >= THREAD_SIZE);
 
 	preempt_enable_no_resched();
 }
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 93fe97cce581..49d9778376d7 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -191,7 +191,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			 * mapped in the new pgd, we'll double-fault.  Forcibly
 			 * map it.
 			 */
-			unsigned int index = pgd_index(current_stack_pointer());
+			unsigned int index = pgd_index(current_stack_pointer);
 			pgd_t *pgd = next->pgd + index;
 
 			if (unlikely(pgd_none(*pgd)))
-- 
cgit 


From 6c85501f2fabcfc4fc6ed976543d252c4eaf4be9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 29 Sep 2017 13:43:15 -0400
Subject: fix infoleak in waitid(2)

kernel_waitid() can return a PID, an error or 0.  rusage is filled in the first
case and waitid(2) rusage should've been copied out exactly in that case, *not*
whenever kernel_waitid() has not returned an error.  Compat variant shares that
braino; none of kernel_wait4() callers do, so the below ought to fix it.

Reported-and-tested-by: Alexander Potapenko <glider@google.com>
Fixes: ce72a16fa705 ("wait4(2)/waitid(2): separate copying rusage to userland")
Cc: stable@vger.kernel.org # v4.13
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/exit.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 3481ababd06a..f2cd53e92147 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1600,12 +1600,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 	struct waitid_info info = {.status = 0};
 	long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
 	int signo = 0;
+
 	if (err > 0) {
 		signo = SIGCHLD;
 		err = 0;
-	}
-
-	if (!err) {
 		if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
 			return -EFAULT;
 	}
@@ -1723,16 +1721,15 @@ COMPAT_SYSCALL_DEFINE5(waitid,
 	if (err > 0) {
 		signo = SIGCHLD;
 		err = 0;
-	}
-
-	if (!err && uru) {
-		/* kernel_waitid() overwrites everything in ru */
-		if (COMPAT_USE_64BIT_TIME)
-			err = copy_to_user(uru, &ru, sizeof(ru));
-		else
-			err = put_compat_rusage(&ru, uru);
-		if (err)
-			return -EFAULT;
+		if (uru) {
+			/* kernel_waitid() overwrites everything in ru */
+			if (COMPAT_USE_64BIT_TIME)
+				err = copy_to_user(uru, &ru, sizeof(ru));
+			else
+				err = put_compat_rusage(&ru, uru);
+			if (err)
+				return -EFAULT;
+		}
 	}
 
 	if (!infop)
-- 
cgit 


From 9e66317d3c92ddaab330c125dfe9d06eee268aff Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 1 Oct 2017 14:54:54 -0700
Subject: Linux 4.14-rc3

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f0c5b21fadb2..cf007a31d575 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
-- 
cgit 


From d522bb6a105ffabeb765ad890062199fd4d18245 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 4 Oct 2017 17:53:36 -0700
Subject: ALSA: sh: aica: Convert timers to use timer_setup()

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly. This requires adding a pointer to
hold the timer's target substream, as there won't be a way to pass this in
the future.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/sh/aica.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/sound/sh/aica.c b/sound/sh/aica.c
index fdc680ae8aa0..2b26311405a4 100644
--- a/sound/sh/aica.c
+++ b/sound/sh/aica.c
@@ -299,14 +299,14 @@ static void run_spu_dma(struct work_struct *work)
 	}
 }
 
-static void aica_period_elapsed(unsigned long timer_var)
+static void aica_period_elapsed(struct timer_list *t)
 {
+	struct snd_card_aica *dreamcastcard = from_timer(dreamcastcard,
+							      t, timer);
+	struct snd_pcm_substream *substream = dreamcastcard->timer_substream;
 	/*timer function - so cannot sleep */
 	int play_period;
 	struct snd_pcm_runtime *runtime;
-	struct snd_pcm_substream *substream;
-	struct snd_card_aica *dreamcastcard;
-	substream = (struct snd_pcm_substream *) timer_var;
 	runtime = substream->runtime;
 	dreamcastcard = substream->pcm->private_data;
 	/* Have we played out an additional period? */
@@ -336,12 +336,12 @@ static void spu_begin_dma(struct snd_pcm_substream *substream)
 	/*get the queue to do the work */
 	schedule_work(&(dreamcastcard->spu_dma_work));
 	/* Timer may already be running */
-	if (unlikely(dreamcastcard->timer.data)) {
+	if (unlikely(dreamcastcard->timer_substream)) {
 		mod_timer(&dreamcastcard->timer, jiffies + 4);
 		return;
 	}
-	setup_timer(&dreamcastcard->timer, aica_period_elapsed,
-		    (unsigned long) substream);
+	timer_setup(&dreamcastcard->timer, aica_period_elapsed, 0);
+	dreamcastcard->timer_substream = substream;
 	mod_timer(&dreamcastcard->timer, jiffies + 4);
 }
 
@@ -379,7 +379,7 @@ static int snd_aicapcm_pcm_close(struct snd_pcm_substream
 {
 	struct snd_card_aica *dreamcastcard = substream->pcm->private_data;
 	flush_work(&(dreamcastcard->spu_dma_work));
-	if (dreamcastcard->timer.data)
+	if (dreamcastcard->timer_substream)
 		del_timer(&dreamcastcard->timer);
 	kfree(dreamcastcard->channel);
 	spu_disable();
@@ -600,7 +600,7 @@ static int snd_aica_probe(struct platform_device *devptr)
 {
 	int err;
 	struct snd_card_aica *dreamcastcard;
-	dreamcastcard = kmalloc(sizeof(struct snd_card_aica), GFP_KERNEL);
+	dreamcastcard = kzalloc(sizeof(struct snd_card_aica), GFP_KERNEL);
 	if (unlikely(!dreamcastcard))
 		return -ENOMEM;
 	err = snd_card_new(&devptr->dev, index, SND_AICA_DRIVER,
@@ -619,8 +619,6 @@ static int snd_aica_probe(struct platform_device *devptr)
 	err = snd_aicapcmchip(dreamcastcard, 0);
 	if (unlikely(err < 0))
 		goto freedreamcast;
-	dreamcastcard->timer.data = 0;
-	dreamcastcard->channel = NULL;
 	/* Add basic controls */
 	err = add_aicamixer_controls(dreamcastcard);
 	if (unlikely(err < 0))
-- 
cgit 


From 38e9a80f66beb108d30f47dc856dd17b983c3dd6 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 4 Oct 2017 17:53:33 -0700
Subject: ALSA: timer: Convert timers to use timer_setup()

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly. This adds a pointer back to struct
snd_timer.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/timer.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/sound/core/timer.c b/sound/core/timer.c
index 6cdd04a45962..09acaf2b2e57 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1028,15 +1028,17 @@ EXPORT_SYMBOL(snd_timer_global_register);
 
 struct snd_timer_system_private {
 	struct timer_list tlist;
+	struct snd_timer *snd_timer;
 	unsigned long last_expires;
 	unsigned long last_jiffies;
 	unsigned long correction;
 };
 
-static void snd_timer_s_function(unsigned long data)
+static void snd_timer_s_function(struct timer_list *t)
 {
-	struct snd_timer *timer = (struct snd_timer *)data;
-	struct snd_timer_system_private *priv = timer->private_data;
+	struct snd_timer_system_private *priv = from_timer(priv, t,
+								tlist);
+	struct snd_timer *timer = priv->snd_timer;
 	unsigned long jiff = jiffies;
 	if (time_after(jiff, priv->last_expires))
 		priv->correction += (long)jiff - (long)priv->last_expires;
@@ -1118,7 +1120,8 @@ static int snd_timer_register_system(void)
 		snd_timer_free(timer);
 		return -ENOMEM;
 	}
-	setup_timer(&priv->tlist, snd_timer_s_function, (unsigned long) timer);
+	priv->snd_timer = timer;
+	timer_setup(&priv->tlist, snd_timer_s_function, 0);
 	timer->private_data = priv;
 	timer->private_free = snd_timer_free_system;
 	return snd_timer_global_register(timer);
-- 
cgit 


From 394ca81cb4c1518e9463fe342fb1ae8a9f46a82d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 4 Oct 2017 17:53:40 -0700
Subject: ALSA: asihpi: Convert timers to use timer_setup()

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/asihpi/asihpi.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c
index 70d023a85bf5..614322171c79 100644
--- a/sound/pci/asihpi/asihpi.c
+++ b/sound/pci/asihpi/asihpi.c
@@ -749,9 +749,9 @@ static inline unsigned int modulo_min(unsigned int a, unsigned int b,
 
 /** Timer function, equivalent to interrupt service routine for cards
 */
-static void snd_card_asihpi_timer_function(unsigned long data)
+static void snd_card_asihpi_timer_function(struct timer_list *t)
 {
-	struct snd_card_asihpi_pcm *dpcm = (struct snd_card_asihpi_pcm *)data;
+	struct snd_card_asihpi_pcm *dpcm = from_timer(dpcm, t, timer);
 	struct snd_pcm_substream *substream = dpcm->substream;
 	struct snd_card_asihpi *card = snd_pcm_substream_chip(substream);
 	struct snd_pcm_runtime *runtime;
@@ -948,7 +948,7 @@ static void snd_card_asihpi_int_task(unsigned long data)
 	asihpi = (struct snd_card_asihpi *)a->snd_card->private_data;
 	if (asihpi->llmode_streampriv)
 		snd_card_asihpi_timer_function(
-			(unsigned long)asihpi->llmode_streampriv);
+			&asihpi->llmode_streampriv->timer);
 }
 
 static void snd_card_asihpi_isr(struct hpi_adapter *a)
@@ -1059,8 +1059,7 @@ static int snd_card_asihpi_playback_open(struct snd_pcm_substream *substream)
 	    If internal and other stream playing, can't switch
 	*/
 
-	setup_timer(&dpcm->timer, snd_card_asihpi_timer_function,
-		    (unsigned long) dpcm);
+	timer_setup(&dpcm->timer, snd_card_asihpi_timer_function, 0);
 	dpcm->substream = substream;
 	runtime->private_data = dpcm;
 	runtime->private_free = snd_card_asihpi_runtime_free;
@@ -1240,8 +1239,7 @@ static int snd_card_asihpi_capture_open(struct snd_pcm_substream *substream)
 	if (err)
 		return -EIO;
 
-	setup_timer(&dpcm->timer, snd_card_asihpi_timer_function,
-		    (unsigned long) dpcm);
+	timer_setup(&dpcm->timer, snd_card_asihpi_timer_function, 0);
 	dpcm->substream = substream;
 	runtime->private_data = dpcm;
 	runtime->private_free = snd_card_asihpi_runtime_free;
-- 
cgit