From 19e1d4e947cac3b5e08225d15ad7744e691c7376 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 9 Oct 2017 12:41:36 +0200
Subject: genirq: Warn when effective affinity is not updated

Emit a one time warning when the effective affinity mask is enabled in
Kconfig, but the interrupt chip does not update the mask in its
irq_set_affinity() callback,

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710042208400.2406@nanos
---
 kernel/irq/manage.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'kernel')

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d00132b5c325..ef89f7246656 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -168,6 +168,19 @@ void irq_set_thread_affinity(struct irq_desc *desc)
 			set_bit(IRQTF_AFFINITY, &action->thread_flags);
 }
 
+static void irq_validate_effective_affinity(struct irq_data *data)
+{
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	const struct cpumask *m = irq_data_get_effective_affinity_mask(data);
+	struct irq_chip *chip = irq_data_get_irq_chip(data);
+
+	if (!cpumask_empty(m))
+		return;
+	pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n",
+		     chip->name, data->irq);
+#endif
+}
+
 int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
 			bool force)
 {
@@ -181,6 +194,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	case IRQ_SET_MASK_OK_DONE:
 		cpumask_copy(desc->irq_common_data.affinity, mask);
 	case IRQ_SET_MASK_OK_NOCOPY:
+		irq_validate_effective_affinity(data);
 		irq_set_thread_affinity(desc);
 		ret = 0;
 	}
-- 
cgit 


From 60b09c51bb4fb46e2331fdbb39f91520f31d35f7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 9 Oct 2017 12:47:24 +0200
Subject: genirq/cpuhotplug: Add sanity check for effective affinity mask

The effective affinity mask handling has no safety net when the mask is not
updated by the interrupt chip or the mask contains offline CPUs.

If that happens the CPU unplug code fails to migrate interrupts.

Add sanity checks and emit a warning when the mask contains only offline
CPUs.

Fixes: 415fcf1a2293 ("genirq/cpuhotplug: Use effective affinity mask")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710042208400.2406@nanos
---
 kernel/irq/cpuhotplug.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 638eb9c83d9f..9eb09aef0313 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -18,8 +18,34 @@
 static inline bool irq_needs_fixup(struct irq_data *d)
 {
 	const struct cpumask *m = irq_data_get_effective_affinity_mask(d);
+	unsigned int cpu = smp_processor_id();
 
-	return cpumask_test_cpu(smp_processor_id(), m);
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	/*
+	 * The cpumask_empty() check is a workaround for interrupt chips,
+	 * which do not implement effective affinity, but the architecture has
+	 * enabled the config switch. Use the general affinity mask instead.
+	 */
+	if (cpumask_empty(m))
+		m = irq_data_get_affinity_mask(d);
+
+	/*
+	 * Sanity check. If the mask is not empty when excluding the outgoing
+	 * CPU then it must contain at least one online CPU. The outgoing CPU
+	 * has been removed from the online mask already.
+	 */
+	if (cpumask_any_but(m, cpu) < nr_cpu_ids &&
+	    cpumask_any_and(m, cpu_online_mask) >= nr_cpu_ids) {
+		/*
+		 * If this happens then there was a missed IRQ fixup at some
+		 * point. Warn about it and enforce fixup.
+		 */
+		pr_warn("Eff. affinity %*pbl of IRQ %u contains only offline CPUs after offlining CPU %u\n",
+			cpumask_pr_args(m), d->irq, cpu);
+		return true;
+	}
+#endif
+	return cpumask_test_cpu(cpu, m);
 }
 
 static bool migrate_one_irq(struct irq_desc *desc)
-- 
cgit 


From e43b3b58548051f8809391eb7bec7a27ed3003ea Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 4 Oct 2017 21:07:38 +0200
Subject: genirq/cpuhotplug: Enforce affinity setting on startup of managed
 irqs

Managed interrupts can end up in a stale state on CPU hotplug. If the
interrupt is not targeting a single CPU, i.e. the affinity mask spawns
multiple CPUs then the following can happen:

After boot:

dstate:   0x01601200
            IRQD_ACTIVATED
            IRQD_IRQ_STARTED
            IRQD_SINGLE_TARGET
            IRQD_AFFINITY_SET
            IRQD_AFFINITY_MANAGED
node:     0
affinity: 24-31
effectiv: 24
pending:  0

After offlining CPU 31 - 24

dstate:   0x01a31000
            IRQD_IRQ_DISABLED
            IRQD_IRQ_MASKED
            IRQD_SINGLE_TARGET
            IRQD_AFFINITY_SET
            IRQD_AFFINITY_MANAGED
            IRQD_MANAGED_SHUTDOWN
node:     0
affinity: 24-31
effectiv: 24
pending:  0

Now CPU 25 gets onlined again, so it should get the effective interrupt
affinity for this interruopt, but due to the x86 interrupt affinity setter
restrictions this ends up after restarting the interrupt with:

dstate:   0x01601300
            IRQD_ACTIVATED
            IRQD_IRQ_STARTED
            IRQD_SINGLE_TARGET
            IRQD_AFFINITY_SET
            IRQD_SETAFFINITY_PENDING
            IRQD_AFFINITY_MANAGED
node:     0
affinity: 24-31
effectiv: 24
pending:  24-31

So the interrupt is still affine to CPU 24, which was the last CPU to go
offline of that affinity set and the move to an online CPU within 24-31,
in this case 25, is pending. This mechanism is x86/ia64 specific as those
architectures cannot move interrupts from thread context and do this when
an interrupt is actually handled. So the move is set to pending.

Whats worse is that offlining CPU 25 again results in:

dstate:   0x01601300
            IRQD_ACTIVATED
            IRQD_IRQ_STARTED
            IRQD_SINGLE_TARGET
            IRQD_AFFINITY_SET
            IRQD_SETAFFINITY_PENDING
            IRQD_AFFINITY_MANAGED
node:     0
affinity: 24-31
effectiv: 24
pending:  24-31

This means the interrupt has not been shut down, because the outgoing CPU
is not in the effective affinity mask, but of course nothing notices that
the effective affinity mask is pointing at an offline CPU.

In the case of restarting a managed interrupt the move restriction does not
apply, so the affinity setting can be made unconditional. This needs to be
done _before_ the interrupt is started up as otherwise the condition for
moving it from thread context would not longer be fulfilled.

With that change applied onlining CPU 25 after offlining 31-24 results in:

dstate:   0x01600200
            IRQD_ACTIVATED
            IRQD_IRQ_STARTED
            IRQD_SINGLE_TARGET
            IRQD_AFFINITY_MANAGED
node:     0
affinity: 24-31
effectiv: 25
pending:

And after offlining CPU 25:

dstate:   0x01a30000
            IRQD_IRQ_DISABLED
            IRQD_IRQ_MASKED
            IRQD_SINGLE_TARGET
            IRQD_AFFINITY_MANAGED
            IRQD_MANAGED_SHUTDOWN
node:     0
affinity: 24-31
effectiv: 25
pending:

which is the correct and expected result.

Fixes: 761ea388e8c4 ("genirq: Handle managed irqs gracefully in irq_startup()")
Reported-by: YASUAKI ISHIMATSU <yasu.isimatu@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: axboe@kernel.dk
Cc: linux-scsi@vger.kernel.org
Cc: Sumit Saxena <sumit.saxena@broadcom.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: mpe@ellerman.id.au
Cc: Shivasharan Srikanteshwara <shivasharan.srikanteshwara@broadcom.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: keith.busch@intel.com
Cc: peterz@infradead.org
Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1710042208400.2406@nanos
---
 kernel/irq/chip.c   | 2 +-
 kernel/irq/manage.c | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6fc89fd93824..5a2ef92c2782 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -265,8 +265,8 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force)
 			irq_setup_affinity(desc);
 			break;
 		case IRQ_STARTUP_MANAGED:
+			irq_do_set_affinity(d, aff, false);
 			ret = __irq_startup(desc);
-			irq_set_affinity_locked(d, aff, false);
 			break;
 		case IRQ_STARTUP_ABORT:
 			return 0;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ef89f7246656..4bff6a10ae8e 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -188,6 +188,9 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	struct irq_chip *chip = irq_data_get_irq_chip(data);
 	int ret;
 
+	if (!chip || !chip->irq_set_affinity)
+		return -EINVAL;
+
 	ret = chip->irq_set_affinity(data, mask, force);
 	switch (ret) {
 	case IRQ_SET_MASK_OK:
-- 
cgit 


From 96ca579a1ecc943b75beba58bebb0356f6cc4b51 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 9 Oct 2017 11:36:52 -0700
Subject: waitid(): Add missing access_ok() checks

Adds missing access_ok() checks.

CVE-2017-5123

Reported-by: Chris Salls <chrissalls5@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Fixes: 4c48abe91be0 ("waitid(): switch copyout of siginfo to unsafe_put_user()")
Cc: stable@kernel.org # 4.13
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/exit.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index f2cd53e92147..cf28528842bc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1610,6 +1610,9 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 	if (!infop)
 		return err;
 
+	if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
+		goto Efault;
+
 	user_access_begin();
 	unsafe_put_user(signo, &infop->si_signo, Efault);
 	unsafe_put_user(0, &infop->si_errno, Efault);
@@ -1735,6 +1738,9 @@ COMPAT_SYSCALL_DEFINE5(waitid,
 	if (!infop)
 		return err;
 
+	if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
+		goto Efault;
+
 	user_access_begin();
 	unsafe_put_user(signo, &infop->si_signo, Efault);
 	unsafe_put_user(0, &infop->si_errno, Efault);
-- 
cgit 


From fbb1fb4ad415cb31ce944f65a5ca700aaf73a227 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 8 Oct 2017 21:44:52 -0700
Subject: net: defer call to cgroup_sk_alloc()

sk_clone_lock() might run while TCP/DCCP listener already vanished.

In order to prevent use after free, it is better to defer cgroup_sk_alloc()
to the point we know both parent and child exist, and from process context.

Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/cgroup/cgroup.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 44857278eb8a..3380a3e49af5 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5709,17 +5709,6 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
 	if (cgroup_sk_alloc_disabled)
 		return;
 
-	/* Socket clone path */
-	if (skcd->val) {
-		/*
-		 * We might be cloning a socket which is left in an empty
-		 * cgroup and the cgroup might have already been rmdir'd.
-		 * Don't use cgroup_get_live().
-		 */
-		cgroup_get(sock_cgroup_ptr(skcd));
-		return;
-	}
-
 	rcu_read_lock();
 
 	while (true) {
-- 
cgit 


From 8b405d5c5d0996d3d16f70c42744a0500f5b6ec3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 4 Oct 2017 11:13:37 +0200
Subject: locking/lockdep: Fix stacktrace mess

There is some complication between check_prevs_add() and
check_prev_add() wrt. saving stack traces. The problem is that we want
to be frugal with saving stack traces, since it consumes static
resources.

We'll only know in check_prev_add() if we need the trace, but we can
call into it multiple times. So we want to do on-demand and re-use.

A further complication is that check_prev_add() can drop graph_lock
and mess with our static resources.

In any case, the current state; after commit:

  ce07a9415f26 ("locking/lockdep: Make check_prev_add() able to handle external stack_trace")

is that we'll assume the trace contains valid data once
check_prev_add() returns '2'. However, as noted by Josh, this is
false, check_prev_add() can return '2' before having saved a trace,
this then result in the possibility of using uninitialized data.
Testing, as reported by Wu, shows a NULL deref.

So simplify.

Since the graph_lock() thing is a debug path that hasn't
really been used in a long while, take it out back and avoid the
head-ache.

Further initialize the stack_trace to a known 'empty' state; as long
as nr_entries == 0, nothing should deref entries. We can then use the
'entries == NULL' test for a valid trace / on-demand saving.

Analyzed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Byungchul Park <byungchul.park@lge.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: ce07a9415f26 ("locking/lockdep: Make check_prev_add() able to handle external stack_trace")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 48 ++++++++++++++++++++----------------------------
 1 file changed, 20 insertions(+), 28 deletions(-)

(limited to 'kernel')

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 44c8d0d17170..e36e652d996f 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1873,10 +1873,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	       struct held_lock *next, int distance, struct stack_trace *trace,
 	       int (*save)(struct stack_trace *trace))
 {
+	struct lock_list *uninitialized_var(target_entry);
 	struct lock_list *entry;
-	int ret;
 	struct lock_list this;
-	struct lock_list *uninitialized_var(target_entry);
+	int ret;
 
 	/*
 	 * Prove that the new <prev> -> <next> dependency would not
@@ -1890,8 +1890,17 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	this.class = hlock_class(next);
 	this.parent = NULL;
 	ret = check_noncircular(&this, hlock_class(prev), &target_entry);
-	if (unlikely(!ret))
+	if (unlikely(!ret)) {
+		if (!trace->entries) {
+			/*
+			 * If @save fails here, the printing might trigger
+			 * a WARN but because of the !nr_entries it should
+			 * not do bad things.
+			 */
+			save(trace);
+		}
 		return print_circular_bug(&this, target_entry, next, prev, trace);
+	}
 	else if (unlikely(ret < 0))
 		return print_bfs_bug(ret);
 
@@ -1938,7 +1947,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 		return print_bfs_bug(ret);
 
 
-	if (save && !save(trace))
+	if (!trace->entries && !save(trace))
 		return 0;
 
 	/*
@@ -1958,20 +1967,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	if (!ret)
 		return 0;
 
-	/*
-	 * Debugging printouts:
-	 */
-	if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
-		graph_unlock();
-		printk("\n new dependency: ");
-		print_lock_name(hlock_class(prev));
-		printk(KERN_CONT " => ");
-		print_lock_name(hlock_class(next));
-		printk(KERN_CONT "\n");
-		dump_stack();
-		if (!graph_lock())
-			return 0;
-	}
 	return 2;
 }
 
@@ -1986,8 +1981,12 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 {
 	int depth = curr->lockdep_depth;
 	struct held_lock *hlock;
-	struct stack_trace trace;
-	int (*save)(struct stack_trace *trace) = save_trace;
+	struct stack_trace trace = {
+		.nr_entries = 0,
+		.max_entries = 0,
+		.entries = NULL,
+		.skip = 0,
+	};
 
 	/*
 	 * Debugging checks.
@@ -2018,17 +2017,10 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 			 */
 			if (hlock->read != 2 && hlock->check) {
 				int ret = check_prev_add(curr, hlock, next,
-							 distance, &trace, save);
+							 distance, &trace, save_trace);
 				if (!ret)
 					return 0;
 
-				/*
-				 * Stop saving stack_trace if save_trace() was
-				 * called at least once:
-				 */
-				if (save && ret == 2)
-					save = NULL;
-
 				/*
 				 * Stop after the first non-trylock entry,
 				 * as non-trylock entries have added their
-- 
cgit 


From df0062b27ebf473b372914a3e3574d93790e2b72 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 3 Oct 2017 15:20:50 +0100
Subject: perf/core: Avoid freeing static PMU contexts when PMU is unregistered

Since commit:

  1fd7e4169954 ("perf/core: Remove perf_cpu_context::unique_pmu")

... when a PMU is unregistered then its associated ->pmu_cpu_context is
unconditionally freed. Whilst this is fine for dynamically allocated
context types (i.e. those registered using perf_invalid_context), this
causes a problem for sharing of static contexts such as
perf_{sw,hw}_context, which are used by multiple built-in PMUs and
effectively have a global lifetime.

Whilst testing the ARM SPE driver, which must use perf_sw_context to
support per-task AUX tracing, unregistering the driver as a result of a
module unload resulted in:

 Unable to handle kernel NULL pointer dereference at virtual address 00000038
 Internal error: Oops: 96000004 [#1] PREEMPT SMP
 Modules linked in: [last unloaded: arm_spe_pmu]
 PC is at ctx_resched+0x38/0xe8
 LR is at perf_event_exec+0x20c/0x278
 [...]
 ctx_resched+0x38/0xe8
 perf_event_exec+0x20c/0x278
 setup_new_exec+0x88/0x118
 load_elf_binary+0x26c/0x109c
 search_binary_handler+0x90/0x298
 do_execveat_common.isra.14+0x540/0x618
 SyS_execve+0x38/0x48

since the software context has been freed and the ctx.pmu->pmu_disable_count
field has been set to NULL.

This patch fixes the problem by avoiding the freeing of static PMU contexts
altogether. Whilst the sharing of dynamic contexts is questionable, this
actually requires the caller to share their context pointer explicitly
and so the burden is on them to manage the object lifetime.

Reported-by: Kim Phillips <kim.phillips@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 1fd7e4169954 ("perf/core: Remove perf_cpu_context::unique_pmu")
Link: http://lkml.kernel.org/r/1507040450-7730-1-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6bc21e202ae4..243bfc68d0fe 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8955,6 +8955,14 @@ static struct perf_cpu_context __percpu *find_pmu_context(int ctxn)
 
 static void free_pmu_context(struct pmu *pmu)
 {
+	/*
+	 * Static contexts such as perf_sw_context have a global lifetime
+	 * and may be shared between different PMUs. Avoid freeing them
+	 * when a single PMU is going away.
+	 */
+	if (pmu->task_ctx_nr > perf_invalid_context)
+		return;
+
 	mutex_lock(&pmus_lock);
 	free_percpu(pmu->pmu_cpu_context);
 	mutex_unlock(&pmus_lock);
-- 
cgit 


From e6a5203399d19871021c1fa0eb2a08fc63b67e91 Mon Sep 17 00:00:00 2001
From: "leilei.lin" <leilei.lin@alibaba-inc.com>
Date: Fri, 29 Sep 2017 13:54:44 +0800
Subject: perf/core: Fix cgroup time when scheduling descendants

Update cgroup time when an event is scheduled in by descendants.

Reviewed-and-tested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: leilei.lin <leilei.lin@alibaba-inc.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: alexander.shishkin@linux.intel.com
Cc: brendan.d.gregg@gmail.com
Cc: yang_oliver@hotmail.com
Link: http://lkml.kernel.org/r/CALPjY3mkHiekRkRECzMi9G-bjUQOvOjVBAqxmWkTzc-g+0LwMg@mail.gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 243bfc68d0fe..9d93db81fa36 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -662,7 +662,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
 	/*
 	 * Do not update time when cgroup is not active
 	 */
-	if (cgrp == event->cgrp)
+       if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
 		__update_cgrp_time(event->cgrp);
 }
 
-- 
cgit 


From 692b48258dda7c302e777d7d5f4217244478f1f6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Oct 2017 08:04:13 -0700
Subject: workqueue: replace pool->manager_arb mutex with a flag

Josef reported a HARDIRQ-safe -> HARDIRQ-unsafe lock order detected by
lockdep:

 [ 1270.472259] WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected
 [ 1270.472783] 4.14.0-rc1-xfstests-12888-g76833e8 #110 Not tainted
 [ 1270.473240] -----------------------------------------------------
 [ 1270.473710] kworker/u5:2/5157 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
 [ 1270.474239]  (&(&lock->wait_lock)->rlock){+.+.}, at: [<ffffffff8da253d2>] __mutex_unlock_slowpath+0xa2/0x280
 [ 1270.474994]
 [ 1270.474994] and this task is already holding:
 [ 1270.475440]  (&pool->lock/1){-.-.}, at: [<ffffffff8d2992f6>] worker_thread+0x366/0x3c0
 [ 1270.476046] which would create a new lock dependency:
 [ 1270.476436]  (&pool->lock/1){-.-.} -> (&(&lock->wait_lock)->rlock){+.+.}
 [ 1270.476949]
 [ 1270.476949] but this new dependency connects a HARDIRQ-irq-safe lock:
 [ 1270.477553]  (&pool->lock/1){-.-.}
 ...
 [ 1270.488900] to a HARDIRQ-irq-unsafe lock:
 [ 1270.489327]  (&(&lock->wait_lock)->rlock){+.+.}
 ...
 [ 1270.494735]  Possible interrupt unsafe locking scenario:
 [ 1270.494735]
 [ 1270.495250]        CPU0                    CPU1
 [ 1270.495600]        ----                    ----
 [ 1270.495947]   lock(&(&lock->wait_lock)->rlock);
 [ 1270.496295]                                local_irq_disable();
 [ 1270.496753]                                lock(&pool->lock/1);
 [ 1270.497205]                                lock(&(&lock->wait_lock)->rlock);
 [ 1270.497744]   <Interrupt>
 [ 1270.497948]     lock(&pool->lock/1);

, which will cause a irq inversion deadlock if the above lock scenario
happens.

The root cause of this safe -> unsafe lock order is the
mutex_unlock(pool->manager_arb) in manage_workers() with pool->lock
held.

Unlocking mutex while holding an irq spinlock was never safe and this
problem has been around forever but it never got noticed because the
only time the mutex is usually trylocked while holding irqlock making
actual failures very unlikely and lockdep annotation missed the
condition until the recent b9c16a0e1f73 ("locking/mutex: Fix
lockdep_assert_held() fail").

Using mutex for pool->manager_arb has always been a bit of stretch.
It primarily is an mechanism to arbitrate managership between workers
which can easily be done with a pool flag.  The only reason it became
a mutex is that pool destruction path wants to exclude parallel
managing operations.

This patch replaces the mutex with a new pool flag POOL_MANAGER_ACTIVE
and make the destruction path wait for the current manager on a wait
queue.

v2: Drop unnecessary flag clearing before pool destruction as
    suggested by Boqun.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: stable@vger.kernel.org
---
 kernel/workqueue.c | 37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

(limited to 'kernel')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 64d0edf428f8..a2dccfe1acec 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,6 +68,7 @@ enum {
 	 * attach_mutex to avoid changing binding state while
 	 * worker_attach_to_pool() is in progress.
 	 */
+	POOL_MANAGER_ACTIVE	= 1 << 0,	/* being managed */
 	POOL_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
 
 	/* worker flags */
@@ -165,7 +166,6 @@ struct worker_pool {
 						/* L: hash of busy workers */
 
 	/* see manage_workers() for details on the two manager mutexes */
-	struct mutex		manager_arb;	/* manager arbitration */
 	struct worker		*manager;	/* L: purely informational */
 	struct mutex		attach_mutex;	/* attach/detach exclusion */
 	struct list_head	workers;	/* A: attached workers */
@@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
 
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
 static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
+static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
 
 static LIST_HEAD(workqueues);		/* PR: list of all workqueues */
 static bool workqueue_freezing;		/* PL: have wqs started freezing? */
@@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool)
 /* Do we have too many workers and should some go away? */
 static bool too_many_workers(struct worker_pool *pool)
 {
-	bool managing = mutex_is_locked(&pool->manager_arb);
+	bool managing = pool->flags & POOL_MANAGER_ACTIVE;
 	int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
 	int nr_busy = pool->nr_workers - nr_idle;
 
@@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker)
 {
 	struct worker_pool *pool = worker->pool;
 
-	/*
-	 * Anyone who successfully grabs manager_arb wins the arbitration
-	 * and becomes the manager.  mutex_trylock() on pool->manager_arb
-	 * failure while holding pool->lock reliably indicates that someone
-	 * else is managing the pool and the worker which failed trylock
-	 * can proceed to executing work items.  This means that anyone
-	 * grabbing manager_arb is responsible for actually performing
-	 * manager duties.  If manager_arb is grabbed and released without
-	 * actual management, the pool may stall indefinitely.
-	 */
-	if (!mutex_trylock(&pool->manager_arb))
+	if (pool->flags & POOL_MANAGER_ACTIVE)
 		return false;
+
+	pool->flags |= POOL_MANAGER_ACTIVE;
 	pool->manager = worker;
 
 	maybe_create_worker(pool);
 
 	pool->manager = NULL;
-	mutex_unlock(&pool->manager_arb);
+	pool->flags &= ~POOL_MANAGER_ACTIVE;
+	wake_up(&wq_manager_wait);
 	return true;
 }
 
@@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool)
 	setup_timer(&pool->mayday_timer, pool_mayday_timeout,
 		    (unsigned long)pool);
 
-	mutex_init(&pool->manager_arb);
 	mutex_init(&pool->attach_mutex);
 	INIT_LIST_HEAD(&pool->workers);
 
@@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool)
 	hash_del(&pool->hash_node);
 
 	/*
-	 * Become the manager and destroy all workers.  Grabbing
-	 * manager_arb prevents @pool's workers from blocking on
-	 * attach_mutex.
+	 * Become the manager and destroy all workers.  This prevents
+	 * @pool's workers from blocking on attach_mutex.  We're the last
+	 * manager and @pool gets freed with the flag set.
 	 */
-	mutex_lock(&pool->manager_arb);
-
 	spin_lock_irq(&pool->lock);
+	wait_event_lock_irq(wq_manager_wait,
+			    !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
+	pool->flags |= POOL_MANAGER_ACTIVE;
+
 	while ((worker = first_idle_worker(pool)))
 		destroy_worker(worker);
 	WARN_ON(pool->nr_workers || pool->nr_idle);
@@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool)
 	if (pool->detach_completion)
 		wait_for_completion(pool->detach_completion);
 
-	mutex_unlock(&pool->manager_arb);
-
 	/* shut down the timers */
 	del_timer_sync(&pool->idle_timer);
 	del_timer_sync(&pool->mayday_timer);
-- 
cgit 


From 084f5601c357e4ee59cf0712200d3f5c4710ba40 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 29 Sep 2017 14:26:48 +0100
Subject: seccomp: make function __get_seccomp_filter static

The function __get_seccomp_filter is local to the source and does
not need to be in global scope, so make it static.

Cleans up sparse warning:
symbol '__get_seccomp_filter' was not declared. Should it be static?

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Fixes: 66a733ea6b61 ("seccomp: fix the usage of get/put_seccomp_filter() in seccomp_get_filter()")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/seccomp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index bb3a38005b9c..0ae832e13b97 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -473,7 +473,7 @@ static long seccomp_attach_filter(unsigned int flags,
 	return 0;
 }
 
-void __get_seccomp_filter(struct seccomp_filter *filter)
+static void __get_seccomp_filter(struct seccomp_filter *filter)
 {
 	/* Reference count is bounded by the number of total processes. */
 	refcount_inc(&filter->usage);
-- 
cgit 


From 75cb070960ade40fba5de32138390f3c85c90941 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 10 Oct 2017 19:12:32 -0700
Subject: Revert "net: defer call to cgroup_sk_alloc()"

This reverts commit fbb1fb4ad415cb31ce944f65a5ca700aaf73a227.

This was not the proper fix, lets cleanly revert it, so that
following patch can be carried to stable versions.

sock_cgroup_ptr() callers do not expect a NULL return value.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/cgroup/cgroup.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'kernel')

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 3380a3e49af5..44857278eb8a 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5709,6 +5709,17 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
 	if (cgroup_sk_alloc_disabled)
 		return;
 
+	/* Socket clone path */
+	if (skcd->val) {
+		/*
+		 * We might be cloning a socket which is left in an empty
+		 * cgroup and the cgroup might have already been rmdir'd.
+		 * Don't use cgroup_get_live().
+		 */
+		cgroup_get(sock_cgroup_ptr(skcd));
+		return;
+	}
+
 	rcu_read_lock();
 
 	while (true) {
-- 
cgit 


From ef8daf8eeb5b8ab6bc356656163d19f20fb827ed Mon Sep 17 00:00:00 2001
From: Joe Lawrence <joe.lawrence@redhat.com>
Date: Mon, 2 Oct 2017 11:56:48 -0400
Subject: livepatch: unpatch all klp_objects if klp_module_coming fails

When an incoming module is considered for livepatching by
klp_module_coming(), it iterates over multiple patches and multiple
kernel objects in this order:

	list_for_each_entry(patch, &klp_patches, list) {
		klp_for_each_object(patch, obj) {

which means that if one of the kernel objects fails to patch,
klp_module_coming()'s error path needs to unpatch and cleanup any kernel
objects that were already patched by a previous patch.

Reported-by: Miroslav Benes <mbenes@suse.cz>
Suggested-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Joe Lawrence <joe.lawrence@redhat.com>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 kernel/livepatch/core.c | 60 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 23 deletions(-)

(limited to 'kernel')

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index b9628e43c78f..bf8c8fd72589 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -830,6 +830,41 @@ int klp_register_patch(struct klp_patch *patch)
 }
 EXPORT_SYMBOL_GPL(klp_register_patch);
 
+/*
+ * Remove parts of patches that touch a given kernel module. The list of
+ * patches processed might be limited. When limit is NULL, all patches
+ * will be handled.
+ */
+static void klp_cleanup_module_patches_limited(struct module *mod,
+					       struct klp_patch *limit)
+{
+	struct klp_patch *patch;
+	struct klp_object *obj;
+
+	list_for_each_entry(patch, &klp_patches, list) {
+		if (patch == limit)
+			break;
+
+		klp_for_each_object(patch, obj) {
+			if (!klp_is_module(obj) || strcmp(obj->name, mod->name))
+				continue;
+
+			/*
+			 * Only unpatch the module if the patch is enabled or
+			 * is in transition.
+			 */
+			if (patch->enabled || patch == klp_transition_patch) {
+				pr_notice("reverting patch '%s' on unloading module '%s'\n",
+					  patch->mod->name, obj->mod->name);
+				klp_unpatch_object(obj);
+			}
+
+			klp_free_object_loaded(obj);
+			break;
+		}
+	}
+}
+
 int klp_module_coming(struct module *mod)
 {
 	int ret;
@@ -894,7 +929,7 @@ err:
 	pr_warn("patch '%s' failed for module '%s', refusing to load module '%s'\n",
 		patch->mod->name, obj->mod->name, obj->mod->name);
 	mod->klp_alive = false;
-	klp_free_object_loaded(obj);
+	klp_cleanup_module_patches_limited(mod, patch);
 	mutex_unlock(&klp_mutex);
 
 	return ret;
@@ -902,9 +937,6 @@ err:
 
 void klp_module_going(struct module *mod)
 {
-	struct klp_patch *patch;
-	struct klp_object *obj;
-
 	if (WARN_ON(mod->state != MODULE_STATE_GOING &&
 		    mod->state != MODULE_STATE_COMING))
 		return;
@@ -917,25 +949,7 @@ void klp_module_going(struct module *mod)
 	 */
 	mod->klp_alive = false;
 
-	list_for_each_entry(patch, &klp_patches, list) {
-		klp_for_each_object(patch, obj) {
-			if (!klp_is_module(obj) || strcmp(obj->name, mod->name))
-				continue;
-
-			/*
-			 * Only unpatch the module if the patch is enabled or
-			 * is in transition.
-			 */
-			if (patch->enabled || patch == klp_transition_patch) {
-				pr_notice("reverting patch '%s' on unloading module '%s'\n",
-					  patch->mod->name, obj->mod->name);
-				klp_unpatch_object(obj);
-			}
-
-			klp_free_object_loaded(obj);
-			break;
-		}
-	}
+	klp_cleanup_module_patches_limited(mod, NULL);
 
 	mutex_unlock(&klp_mutex);
 }
-- 
cgit 


From 20608924cc2e6bdeaf6f58ccbe9ddfe12dbfa082 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 4 Oct 2017 14:26:26 +0200
Subject: genirq: generic chip: Add irq_gc_mask_disable_and_ack_set()

The irq_gc_mask_disable_reg_and_ack() function name implies that it
provides the combined functions of irq_gc_mask_disable_reg() and
irq_gc_ack().  However, the implementation does not actually do
that since it writes the mask instead of the disable register. It
also does not maintain the mask cache which makes it inappropriate
to use with other masking functions.

In addition, commit 659fb32d1b67 ("genirq: replace irq_gc_ack() with
{set,clr}_bit variants (fwd)") effectively renamed irq_gc_ack() to
irq_gc_ack_set_bit() so this function probably should have also been
renamed at that time.

The generic chip code currently provides three functions for use
with the irq_mask member of the irq_chip structure and two functions
for use with the irq_ack member of the irq_chip structure. These
functions could be combined into six functions for use with the
irq_mask_ack member of the irq_chip structure.  However, since only
one of the combinations is currently used, only the function
irq_gc_mask_disable_and_ack_set() is added by this commit.

The '_reg' and '_bit' portions of the base function name were left
out of the new combined function name in an attempt to keep the
function name length manageable with the 80 character source code
line length while still allowing the distinct aspects of each
combination to be captured by the name.

If other combinations are desired in the future please add them to
the irq generic chip library at that time.

Signed-off-by: Doug Berger <opendmb@gmail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 kernel/irq/generic-chip.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'kernel')

diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 5270a54b9fa4..ec5fe9a0cb05 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -150,6 +150,31 @@ void irq_gc_mask_disable_reg_and_ack(struct irq_data *d)
 	irq_gc_unlock(gc);
 }
 
+/**
+ * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt
+ * @d: irq_data
+ *
+ * This generic implementation of the irq_mask_ack method is for chips
+ * with separate enable/disable registers instead of a single mask
+ * register and where a pending interrupt is acknowledged by setting a
+ * bit.
+ *
+ * Note: This is the only permutation currently used.  Similar generic
+ * functions should be added here if other permutations are required.
+ */
+void irq_gc_mask_disable_and_ack_set(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+	u32 mask = d->mask;
+
+	irq_gc_lock(gc);
+	irq_reg_writel(gc, mask, ct->regs.disable);
+	*ct->mask_cache &= ~mask;
+	irq_reg_writel(gc, mask, ct->regs.ack);
+	irq_gc_unlock(gc);
+}
+
 /**
  * irq_gc_eoi - EOI interrupt
  * @d: irq_data
-- 
cgit 


From 0d08af35f16a0cc418ad2afde3bc5f70ace82705 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 4 Oct 2017 14:28:17 +0200
Subject: genirq: generic chip: remove irq_gc_mask_disable_reg_and_ack()

Any usage of the irq_gc_mask_disable_reg_and_ack() function has
been replaced with the desired functionality.

The incorrect and ambiguously named function is removed here to
prevent accidental misuse.

Signed-off-by: Doug Berger <opendmb@gmail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 kernel/irq/generic-chip.c | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index ec5fe9a0cb05..c26c5bb6b491 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -134,22 +134,6 @@ void irq_gc_ack_clr_bit(struct irq_data *d)
 	irq_gc_unlock(gc);
 }
 
-/**
- * irq_gc_mask_disable_reg_and_ack - Mask and ack pending interrupt
- * @d: irq_data
- */
-void irq_gc_mask_disable_reg_and_ack(struct irq_data *d)
-{
-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
-	struct irq_chip_type *ct = irq_data_get_chip_type(d);
-	u32 mask = d->mask;
-
-	irq_gc_lock(gc);
-	irq_reg_writel(gc, mask, ct->regs.mask);
-	irq_reg_writel(gc, mask, ct->regs.ack);
-	irq_gc_unlock(gc);
-}
-
 /**
  * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt
  * @d: irq_data
-- 
cgit 


From ca182551857cc2c1e6a2b7f1e72090a137a15008 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Fri, 13 Oct 2017 15:58:22 -0700
Subject: kmemleak: clear stale pointers from task stacks

Kmemleak considers any pointers on task stacks as references.  This
patch clears newly allocated and reused vmap stacks.

Link: http://lkml.kernel.org/r/150728990124.744199.8403409836394318684.stgit@buzz
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/fork.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index e702cb9ffbd8..07cc743698d3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -215,6 +215,10 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 		if (!s)
 			continue;
 
+#ifdef CONFIG_DEBUG_KMEMLEAK
+		/* Clear stale pointers from reused stack. */
+		memset(s->addr, 0, THREAD_SIZE);
+#endif
 		tsk->stack_vm_area = s;
 		return s->addr;
 	}
-- 
cgit 


From 28e33f9d78eefe98ea86673ab31e988b37a9a738 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 16 Oct 2017 11:16:55 -0700
Subject: bpf: disallow arithmetic operations on context pointer

Commit f1174f77b50c ("bpf/verifier: rework value tracking")
removed the crafty selection of which pointer types are
allowed to be modified.  This is OK for most pointer types
since adjust_ptr_min_max_vals() will catch operations on
immutable pointers.  One exception is PTR_TO_CTX which is
now allowed to be offseted freely.

The intent of aforementioned commit was to allow context
access via modified registers.  The offset passed to
->is_valid_access() verifier callback has been adjusted
by the value of the variable offset.

What is missing, however, is taking the variable offset
into account when the context register is used.  Or in terms
of the code adding the offset to the value passed to the
->convert_ctx_access() callback.  This leads to the following
eBPF user code:

     r1 += 68
     r0 = *(u32 *)(r1 + 8)
     exit

being translated to this in kernel space:

   0: (07) r1 += 68
   1: (61) r0 = *(u32 *)(r1 +180)
   2: (95) exit

Offset 8 is corresponding to 180 in the kernel, but offset
76 is valid too.  Verifier will "accept" access to offset
68+8=76 but then "convert" access to offset 8 as 180.
Effective access to offset 248 is beyond the kernel context.
(This is a __sk_buff example on a debug-heavy kernel -
packet mark is 8 -> 180, 76 would be data.)

Dereferencing the modified context pointer is not as easy
as dereferencing other types, because we have to translate
the access to reading a field in kernel structures which is
usually at a different offset and often of a different size.
To allow modifying the pointer we would have to make sure
that given eBPF instruction will always access the same
field or the fields accessed are "compatible" in terms of
offset and size...

Disallow dereferencing modified context pointers and add
to selftests the test case described here.

Fixes: f1174f77b50c ("bpf/verifier: rework value tracking")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8b8d6ba39e23..20f3889c006e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		/* ctx accesses must be at a fixed offset, so that we can
 		 * determine what type of data were returned.
 		 */
-		if (!tnum_is_const(reg->var_off)) {
+		if (reg->off) {
+			verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
+				regno, reg->off, off - reg->off);
+			return -EACCES;
+		}
+		if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
 			char tn_buf[48];
 
 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
@@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 				tn_buf, off, size);
 			return -EACCES;
 		}
-		off += reg->var_off.value;
 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
 		if (!err && t == BPF_READ && value_regno >= 0) {
 			/* ctx access returns either a scalar, or a
-- 
cgit 


From 82f8dd28bd3abe181b7a66ea4ea132134d37a400 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 17 Oct 2017 16:55:53 +0200
Subject: bpf: fix splat for illegal devmap percpu allocation

It was reported that syzkaller was able to trigger a splat on
devmap percpu allocation due to illegal/unsupported allocation
request size passed to __alloc_percpu():

  [   70.094249] illegal size (32776) or align (8) for percpu allocation
  [   70.094256] ------------[ cut here ]------------
  [   70.094259] WARNING: CPU: 3 PID: 3451 at mm/percpu.c:1365 pcpu_alloc+0x96/0x630
  [...]
  [   70.094325] Call Trace:
  [   70.094328]  __alloc_percpu_gfp+0x12/0x20
  [   70.094330]  dev_map_alloc+0x134/0x1e0
  [   70.094331]  SyS_bpf+0x9bc/0x1610
  [   70.094333]  ? selinux_task_setrlimit+0x5a/0x60
  [   70.094334]  ? security_task_setrlimit+0x43/0x60
  [   70.094336]  entry_SYSCALL_64_fastpath+0x1a/0xa5

This was due to too large max_entries for the map such that we
surpassed the upper limit of PCPU_MIN_UNIT_SIZE. It's fine to
fail naturally here, so switch to __alloc_percpu_gfp() and pass
__GFP_NOWARN instead.

Fixes: 11393cc9b9be ("xdp: Add batching support to redirect map")
Reported-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Shankara Pailoor <sp3485@columbia.edu>
Reported-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/devmap.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index e093d9a2c4dd..920428d84da2 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -111,8 +111,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 	err = -ENOMEM;
 
 	/* A per cpu bitfield with a bit per possible net device */
-	dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr),
-					    __alignof__(unsigned long));
+	dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr),
+						__alignof__(unsigned long),
+						GFP_KERNEL | __GFP_NOWARN);
 	if (!dtab->flush_needed)
 		goto free_dtab;
 
-- 
cgit 


From bc6d5031b43a2291de638ab9304320b4cae61689 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 17 Oct 2017 16:55:54 +0200
Subject: bpf: do not test for PCPU_MIN_UNIT_SIZE before percpu allocations

PCPU_MIN_UNIT_SIZE is an implementation detail of the percpu
allocator. Given we support __GFP_NOWARN now, lets just let
the allocation request fail naturally instead. The two call
sites from BPF mistakenly assumed __GFP_NOWARN would work, so
no changes needed to their actual __alloc_percpu_gfp() calls
which use the flag already.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/arraymap.c | 2 +-
 kernel/bpf/hashtab.c  | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 98c0f00c3f5e..e2636737b69b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -98,7 +98,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
 
 	if (array_size >= U32_MAX - PAGE_SIZE ||
-	    elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) {
+	    bpf_array_alloc_percpu(array)) {
 		bpf_map_area_free(array);
 		return ERR_PTR(-ENOMEM);
 	}
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 431126f31ea3..6533f08d1238 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -317,10 +317,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 		 */
 		goto free_htab;
 
-	if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE)
-		/* make sure the size for pcpu_alloc() is reasonable */
-		goto free_htab;
-
 	htab->elem_size = sizeof(struct htab_elem) +
 			  round_up(htab->map.key_size, 8);
 	if (percpu)
-- 
cgit 


From a961e40917fb14614d368d8bc9782ca4d6a8cd11 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 19 Oct 2017 13:30:15 -0400
Subject: membarrier: Provide register expedited private command

This introduces a "register private expedited" membarrier command which
allows eventual removal of important memory barrier constraints on the
scheduler fast-paths. It changes how the "private expedited" membarrier
command (new to 4.14) is used from user-space.

This new command allows processes to register their intent to use the
private expedited command.  This affects how the expedited private
command introduced in 4.14-rc is meant to be used, and should be merged
before 4.14 final.

Processes are now required to register before using
MEMBARRIER_CMD_PRIVATE_EXPEDITED, otherwise that command returns EPERM.

This fixes a problem that arose when designing requested extensions to
sys_membarrier() to allow JITs to efficiently flush old code from
instruction caches.  Several potential algorithms are much less painful
if the user register intent to use this functionality early on, for
example, before the process spawns the second thread.  Registering at
this time removes the need to interrupt each and every thread in that
process at the first expedited sys_membarrier() system call.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sched/membarrier.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index a92fddc22747..dd7908743dab 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -18,6 +18,7 @@
 #include <linux/membarrier.h>
 #include <linux/tick.h>
 #include <linux/cpumask.h>
+#include <linux/atomic.h>
 
 #include "sched.h"	/* for cpu_rq(). */
 
@@ -26,21 +27,26 @@
  * except MEMBARRIER_CMD_QUERY.
  */
 #define MEMBARRIER_CMD_BITMASK	\
-	(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED)
+	(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED	\
+	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
 
 static void ipi_mb(void *info)
 {
 	smp_mb();	/* IPIs should be serializing but paranoid. */
 }
 
-static void membarrier_private_expedited(void)
+static int membarrier_private_expedited(void)
 {
 	int cpu;
 	bool fallback = false;
 	cpumask_var_t tmpmask;
 
+	if (!(atomic_read(&current->mm->membarrier_state)
+			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
+		return -EPERM;
+
 	if (num_online_cpus() == 1)
-		return;
+		return 0;
 
 	/*
 	 * Matches memory barriers around rq->curr modification in
@@ -94,6 +100,24 @@ static void membarrier_private_expedited(void)
 	 * rq->curr modification in scheduler.
 	 */
 	smp_mb();	/* exit from system call is not a mb */
+	return 0;
+}
+
+static void membarrier_register_private_expedited(void)
+{
+	struct task_struct *p = current;
+	struct mm_struct *mm = p->mm;
+
+	/*
+	 * We need to consider threads belonging to different thread
+	 * groups, which use the same mm. (CLONE_VM but not
+	 * CLONE_THREAD).
+	 */
+	if (atomic_read(&mm->membarrier_state)
+			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
+		return;
+	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
+			&mm->membarrier_state);
 }
 
 /**
@@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
 			synchronize_sched();
 		return 0;
 	case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
-		membarrier_private_expedited();
+		return membarrier_private_expedited();
+	case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
+		membarrier_register_private_expedited();
 		return 0;
 	default:
 		return -EINVAL;
-- 
cgit 


From 27fdb35fe99011d86bcc54f62fe84712c53f4d05 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 19 Oct 2017 14:26:21 -0700
Subject: doc: Fix various RCU docbook comment-header problems

Because many of RCU's files have not been included into docbook, a
number of errors have accumulated.  This commit fixes them.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/rcu/srcutree.c |  2 +-
 kernel/rcu/sync.c     |  9 ++++++---
 kernel/rcu/tree.c     | 18 ++++++++++--------
 3 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 729a8706751d..6d5880089ff6 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -854,7 +854,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 /**
  * call_srcu() - Queue a callback for invocation after an SRCU grace period
  * @sp: srcu_struct in queue the callback
- * @head: structure to be used for queueing the SRCU callback.
+ * @rhp: structure to be used for queueing the SRCU callback.
  * @func: function to be invoked after the SRCU grace period
  *
  * The callback function will be invoked some time after a full SRCU
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 50d1861f7759..3f943efcf61c 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -85,6 +85,9 @@ void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
 }
 
 /**
+ * rcu_sync_enter_start - Force readers onto slow path for multiple updates
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
  * Must be called after rcu_sync_init() and before first use.
  *
  * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}()
@@ -142,7 +145,7 @@ void rcu_sync_enter(struct rcu_sync *rsp)
 
 /**
  * rcu_sync_func() - Callback function managing reader access to fastpath
- * @rsp: Pointer to rcu_sync structure to use for synchronization
+ * @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization
  *
  * This function is passed to one of the call_rcu() functions by
  * rcu_sync_exit(), so that it is invoked after a grace period following the
@@ -158,9 +161,9 @@ void rcu_sync_enter(struct rcu_sync *rsp)
  * rcu_sync_exit().  Otherwise, set all state back to idle so that readers
  * can again use their fastpaths.
  */
-static void rcu_sync_func(struct rcu_head *rcu)
+static void rcu_sync_func(struct rcu_head *rhp)
 {
-	struct rcu_sync *rsp = container_of(rcu, struct rcu_sync, cb_head);
+	struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head);
 	unsigned long flags;
 
 	BUG_ON(rsp->gp_state != GP_PASSED);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b0ad62b0e7b8..3e3650e94ae6 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3097,9 +3097,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
  * read-side critical sections have completed. call_rcu_sched() assumes
  * that the read-side critical sections end on enabling of preemption
  * or on voluntary preemption.
- * RCU read-side critical sections are delimited by :
- *  - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
- *  - anything that disables preemption.
+ * RCU read-side critical sections are delimited by:
+ *
+ * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
+ * - anything that disables preemption.
  *
  *  These may be nested.
  *
@@ -3124,11 +3125,12 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
  * handler. This means that read-side critical sections in process
  * context must not be interrupted by softirqs. This interface is to be
  * used when most of the read-side critical sections are in softirq context.
- * RCU read-side critical sections are delimited by :
- *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context.
- *  OR
- *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
- *  These may be nested.
+ * RCU read-side critical sections are delimited by:
+ *
+ * - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context, OR
+ * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
+ *
+ * These may be nested.
  *
  * See the description of call_rcu() for more detailed information on
  * memory ordering guarantees.
-- 
cgit 


From 435bf0d3f99a164df7e8c30428cef266b91d1d3b Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 18 Oct 2017 07:10:15 -0700
Subject: bpf: enforce TCP only support for sockmap

Only TCP sockets have been tested and at the moment the state change
callback only handles TCP sockets. This adds a check to ensure that
sockets actually being added are TCP sockets.

For net-next we can consider UDP support.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/sockmap.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel')

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 6424ce0e4969..c68899d5b246 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -840,6 +840,12 @@ static int sock_map_update_elem(struct bpf_map *map,
 		return -EINVAL;
 	}
 
+	if (skops.sk->sk_type != SOCK_STREAM ||
+	    skops.sk->sk_protocol != IPPROTO_TCP) {
+		fput(socket->file);
+		return -EOPNOTSUPP;
+	}
+
 	err = sock_map_ctx_update_elem(&skops, map, key, flags);
 	fput(socket->file);
 	return err;
-- 
cgit 


From 34f79502bbcfab659b8729da68b5e387f96eb4c1 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 18 Oct 2017 07:10:36 -0700
Subject: bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region

SK_SKB BPF programs are run from the socket/tcp context but early in
the stack before much of the TCP metadata is needed in tcp_skb_cb. So
we can use some unused fields to place BPF metadata needed for SK_SKB
programs when implementing the redirect function.

This allows us to drop the preempt disable logic. It does however
require an API change so sk_redirect_map() has been updated to
additionally provide ctx_ptr to skb. Note, we do however continue to
disable/enable preemption around actual BPF program running to account
for map updates.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/sockmap.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index c68899d5b246..beaabb21c3a3 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -39,6 +39,7 @@
 #include <linux/workqueue.h>
 #include <linux/list.h>
 #include <net/strparser.h>
+#include <net/tcp.h>
 
 struct bpf_stab {
 	struct bpf_map map;
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 		return SK_DROP;
 
 	skb_orphan(skb);
+	/* We need to ensure that BPF metadata for maps is also cleared
+	 * when we orphan the skb so that we don't have the possibility
+	 * to reference a stale map.
+	 */
+	TCP_SKB_CB(skb)->bpf.map = NULL;
 	skb->sk = psock->sock;
 	bpf_compute_data_end(skb);
+	preempt_disable();
 	rc = (*prog->bpf_func)(skb, prog->insnsi);
+	preempt_enable();
 	skb->sk = NULL;
 
 	return rc;
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
 	struct sock *sk;
 	int rc;
 
-	/* Because we use per cpu values to feed input from sock redirect
-	 * in BPF program to do_sk_redirect_map() call we need to ensure we
-	 * are not preempted. RCU read lock is not sufficient in this case
-	 * with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
-	 */
-	preempt_disable();
 	rc = smap_verdict_func(psock, skb);
 	switch (rc) {
 	case SK_REDIRECT:
-		sk = do_sk_redirect_map();
-		preempt_enable();
+		sk = do_sk_redirect_map(skb);
 		if (likely(sk)) {
 			struct smap_psock *peer = smap_psock_sk(sk);
 
@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
 	/* Fall through and free skb otherwise */
 	case SK_DROP:
 	default:
-		if (rc != SK_REDIRECT)
-			preempt_enable();
 		kfree_skb(skb);
 	}
 }
-- 
cgit 


From fb50df8d32283cd95932a182a46a10070c4a8832 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 18 Oct 2017 07:11:22 -0700
Subject: bpf: require CAP_NET_ADMIN when using sockmap maps

Restrict sockmap to CAP_NET_ADMIN.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/sockmap.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index beaabb21c3a3..2b6eb35ae5d3 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -486,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 	int err = -EINVAL;
 	u64 cost;
 
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
 	    attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
-- 
cgit 


From 9ef2a8cd5c0dcb8e1f1534615c56eb13b630c363 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 18 Oct 2017 07:11:44 -0700
Subject: bpf: require CAP_NET_ADMIN when using devmap

Devmap is used with XDP which requires CAP_NET_ADMIN so lets also
make CAP_NET_ADMIN required to use the map.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/devmap.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 920428d84da2..52e0548ba548 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 	int err = -EINVAL;
 	u64 cost;
 
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
 	    attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
-- 
cgit 


From 1c9fec470b81ca5e89391c20a11ead31a1e9314b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 20 Oct 2017 07:36:05 -0700
Subject: waitid(): Avoid unbalanced user_access_end() on access_ok() error

As pointed out by Linus and David, the earlier waitid() fix resulted in
a (currently harmless) unbalanced user_access_end() call.  This fixes it
to just directly return EFAULT on access_ok() failure.

Fixes: 96ca579a1ecc ("waitid(): Add missing access_ok() checks")
Acked-by: David Daney <david.daney@cavium.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/exit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index cf28528842bc..f6cad39f35df 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1611,7 +1611,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 		return err;
 
 	if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
-		goto Efault;
+		return -EFAULT;
 
 	user_access_begin();
 	unsafe_put_user(signo, &infop->si_signo, Efault);
@@ -1739,7 +1739,7 @@ COMPAT_SYSCALL_DEFINE5(waitid,
 		return err;
 
 	if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
-		goto Efault;
+		return -EFAULT;
 
 	user_access_begin();
 	unsafe_put_user(signo, &infop->si_signo, Efault);
-- 
cgit 


From 1f7c70d6b2bc5de301f30456621e1161fddf4242 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 21 Oct 2017 16:06:52 +0200
Subject: cpu/hotplug: Reset node state after operation

The recent rework of the cpu hotplug internals changed the usage of the per
cpu state->node field, but missed to clean it up after usage.

So subsequent hotplug operations use the stale pointer from a previous
operation and hand it into the callback functions. The callbacks then
dereference a pointer which either belongs to a different facility or
points to freed and potentially reused memory. In either case data
corruption and crashes are the obvious consequence.

Reset the node and the last pointers in the per cpu state to NULL after the
operation which set them has completed.

Fixes: 96abb968549c ("smp/hotplug: Allow external multi-instance rollback")
Reported-by: Tvrtko Ursulin <tursulin@ursulin.net>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1710211606130.3213@nanos
---
 kernel/cpu.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index d851df22f5c5..04892a82f6ac 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -632,6 +632,11 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
 		__cpuhp_kick_ap(st);
 	}
 
+	/*
+	 * Clean up the leftovers so the next hotplug operation wont use stale
+	 * data.
+	 */
+	st->node = st->last = NULL;
 	return ret;
 }
 
-- 
cgit 


From 8695a5395661fbb4a4f26c97f801f3800ae4754e Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@gmail.com>
Date: Thu, 19 Oct 2017 09:03:52 -0700
Subject: bpf: devmap fix arithmetic overflow in bitmap_size calculation

An integer overflow is possible in dev_map_bitmap_size() when
calculating the BITS_TO_LONG logic which becomes, after macro
replacement,

	(((n) + (d) - 1)/ (d))

where 'n' is a __u32 and 'd' is (8 * sizeof(long)). To avoid
overflow cast to u64 before arithmetic.

Reported-by: Richard Weinberger <richard@nod.at>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/devmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 52e0548ba548..e745d6a88224 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -69,7 +69,7 @@ static LIST_HEAD(dev_map_list);
 
 static u64 dev_map_bitmap_size(const union bpf_attr *attr)
 {
-	return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
+	return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long);
 }
 
 static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
-- 
cgit 


From fb2a311a31d3457fe8c3ee16f5609877e2ead9f7 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 21 Oct 2017 02:34:21 +0200
Subject: bpf: fix off by one for range markings with L{T, E} patterns

During review I noticed that the current logic for direct packet
access marking in check_cond_jmp_op() has an off by one for the
upper right range border when marking in find_good_pkt_pointers()
with BPF_JLT and BPF_JLE. It's not really harmful given access
up to pkt_end is always safe, but we should nevertheless correct
the range marking before it becomes ABI. If pkt_data' denotes a
pkt_data derived pointer (pkt_data + X), then for pkt_data' < pkt_end
in the true branch as well as for pkt_end <= pkt_data' in the false
branch we mark the range with X although it should really be X - 1
in these cases. For example, X could be pkt_end - pkt_data, then
when testing for pkt_data' < pkt_end the verifier simulation cannot
deduce that a byte load of pkt_data' - 1 would succeed in this
branch.

Fixes: b4e432f1000a ("bpf: enable BPF_J{LT, LE, SLT, SLE} opcodes in verifier")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 20f3889c006e..49cb5ad14746 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2430,12 +2430,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 }
 
 static void find_good_pkt_pointers(struct bpf_verifier_state *state,
-				   struct bpf_reg_state *dst_reg)
+				   struct bpf_reg_state *dst_reg,
+				   bool range_right_open)
 {
 	struct bpf_reg_state *regs = state->regs, *reg;
+	u16 new_range;
 	int i;
 
-	if (dst_reg->off < 0)
+	if (dst_reg->off < 0 ||
+	    (dst_reg->off == 0 && range_right_open))
 		/* This doesn't give us any range */
 		return;
 
@@ -2446,9 +2449,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 		 */
 		return;
 
-	/* LLVM can generate four kind of checks:
+	new_range = dst_reg->off;
+	if (range_right_open)
+		new_range--;
+
+	/* Examples for register markings:
 	 *
-	 * Type 1/2:
+	 * pkt_data in dst register:
 	 *
 	 *   r2 = r3;
 	 *   r2 += 8;
@@ -2465,7 +2472,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 	 *     r2=pkt(id=n,off=8,r=0)
 	 *     r3=pkt(id=n,off=0,r=0)
 	 *
-	 * Type 3/4:
+	 * pkt_data in src register:
 	 *
 	 *   r2 = r3;
 	 *   r2 += 8;
@@ -2483,7 +2490,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 	 *     r3=pkt(id=n,off=0,r=0)
 	 *
 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
-	 * so that range of bytes [r3, r3 + 8) is safe to access.
+	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
+	 * and [r3, r3 + 8-1) respectively is safe to access depending on
+	 * the check.
 	 */
 
 	/* If our ids match, then we must have the same max_value.  And we
@@ -2494,14 +2503,14 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
 			/* keep the maximum range already checked */
-			regs[i].range = max_t(u16, regs[i].range, dst_reg->off);
+			regs[i].range = max(regs[i].range, new_range);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
 			continue;
 		reg = &state->spilled_regs[i / BPF_REG_SIZE];
 		if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
-			reg->range = max_t(u16, reg->range, dst_reg->off);
+			reg->range = max(reg->range, new_range);
 	}
 }
 
@@ -2865,19 +2874,19 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
 		   dst_reg->type == PTR_TO_PACKET &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
-		find_good_pkt_pointers(this_branch, dst_reg);
+		find_good_pkt_pointers(this_branch, dst_reg, false);
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
 		   dst_reg->type == PTR_TO_PACKET &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
-		find_good_pkt_pointers(other_branch, dst_reg);
+		find_good_pkt_pointers(other_branch, dst_reg, true);
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
 		   dst_reg->type == PTR_TO_PACKET_END &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET) {
-		find_good_pkt_pointers(other_branch, &regs[insn->src_reg]);
+		find_good_pkt_pointers(other_branch, &regs[insn->src_reg], false);
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
 		   dst_reg->type == PTR_TO_PACKET_END &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET) {
-		find_good_pkt_pointers(this_branch, &regs[insn->src_reg]);
+		find_good_pkt_pointers(this_branch, &regs[insn->src_reg], true);
 	} else if (is_pointer_value(env, insn->dst_reg)) {
 		verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
 		return -EACCES;
-- 
cgit 


From 0fd4759c5515b7f2297d7fee5c45e5d9dd733001 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 21 Oct 2017 02:34:22 +0200
Subject: bpf: fix pattern matches for direct packet access

Alexander had a test program with direct packet access, where
the access test was in the form of data + X > data_end. In an
unrelated change to the program LLVM decided to swap the branches
and emitted code for the test in form of data + X <= data_end.
We hadn't seen these being generated previously, thus verifier
would reject the program. Therefore, fix up the verifier to
detect all test cases, so we don't run into such issues in the
future.

Fixes: b4e432f1000a ("bpf: enable BPF_J{LT, LE, SLT, SLE} opcodes in verifier")
Reported-by: Alexander Alemayhu <alexander@alemayhu.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 49cb5ad14746..c48ca2a34b5e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2874,18 +2874,42 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
 		   dst_reg->type == PTR_TO_PACKET &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
+		/* pkt_data' > pkt_end */
 		find_good_pkt_pointers(this_branch, dst_reg, false);
+	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
+		   dst_reg->type == PTR_TO_PACKET_END &&
+		   regs[insn->src_reg].type == PTR_TO_PACKET) {
+		/* pkt_end > pkt_data' */
+		find_good_pkt_pointers(other_branch, &regs[insn->src_reg], true);
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
 		   dst_reg->type == PTR_TO_PACKET &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
+		/* pkt_data' < pkt_end */
 		find_good_pkt_pointers(other_branch, dst_reg, true);
+	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
+		   dst_reg->type == PTR_TO_PACKET_END &&
+		   regs[insn->src_reg].type == PTR_TO_PACKET) {
+		/* pkt_end < pkt_data' */
+		find_good_pkt_pointers(this_branch, &regs[insn->src_reg], false);
+	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
+		   dst_reg->type == PTR_TO_PACKET &&
+		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
+		/* pkt_data' >= pkt_end */
+		find_good_pkt_pointers(this_branch, dst_reg, true);
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
 		   dst_reg->type == PTR_TO_PACKET_END &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET) {
+		/* pkt_end >= pkt_data' */
 		find_good_pkt_pointers(other_branch, &regs[insn->src_reg], false);
+	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
+		   dst_reg->type == PTR_TO_PACKET &&
+		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
+		/* pkt_data' <= pkt_end */
+		find_good_pkt_pointers(other_branch, dst_reg, false);
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
 		   dst_reg->type == PTR_TO_PACKET_END &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET) {
+		/* pkt_end <= pkt_data' */
 		find_good_pkt_pointers(this_branch, &regs[insn->src_reg], true);
 	} else if (is_pointer_value(env, insn->dst_reg)) {
 		verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
-- 
cgit 


From 8108a77515126f6db4374e8593956e20430307c0 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 27 Oct 2017 09:45:34 -0700
Subject: bpf: bpf_compute_data uses incorrect cb structure

SK_SKB program types use bpf_compute_data to store the end of the
packet data. However, bpf_compute_data assumes the cb is stored in the
qdisc layer format. But, for SK_SKB this is the wrong layer of the
stack for this type.

It happens to work (sort of!) because in most cases nothing happens
to be overwritten today. This is very fragile and error prone.
Fortunately, we have another hole in tcp_skb_cb we can use so lets
put the data_end value there.

Note, SK_SKB program types do not use data_meta, they are failed by
sk_skb_is_valid_access().

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/sockmap.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 2b6eb35ae5d3..6778fb773934 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -93,6 +93,14 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 	return rcu_dereference_sk_user_data(sk);
 }
 
+/* compute the linear packet data range [data, data_end) for skb when
+ * sk_skb type programs are in use.
+ */
+static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
+{
+	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
+}
+
 static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 {
 	struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict);
@@ -108,7 +116,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 	 */
 	TCP_SKB_CB(skb)->bpf.map = NULL;
 	skb->sk = psock->sock;
-	bpf_compute_data_end(skb);
+	bpf_compute_data_end_sk_skb(skb);
 	preempt_disable();
 	rc = (*prog->bpf_func)(skb, prog->insnsi);
 	preempt_enable();
@@ -368,7 +376,7 @@ static int smap_parse_func_strparser(struct strparser *strp,
 	 * any socket yet.
 	 */
 	skb->sk = psock->sock;
-	bpf_compute_data_end(skb);
+	bpf_compute_data_end_sk_skb(skb);
 	rc = (*prog->bpf_func)(skb, prog->insnsi);
 	skb->sk = NULL;
 	rcu_read_unlock();
-- 
cgit 


From bfa640757e9378c2f26867e723f1287e94f5a7ad Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 27 Oct 2017 09:45:53 -0700
Subject: bpf: rename sk_actions to align with bpf infrastructure

Recent additions to support multiple programs in cgroups impose
a strict requirement, "all yes is yes, any no is no". To enforce
this the infrastructure requires the 'no' return code, SK_DROP in
this case, to be 0.

To apply these rules to SK_SKB program types the sk_actions return
codes need to be adjusted.

This fix adds SK_PASS and makes 'SK_DROP = 0'. Finally, remove
SK_ABORTED to remove any chance that the API may allow aborted
program flows to be passed up the stack. This would be incorrect
behavior and allow programs to break existing policies.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/sockmap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 6778fb773934..66f00a2b27f4 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -122,7 +122,8 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 	preempt_enable();
 	skb->sk = NULL;
 
-	return rc;
+	return rc == SK_PASS ?
+		(TCP_SKB_CB(skb)->bpf.map ? SK_REDIRECT : SK_PASS) : SK_DROP;
 }
 
 static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
-- 
cgit 


From be96b316deff35e119760982c43af74e606fa143 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 28 Oct 2017 09:49:37 -0700
Subject: perf/cgroup: Fix perf cgroup hierarchy support

The following commit:

  864c2357ca89 ("perf/core: Do not set cpuctx->cgrp for unscheduled cgroups")

made list_update_cgroup_event() skip setting cpuctx->cgrp if no cgroup event
targets %current's cgroup.

This breaks perf_event's hierarchical support because events which target one
of the ancestors get ignored.

Fix it by using cgroup_is_descendant() test instead of equality.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: David Carrillo-Cisneros <davidcc@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: kernel-team@fb.com
Cc: stable@vger.kernel.org # v4.9+
Fixes: 864c2357ca89 ("perf/core: Do not set cpuctx->cgrp for unscheduled cgroups")
Link: http://lkml.kernel.org/r/20171028164237.GA972780@devbig577.frc2.facebook.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9d93db81fa36..10cdb9c26b5d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -901,9 +901,11 @@ list_update_cgroup_event(struct perf_event *event,
 	cpuctx_entry = &cpuctx->cgrp_cpuctx_entry;
 	/* cpuctx->cgrp is NULL unless a cgroup event is active in this CPU .*/
 	if (add) {
+		struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
+
 		list_add(cpuctx_entry, this_cpu_ptr(&cgrp_cpuctx_list));
-		if (perf_cgroup_from_task(current, ctx) == event->cgrp)
-			cpuctx->cgrp = event->cgrp;
+		if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
+			cpuctx->cgrp = cgrp;
 	} else {
 		list_del(cpuctx_entry);
 		cpuctx->cgrp = NULL;
-- 
cgit 


From cef572ad9bd7f85035ba8272e5352040e8be0152 Mon Sep 17 00:00:00 2001
From: Li Bin <huawei.libin@huawei.com>
Date: Sat, 28 Oct 2017 11:07:28 +0800
Subject: workqueue: Fix NULL pointer dereference

When queue_work() is used in irq (not in task context), there is
a potential case that trigger NULL pointer dereference.
----------------------------------------------------------------
worker_thread()
|-spin_lock_irq()
|-process_one_work()
	|-worker->current_pwq = pwq
	|-spin_unlock_irq()
	|-worker->current_func(work)
	|-spin_lock_irq()
 	|-worker->current_pwq = NULL
|-spin_unlock_irq()

				//interrupt here
				|-irq_handler
					|-__queue_work()
						//assuming that the wq is draining
						|-is_chained_work(wq)
							|-current_wq_worker()
							//Here, 'current' is the interrupted worker!
								|-current->current_pwq is NULL here!
|-schedule()
----------------------------------------------------------------

Avoid it by checking for task context in current_wq_worker(), and
if not in task context, we shouldn't use the 'current' to check the
condition.

Reported-by: Xiaofei Tan <tanxiaofei@huawei.com>
Signed-off-by: Li Bin <huawei.libin@huawei.com>
Reviewed-by: Lai Jiangshan <jiangshanlai@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 8d03ecfe4718 ("workqueue: reimplement is_chained_work() using current_wq_worker()")
Cc: stable@vger.kernel.org # v3.9+
---
 kernel/workqueue_internal.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 8635417c587b..29fa81f0f51a 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -9,6 +9,7 @@
 
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
+#include <linux/preempt.h>
 
 struct worker_pool;
 
@@ -59,7 +60,7 @@ struct worker {
  */
 static inline struct worker *current_wq_worker(void)
 {
-	if (current->flags & PF_WQ_WORKER)
+	if (in_task() && (current->flags & PF_WQ_WORKER))
 		return kthread_data(current);
 	return NULL;
 }
-- 
cgit 


From 04686ef299db5ff299bfc5a4504b189e46842078 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 31 Oct 2017 19:17:31 -0700
Subject: bpf: remove SK_REDIRECT from UAPI

Now that SK_REDIRECT is no longer a valid return code. Remove it
from the UAPI completely. Then do a namespace remapping internal
to sockmap so SK_REDIRECT is no longer externally visible.

Patchs primary change is to do a namechange from SK_REDIRECT to
__SK_REDIRECT

Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/sockmap.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 66f00a2b27f4..dbd7b322a86b 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -101,13 +101,19 @@ static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
 }
 
+enum __sk_action {
+	__SK_DROP = 0,
+	__SK_PASS,
+	__SK_REDIRECT,
+};
+
 static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 {
 	struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict);
 	int rc;
 
 	if (unlikely(!prog))
-		return SK_DROP;
+		return __SK_DROP;
 
 	skb_orphan(skb);
 	/* We need to ensure that BPF metadata for maps is also cleared
@@ -122,8 +128,10 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 	preempt_enable();
 	skb->sk = NULL;
 
+	/* Moving return codes from UAPI namespace into internal namespace */
 	return rc == SK_PASS ?
-		(TCP_SKB_CB(skb)->bpf.map ? SK_REDIRECT : SK_PASS) : SK_DROP;
+		(TCP_SKB_CB(skb)->bpf.map ? __SK_REDIRECT : __SK_PASS) :
+		__SK_DROP;
 }
 
 static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
@@ -133,7 +141,7 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
 
 	rc = smap_verdict_func(psock, skb);
 	switch (rc) {
-	case SK_REDIRECT:
+	case __SK_REDIRECT:
 		sk = do_sk_redirect_map(skb);
 		if (likely(sk)) {
 			struct smap_psock *peer = smap_psock_sk(sk);
@@ -149,7 +157,7 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
 			}
 		}
 	/* Fall through and free skb otherwise */
-	case SK_DROP:
+	case __SK_DROP:
 	default:
 		kfree_skb(skb);
 	}
-- 
cgit 


From 153fbd1226fb30b8630802aa5047b8af5ef53c9f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 31 Oct 2017 11:18:53 +0100
Subject: futex: Fix more put_pi_state() vs. exit_pi_state_list() races

Dmitry (through syzbot) reported being able to trigger the WARN in
get_pi_state() and a use-after-free on:

	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);

Both are due to this race:

  exit_pi_state_list()				put_pi_state()

  lock(&curr->pi_lock)
  while() {
	pi_state = list_first_entry(head);
	hb = hash_futex(&pi_state->key);
	unlock(&curr->pi_lock);

						dec_and_test(&pi_state->refcount);

	lock(&hb->lock)
	lock(&pi_state->pi_mutex.wait_lock)	// uaf if pi_state free'd
	lock(&curr->pi_lock);

	....

	unlock(&curr->pi_lock);
	get_pi_state();				// WARN; refcount==0

The problem is we take the reference count too late, and don't allow it
being 0. Fix it by using inc_not_zero() and simply retrying the loop
when we fail to get a refcount. In that case put_pi_state() should
remove the entry from the list.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Gratian Crisan <gratian.crisan@ni.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: dvhart@infradead.org
Cc: syzbot <bot+2af19c9e1ffe4d4ee1d16c56ae7580feaee75765@syzkaller.appspotmail.com>
Cc: syzkaller-bugs@googlegroups.com
Cc: <stable@vger.kernel.org>
Fixes: c74aef2d06a9 ("futex: Fix pi_state->owner serialization")
Link: http://lkml.kernel.org/r/20171031101853.xpfh72y643kdfhjs@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/futex.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/futex.c b/kernel/futex.c
index 0518a0bfc746..ca5bb9cba5cf 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -903,11 +903,27 @@ void exit_pi_state_list(struct task_struct *curr)
 	 */
 	raw_spin_lock_irq(&curr->pi_lock);
 	while (!list_empty(head)) {
-
 		next = head->next;
 		pi_state = list_entry(next, struct futex_pi_state, list);
 		key = pi_state->key;
 		hb = hash_futex(&key);
+
+		/*
+		 * We can race against put_pi_state() removing itself from the
+		 * list (a waiter going away). put_pi_state() will first
+		 * decrement the reference count and then modify the list, so
+		 * its possible to see the list entry but fail this reference
+		 * acquire.
+		 *
+		 * In that case; drop the locks to let put_pi_state() make
+		 * progress and retry the loop.
+		 */
+		if (!atomic_inc_not_zero(&pi_state->refcount)) {
+			raw_spin_unlock_irq(&curr->pi_lock);
+			cpu_relax();
+			raw_spin_lock_irq(&curr->pi_lock);
+			continue;
+		}
 		raw_spin_unlock_irq(&curr->pi_lock);
 
 		spin_lock(&hb->lock);
@@ -918,8 +934,10 @@ void exit_pi_state_list(struct task_struct *curr)
 		 * task still owns the PI-state:
 		 */
 		if (head->next != next) {
+			/* retain curr->pi_lock for the loop invariant */
 			raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
 			spin_unlock(&hb->lock);
+			put_pi_state(pi_state);
 			continue;
 		}
 
@@ -927,9 +945,8 @@ void exit_pi_state_list(struct task_struct *curr)
 		WARN_ON(list_empty(&pi_state->list));
 		list_del_init(&pi_state->list);
 		pi_state->owner = NULL;
-		raw_spin_unlock(&curr->pi_lock);
 
-		get_pi_state(pi_state);
+		raw_spin_unlock(&curr->pi_lock);
 		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 		spin_unlock(&hb->lock);
 
-- 
cgit 


From 9c388a5ed1960b2ebbebd3dbe7553092b0c15ec1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 31 Oct 2017 22:32:00 +0100
Subject: watchdog/harclockup/perf: Revert a33d44843d45
 ("watchdog/hardlockup/perf: Simplify deferred event destroy")

Guenter reported a crash in the watchdog/perf code, which is caused by
cleanup() and enable() running concurrently. The reason for this is:

The watchdog functions are serialized via the watchdog_mutex and cpu
hotplug locking, but the enable of the perf based watchdog happens in
context of the unpark callback of the smpboot thread. But that unpark
function is not synchronous inside the locking. The unparking of the thread
just wakes it up and leaves so there is no guarantee when the thread is
executing.

If it starts running _before_ the cleanup happened then it will create a
event and overwrite the dead event pointer. The new event is then cleaned
up because the event is marked dead.

    lock(watchdog_mutex);
    lockup_detector_reconfigure();
        cpus_read_lock();
	stop();
	   park()
	update();
	start();
	   unpark()
	cpus_read_unlock();		thread runs()
					  overwrite dead event ptr
	cleanup();
	  free new event, which is active inside perf....
    unlock(watchdog_mutex);

The park side is safe as that actually waits for the thread to reach
parked state.

Commit a33d44843d45 removed the protection against this kind of scenario
under the stupid assumption that the hotplug serialization and the
watchdog_mutex cover everything.

Bring it back.

Reverts: a33d44843d45 ("watchdog/hardlockup/perf: Simplify deferred event destroy")
Reported-and-tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Thomas Feels-stupid Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Don Zickus <dzickus@redhat.com>
Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1710312145190.1942@nanos
---
 kernel/watchdog_hld.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 71a62ceacdc8..a7f137c1933a 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -21,6 +21,7 @@
 static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
+static DEFINE_PER_CPU(struct perf_event *, dead_event);
 static struct cpumask dead_events_mask;
 
 static unsigned long hardlockup_allcpu_dumped;
@@ -203,6 +204,8 @@ void hardlockup_detector_perf_disable(void)
 
 	if (event) {
 		perf_event_disable(event);
+		this_cpu_write(watchdog_ev, NULL);
+		this_cpu_write(dead_event, event);
 		cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
 		watchdog_cpus--;
 	}
@@ -218,7 +221,7 @@ void hardlockup_detector_perf_cleanup(void)
 	int cpu;
 
 	for_each_cpu(cpu, &dead_events_mask) {
-		struct perf_event *event = per_cpu(watchdog_ev, cpu);
+		struct perf_event *event = per_cpu(dead_event, cpu);
 
 		/*
 		 * Required because for_each_cpu() reports  unconditionally
@@ -226,7 +229,7 @@ void hardlockup_detector_perf_cleanup(void)
 		 */
 		if (event)
 			perf_event_release_kernel(event);
-		per_cpu(watchdog_ev, cpu) = NULL;
+		per_cpu(dead_event, cpu) = NULL;
 	}
 	cpumask_clear(&dead_events_mask);
 }
-- 
cgit 


From 42f930da7f00c0ab23df4c7aed36137f35988980 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Wed, 1 Nov 2017 14:11:27 -0400
Subject: watchdog/hardlockup/perf: Use atomics to track in-use cpu counter

Guenter reported:
  There is still a problem. When running
    echo 6 > /proc/sys/kernel/watchdog_thresh
    echo 5 > /proc/sys/kernel/watchdog_thresh
  repeatedly, the message

   NMI watchdog: Enabled. Permanently consumes one hw-PMU counter.

  stops after a while (after ~10-30 iterations, with fluctuations).
  Maybe watchdog_cpus needs to be atomic ?

That's correct as this again is affected by the asynchronous nature of the
smpboot thread unpark mechanism.

CPU 0				CPU1			CPU2
write(watchdog_thresh, 6)
  stop()
    park()
  update()
  start()
    unpark()
				thread->unpark()
				  cnt++;
write(watchdog_thresh, 5)				thread->unpark()
  stop()
    park()			thread->park()
				   cnt--;		  cnt++;
  update()
  start()
    unpark()

That's not a functional problem, it just affects the informational message.

Convert watchdog_cpus to atomic_t to prevent the problem

Reported-and-tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20171101181126.j727fqjmdthjz4xk@redhat.com
---
 kernel/watchdog_hld.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index a7f137c1933a..a84b205fac9a 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -12,6 +12,7 @@
 #define pr_fmt(fmt) "NMI watchdog: " fmt
 
 #include <linux/nmi.h>
+#include <linux/atomic.h>
 #include <linux/module.h>
 #include <linux/sched/debug.h>
 
@@ -25,7 +26,7 @@ static DEFINE_PER_CPU(struct perf_event *, dead_event);
 static struct cpumask dead_events_mask;
 
 static unsigned long hardlockup_allcpu_dumped;
-static unsigned int watchdog_cpus;
+static atomic_t watchdog_cpus = ATOMIC_INIT(0);
 
 void arch_touch_nmi_watchdog(void)
 {
@@ -189,7 +190,8 @@ void hardlockup_detector_perf_enable(void)
 	if (hardlockup_detector_event_create())
 		return;
 
-	if (!watchdog_cpus++)
+	/* use original value for check */
+	if (!atomic_fetch_inc(&watchdog_cpus))
 		pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
 
 	perf_event_enable(this_cpu_read(watchdog_ev));
@@ -207,7 +209,7 @@ void hardlockup_detector_perf_disable(void)
 		this_cpu_write(watchdog_ev, NULL);
 		this_cpu_write(dead_event, event);
 		cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
-		watchdog_cpus--;
+		atomic_dec(&watchdog_cpus);
 	}
 }
 
-- 
cgit 


From c3aff086ea11f17f5c0bec77bdbf4365ba6b41fc Mon Sep 17 00:00:00 2001
From: Andrew Clayton <andrew@digital-domain.net>
Date: Wed, 1 Nov 2017 15:49:59 +0000
Subject: signal: Fix name of SIGEMT in #if defined() check

Commit cc731525f26a ("signal: Remove kernel interal si_code magic")
added a check for SIGMET and NSIGEMT being defined. That SIGMET should
in fact be SIGEMT, with SIGEMT being defined in
arch/{alpha,mips,sparc}/include/uapi/asm/signal.h

This was actually pointed out by BenHutchings in a lwn.net comment
here https://lwn.net/Comments/734608/

Fixes: cc731525f26a ("signal: Remove kernel interal si_code magic")
Signed-off-by: Andrew Clayton <andrew@digital-domain.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/signal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 800a18f77732..8dcd8825b2de 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2698,7 +2698,7 @@ enum siginfo_layout siginfo_layout(int sig, int si_code)
 			[SIGSEGV] = { NSIGSEGV, SIL_FAULT },
 			[SIGBUS]  = { NSIGBUS,  SIL_FAULT },
 			[SIGTRAP] = { NSIGTRAP, SIL_FAULT },
-#if defined(SIGMET) && defined(NSIGEMT)
+#if defined(SIGEMT) && defined(NSIGEMT)
 			[SIGEMT]  = { NSIGEMT,  SIL_FAULT },
 #endif
 			[SIGCHLD] = { NSIGCHLD, SIL_CHLD },
-- 
cgit 


From b24413180f5600bcb3bb70fbed5cf186b60864bd Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 1 Nov 2017 15:07:57 +0100
Subject: License cleanup: add SPDX GPL-2.0 license identifier to files with no
 license

Many source files in the tree are missing licensing information, which
makes it harder for compliance tools to determine the correct license.

By default all files without license information are under the default
license of the kernel, which is GPL version 2.

Update the files which contain no license information with the 'GPL-2.0'
SPDX license identifier.  The SPDX identifier is a legally binding
shorthand, which can be used instead of the full boiler plate text.

This patch is based on work done by Thomas Gleixner and Kate Stewart and
Philippe Ombredanne.

How this work was done:

Patches were generated and checked against linux-4.14-rc6 for a subset of
the use cases:
 - file had no licensing information it it.
 - file was a */uapi/* one with no licensing information in it,
 - file was a */uapi/* one with existing licensing information,

Further patches will be generated in subsequent months to fix up cases
where non-standard license headers were used, and references to license
had to be inferred by heuristics based on keywords.

The analysis to determine which SPDX License Identifier to be applied to
a file was done in a spreadsheet of side by side results from of the
output of two independent scanners (ScanCode & Windriver) producing SPDX
tag:value files created by Philippe Ombredanne.  Philippe prepared the
base worksheet, and did an initial spot review of a few 1000 files.

The 4.13 kernel was the starting point of the analysis with 60,537 files
assessed.  Kate Stewart did a file by file comparison of the scanner
results in the spreadsheet to determine which SPDX license identifier(s)
to be applied to the file. She confirmed any determination that was not
immediately clear with lawyers working with the Linux Foundation.

Criteria used to select files for SPDX license identifier tagging was:
 - Files considered eligible had to be source code files.
 - Make and config files were included as candidates if they contained >5
   lines of source
 - File already had some variant of a license header in it (even if <5
   lines).

All documentation files were explicitly excluded.

The following heuristics were used to determine which SPDX license
identifiers to apply.

 - when both scanners couldn't find any license traces, file was
   considered to have no license information in it, and the top level
   COPYING file license applied.

   For non */uapi/* files that summary was:

   SPDX license identifier                            # files
   ---------------------------------------------------|-------
   GPL-2.0                                              11139

   and resulted in the first patch in this series.

   If that file was a */uapi/* path one, it was "GPL-2.0 WITH
   Linux-syscall-note" otherwise it was "GPL-2.0".  Results of that was:

   SPDX license identifier                            # files
   ---------------------------------------------------|-------
   GPL-2.0 WITH Linux-syscall-note                        930

   and resulted in the second patch in this series.

 - if a file had some form of licensing information in it, and was one
   of the */uapi/* ones, it was denoted with the Linux-syscall-note if
   any GPL family license was found in the file or had no licensing in
   it (per prior point).  Results summary:

   SPDX license identifier                            # files
   ---------------------------------------------------|------
   GPL-2.0 WITH Linux-syscall-note                       270
   GPL-2.0+ WITH Linux-syscall-note                      169
   ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause)    21
   ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)    17
   LGPL-2.1+ WITH Linux-syscall-note                      15
   GPL-1.0+ WITH Linux-syscall-note                       14
   ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause)    5
   LGPL-2.0+ WITH Linux-syscall-note                       4
   LGPL-2.1 WITH Linux-syscall-note                        3
   ((GPL-2.0 WITH Linux-syscall-note) OR MIT)              3
   ((GPL-2.0 WITH Linux-syscall-note) AND MIT)             1

   and that resulted in the third patch in this series.

 - when the two scanners agreed on the detected license(s), that became
   the concluded license(s).

 - when there was disagreement between the two scanners (one detected a
   license but the other didn't, or they both detected different
   licenses) a manual inspection of the file occurred.

 - In most cases a manual inspection of the information in the file
   resulted in a clear resolution of the license that should apply (and
   which scanner probably needed to revisit its heuristics).

 - When it was not immediately clear, the license identifier was
   confirmed with lawyers working with the Linux Foundation.

 - If there was any question as to the appropriate license identifier,
   the file was flagged for further research and to be revisited later
   in time.

In total, over 70 hours of logged manual review was done on the
spreadsheet to determine the SPDX license identifiers to apply to the
source files by Kate, Philippe, Thomas and, in some cases, confirmation
by lawyers working with the Linux Foundation.

Kate also obtained a third independent scan of the 4.13 code base from
FOSSology, and compared selected files where the other two scanners
disagreed against that SPDX file, to see if there was new insights.  The
Windriver scanner is based on an older version of FOSSology in part, so
they are related.

Thomas did random spot checks in about 500 files from the spreadsheets
for the uapi headers and agreed with SPDX license identifier in the
files he inspected. For the non-uapi files Thomas did random spot checks
in about 15000 files.

In initial set of patches against 4.14-rc6, 3 files were found to have
copy/paste license identifier errors, and have been fixed to reflect the
correct identifier.

Additionally Philippe spent 10 hours this week doing a detailed manual
inspection and review of the 12,461 patched files from the initial patch
version early this week with:
 - a full scancode scan run, collecting the matched texts, detected
   license ids and scores
 - reviewing anything where there was a license detected (about 500+
   files) to ensure that the applied SPDX license was correct
 - reviewing anything where there was no detection but the patch license
   was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied
   SPDX license was correct

This produced a worksheet with 20 files needing minor correction.  This
worksheet was then exported into 3 different .csv files for the
different types of files to be modified.

These .csv files were then reviewed by Greg.  Thomas wrote a script to
parse the csv files and add the proper SPDX tag to the file, in the
format that the file expected.  This script was further refined by Greg
based on the output to detect more types of files automatically and to
distinguish between header and source .c files (which need different
comment types.)  Finally Greg ran the script using the .csv files to
generate the patches.

Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org>
Reviewed-by: Philippe Ombredanne <pombredanne@nexb.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/Makefile                         | 1 +
 kernel/acct.c                           | 1 +
 kernel/audit_tree.c                     | 1 +
 kernel/bounds.c                         | 1 +
 kernel/bpf/Makefile                     | 1 +
 kernel/capability.c                     | 1 +
 kernel/cgroup/Makefile                  | 1 +
 kernel/cgroup/cgroup-internal.h         | 1 +
 kernel/cgroup/debug.c                   | 1 +
 kernel/cgroup/namespace.c               | 1 +
 kernel/dma.c                            | 1 +
 kernel/elfcore.c                        | 1 +
 kernel/events/Makefile                  | 1 +
 kernel/events/internal.h                | 1 +
 kernel/exec_domain.c                    | 1 +
 kernel/futex_compat.c                   | 1 +
 kernel/gcov/Makefile                    | 1 +
 kernel/gcov/base.c                      | 1 +
 kernel/gcov/fs.c                        | 1 +
 kernel/gcov/gcc_3_4.c                   | 1 +
 kernel/gcov/gcc_4_7.c                   | 1 +
 kernel/gcov/gcov.h                      | 1 +
 kernel/groups.c                         | 1 +
 kernel/irq/Makefile                     | 1 +
 kernel/irq/affinity.c                   | 1 +
 kernel/irq/autoprobe.c                  | 1 +
 kernel/irq/debug.h                      | 1 +
 kernel/irq/internals.h                  | 1 +
 kernel/irq/migration.c                  | 1 +
 kernel/irq/proc.c                       | 1 +
 kernel/irq/resend.c                     | 1 +
 kernel/irq/settings.h                   | 1 +
 kernel/irq/spurious.c                   | 1 +
 kernel/kcmp.c                           | 1 +
 kernel/kcov.c                           | 1 +
 kernel/kexec_internal.h                 | 1 +
 kernel/livepatch/core.h                 | 1 +
 kernel/livepatch/patch.h                | 1 +
 kernel/livepatch/transition.h           | 1 +
 kernel/locking/Makefile                 | 1 +
 kernel/locking/lockdep_internals.h      | 1 +
 kernel/locking/lockdep_proc.c           | 1 +
 kernel/locking/mcs_spinlock.h           | 1 +
 kernel/locking/mutex-debug.h            | 1 +
 kernel/locking/mutex.h                  | 1 +
 kernel/locking/osq_lock.c               | 1 +
 kernel/locking/qspinlock_paravirt.h     | 1 +
 kernel/locking/rtmutex-debug.c          | 1 +
 kernel/locking/rtmutex-debug.h          | 1 +
 kernel/locking/rtmutex.h                | 1 +
 kernel/locking/rtmutex_common.h         | 1 +
 kernel/locking/rwsem-spinlock.c         | 1 +
 kernel/locking/rwsem-xadd.c             | 1 +
 kernel/locking/rwsem.c                  | 1 +
 kernel/locking/rwsem.h                  | 1 +
 kernel/locking/spinlock.c               | 1 +
 kernel/power/Makefile                   | 1 +
 kernel/power/autosleep.c                | 1 +
 kernel/power/console.c                  | 1 +
 kernel/power/power.h                    | 1 +
 kernel/power/process.c                  | 1 +
 kernel/power/wakelock.c                 | 1 +
 kernel/printk/braille.c                 | 1 +
 kernel/printk/braille.h                 | 1 +
 kernel/printk/console_cmdline.h         | 1 +
 kernel/range.c                          | 1 +
 kernel/rcu/Makefile                     | 1 +
 kernel/sched/Makefile                   | 1 +
 kernel/sched/autogroup.c                | 1 +
 kernel/sched/autogroup.h                | 1 +
 kernel/sched/completion.c               | 1 +
 kernel/sched/cpuacct.c                  | 1 +
 kernel/sched/cpuacct.h                  | 1 +
 kernel/sched/cpudeadline.h              | 1 +
 kernel/sched/cpupri.h                   | 1 +
 kernel/sched/deadline.c                 | 1 +
 kernel/sched/fair.c                     | 1 +
 kernel/sched/features.h                 | 1 +
 kernel/sched/idle_task.c                | 1 +
 kernel/sched/loadavg.c                  | 1 +
 kernel/sched/rt.c                       | 1 +
 kernel/sched/sched-pelt.h               | 1 +
 kernel/sched/sched.h                    | 1 +
 kernel/sched/stats.c                    | 1 +
 kernel/sched/stats.h                    | 1 +
 kernel/sched/stop_task.c                | 1 +
 kernel/sched/swait.c                    | 1 +
 kernel/sched/topology.c                 | 1 +
 kernel/seccomp.c                        | 1 +
 kernel/smpboot.h                        | 1 +
 kernel/sys.c                            | 1 +
 kernel/sys_ni.c                         | 1 +
 kernel/sysctl_binary.c                  | 1 +
 kernel/task_work.c                      | 1 +
 kernel/time/Makefile                    | 1 +
 kernel/time/itimer.c                    | 1 +
 kernel/time/ntp.c                       | 1 +
 kernel/time/ntp_internal.h              | 1 +
 kernel/time/posix-cpu-timers.c          | 1 +
 kernel/time/posix-timers.h              | 1 +
 kernel/time/tick-broadcast-hrtimer.c    | 1 +
 kernel/time/tick-internal.h             | 1 +
 kernel/time/tick-sched.h                | 1 +
 kernel/time/timekeeping.h               | 1 +
 kernel/time/timekeeping_internal.h      | 1 +
 kernel/trace/Makefile                   | 1 +
 kernel/trace/power-traces.c             | 1 +
 kernel/trace/rpm-traces.c               | 1 +
 kernel/trace/trace.h                    | 1 +
 kernel/trace/trace_benchmark.c          | 1 +
 kernel/trace/trace_benchmark.h          | 1 +
 kernel/trace/trace_branch.c             | 1 +
 kernel/trace/trace_entries.h            | 1 +
 kernel/trace/trace_events_filter_test.h | 1 +
 kernel/trace/trace_export.c             | 1 +
 kernel/trace/trace_functions.c          | 1 +
 kernel/trace/trace_functions_graph.c    | 1 +
 kernel/trace/trace_kdb.c                | 1 +
 kernel/trace/trace_mmiotrace.c          | 1 +
 kernel/trace/trace_nop.c                | 1 +
 kernel/trace/trace_output.h             | 1 +
 kernel/trace/trace_sched_switch.c       | 1 +
 kernel/trace/trace_sched_wakeup.c       | 1 +
 kernel/trace/trace_selftest.c           | 1 +
 kernel/trace/trace_selftest_dynamic.c   | 1 +
 kernel/trace/trace_stack.c              | 1 +
 kernel/trace/trace_stat.c               | 1 +
 kernel/trace/trace_stat.h               | 1 +
 kernel/trace/trace_syscalls.c           | 1 +
 kernel/trace/tracing_map.h              | 1 +
 kernel/uid16.c                          | 1 +
 kernel/watchdog.c                       | 1 +
 kernel/watchdog_hld.c                   | 1 +
 kernel/workqueue_internal.h             | 1 +
 134 files changed, 134 insertions(+)

(limited to 'kernel')

diff --git a/kernel/Makefile b/kernel/Makefile
index ed470aac53da..172d151d429c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the linux kernel.
 #
diff --git a/kernel/acct.c b/kernel/acct.c
index 5e72af29ab73..6670fbd3e466 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  linux/kernel/acct.c
  *
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 011d46e5f73f..d4b050d9a66e 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "audit.h"
 #include <linux/fsnotify_backend.h>
 #include <linux/namei.h>
diff --git a/kernel/bounds.c b/kernel/bounds.c
index e1d1d1952bfa..c373e887c066 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Generate definitions needed by the preprocessor.
  * This code generates raw asm output which is post-processed
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 897daa005b23..af3ab6164ff5 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
diff --git a/kernel/capability.c b/kernel/capability.c
index f97fe77ceb88..1e1c0236f55b 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/capability.c
  *
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index ce693ccb8c58..ae448f7632cc 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-y := cgroup.o namespace.o cgroup-v1.o
 
 obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 5151ff256c29..bf54ade001be 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __CGROUP_INTERNAL_H
 #define __CGROUP_INTERNAL_H
 
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c
index f661b4cc5efd..5f780d8f6a9d 100644
--- a/kernel/cgroup/debug.c
+++ b/kernel/cgroup/debug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Debug controller
  *
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
index 66129eb4371d..b05f1dd58a62 100644
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "cgroup-internal.h"
 
 #include <linux/sched/task.h>
diff --git a/kernel/dma.c b/kernel/dma.c
index 6c6262f86c17..3506fc34a712 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c.
  *
diff --git a/kernel/elfcore.c b/kernel/elfcore.c
index e556751d15d9..fc482c8e0bd8 100644
--- a/kernel/elfcore.c
+++ b/kernel/elfcore.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/elf.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 2925188f50ea..3c022e33c109 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE)
 endif
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 843e97047335..09b1537ae06c 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _KERNEL_EVENTS_INTERNAL_H
 #define _KERNEL_EVENTS_INTERNAL_H
 
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 6873bb3e6b7e..0975b0268545 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Handling of different ABIs (personalities).
  *
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 3f409968e466..83f830acbb5f 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/futex_compat.c
  *
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
index 752d6486b67e..c6c50e5c680e 100644
--- a/kernel/gcov/Makefile
+++ b/kernel/gcov/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
 
 obj-y := base.o fs.o
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
index c51a49c9be70..9c7c8d5c18f2 100644
--- a/kernel/gcov/base.c
+++ b/kernel/gcov/base.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  This code maintains a list of active profiling data structures.
  *
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
index edf67c493a8e..6e40ff6be083 100644
--- a/kernel/gcov/fs.c
+++ b/kernel/gcov/fs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  This code exports profiling data as debugfs files to userspace.
  *
diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c
index 27bc88a35013..1e32e66c9563 100644
--- a/kernel/gcov/gcc_3_4.c
+++ b/kernel/gcov/gcc_3_4.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  This code provides functions to handle gcc's profiling data format
  *  introduced with gcc 3.4. Future versions of gcc may change the gcov
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index 46a18e72bce6..ca5e5c0ef853 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  This code provides functions to handle gcc's profiling data format
  *  introduced with gcc 4.7.
diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h
index 92c8e22a29ed..de118ad4a024 100644
--- a/kernel/gcov/gcov.h
+++ b/kernel/gcov/gcov.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *  Profiling infrastructure declarations.
  *
diff --git a/kernel/groups.c b/kernel/groups.c
index 434f6665f187..e357bc800111 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Supplementary group IDs
  */
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 1970cafe8f2a..ed15d142694b 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 
 obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
 obj-$(CONFIG_IRQ_TIMINGS) += timings.o
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index d69bd77252a7..e12d35108225 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2016 Thomas Gleixner.
  * Copyright (C) 2016-2017 Christoph Hellwig.
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index d30a0dd5cc02..befa671fba64 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/irq/autoprobe.c
  *
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index e75e29e4434a..17f05ef8f575 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Debugging printout:
  */
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index a4aa39009f0d..44ed5f8c8759 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * IRQ subsystem internal functions and variables:
  *
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 6ca054a3f91d..86ae0eb80b53 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/irq.h>
 #include <linux/interrupt.h>
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6376b4a598d3..c010cc0daf79 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/irq/proc.c
  *
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index b86886beee4f..1d08f45135c2 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/irq/resend.c
  *
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 320579d89091..e43795cd2ccf 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Internal header to deal with irq_desc->status which will be renamed
  * to irq_desc->settings.
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 061ba7eed4ed..987d7bca4864 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/irq/spurious.c
  *
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 055bb2962a0b..a0e3d7a0e8b8 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/syscalls.h>
 #include <linux/fdtable.h>
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 3f693a0f6f3e..fc6af9e1308b 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #define pr_fmt(fmt) "kcov: " fmt
 
 #define DISABLE_BRANCH_PROFILING
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 50dfcb039a41..48aaf2ac0d0d 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef LINUX_KEXEC_INTERNAL_H
 #define LINUX_KEXEC_INTERNAL_H
 
diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h
index c74f24c47837..a351601d7f76 100644
--- a/kernel/livepatch/core.h
+++ b/kernel/livepatch/core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LIVEPATCH_CORE_H
 #define _LIVEPATCH_CORE_H
 
diff --git a/kernel/livepatch/patch.h b/kernel/livepatch/patch.h
index 0db227170c36..e72d8250d04b 100644
--- a/kernel/livepatch/patch.h
+++ b/kernel/livepatch/patch.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LIVEPATCH_PATCH_H
 #define _LIVEPATCH_PATCH_H
 
diff --git a/kernel/livepatch/transition.h b/kernel/livepatch/transition.h
index ce09b326546c..0f6e27c481f9 100644
--- a/kernel/livepatch/transition.h
+++ b/kernel/livepatch/transition.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LIVEPATCH_TRANSITION_H
 #define _LIVEPATCH_TRANSITION_H
 
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 760158d9d98d..392c7f23af76 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 # Any varying coverage in these files is non-deterministic
 # and is generally not a function of system call inputs.
 KCOV_INSTRUMENT		:= n
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index 1da4669d57a7..d459d624ba2a 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * kernel/lockdep_internals.h
  *
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index 68d9e267ccd4..ad69bbc9bd28 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kernel/lockdep_proc.c
  *
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 6a385aabcce7..f046b7ce9dd6 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * MCS lock defines
  *
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
index 4174417d5309..1edd3f45a4ec 100644
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Mutexes: blocking mutual exclusion locks
  *
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 6ebc1902f779..1c2287d3fa71 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Mutexes: blocking mutual exclusion locks
  *
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index a74ee6abd039..6ef600aa0f47 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/osq_lock.h>
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 43555681c40b..15b6a39366c6 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _GEN_PV_LOCK_SLOWPATH
 #error "do not include this file"
 #endif
diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
index f4a74e78d467..fd4fe1f5b458 100644
--- a/kernel/locking/rtmutex-debug.c
+++ b/kernel/locking/rtmutex-debug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * RT-Mutexes: blocking mutual exclusion locks with PI support
  *
diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h
index 5078c6ddf4a5..fc549713bba3 100644
--- a/kernel/locking/rtmutex-debug.h
+++ b/kernel/locking/rtmutex-debug.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * RT-Mutexes: blocking mutual exclusion locks with PI support
  *
diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h
index 5c253caffe91..732f96abf462 100644
--- a/kernel/locking/rtmutex.h
+++ b/kernel/locking/rtmutex.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * RT-Mutexes: blocking mutual exclusion locks with PI support
  *
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 7453be0485a5..124e98ca0b17 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * RT Mutexes: blocking mutual exclusion locks with PI support
  *
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 0848634c5512..a7ffb2a96ede 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* rwsem-spinlock.c: R/W semaphores: contention handling functions for
  * generic spinlock implementation
  *
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 1fefe6dcafd7..e795908f3607 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* rwsem.c: R/W semaphores: contention handling functions
  *
  * Written by David Howells (dhowells@redhat.com).
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 4d48b1c4870d..a6c76a4832b4 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* kernel/rwsem.c: R/W semaphores, public implementation
  *
  * Written by David Howells (dhowells@redhat.com).
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index a699f4048ba1..a883b8f1fdc6 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * The owner field of the rw_semaphore structure will be set to
  * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 4b082b5cac9e..6e40fdfba326 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (2004) Linus Torvalds
  *
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index eb4f717705ba..a3f79f0eef36 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 
 ccflags-$(CONFIG_PM_DEBUG)	:= -DDEBUG
 
diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c
index 9012ecf7b814..41e83a779e19 100644
--- a/kernel/power/autosleep.c
+++ b/kernel/power/autosleep.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kernel/power/autosleep.c
  *
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 0e781798b0b3..fcdf0e14a47d 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Functions for saving/restoring console.
  *
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 1d2d761e3c25..f29cd178df90 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/suspend.h>
 #include <linux/suspend_ioctls.h>
 #include <linux/utsname.h>
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 50f25cb370c6..7381d49a44db 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * drivers/power/process.c - Functions for starting/stopping processes on 
  *                           suspend transitions.
diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
index 1896386e16bb..dfba59be190b 100644
--- a/kernel/power/wakelock.c
+++ b/kernel/power/wakelock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kernel/power/wakelock.c
  *
diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c
index 61d41ca41844..1d21ebacfdb8 100644
--- a/kernel/printk/braille.c
+++ b/kernel/printk/braille.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
diff --git a/kernel/printk/braille.h b/kernel/printk/braille.h
index 749a6756843a..123154f86304 100644
--- a/kernel/printk/braille.h
+++ b/kernel/printk/braille.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _PRINTK_BRAILLE_H
 #define _PRINTK_BRAILLE_H
 
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
index 2ca4a8b5fe57..11f19c466af5 100644
--- a/kernel/printk/console_cmdline.h
+++ b/kernel/printk/console_cmdline.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _CONSOLE_CMDLINE_H
 #define _CONSOLE_CMDLINE_H
 
diff --git a/kernel/range.c b/kernel/range.c
index 82cfc285b046..d84de6766472 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Range add and subtract
  */
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 13c0fc852767..020e8b6a644b 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 # Any varying coverage in these files is non-deterministic
 # and is generally not a function of system call inputs.
 KCOV_INSTRUMENT := n
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 78f54932ea1d..a9ee16bbc693 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE)
 endif
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index de6d7f4dfcb5..a43df5193538 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "sched.h"
 
 #include <linux/proc_fs.h>
diff --git a/kernel/sched/autogroup.h b/kernel/sched/autogroup.h
index ce40c810cd5c..27cd22b89824 100644
--- a/kernel/sched/autogroup.h
+++ b/kernel/sched/autogroup.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifdef CONFIG_SCHED_AUTOGROUP
 
 #include <linux/kref.h>
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index cc873075c3bd..2ddaec40956f 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Generic wait-for-completion handler;
  *
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index f95ab29a45d0..44ab32a4fab6 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/cgroup.h>
 #include <linux/slab.h>
 #include <linux/percpu.h>
diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
index ba72807c73d4..a8358a57a316 100644
--- a/kernel/sched/cpuacct.h
+++ b/kernel/sched/cpuacct.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifdef CONFIG_CGROUP_CPUACCT
 
 extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
index f7da8c55bba0..b010d26e108e 100644
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_CPUDL_H
 #define _LINUX_CPUDL_H
 
diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h
index 63cbb9ca0496..bab050019071 100644
--- a/kernel/sched/cpupri.h
+++ b/kernel/sched/cpupri.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_CPUPRI_H
 #define _LINUX_CPUPRI_H
 
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 0191ec7667c3..4ae5c1ea90e2 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Deadline Scheduling Class (SCHED_DEADLINE)
  *
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d3f3094856fe..5c09ddf8c832 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Completely Fair Scheduling (CFS) Class (SCHED_NORMAL/SCHED_BATCH)
  *
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 319ed0e8a347..9552fd5854bf 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Only give sleepers 50% of their service deficit. This allows
  * them to run sooner, but does not allow tons of sleepers to
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 0c00172db63e..d518664cce4f 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "sched.h"
 
 /*
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index f14716a3522f..89a989e4d758 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kernel/sched/loadavg.c
  *
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 0af5ca9e3e3f..3c96c80e0992 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
  * policies)
diff --git a/kernel/sched/sched-pelt.h b/kernel/sched/sched-pelt.h
index cd200d16529e..a26473674fb7 100644
--- a/kernel/sched/sched-pelt.h
+++ b/kernel/sched/sched-pelt.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Generated by Documentation/scheduler/sched-pelt; do not modify. */
 
 static const u32 runnable_avg_yN_inv[] = {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14db76cd496f..3b448ba82225 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 
 #include <linux/sched.h>
 #include <linux/sched/autogroup.h>
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index 87e2c9f0c33e..940b1fa1d2ce 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/slab.h>
 #include <linux/fs.h>
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index d5710651043b..baf500d12b7c 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 
 #ifdef CONFIG_SCHEDSTATS
 
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 9f69fb630853..45caf90b24cd 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "sched.h"
 
 /*
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index 2227e183e202..9ff1555341ed 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/sched/signal.h>
 #include <linux/swait.h>
 
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index f1cf4f306a82..6798276d29af 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Scheduler topology setup/handling methods
  */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 0ae832e13b97..418a1c045933 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/seccomp.c
  *
diff --git a/kernel/smpboot.h b/kernel/smpboot.h
index 485b81cfab34..34dd3d7ba40b 100644
--- a/kernel/smpboot.h
+++ b/kernel/smpboot.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef SMPBOOT_H
 #define SMPBOOT_H
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 9aebc2935013..524a4cb9bbe2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  linux/kernel/sys.c
  *
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 8acef8576ce9..b5189762d275 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/linkage.h>
 #include <linux/errno.h>
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 58ea8c03662e..e8c0dab4fd65 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/stat.h>
 #include <linux/sysctl.h>
 #include "../fs/xfs/xfs_sysctl.h"
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 836a72a66fba..5718b3ea202a 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/spinlock.h>
 #include <linux/task_work.h>
 #include <linux/tracehook.h>
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 938dbf33ef49..f1e46f338a9c 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-y += time.o timer.o hrtimer.o
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
 obj-y += timeconv.o timecounter.o alarmtimer.o
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 2ef98a02376a..f26acef5d7b4 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/itimer.c
  *
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index edf19cc53140..99e03bec68e4 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * NTP state machine interfaces and logic.
  *
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index d8a7c11fa71a..0a53e6ea47b1 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_NTP_INTERNAL_H
 #define _LINUX_NTP_INTERNAL_H
 
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 8585ad6e472a..5b117110b55b 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Implement CPU time clocks for the POSIX clock interface.
  */
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index fb303c3be4d3..151e28f5bf30 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #define TIMER_RETRY 1
 
 struct k_clock {
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index a7bb8f33ae07..58045eb976c3 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/time/tick-broadcast-hrtimer.c
  * This file emulates a local clock event device
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index be0ac01f2e12..f8e1845aa464 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * tick internal variable and functions used by low/high res code
  */
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 075444e3d48e..954b43dbf21c 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _TICK_SCHED_H
 #define _TICK_SCHED_H
 
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index d0914676d4c5..c9f9af339914 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _KERNEL_TIME_TIMEKEEPING_H
 #define _KERNEL_TIME_TIMEKEEPING_H
 /*
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 9a18f121f399..fdbeeb02dde9 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _TIMEKEEPING_INTERNAL_H
 #define _TIMEKEEPING_INTERNAL_H
 /*
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 90f2701d92a7..19a15b2f1190 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 
 # Do not instrument the tracer itself:
 
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 0c7dee221dca..21bb161c2316 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Power trace points
  *
diff --git a/kernel/trace/rpm-traces.c b/kernel/trace/rpm-traces.c
index 4b3b5eaf94d1..25dec0b00280 100644
--- a/kernel/trace/rpm-traces.c
+++ b/kernel/trace/rpm-traces.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Power trace points
  *
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 652c682707cd..401b0639116f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 
 #ifndef _LINUX_KERNEL_TRACE_H
 #define _LINUX_KERNEL_TRACE_H
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index 16a8cf02eee9..79f838a75077 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
diff --git a/kernel/trace/trace_benchmark.h b/kernel/trace/trace_benchmark.h
index ebdbfc2f2a64..be1d86ff753d 100644
--- a/kernel/trace/trace_benchmark.h
+++ b/kernel/trace/trace_benchmark.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM benchmark
 
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4d8fdf3184dc..4ad967453b6f 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * unlikely profiler
  *
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index adcdbbeae010..e954ae3d82c0 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * This file defines the trace event structures that go into the ring
  * buffer directly. They are created via macros so that changes for them
diff --git a/kernel/trace/trace_events_filter_test.h b/kernel/trace/trace_events_filter_test.h
index bfd4dba0d603..39d7ef4f57cb 100644
--- a/kernel/trace/trace_events_filter_test.h
+++ b/kernel/trace/trace_events_filter_test.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM test
 
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 39aa7aa66468..548e62eb5c46 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * trace_export.c - export basic ftrace utilities to user space
  *
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index a0910c0cdf2e..27f7ad12c4b1 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * ring buffer based function tracer
  *
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b8f1f54731af..23c0b0cb5fb9 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * Function graph tracer.
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 57149bce6aad..d953c163a079 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kdb helper for dumping the ftrace buffer
  *
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index dca78fc48439..b0388016b687 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Memory mapped I/O tracing
  *
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 49f61fe96a6b..50523f953a5d 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * nop tracer
  *
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index fabc49bcd493..dbba03ed96de 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __TRACE_EVENTS_H
 #define __TRACE_EVENTS_H
 
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index b341c02730be..e288168661e1 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * trace context switch
  *
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0c331978b1a6..7d461dcd4831 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * trace task wakeup timings
  *
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index b17ec642793b..cd70eb5df38e 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Include in trace.c */
 
 #include <uapi/linux/sched/types.h>
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
index b4c475a0a48b..8cda06a10d66 100644
--- a/kernel/trace/trace_selftest_dynamic.c
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "trace.h"
 
 int DYN_FTRACE_TEST_NAME(void)
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 49cb41412eec..719a52a4064a 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  *
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 413ff108fbd0..75bf1bcb4a8a 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Infrastructure for statistic tracing (histogram output).
  *
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index 8f03914b9a6a..76d30b4ebe83 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __TRACE_STAT_H
 #define __TRACE_STAT_H
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 696afe72d3b1..a2a642f2c64f 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <trace/syscall.h>
 #include <trace/events/syscalls.h>
 #include <linux/syscalls.h>
diff --git a/kernel/trace/tracing_map.h b/kernel/trace/tracing_map.h
index 618838f5f30a..ab0ca77331d0 100644
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __TRACING_MAP_H
 #define __TRACING_MAP_H
 
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 5c2dc5b2bf4f..ce74a4901d2b 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	Wrapper functions for 16bit uid back compatibility. All nicely tied
  *	together in the faint hope we can take the out in five years time.
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6bcb854909c0..c8e06703e44c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Detect hard and soft lockups on a system
  *
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 71a62ceacdc8..4583feb66393 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Detect hard lockups on a system
  *
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 8635417c587b..efdd72e15794 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * kernel/workqueue_internal.h
  *
-- 
cgit 


From e78c38f6bdd900b2ad9ac9df8eff58b745dc5b3c Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 23 Oct 2017 13:41:51 +0200
Subject: futex: futex_wake_op, do not fail on invalid op

In commit 30d6e0a4190d ("futex: Remove duplicated code and fix undefined
behaviour"), I let FUTEX_WAKE_OP to fail on invalid op.  Namely when op
should be considered as shift and the shift is out of range (< 0 or > 31).

But strace's test suite does this madness:

  futex(0x7fabd78bcffc, 0x5, 0xfacefeed, 0xb, 0x7fabd78bcffc, 0xa0caffee);
  futex(0x7fabd78bcffc, 0x5, 0xfacefeed, 0xb, 0x7fabd78bcffc, 0xbadfaced);
  futex(0x7fabd78bcffc, 0x5, 0xfacefeed, 0xb, 0x7fabd78bcffc, 0xffffffff);

When I pick the first 0xa0caffee, it decodes as:

  0x80000000 & 0xa0caffee: oparg is shift
  0x70000000 & 0xa0caffee: op is FUTEX_OP_OR
  0x0f000000 & 0xa0caffee: cmp is FUTEX_OP_CMP_EQ
  0x00fff000 & 0xa0caffee: oparg is sign-extended 0xcaf = -849
  0x00000fff & 0xa0caffee: cmparg is sign-extended 0xfee = -18

That means the op tries to do this:

  (futex |= (1 << (-849))) == -18

which is completely bogus. The new check of op in the code is:

        if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
                if (oparg < 0 || oparg > 31)
                        return -EINVAL;
                oparg = 1 << oparg;
        }

which results obviously in the "Invalid argument" errno:

  FAIL: futex
  ===========

  futex(0x7fabd78bcffc, 0x5, 0xfacefeed, 0xb, 0x7fabd78bcffc, 0xa0caffee) = -1: Invalid argument
  futex.test: failed test: ../futex failed with code 1

So let us soften the failure to print only a (ratelimited) message, crop
the value and continue as if it were right.  When userspace keeps up, we
can switch this to return -EINVAL again.

[v2] Do not return 0 immediatelly, proceed with the cropped value.

Fixes: 30d6e0a4190d ("futex: Remove duplicated code and fix undefined behaviour")
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <dvhart@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/futex.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/futex.c b/kernel/futex.c
index 0518a0bfc746..0d638f008bb1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1570,8 +1570,16 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
 	int oldval, ret;
 
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
-		if (oparg < 0 || oparg > 31)
-			return -EINVAL;
+		if (oparg < 0 || oparg > 31) {
+			char comm[sizeof(current->comm)];
+			/*
+			 * kill this print and return -EINVAL when userspace
+			 * is sane again
+			 */
+			pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
+					get_task_comm(comm, current), oparg);
+			oparg &= 31;
+		}
 		oparg = 1 << oparg;
 	}
 
-- 
cgit